# Signal Direction Persistence Dashboard

This notebook visualizes:
- % of daily emissions that maintain direction
- Average directional streak by signal (symbol)


In [None]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns
from io import StringIO
from IPython.display import display

pd.set_option('display.max_columns', None)
pd.set_option('display.width', None)

sns.set_style('whitegrid')
plt.rcParams['figure.figsize'] = (14, 6)

from QuantConnect import *
from QuantConnect.Research import QuantBook
from config import TEAM_ID

qb = QuantBook()
print('QuantBook initialized')

def read_csv_from_store(key):
    try:
        if not qb.ObjectStore.ContainsKey(key):
            print(f'ObjectStore key not found: {key}')
            return None
        content = qb.ObjectStore.Read(key)
        if not content:
            print(f'Empty ObjectStore key: {key}')
            return None
        return pd.read_csv(StringIO(content))
    except Exception as e:
        print(f'Error reading {key}: {e}')
        return None


In [None]:
df_signals = read_csv_from_store(f'{TEAM_ID}/signals.csv')

if df_signals is None:
    raise ValueError('signals.csv is required. Run a backtest with signal logging enabled.')

required_cols = ['date', 'symbol', 'direction', 'magnitude']
missing = [c for c in required_cols if c not in df_signals.columns]
if missing:
    raise ValueError(f'signals.csv missing required columns: {missing}')

df = df_signals.copy()
df['date'] = pd.to_datetime(df['date'])
df['symbol'] = df['symbol'].astype(str)
df['magnitude'] = pd.to_numeric(df['magnitude'], errors='coerce')
df['direction'] = df['direction'].astype(str).str.title()
df['direction'] = np.where(
    df['direction'].isin(['Up', 'Down']),
    df['direction'],
    np.where(df['magnitude'] >= 0, 'Up', 'Down')
)
df['abs_magnitude'] = df['magnitude'].abs()
df['tier'] = np.select(
    [df['abs_magnitude'] >= 0.7, df['abs_magnitude'] >= 0.3],
    ['strong', 'moderate'],
    default='weak'
)

df = df.sort_values(['symbol', 'date']).reset_index(drop=True)
df['prev_direction'] = df.groupby('symbol')['direction'].shift(1)
df['has_prev_emission'] = df['prev_direction'].notna()
df['maintains_direction'] = df['direction'].eq(df['prev_direction'])

daily_maintenance = (
    df.groupby('date', as_index=False)
      .agg(
          emissions=('symbol', 'count'),
          comparable_emissions=('has_prev_emission', 'sum'),
          maintained=('maintains_direction', 'sum')
      )
)
daily_maintenance['pct_emissions_maintained'] = (
    100.0 * daily_maintenance['maintained'] / daily_maintenance['emissions']
)
daily_maintenance['pct_comparable_maintained'] = np.where(
    daily_maintenance['comparable_emissions'] > 0,
    100.0 * daily_maintenance['maintained'] / daily_maintenance['comparable_emissions'],
    np.nan
)
daily_maintenance['rolling_20d_pct'] = (
    daily_maintenance['pct_emissions_maintained']
      .rolling(20, min_periods=5)
      .mean()
)

overall_pct = 100.0 * df['maintains_direction'].mean()
overall_comparable_pct = 100.0 * df.loc[df['has_prev_emission'], 'maintains_direction'].mean()

print(f"Signal rows: {len(df):,}")
print(f"Date range: {df['date'].min().date()} -> {df['date'].max().date()}")
print(f"% of all emissions that maintain direction: {overall_pct:.2f}%")
print(f"% of comparable emissions that maintain direction: {overall_comparable_pct:.2f}%")
display(daily_maintenance.head())


## % of Daily Emissions That Maintain Direction

Left: daily and 20-trading-day rolling maintenance rate.
Right: maintenance rate by signal tier.


In [None]:
tier_order = ['strong', 'moderate', 'weak']

maintenance_by_tier = (
    df.groupby('tier', as_index=False)
      .agg(
          emissions=('symbol', 'count'),
          comparable_emissions=('has_prev_emission', 'sum'),
          maintained=('maintains_direction', 'sum')
      )
)
maintenance_by_tier['pct_emissions_maintained'] = (
    100.0 * maintenance_by_tier['maintained'] / maintenance_by_tier['emissions']
)
maintenance_by_tier['pct_comparable_maintained'] = np.where(
    maintenance_by_tier['comparable_emissions'] > 0,
    100.0 * maintenance_by_tier['maintained'] / maintenance_by_tier['comparable_emissions'],
    np.nan
)
maintenance_by_tier = maintenance_by_tier.set_index('tier').reindex(tier_order).reset_index()

fig, axes = plt.subplots(1, 2, figsize=(18, 6))

sns.lineplot(
    data=daily_maintenance,
    x='date',
    y='pct_emissions_maintained',
    ax=axes[0],
    color='#1f77b4',
    alpha=0.35,
    label='Daily'
)
sns.lineplot(
    data=daily_maintenance,
    x='date',
    y='rolling_20d_pct',
    ax=axes[0],
    color='#d62728',
    linewidth=2.0,
    label='20D rolling'
)
axes[0].set_title('% of Daily Emissions Maintaining Direction')
axes[0].set_xlabel('Date')
axes[0].set_ylabel('Percent (%)')
axes[0].set_ylim(0, 105)
axes[0].grid(alpha=0.3)
axes[0].legend()

sns.barplot(
    data=maintenance_by_tier,
    x='tier',
    y='pct_emissions_maintained',
    order=tier_order,
    ax=axes[1],
    palette=['#4daf4a', '#377eb8', '#ff7f00']
)
axes[1].set_title('Direction Maintenance by Signal Tier')
axes[1].set_xlabel('Tier')
axes[1].set_ylabel('Percent of Emissions (%)')
axes[1].set_ylim(0, 105)
axes[1].grid(axis='y', alpha=0.3)
for i, row in maintenance_by_tier.iterrows():
    if pd.notna(row['pct_emissions_maintained']):
        axes[1].text(i, row['pct_emissions_maintained'] + 1, f"{row['pct_emissions_maintained']:.1f}%", ha='center', va='bottom', fontsize=10)

plt.tight_layout()
plt.show()

print('Maintenance summary by tier')
display(maintenance_by_tier)


## Average Directional Streak by Signal

A streak is consecutive emissions for the same symbol with the same direction.


In [None]:
df_streak = df.copy()
df_streak['streak_id'] = df_streak.groupby('symbol')['direction'].transform(lambda s: s.ne(s.shift()).cumsum())

streaks = (
    df_streak.groupby(['symbol', 'streak_id', 'direction'], as_index=False)
      .agg(
          start_date=('date', 'min'),
          end_date=('date', 'max'),
          streak_len=('date', 'size'),
          avg_abs_magnitude=('abs_magnitude', 'mean')
      )
)

avg_streak_by_symbol = (
    streaks.groupby('symbol', as_index=False)
      .agg(
          avg_streak=('streak_len', 'mean'),
          median_streak=('streak_len', 'median'),
          max_streak=('streak_len', 'max'),
          streak_count=('streak_len', 'count')
      )
      .sort_values('avg_streak', ascending=False)
)

avg_streak_by_direction = (
    streaks.groupby('direction', as_index=False)
      .agg(
          avg_streak=('streak_len', 'mean'),
          median_streak=('streak_len', 'median'),
          streaks=('streak_len', 'count')
      )
      .sort_values('direction')
)

overall_avg_streak = streaks['streak_len'].mean()
overall_median_streak = streaks['streak_len'].median()
print(f"Overall average streak length: {overall_avg_streak:.2f} emissions")
print(f"Overall median streak length: {overall_median_streak:.2f} emissions")

fig, axes = plt.subplots(1, 2, figsize=(18, 6))

bins = np.arange(1, int(streaks['streak_len'].max()) + 2)
sns.histplot(streaks['streak_len'], bins=bins, ax=axes[0], color='#1f77b4', edgecolor='white')
axes[0].set_title('Directional Streak Length Distribution')
axes[0].set_xlabel('Streak Length (Emissions)')
axes[0].set_ylabel('Count')
axes[0].grid(axis='y', alpha=0.3)

top_avg = avg_streak_by_symbol.head(15).sort_values('avg_streak', ascending=True)
sns.barplot(data=top_avg, x='avg_streak', y='symbol', ax=axes[1], color='#2ca02c')
axes[1].set_title('Top 15 Symbols by Average Directional Streak')
axes[1].set_xlabel('Average Streak Length (Emissions)')
axes[1].set_ylabel('Symbol')
axes[1].grid(axis='x', alpha=0.3)

plt.tight_layout()
plt.show()

print('Average directional streak by direction')
display(avg_streak_by_direction)

print('Average directional streak by signal (symbol)')
display(avg_streak_by_symbol.head(20))
