# Signal Distribution Dashboard

This notebook answers:
- What signal types are we emitting (direction + tier + strength)?
- How are signals distributed across symbols and trend structures?
- What indicator states are behind each signal tier?


In [None]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns
from io import StringIO
from IPython.display import display

pd.set_option('display.max_columns', None)
pd.set_option('display.width', None)

sns.set_style('whitegrid')
plt.rcParams['figure.figsize'] = (14, 6)

from QuantConnect import *
from QuantConnect.Research import QuantBook
from config import TEAM_ID

qb = QuantBook()
print('QuantBook initialized')


def read_csv_from_store(key):
    try:
        if not qb.ObjectStore.ContainsKey(key):
            print(f'ObjectStore key not found: {key}')
            return None
        content = qb.ObjectStore.Read(key)
        if not content:
            print(f'Empty ObjectStore key: {key}')
            return None
        return pd.read_csv(StringIO(content))
    except Exception as e:
        print(f'Error reading {key}: {e}')
        return None


## Data Loading & Preprocessing

Loads the logged signal data from ObjectStore and applies preprocessing: parsing dates, computing absolute magnitude, assigning signal tiers (strong/moderate/weak), and deriving ATR-normalized distances and trend structure labels. This cell is a prerequisite for all charts and tables in the notebook. Run this once before executing any downstream cells.

## Signal Distribution — 4-Panel Overview

This panel visualizes the overall shape and behavior of the signal stream across four dimensions: raw magnitude distribution, signal count split by tier and direction, absolute magnitude spread per tier, and monthly signal volume alongside average strength. Together these charts reveal whether the strategy is generating balanced, diverse signals or concentrating in a narrow tier or direction. A heavy skew toward one tier or direction in these plots is a useful early indicator of model regime bias.

## ATR-Normalized Distances & Structure Mix

These two plots analyze the geometric relationship between price and the three trend horizons at signal emission. The left boxplot shows how far (in ATR units) price sits above or below each SMA when a long or short signal fires, distinguishing how extended prices are across short, medium, and long horizons. The right bar chart breaks down what market structure (full trend, pullback, mixed) underlies each tier, revealing whether strong signals tend to come from cleaner or messier trend conditions.

## Summary Tables — Tier, Symbol, and Structure

This section produces three summary tables that quantify signal composition at different aggregation levels. The tier table reports average magnitude, directional bias, and SMA distances by strong/moderate/weak tier; the symbol table ranks stocks by signal frequency; and the structure table measures what fraction of signals come from each trend structure. These tables are the primary reference for understanding where and how the strategy finds opportunities across the Dow 30 universe.

In [None]:
df_signals = read_csv_from_store(f'{TEAM_ID}/signals.csv')

if df_signals is None:
    raise ValueError('signals.csv is required. Run a backtest with signal logging enabled.')

required_cols = ['date', 'symbol', 'direction', 'magnitude', 'price', 'sma_short', 'sma_medium', 'sma_long', 'atr']
missing = [c for c in required_cols if c not in df_signals.columns]
if missing:
    raise ValueError(f'signals.csv missing required columns: {missing}')

df = df_signals.copy()
df['date'] = pd.to_datetime(df['date'])
for col in ['magnitude', 'price', 'sma_short', 'sma_medium', 'sma_long', 'atr']:
    df[col] = pd.to_numeric(df[col], errors='coerce')

df['direction'] = df['direction'].astype(str).str.title()
df['direction'] = np.where(df['direction'].isin(['Up', 'Down']), df['direction'], np.where(df['magnitude'] >= 0, 'Up', 'Down'))
df['abs_magnitude'] = df['magnitude'].abs()
df['tier'] = np.select(
    [df['abs_magnitude'] >= 0.7, df['abs_magnitude'] >= 0.3],
    ['strong', 'moderate'],
    default='weak'
)

safe_atr = df['atr'].replace(0, np.nan)
df['dist_short'] = (df['price'] - df['sma_short']) / safe_atr
df['dist_medium'] = (df['price'] - df['sma_medium']) / safe_atr
df['dist_long'] = (df['price'] - df['sma_long']) / safe_atr

trend_up = (df['price'] > df['sma_short']) & (df['sma_short'] > df['sma_medium']) & (df['sma_medium'] > df['sma_long'])
trend_down = (df['price'] < df['sma_short']) & (df['sma_short'] < df['sma_medium']) & (df['sma_medium'] < df['sma_long'])
pullback_up = (df['price'] < df['sma_short']) & (df['price'] > df['sma_medium']) & (df['sma_medium'] > df['sma_long'])
pullback_down = (df['price'] > df['sma_short']) & (df['price'] < df['sma_medium']) & (df['sma_medium'] < df['sma_long'])

df['structure'] = np.select(
    [trend_up, trend_down, pullback_up, pullback_down],
    ['trend_up', 'trend_down', 'pullback_up', 'pullback_down'],
    default='mixed'
)

print(f"signal rows: {len(df):,}")
print(f"date range: {df['date'].min().date()} -> {df['date'].max().date()}")
print(f"symbols with signals: {df['symbol'].nunique()}")
display(df.head())


In [None]:
tier_order = ['strong', 'moderate', 'weak']

fig, axes = plt.subplots(2, 2, figsize=(18, 12))

sns.histplot(data=df, x='magnitude', bins=40, kde=True, ax=axes[0, 0], color='#1f77b4')
axes[0, 0].set_title('Signal Magnitude Distribution')
axes[0, 0].axvline(0.0, color='black', linewidth=1)
axes[0, 0].grid(alpha=0.3)

pivot = (
    df.groupby(['tier', 'direction'])['symbol']
      .count()
      .unstack(fill_value=0)
      .reindex(tier_order)
)
pivot.plot(kind='bar', stacked=True, ax=axes[0, 1], color=['#2ca02c', '#d62728'])
axes[0, 1].set_title('Signal Count by Tier and Direction')
axes[0, 1].set_xlabel('Tier')
axes[0, 1].set_ylabel('Signals')
axes[0, 1].tick_params(axis='x', rotation=0)
axes[0, 1].grid(axis='y', alpha=0.3)

sns.boxplot(data=df, x='tier', y='abs_magnitude', order=tier_order, hue='direction', ax=axes[1, 0])
axes[1, 0].set_title('Absolute Magnitude by Tier')
axes[1, 0].set_xlabel('Tier')
axes[1, 0].set_ylabel('|magnitude|')
axes[1, 0].grid(alpha=0.3)

monthly = (
    df.set_index('date')
      .resample('M')
      .agg(signal_count=('symbol', 'count'), mean_abs_mag=('abs_magnitude', 'mean'))
      .reset_index()
)
ax_left = axes[1, 1]
ax_right = ax_left.twinx()
ax_left.plot(monthly['date'], monthly['signal_count'], color='#1f77b4', linewidth=1.8, label='Signal count')
ax_right.plot(monthly['date'], monthly['mean_abs_mag'], color='#ff7f0e', linewidth=1.8, label='Mean |magnitude|')
ax_left.set_title('Monthly Signal Flow and Strength')
ax_left.set_ylabel('Signal count')
ax_right.set_ylabel('Mean |magnitude|')
ax_left.grid(alpha=0.3)

plt.tight_layout()
plt.show()


In [None]:
dist_long = (
    df.melt(
        id_vars=['direction'],
        value_vars=['dist_short', 'dist_medium', 'dist_long'],
        var_name='component',
        value_name='z_dist'
    )
    .dropna(subset=['z_dist'])
)

fig, axes = plt.subplots(1, 2, figsize=(18, 6))

sns.boxplot(data=dist_long, x='component', y='z_dist', hue='direction', ax=axes[0])
axes[0].axhline(0, color='black', linewidth=1)
axes[0].set_title('ATR-Normalized Distance by Horizon')
axes[0].set_xlabel('Component')
axes[0].set_ylabel('(price - SMA) / ATR')
axes[0].grid(alpha=0.3)

structure_counts = (
    df.groupby(['structure', 'tier'])['symbol']
      .count()
      .reset_index(name='signals')
)
order = ['trend_up', 'pullback_up', 'mixed', 'pullback_down', 'trend_down']
sns.barplot(data=structure_counts, x='structure', y='signals', hue='tier', order=order, ax=axes[1])
axes[1].set_title('Signal Structure Mix by Tier')
axes[1].set_xlabel('Structure')
axes[1].set_ylabel('Signals')
axes[1].tick_params(axis='x', rotation=20)
axes[1].grid(axis='y', alpha=0.3)

plt.tight_layout()
plt.show()


In [None]:
tier_summary = (
    df.groupby('tier', as_index=False)
      .agg(
          signals=('symbol', 'count'),
          symbols=('symbol', 'nunique'),
          avg_abs_magnitude=('abs_magnitude', 'mean'),
          long_share=('direction', lambda s: (s == 'Up').mean()),
          avg_dist_short=('dist_short', 'mean'),
          avg_dist_medium=('dist_medium', 'mean'),
          avg_dist_long=('dist_long', 'mean')
      )
      .sort_values('signals', ascending=False)
)

tier_summary['long_share'] = 100 * tier_summary['long_share']

symbol_summary = (
    df.groupby('symbol', as_index=False)
      .agg(
          signals=('date', 'count'),
          avg_abs_magnitude=('abs_magnitude', 'mean'),
          long_share=('direction', lambda s: (s == 'Up').mean())
      )
      .sort_values('signals', ascending=False)
)
symbol_summary['long_share'] = 100 * symbol_summary['long_share']

structure_summary = (
    df.groupby('structure', as_index=False)
      .agg(
          signals=('symbol', 'count'),
          avg_abs_magnitude=('abs_magnitude', 'mean'),
          long_share=('direction', lambda s: (s == 'Up').mean())
      )
      .sort_values('signals', ascending=False)
)
structure_summary['long_share'] = 100 * structure_summary['long_share']

print('Tier summary')
display(tier_summary)

print('Top symbols by signal frequency')
display(symbol_summary.head(20))

print('Structure summary')
display(structure_summary)
