# Weekly Selection Diagnostics

This notebook explains why the same symbols are repeatedly selected.
It compares weekly signal ranks vs weekly target weights and shows where names drop out.


In [None]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns
from io import StringIO
from IPython.display import display

pd.set_option('display.max_columns', None)
pd.set_option('display.width', None)

sns.set_style('whitegrid')
plt.rcParams['figure.figsize'] = (14, 6)

from QuantConnect import *
from QuantConnect.Research import QuantBook
from config import TEAM_ID

qb = QuantBook()
print('QuantBook initialized')


def read_csv_from_store(key):
    try:
        if not qb.ObjectStore.ContainsKey(key):
            print(f'ObjectStore key not found: {key}')
            return None
        content = qb.ObjectStore.Read(key)
        if not content:
            print(f'Empty ObjectStore key: {key}')
            return None
        return pd.read_csv(StringIO(content))
    except Exception as e:
        print(f'Error reading {key}: {e}')
        return None


In [None]:
# Static algorithm universe (models/universe.py)
UNIVERSE_SYMBOLS = [
    'AAPL', 'AMGN', 'AXP', 'BA', 'CAT', 'CRM',
    'CSCO', 'CVX', 'DIS', 'DOW', 'GS', 'HD',
    'HON', 'IBM', 'INTC', 'JNJ', 'JPM', 'KO',
    'MCD', 'MMM', 'MRK', 'MSFT', 'NKE', 'PG',
    'TRV', 'UNH', 'V', 'VZ', 'WBA', 'WMT'
]

df_signals = read_csv_from_store(f'{TEAM_ID}/signals.csv')
df_targets = read_csv_from_store(f'{TEAM_ID}/targets.csv')

if df_signals is None or df_targets is None:
    raise ValueError('signals.csv and targets.csv are required.')

df_signals['date'] = pd.to_datetime(df_signals['date'])
df_signals['magnitude'] = pd.to_numeric(df_signals['magnitude'], errors='coerce')
df_signals['abs_magnitude'] = df_signals['magnitude'].abs()
df_signals['week_id'] = df_signals['date']

df_targets['date'] = pd.to_datetime(df_targets['date'])
if 'week_id' in df_targets.columns:
    df_targets['week_id'] = pd.to_datetime(df_targets['week_id'], errors='coerce')
else:
    df_targets['week_id'] = df_targets['date']

for col in ['weekly_target_w', 'start_w', 'actual_w', 'scheduled_w']:
    if col in df_targets.columns:
        df_targets[col] = pd.to_numeric(df_targets[col], errors='coerce').fillna(0.0)

weekly_targets = (
    df_targets[df_targets['week_id'].notna()]
      .sort_values('date')
      .groupby(['week_id', 'symbol'], as_index=False)
      .agg(
          weekly_target_w=('weekly_target_w', 'last'),
          start_w=('start_w', 'first'),
          last_actual_w=('actual_w', 'last')
      )
)
weekly_targets['selected'] = weekly_targets['weekly_target_w'].abs() > 1e-6

weekly_signals = (
    df_signals[['week_id', 'symbol', 'magnitude', 'abs_magnitude']]
      .dropna(subset=['week_id'])
      .copy()
)
weekly_signals['rank_abs_mag'] = weekly_signals.groupby('week_id')['abs_magnitude'].rank(method='first', ascending=False)

week_ids = sorted(set(weekly_targets['week_id']).union(set(weekly_signals['week_id'])))
panel = pd.MultiIndex.from_product([week_ids, UNIVERSE_SYMBOLS], names=['week_id', 'symbol']).to_frame(index=False)
panel = panel.merge(weekly_signals, on=['week_id', 'symbol'], how='left')
panel = panel.merge(weekly_targets[['week_id', 'symbol', 'weekly_target_w', 'selected']], on=['week_id', 'symbol'], how='left')

panel['weekly_target_w'] = panel['weekly_target_w'].fillna(0.0)
panel['selected'] = panel['selected'].fillna(False)
panel['has_signal'] = panel['abs_magnitude'].notna()

panel['reason'] = np.select(
    [
        panel['selected'] & panel['has_signal'],
        panel['selected'] & ~panel['has_signal'],
        ~panel['selected'] & panel['has_signal'],
        ~panel['selected'] & ~panel['has_signal']
    ],
    [
        'selected_with_signal',
        'selected_without_logged_signal',
        'signaled_but_zero_target',
        'no_signal_logged'
    ],
    default='unknown'
)

print('Weeks:', len(week_ids))
print('Universe size:', len(UNIVERSE_SYMBOLS))


## Weekly Selection Summary Table

This table computes per-week diagnostics: how many symbols were signaled, how many received non-zero target weights, how many were signaled but dropped to zero target (filtered by portfolio construction), and how many retained their position from the previous week. The `retention_from_prev_week` column is key — high retention means the selected universe barely changes week to week, while low retention indicates high turnover at the signal level. The tail preview shows recent weeks to confirm the pipeline is working end-to-end.

## Signaled vs Selected and Retention Time Series

These two stacked time-series charts reveal the selection funnel from universe signals to actual targets. The top panel plots the count of signaled symbols alongside those that received non-zero target weights, showing how much shrinkage the portfolio construction step applies each week. The bottom panel tracks week-over-week symbol retention as a percentage, revealing whether the selected universe is stable or rapidly rotating.

## Weekly Selection Attribution — Stacked Bar Chart

This stacked bar chart shows, for each rebalance week, how many symbols fell into each attribution category: selected-with-signal, signaled-but-zero-target, no-signal-logged, and selected-without-logged-signal. The dominance of 'no-signal-logged' rows confirms the strategy is selective — most of the 30-stock universe receives no signal on any given week. Weeks with a large 'signaled-but-zero-target' bar indicate that portfolio construction constraints (vol cap, exposure limits, per-name cap) are actively filtering out valid signals.

## Latest Week Signal Rank Chart and Selection Table

This bar chart and table focus on the most recent rebalance week, ranking all universe symbols by absolute signal magnitude with green bars for selected symbols and gray for non-selected. It immediately answers whether any high-magnitude signals were excluded from the portfolio and why — the table shows exact magnitude, target weight, and attribution reason for each symbol. This view makes the selection logic fully transparent for the most actionable, current week.

In [None]:
weekly_summary = (
    panel.groupby('week_id', as_index=False)
         .agg(
             selected_symbols=('selected', 'sum'),
             signaled_symbols=('has_signal', 'sum'),
             signaled_but_zero_target=('reason', lambda s: int((s == 'signaled_but_zero_target').sum())),
             no_signal_logged=('reason', lambda s: int((s == 'no_signal_logged').sum()))
         )
         .sort_values('week_id')
)

selected_sets = (
    panel[panel['selected']]
      .groupby('week_id')['symbol']
      .apply(set)
      .sort_index()
)

retention_rows = []
weeks = list(selected_sets.index)
for i, week in enumerate(weeks):
    current_set = selected_sets.loc[week]
    prev_set = selected_sets.loc[weeks[i - 1]] if i > 0 else set()
    stayed = len(current_set & prev_set)
    entered = len(current_set - prev_set)
    exited = len(prev_set - current_set)
    retention = stayed / len(prev_set) if prev_set else np.nan
    retention_rows.append({
        'week_id': week,
        'stayed': stayed,
        'entered': entered,
        'exited': exited,
        'retention_from_prev_week': retention
    })

retention_df = pd.DataFrame(retention_rows)
weekly_summary = weekly_summary.merge(retention_df, on='week_id', how='left')

display(weekly_summary.tail(20))


In [None]:
fig, axes = plt.subplots(2, 1, figsize=(16, 10), sharex=True)

axes[0].plot(weekly_summary['week_id'], weekly_summary['signaled_symbols'], label='signaled symbols', color='#1f77b4')
axes[0].plot(weekly_summary['week_id'], weekly_summary['selected_symbols'], label='selected symbols (non-zero target)', color='#d62728')
axes[0].set_title('Weekly Symbol Counts: Signaled vs Selected')
axes[0].set_ylabel('Count')
axes[0].legend()
axes[0].grid(alpha=0.3)

axes[1].plot(weekly_summary['week_id'], 100 * weekly_summary['retention_from_prev_week'], color='#2ca02c')
axes[1].set_title('Retention of Previous Week Selected Symbols')
axes[1].set_ylabel('Retention %')
axes[1].set_xlabel('Week')
axes[1].grid(alpha=0.3)

plt.tight_layout()
plt.show()


In [None]:
reason_counts = (
    panel.groupby(['week_id', 'reason'])
         .size()
         .unstack(fill_value=0)
         .reset_index()
)

plot_cols = [c for c in ['selected_with_signal', 'signaled_but_zero_target', 'no_signal_logged', 'selected_without_logged_signal'] if c in reason_counts.columns]
reason_counts = reason_counts.sort_values('week_id')

ax = reason_counts.set_index('week_id')[plot_cols].plot(
    kind='bar',
    stacked=True,
    figsize=(18, 6),
    width=0.9,
    colormap='tab20c'
)
ax.set_title('Weekly Selection Attribution by Reason')
ax.set_xlabel('Week')
ax.set_ylabel('Symbol count')
ax.legend(loc='upper right', frameon=False)
plt.tight_layout()
plt.show()


In [None]:
latest_week = panel['week_id'].max()
latest_panel = panel[panel['week_id'] == latest_week].copy()
latest_panel['abs_magnitude'] = latest_panel['abs_magnitude'].fillna(0.0)
latest_panel = latest_panel.sort_values('abs_magnitude', ascending=False)
latest_panel['rank'] = np.arange(1, len(latest_panel) + 1)

view_n = 20
view = latest_panel.head(view_n).copy()
colors = np.where(view['selected'], '#2ca02c', '#7f7f7f')

plt.figure(figsize=(14, 6))
plt.bar(view['symbol'], view['abs_magnitude'], color=colors)
plt.xticks(rotation=35)
plt.ylabel('Abs signal magnitude')
plt.title(f'Latest Week ({latest_week.date()}) Signal Rank: green=selected, gray=not selected')
plt.grid(axis='y', alpha=0.3)
plt.tight_layout()
plt.show()

display(view[['symbol', 'rank', 'abs_magnitude', 'weekly_target_w', 'reason']])
