# Rebalance Execution Load Dashboard

This notebook plots execution load and progress as a percentage of NAV for each rebalance cycle, including signal-tier breakdowns.

Included sections:
- Planned vs filled order load (% NAV) per rebalance
- Completion rate and unfilled carryover
- Progress quantiles across the 5-day rebalance cycle
- Signal-tier dashboard (planned %, filled %, completion %, average day-of-fill)
- Buy vs sell execution load
- Slippage cost (bps and % NAV)
- Participation vs liquidity (planned notional as % of ADV, optional)
- Target drift during cycle
- Turnover % NAV per rebalance
- Outlier annotations for stressed cycles


In [None]:
import re
import warnings
from io import StringIO

import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import seaborn as sns
from IPython.display import display

from QuantConnect import *
from QuantConnect.Research import QuantBook
from config import TEAM_ID

pd.set_option('display.max_columns', None)
pd.set_option('display.width', 180)

sns.set_style('whitegrid')
plt.rcParams['figure.figsize'] = (14, 6)

warnings.filterwarnings('ignore', category=FutureWarning)


In [None]:
qb = QuantBook()
print('QuantBook initialized')


def read_csv_from_store(key):
    try:
        if not qb.ObjectStore.ContainsKey(key):
            print(f'ObjectStore key not found: {key}')
            return None
        content = qb.ObjectStore.Read(key)
        if not content:
            print(f'Empty ObjectStore key: {key}')
            return None
        return pd.read_csv(StringIO(content))
    except Exception as e:
        print(f'Error reading {key}: {e}')
        return None


df_snap = read_csv_from_store(f'{TEAM_ID}/daily_snapshots.csv')
df_targets = read_csv_from_store(f'{TEAM_ID}/targets.csv')
df_signals = read_csv_from_store(f'{TEAM_ID}/signals.csv')
df_events = read_csv_from_store(f'{TEAM_ID}/order_events.csv')
df_slippage = read_csv_from_store(f'{TEAM_ID}/slippage.csv')

if df_snap is None or df_targets is None:
    raise ValueError('daily_snapshots.csv and targets.csv are required for this dashboard.')

print(f"Rows -> snapshots: {len(df_snap):,}, targets: {len(df_targets):,}, signals: {0 if df_signals is None else len(df_signals):,}, order_events: {0 if df_events is None else len(df_events):,}, slippage: {0 if df_slippage is None else len(df_slippage):,}")


In [None]:
def to_numeric(df, cols):
    for col in cols:
        if col in df.columns:
            df[col] = pd.to_numeric(df[col], errors='coerce')
    return df


def parse_tag_value(tag, key):
    if pd.isna(tag):
        return np.nan
    match = re.search(rf'{key}=([^;]+)', str(tag))
    return match.group(1) if match else np.nan


def tier_from_magnitude(m):
    if pd.isna(m):
        return 'unknown'
    x = abs(float(m))
    if x >= 0.70:
        return 'strong'
    if x >= 0.30:
        return 'moderate'
    return 'weak'


tier_order = ['strong', 'moderate', 'weak', 'unknown']

# Snapshots (portfolio NAV reference)
df_snap = df_snap.copy()
df_snap['date'] = pd.to_datetime(df_snap['date']).dt.normalize()
df_snap = to_numeric(df_snap, ['nav', 'gross_exposure', 'net_exposure', 'daily_pnl'])
df_snap = df_snap.sort_values('date')
nav_by_date = df_snap[['date', 'nav']].drop_duplicates(subset=['date'])

# Targets (weekly plan vs actual state)
dx = df_targets.copy()
dx['date'] = pd.to_datetime(dx['date']).dt.normalize()
dx['week_id'] = dx['week_id'].astype(str)
missing_week = dx['week_id'].isin(['', 'nan', 'None'])
dx.loc[missing_week, 'week_id'] = dx.loc[missing_week, 'date'].dt.strftime('%Y-%m-%d')

dx = to_numeric(
    dx,
    ['start_w', 'weekly_target_w', 'scheduled_fraction', 'scheduled_w', 'actual_w', 'scale_day']
)
dx['scale_day'] = dx['scale_day'].fillna(0).astype(int)

# Join signal magnitude to assign tier at week/symbol level
if df_signals is not None and len(df_signals):
    sig = df_signals.copy()
    sig['date'] = pd.to_datetime(sig['date']).dt.normalize()
    sig = to_numeric(sig, ['magnitude'])
    sig['week_id'] = sig['date'].dt.strftime('%Y-%m-%d')
    week_signal = (
        sig.sort_values(['week_id', 'symbol', 'date'])
           .drop_duplicates(subset=['week_id', 'symbol'], keep='last')[['week_id', 'symbol', 'magnitude']]
    )
else:
    week_signal = pd.DataFrame(columns=['week_id', 'symbol', 'magnitude'])

dx = dx.merge(week_signal, on=['week_id', 'symbol'], how='left')
dx['signal_tier'] = dx['magnitude'].apply(tier_from_magnitude)

# Core execution-load metrics in weight-space (% NAV)
dx['delta_w'] = dx['weekly_target_w'] - dx['start_w']
dx['planned_order_abs'] = dx['delta_w'].abs()
dx['remaining_abs'] = (dx['weekly_target_w'] - dx['actual_w']).abs()
dx['remaining_abs'] = np.minimum(dx['remaining_abs'], dx['planned_order_abs'])
dx['filled_abs'] = (dx['planned_order_abs'] - dx['remaining_abs']).clip(lower=0.0)
dx['schedule_drift_abs'] = (dx['actual_w'] - dx['scheduled_w']).abs()

dx['buy_planned_abs'] = np.where(dx['delta_w'] > 0, dx['planned_order_abs'], 0.0)
dx['sell_planned_abs'] = np.where(dx['delta_w'] < 0, dx['planned_order_abs'], 0.0)
dx['buy_filled_abs'] = np.where(dx['delta_w'] > 0, dx['filled_abs'], 0.0)
dx['sell_filled_abs'] = np.where(dx['delta_w'] < 0, dx['filled_abs'], 0.0)

dx = dx.sort_values(['week_id', 'symbol', 'date'])
dx['day_in_cycle'] = dx.groupby(['week_id', 'symbol']).cumcount()

# Portfolio-level daily progression inside each rebalance cycle
portfolio_daily = (
    dx.groupby(['week_id', 'date'], as_index=False)
      .agg(
          planned_pct_nav=('planned_order_abs', 'sum'),
          remaining_pct_nav=('remaining_abs', 'sum'),
          filled_pct_nav=('filled_abs', 'sum'),
          schedule_drift_pct_nav=('schedule_drift_abs', 'sum'),
          buy_planned_pct_nav=('buy_planned_abs', 'sum'),
          sell_planned_pct_nav=('sell_planned_abs', 'sum'),
          buy_filled_pct_nav=('buy_filled_abs', 'sum'),
          sell_filled_pct_nav=('sell_filled_abs', 'sum')
      )
      .sort_values(['week_id', 'date'])
)

portfolio_daily['completion_rate'] = np.where(
    portfolio_daily['planned_pct_nav'] > 1e-12,
    portfolio_daily['filled_pct_nav'] / portfolio_daily['planned_pct_nav'],
    0.0
)
portfolio_daily['day_in_cycle'] = portfolio_daily.groupby('week_id').cumcount()

# Rebalance-level summary
weekly = (
    portfolio_daily.groupby('week_id', as_index=False)
                 .agg(
                     start_date=('date', 'min'),
                     end_date=('date', 'max'),
                     planned_pct_nav=('planned_pct_nav', 'max'),
                     final_filled_pct_nav=('filled_pct_nav', 'last'),
                     final_completion_rate=('completion_rate', 'last'),
                     final_remaining_pct_nav=('remaining_pct_nav', 'last')
                 )
                 .sort_values('start_date')
)
weekly['unfilled_carryover_pct_nav'] = (
    weekly['planned_pct_nav'] - weekly['final_filled_pct_nav']
).clip(lower=0.0)
weekly = weekly.merge(
    nav_by_date.rename(columns={'date': 'start_date', 'nav': 'week_start_nav'}),
    on='start_date',
    how='left'
)

# Symbol-level week summary (for tier breakdowns)
symbol_final = (
    dx.groupby(['week_id', 'symbol', 'signal_tier'], as_index=False)
      .agg(
          delta_w=('delta_w', 'max'),
          planned_pct_nav=('planned_order_abs', 'max'),
          final_filled_pct_nav=('filled_abs', 'last'),
          final_remaining_pct_nav=('remaining_abs', 'last')
      )
)
symbol_final['completion_rate'] = np.where(
    symbol_final['planned_pct_nav'] > 1e-12,
    symbol_final['final_filled_pct_nav'] / symbol_final['planned_pct_nav'],
    0.0
)
symbol_final['buy_planned_pct_nav'] = np.where(symbol_final['delta_w'] > 0, symbol_final['planned_pct_nav'], 0.0)
symbol_final['sell_planned_pct_nav'] = np.where(symbol_final['delta_w'] < 0, symbol_final['planned_pct_nav'], 0.0)
symbol_final['buy_filled_pct_nav'] = np.where(symbol_final['delta_w'] > 0, symbol_final['final_filled_pct_nav'], 0.0)
symbol_final['sell_filled_pct_nav'] = np.where(symbol_final['delta_w'] < 0, symbol_final['final_filled_pct_nav'], 0.0)

# Average day-of-fill for each week/symbol using fill increments
dx_fill = dx[['week_id', 'symbol', 'day_in_cycle', 'filled_abs']].copy()
dx_fill['prev_filled'] = dx_fill.groupby(['week_id', 'symbol'])['filled_abs'].shift(1).fillna(0.0)
dx_fill['fill_increment'] = (dx_fill['filled_abs'] - dx_fill['prev_filled']).clip(lower=0.0)

def weighted_fill_day(g):
    w = g['fill_increment'].to_numpy(dtype=float)
    d = g['day_in_cycle'].to_numpy(dtype=float)
    total_w = w.sum()
    if total_w <= 1e-12:
        return np.nan
    return float((d * w).sum() / total_w)

fill_day = (
    dx_fill.groupby(['week_id', 'symbol'])
           .apply(weighted_fill_day)
           .reset_index(name='avg_fill_day')
)
symbol_final = symbol_final.merge(fill_day, on=['week_id', 'symbol'], how='left')

# Tier summary: averages across rebalances
tier_week = (
    symbol_final.groupby(['week_id', 'signal_tier'], as_index=False)
               .agg(
                   planned_pct_nav=('planned_pct_nav', 'sum'),
                   filled_pct_nav=('final_filled_pct_nav', 'sum')
               )
)
tier_week['completion_rate'] = np.where(
    tier_week['planned_pct_nav'] > 1e-12,
    tier_week['filled_pct_nav'] / tier_week['planned_pct_nav'],
    0.0
)

def weighted_tier_fill_day(g):
    mask = g['avg_fill_day'].notna() & (g['final_filled_pct_nav'] > 0)
    if not mask.any():
        return np.nan
    vals = g.loc[mask, 'avg_fill_day'].to_numpy(dtype=float)
    w = g.loc[mask, 'final_filled_pct_nav'].to_numpy(dtype=float)
    return float((vals * w).sum() / w.sum())

tier_fill = (
    symbol_final.groupby(['week_id', 'signal_tier'])
               .apply(weighted_tier_fill_day)
               .reset_index(name='avg_fill_day')
)
tier_week = tier_week.merge(tier_fill, on=['week_id', 'signal_tier'], how='left')

tier_summary = (
    tier_week.groupby('signal_tier', as_index=False)
             .agg(
                 planned_pct_nav=('planned_pct_nav', 'mean'),
                 filled_pct_nav=('filled_pct_nav', 'mean'),
                 completion_rate=('completion_rate', 'mean'),
                 avg_fill_day=('avg_fill_day', 'mean'),
                 weeks=('week_id', 'nunique')
             )
)
tier_summary['signal_tier'] = pd.Categorical(tier_summary['signal_tier'], categories=tier_order, ordered=True)
tier_summary = tier_summary.sort_values('signal_tier')

# Parse order events for turnover/slippage-by-tier/ADV participation
if df_events is not None and len(df_events):
    events = df_events.copy()
    events['date'] = pd.to_datetime(events['date']).dt.normalize()
    events = to_numeric(events, ['quantity', 'fill_quantity', 'fill_price', 'limit_price', 'market_price_at_submit'])
    events['tier'] = events['tag'].apply(lambda t: parse_tag_value(t, 'tier')).fillna('unknown')
    events['week_id'] = events['tag'].apply(lambda t: parse_tag_value(t, 'week_id'))
    missing_event_week = events['week_id'].isna() | (events['week_id'].astype(str).str.strip() == '')
    events.loc[missing_event_week, 'week_id'] = events.loc[missing_event_week, 'date'].dt.strftime('%Y-%m-%d')

    fills = events[events['fill_quantity'].abs() > 1e-12].copy()
    fills['fill_notional'] = fills['fill_quantity'].abs() * fills['fill_price'].abs()
    fills['expected_notional'] = fills['fill_quantity'].abs() * fills['market_price_at_submit'].abs()
    fills['implied_slippage_dollars'] = np.where(
        fills['market_price_at_submit'].abs() > 1e-12,
        (fills['fill_price'] - fills['market_price_at_submit']) * fills['fill_quantity'],
        np.nan
    )
    fills = fills.merge(nav_by_date, on='date', how='left')
    fills['fill_pct_nav'] = np.where(fills['nav'] > 1e-12, fills['fill_notional'] / fills['nav'], np.nan)
    fills['implied_slippage_pct_nav'] = np.where(fills['nav'] > 1e-12, fills['implied_slippage_dollars'] / fills['nav'], np.nan)
else:
    events = pd.DataFrame(columns=['date', 'week_id', 'tier'])
    fills = pd.DataFrame(columns=[
        'date', 'week_id', 'tier', 'symbol', 'fill_quantity', 'fill_price', 'market_price_at_submit',
        'fill_notional', 'expected_notional', 'implied_slippage_dollars', 'nav', 'fill_pct_nav', 'implied_slippage_pct_nav'
    ])

if len(fills):
    daily_turnover = (
        fills.groupby('date', as_index=False)
             .agg(
                 filled_notional=('fill_notional', 'sum'),
                 implied_slippage_dollars=('implied_slippage_dollars', 'sum'),
                 nav=('nav', 'last')
             )
    )
    daily_turnover['turnover_pct_nav'] = np.where(
        daily_turnover['nav'] > 1e-12,
        daily_turnover['filled_notional'] / daily_turnover['nav'],
        np.nan
    )
    daily_turnover['implied_slippage_bps'] = np.where(
        daily_turnover['filled_notional'] > 1e-12,
        10000.0 * daily_turnover['implied_slippage_dollars'] / daily_turnover['filled_notional'],
        np.nan
    )

    weekly_turnover_raw = (
        fills.groupby('week_id', as_index=False)
             .agg(
                 turnover_pct_nav=('fill_pct_nav', 'sum'),
                 filled_notional=('fill_notional', 'sum'),
                 implied_slippage_dollars=('implied_slippage_dollars', 'sum')
             )
    )
else:
    daily_turnover = pd.DataFrame(columns=['date', 'filled_notional', 'implied_slippage_dollars', 'nav', 'turnover_pct_nav', 'implied_slippage_bps'])
    weekly_turnover_raw = pd.DataFrame(columns=['week_id', 'turnover_pct_nav', 'filled_notional', 'implied_slippage_dollars'])

# Authoritative slippage stream (from slippage logger)
if df_slippage is not None and len(df_slippage):
    slp = df_slippage.copy()
    slp['date'] = pd.to_datetime(slp['date']).dt.normalize()
    slp = to_numeric(slp, ['slippage_dollars'])
    daily_slippage = (
        slp.groupby('date', as_index=False)
           .agg(slippage_dollars=('slippage_dollars', 'sum'))
    )
else:
    daily_slippage = pd.DataFrame(columns=['date', 'slippage_dollars'])

daily_slippage = nav_by_date.merge(daily_slippage, on='date', how='left')
daily_slippage['slippage_dollars'] = daily_slippage['slippage_dollars'].fillna(0.0)
daily_slippage = daily_slippage.merge(
    daily_turnover[['date', 'filled_notional']],
    on='date',
    how='left'
)
daily_slippage['filled_notional'] = daily_slippage['filled_notional'].fillna(0.0)
daily_slippage['slippage_pct_nav'] = np.where(
    daily_slippage['nav'] > 1e-12,
    daily_slippage['slippage_dollars'] / daily_slippage['nav'],
    np.nan
)
daily_slippage['slippage_bps'] = np.where(
    daily_slippage['filled_notional'] > 1e-12,
    10000.0 * daily_slippage['slippage_dollars'] / daily_slippage['filled_notional'],
    np.nan
)

# Week-level unified analytics table
weekly_turnover = weekly[['week_id', 'start_date']].merge(weekly_turnover_raw, on='week_id', how='left')

date_to_week = portfolio_daily[['date', 'week_id']].drop_duplicates(subset=['date'])
weekly_slippage = (
    date_to_week.merge(daily_slippage[['date', 'slippage_dollars']], on='date', how='left')
                .groupby('week_id', as_index=False)
                .agg(week_slippage_dollars=('slippage_dollars', 'sum'))
)

weekly_analytics = (
    weekly.merge(weekly_turnover[['week_id', 'turnover_pct_nav', 'filled_notional']], on='week_id', how='left')
          .merge(weekly_slippage, on='week_id', how='left')
          .fillna({'turnover_pct_nav': 0.0, 'filled_notional': 0.0, 'week_slippage_dollars': 0.0})
)

print(f'Weeks analyzed: {len(weekly_analytics):,}')
display(weekly_analytics.tail(10))


In [None]:
plot_weekly = weekly_analytics.sort_values('start_date').reset_index(drop=True)
x = np.arange(len(plot_weekly))
step = max(1, len(plot_weekly) // 12)

fig, axes = plt.subplots(2, 1, figsize=(16, 11), sharex=True)

# Planned vs filled load per rebalance (% NAV)
width = 0.42
axes[0].bar(x - width / 2, 100 * plot_weekly['planned_pct_nav'], width=width, color='#1f77b4', label='Planned')
axes[0].bar(x + width / 2, 100 * plot_weekly['final_filled_pct_nav'], width=width, color='#2ca02c', label='Filled')
axes[0].set_ylabel('% NAV')
axes[0].set_title('Planned vs Filled Rebalance Load (% NAV)')
axes[0].legend()
axes[0].grid(axis='y', alpha=0.3)

# Completion rate + unfilled carryover
axes[1].plot(x, 100 * plot_weekly['final_completion_rate'], color='#d62728', marker='o', linewidth=1.8, label='Completion rate')
axes[1].set_ylabel('Completion (%)')
axes[1].set_ylim(0, 110)
axes[1].grid(axis='y', alpha=0.3)

ax2 = axes[1].twinx()
ax2.bar(x, 100 * plot_weekly['unfilled_carryover_pct_nav'], color='#ff9896', alpha=0.55, label='Unfilled carryover')
ax2.set_ylabel('Unfilled (% NAV)')

lines_1, labels_1 = axes[1].get_legend_handles_labels()
lines_2, labels_2 = ax2.get_legend_handles_labels()
axes[1].legend(lines_1 + lines_2, labels_1 + labels_2, loc='upper right')
axes[1].set_title('Completion Rate and Carryover by Rebalance')

axes[1].set_xticks(x[::step])
axes[1].set_xticklabels(plot_weekly['week_id'].iloc[::step], rotation=45, ha='right')
axes[1].set_xlabel('week_id (rebalance date)')

plt.tight_layout()
plt.show()


In [None]:
# Progress quantiles across the cycle (as filled load % NAV)
progress_quant = (
    portfolio_daily.groupby('day_in_cycle')['filled_pct_nav']
                 .quantile([0.25, 0.5, 0.75])
                 .unstack()
                 .rename(columns={0.25: 'p25', 0.5: 'p50', 0.75: 'p75'})
                 .reset_index()
)
progress_mean = (
    portfolio_daily.groupby('day_in_cycle', as_index=False)['filled_pct_nav']
                 .mean()
                 .rename(columns={'filled_pct_nav': 'mean'})
)
progress_plot = progress_quant.merge(progress_mean, on='day_in_cycle', how='left').sort_values('day_in_cycle')

fig, ax = plt.subplots(figsize=(12, 6))
ax.plot(progress_plot['day_in_cycle'], 100 * progress_plot['p25'], marker='o', label='P25', color='#8da0cb')
ax.plot(progress_plot['day_in_cycle'], 100 * progress_plot['p50'], marker='o', label='P50 (Median)', color='#1f77b4')
ax.plot(progress_plot['day_in_cycle'], 100 * progress_plot['p75'], marker='o', label='P75', color='#66c2a5')
ax.plot(progress_plot['day_in_cycle'], 100 * progress_plot['mean'], marker='s', linestyle='--', label='Mean', color='#d62728')
ax.set_title('Execution Progress by Day in Rebalance Cycle (% NAV Filled)')
ax.set_xlabel('Day in cycle (0-indexed)')
ax.set_ylabel('Filled load (% NAV)')
ax.grid(alpha=0.3)
ax.legend()

plt.tight_layout()
plt.show()

display(progress_plot)


In [None]:
# Signal-tier split dashboard
tier_plot = tier_summary.copy()
tier_plot = tier_plot[tier_plot['signal_tier'].isin(tier_order)]

fig, axes = plt.subplots(2, 2, figsize=(15, 10))

sns.barplot(data=tier_plot, x='signal_tier', y=100 * tier_plot['planned_pct_nav'], order=tier_order, ax=axes[0, 0], color='#1f77b4')
axes[0, 0].set_title('Avg Planned Load by Tier')
axes[0, 0].set_xlabel('Signal tier')
axes[0, 0].set_ylabel('Planned (% NAV)')
axes[0, 0].grid(axis='y', alpha=0.3)

sns.barplot(data=tier_plot, x='signal_tier', y=100 * tier_plot['filled_pct_nav'], order=tier_order, ax=axes[0, 1], color='#2ca02c')
axes[0, 1].set_title('Avg Filled Load by Tier')
axes[0, 1].set_xlabel('Signal tier')
axes[0, 1].set_ylabel('Filled (% NAV)')
axes[0, 1].grid(axis='y', alpha=0.3)

sns.barplot(data=tier_plot, x='signal_tier', y=100 * tier_plot['completion_rate'], order=tier_order, ax=axes[1, 0], color='#d62728')
axes[1, 0].set_title('Avg Completion Rate by Tier')
axes[1, 0].set_xlabel('Signal tier')
axes[1, 0].set_ylabel('Completion (%)')
axes[1, 0].set_ylim(0, 110)
axes[1, 0].grid(axis='y', alpha=0.3)

sns.barplot(data=tier_plot, x='signal_tier', y='avg_fill_day', order=tier_order, ax=axes[1, 1], color='#9467bd')
axes[1, 1].set_title('Avg Day-of-Fill by Tier')
axes[1, 1].set_xlabel('Signal tier')
axes[1, 1].set_ylabel('Weighted average fill day (0-indexed)')
axes[1, 1].grid(axis='y', alpha=0.3)

plt.tight_layout()
plt.show()

display(tier_plot)


In [None]:
# Average holding period (strategy-level and by signal tier)
df_positions = read_csv_from_store(f'{TEAM_ID}/positions.csv')

if df_positions is None or len(df_positions) == 0:
    print('positions.csv not available; skipping holding-period analysis.')
else:
    pos = df_positions.copy()
    pos['date'] = pd.to_datetime(pos['date']).dt.normalize()
    pos = to_numeric(pos, ['quantity', 'invested'])

    if 'quantity' not in pos.columns:
        raise ValueError('positions.csv must contain a quantity column.')

    pos['quantity'] = pd.to_numeric(pos['quantity'], errors='coerce').fillna(0.0)

    if 'invested' in pos.columns:
        pos['is_invested'] = pd.to_numeric(pos['invested'], errors='coerce').fillna(0.0) > 0
    else:
        pos['is_invested'] = pos['quantity'].abs() > 1e-12

    pos = pos.sort_values(['symbol', 'date'])
    pos = pos.drop_duplicates(['symbol', 'date'], keep='last')

    prev_invested = pos.groupby('symbol')['is_invested'].shift(1).fillna(False)
    pos['start_flag'] = pos['is_invested'] & (~prev_invested)
    pos['episode_id'] = pos.groupby('symbol')['start_flag'].cumsum()

    invested_rows = pos[pos['is_invested']].copy()

    episodes = (
        invested_rows.groupby(['symbol', 'episode_id'], as_index=False)
                    .agg(
                        start_date=('date', 'min'),
                        end_date=('date', 'max'),
                        holding_days=('date', 'count'),
                        entry_qty=('quantity', 'first'),
                        exit_qty=('quantity', 'last')
                    )
    )

    if len(episodes) == 0:
        print('No invested episodes found in positions.csv.')
    else:
        last_state = (
            pos.sort_values('date')
               .groupby('symbol', as_index=False)
               .tail(1)[['symbol', 'date', 'is_invested']]
               .rename(columns={'date': 'symbol_last_date', 'is_invested': 'symbol_last_invested'})
        )
        episodes = episodes.merge(last_state, on='symbol', how='left')
        episodes['open_censored'] = (
            (episodes['end_date'] == episodes['symbol_last_date']) &
            (episodes['symbol_last_invested'])
        )

        episodes['side'] = np.where(episodes['entry_qty'] >= 0, 'long', 'short')
        episodes['entry_sign'] = np.where(episodes['entry_qty'] >= 0, 1, -1)

        # Primary tier attribution from entry fills on episode start date.
        if len(fills):
            entry_fills = fills.copy()
            entry_fills['tier_clean'] = entry_fills['tier'].astype(str).str.lower().str.strip()
            entry_fills['fill_sign'] = np.where(entry_fills['fill_quantity'] >= 0, 1, -1)
            if 'fill_notional' not in entry_fills.columns:
                entry_fills['fill_notional'] = entry_fills['fill_quantity'].abs() * entry_fills['fill_price'].abs()

            entry_fills = entry_fills[
                entry_fills['tier_clean'].isin(['strong', 'moderate', 'weak']) &
                (entry_fills['fill_notional'] > 0)
            ]

            fill_match = episodes.merge(
                entry_fills[['symbol', 'date', 'fill_sign', 'tier_clean', 'fill_notional']],
                left_on=['symbol', 'start_date'],
                right_on=['symbol', 'date'],
                how='left'
            )
            fill_match = fill_match[fill_match['fill_sign'] == fill_match['entry_sign']]
            fill_match = fill_match.sort_values(['symbol', 'episode_id', 'fill_notional'], ascending=[True, True, False])
            tier_map = (
                fill_match.drop_duplicates(['symbol', 'episode_id'], keep='first')
                          [['symbol', 'episode_id', 'tier_clean']]
            )

            episodes = episodes.merge(tier_map, on=['symbol', 'episode_id'], how='left')
            episodes = episodes.rename(columns={'tier_clean': 'entry_tier'})
        else:
            episodes['entry_tier'] = np.nan

        # Fallback: same-day signal tier at episode start.
        if df_signals is not None and len(df_signals):
            sig = df_signals[['date', 'symbol', 'magnitude']].copy()
            sig['date'] = pd.to_datetime(sig['date']).dt.normalize()
            sig = to_numeric(sig, ['magnitude'])
            sig['signal_tier'] = sig['magnitude'].apply(tier_from_magnitude)
            sig = sig.sort_values(['symbol', 'date']).drop_duplicates(['symbol', 'date'], keep='last')

            episodes = episodes.merge(
                sig[['symbol', 'date', 'signal_tier']].rename(columns={'date': 'start_date', 'signal_tier': 'entry_tier_signal'}),
                on=['symbol', 'start_date'],
                how='left'
            )
            episodes['entry_tier'] = episodes['entry_tier'].fillna(episodes['entry_tier_signal'])
            episodes = episodes.drop(columns=['entry_tier_signal'])

        episodes['entry_tier'] = episodes['entry_tier'].fillna('unknown')

        closed = episodes[~episodes['open_censored']].copy()

        if len(closed) == 0:
            print('No closed episodes available yet; holding-period plot skipped.')
        else:
            tier_order_hp = tier_order  # tier_order already includes 'unknown'

            overall_avg = float(closed['holding_days'].mean())
            overall_med = float(closed['holding_days'].median())

            tier_holding = (
                closed.groupby('entry_tier', as_index=False)
                      .agg(
                          episodes=('holding_days', 'count'),
                          avg_holding_days=('holding_days', 'mean'),
                          median_holding_days=('holding_days', 'median')
                      )
            )
            tier_holding['entry_tier'] = pd.Categorical(tier_holding['entry_tier'], categories=tier_order_hp, ordered=True)
            tier_holding = tier_holding.sort_values('entry_tier')

            fig, axes = plt.subplots(1, 2, figsize=(16, 6))

            sns.barplot(
                data=tier_holding,
                x='entry_tier',
                y='avg_holding_days',
                order=tier_order_hp,
                ax=axes[0],
                color='#1f77b4'
            )
            axes[0].axhline(overall_avg, color='#d62728', linestyle='--', linewidth=2, label=f'Overall avg = {overall_avg:.2f}d')
            axes[0].set_title('Average Holding Period by Entry Signal Tier')
            axes[0].set_xlabel('Entry signal tier')
            axes[0].set_ylabel('Avg holding period (trading days)')
            axes[0].grid(axis='y', alpha=0.3)
            axes[0].legend()

            sns.boxplot(
                data=closed,
                x='entry_tier',
                y='holding_days',
                order=tier_order_hp,
                ax=axes[1]
            )
            axes[1].axhline(overall_avg, color='#d62728', linestyle='--', linewidth=2)
            axes[1].set_title('Holding Period Distribution by Entry Tier')
            axes[1].set_xlabel('Entry signal tier')
            axes[1].set_ylabel('Holding period (trading days)')
            axes[1].grid(axis='y', alpha=0.3)

            plt.tight_layout()
            plt.show()

            overall_summary = pd.DataFrame([
                {
                    'group': 'strategy_overall',
                    'episodes': int(len(closed)),
                    'avg_holding_days': overall_avg,
                    'median_holding_days': overall_med
                }
            ])

            print('Holding-period summary (closed episodes only):')
            display(overall_summary)
            display(tier_holding)

In [None]:
# Buy vs sell execution load by rebalance
weekly_side = (
    portfolio_daily.groupby('week_id', as_index=False)
                  .agg(
                      start_date=('date', 'min'),
                      planned_buy_pct_nav=('buy_planned_pct_nav', 'max'),
                      planned_sell_pct_nav=('sell_planned_pct_nav', 'max'),
                      filled_buy_pct_nav=('buy_filled_pct_nav', 'last'),
                      filled_sell_pct_nav=('sell_filled_pct_nav', 'last')
                  )
                  .sort_values('start_date')
)

x = np.arange(len(weekly_side))
step = max(1, len(weekly_side) // 12)

fig, axes = plt.subplots(2, 1, figsize=(16, 11), sharex=True)

axes[0].bar(x, 100 * weekly_side['planned_buy_pct_nav'], color='#2ca02c', label='Planned buys')
axes[0].bar(x, -100 * weekly_side['planned_sell_pct_nav'], color='#ff7f0e', label='Planned sells')
axes[0].axhline(0, color='black', linewidth=0.8)
axes[0].set_ylabel('% NAV')
axes[0].set_title('Planned Buy vs Sell Load (% NAV)')
axes[0].legend()
axes[0].grid(axis='y', alpha=0.3)

axes[1].bar(x, 100 * weekly_side['filled_buy_pct_nav'], color='#1f77b4', label='Filled buys')
axes[1].bar(x, -100 * weekly_side['filled_sell_pct_nav'], color='#d62728', label='Filled sells')
axes[1].axhline(0, color='black', linewidth=0.8)
axes[1].set_ylabel('% NAV')
axes[1].set_title('Filled Buy vs Sell Load (% NAV)')
axes[1].legend()
axes[1].grid(axis='y', alpha=0.3)

axes[1].set_xticks(x[::step])
axes[1].set_xticklabels(weekly_side['week_id'].iloc[::step], rotation=45, ha='right')
axes[1].set_xlabel('week_id (rebalance date)')

plt.tight_layout()
plt.show()

display(weekly_side.tail(10))


In [None]:
# Slippage: overall and by tier
if len(fills):
    tier_slippage = (
        fills.groupby('tier', as_index=False)
             .agg(
                 fill_notional=('fill_notional', 'sum'),
                 slippage_dollars=('implied_slippage_dollars', 'sum'),
                 slippage_pct_nav=('implied_slippage_pct_nav', 'sum'),
                 fill_rows=('fill_quantity', 'count')
             )
    )
    tier_slippage['slippage_bps'] = np.where(
        tier_slippage['fill_notional'] > 1e-12,
        10000.0 * tier_slippage['slippage_dollars'] / tier_slippage['fill_notional'],
        np.nan
    )
    tier_slippage['tier'] = pd.Categorical(tier_slippage['tier'], categories=tier_order + ['exit'], ordered=True)
    tier_slippage = tier_slippage.sort_values('tier')
else:
    tier_slippage = pd.DataFrame(columns=['tier', 'fill_notional', 'slippage_dollars', 'slippage_pct_nav', 'fill_rows', 'slippage_bps'])

fig, axes = plt.subplots(2, 1, figsize=(15, 10))

daily_slip_plot = daily_slippage.sort_values('date')
axes[0].plot(daily_slip_plot['date'], 10000 * daily_slip_plot['slippage_pct_nav'], color='#d62728', linewidth=1.4, label='Slippage (bps of NAV)')
axes[0].set_title('Daily Slippage (from slippage.csv)')
axes[0].set_ylabel('bps of NAV')
axes[0].grid(alpha=0.3)

ax2 = axes[0].twinx()
ax2.plot(daily_slip_plot['date'], daily_slip_plot['slippage_bps'], color='#1f77b4', linewidth=1.2, alpha=0.7, label='Slippage (bps of filled notional)')
ax2.set_ylabel('bps of filled notional')

lines_1, labels_1 = axes[0].get_legend_handles_labels()
lines_2, labels_2 = ax2.get_legend_handles_labels()
axes[0].legend(lines_1 + lines_2, labels_1 + labels_2, loc='upper right')

if len(tier_slippage):
    width = 0.38
    x = np.arange(len(tier_slippage))
    axes[1].bar(x - width / 2, tier_slippage['slippage_bps'], width=width, color='#9467bd', label='Slippage bps')
    axes[1].bar(x + width / 2, 100 * tier_slippage['slippage_pct_nav'], width=width, color='#ff9896', label='Slippage % NAV')
    axes[1].set_xticks(x)
    axes[1].set_xticklabels(tier_slippage['tier'].astype(str))
    axes[1].set_title('Implied Slippage by Signal Tier (from order_events fill prices)')
    axes[1].set_ylabel('bps / %NAV')
    axes[1].grid(axis='y', alpha=0.3)
    axes[1].legend()
else:
    axes[1].text(0.5, 0.5, 'No fill rows available in order_events.csv', ha='center', va='center')
    axes[1].set_axis_off()

plt.tight_layout()
plt.show()

display(tier_slippage)


In [None]:
# Participation vs liquidity (planned notional as % of ADV20)
symbol_week = symbol_final.merge(
    weekly[['week_id', 'start_date', 'week_start_nav']],
    on='week_id',
    how='left'
)
symbol_week['planned_notional'] = symbol_week['planned_pct_nav'] * symbol_week['week_start_nav']

participation = pd.DataFrame()

try:
    adv_input = symbol_week[symbol_week['planned_notional'] > 0].copy()
    adv_input = adv_input.dropna(subset=['start_date'])

    tickers = sorted(adv_input['symbol'].dropna().astype(str).unique())
    qc_symbols = {}
    for ticker in tickers:
        try:
            qc_symbols[ticker] = qb.AddEquity(ticker, Resolution.Daily).Symbol
        except Exception:
            pass

    if qc_symbols:
        hist_start = adv_input['start_date'].min() - pd.Timedelta(days=90)
        hist_end = adv_input['start_date'].max() + pd.Timedelta(days=1)
        hist = qb.History(list(qc_symbols.values()), hist_start, hist_end, Resolution.Daily)

        if hist is not None and len(hist):
            hist = hist.reset_index()

            symbol_col = 'symbol' if 'symbol' in hist.columns else hist.columns[0]
            if 'time' in hist.columns:
                time_col = 'time'
            elif 'end_time' in hist.columns:
                time_col = 'end_time'
            else:
                time_col = hist.columns[1]

            close_col = 'close' if 'close' in hist.columns else 'Close'
            volume_col = 'volume' if 'volume' in hist.columns else 'Volume'

            hist['symbol'] = hist[symbol_col].astype(str).str.split(' ').str[0]
            hist['date'] = pd.to_datetime(hist[time_col]).dt.normalize()
            hist['close_val'] = pd.to_numeric(hist[close_col], errors='coerce')
            hist['volume_val'] = pd.to_numeric(hist[volume_col], errors='coerce')
            hist['dollar_volume'] = hist['close_val'] * hist['volume_val']

            hist = hist.sort_values(['symbol', 'date'])
            hist['adv20'] = hist.groupby('symbol')['dollar_volume'].transform(lambda s: s.rolling(20, min_periods=5).mean())

            adv_lookup = hist[['symbol', 'date', 'adv20']].dropna()

            participation = adv_input.merge(
                adv_lookup,
                left_on=['symbol', 'start_date'],
                right_on=['symbol', 'date'],
                how='left'
            )
            participation['participation_pct_adv'] = 100.0 * participation['planned_notional'] / participation['adv20']
            participation = participation.replace([np.inf, -np.inf], np.nan)
            participation = participation.dropna(subset=['participation_pct_adv'])

except Exception as e:
    print(f'ADV participation section skipped due to error: {e}')

if len(participation):
    fig, axes = plt.subplots(1, 2, figsize=(15, 6))

    sns.boxplot(
        data=participation,
        x='signal_tier',
        y='participation_pct_adv',
        order=tier_order,
        ax=axes[0]
    )
    axes[0].set_title('Planned Notional as % of ADV20 by Tier')
    axes[0].set_xlabel('Signal tier')
    axes[0].set_ylabel('% ADV20')
    axes[0].set_yscale('log')
    axes[0].grid(axis='y', alpha=0.3)

    tier_adv = (
        participation.groupby('signal_tier', as_index=False)
                     .agg(
                         mean_participation=('participation_pct_adv', 'mean'),
                         median_participation=('participation_pct_adv', 'median'),
                         count=('participation_pct_adv', 'count')
                     )
    )
    tier_adv['signal_tier'] = pd.Categorical(tier_adv['signal_tier'], categories=tier_order, ordered=True)
    tier_adv = tier_adv.sort_values('signal_tier')

    width = 0.38
    x = np.arange(len(tier_adv))
    axes[1].bar(x - width / 2, tier_adv['mean_participation'], width=width, color='#1f77b4', label='Mean %ADV')
    axes[1].bar(x + width / 2, tier_adv['median_participation'], width=width, color='#2ca02c', label='Median %ADV')
    axes[1].set_xticks(x)
    axes[1].set_xticklabels(tier_adv['signal_tier'].astype(str))
    axes[1].set_title('Participation Summary by Tier')
    axes[1].set_xlabel('Signal tier')
    axes[1].set_ylabel('% ADV20')
    axes[1].grid(axis='y', alpha=0.3)
    axes[1].legend()

    plt.tight_layout()
    plt.show()

    display(tier_adv)
else:
    print('Participation vs ADV plot skipped: unable to build ADV20 history for this dataset.')


In [None]:
# Target drift while orders are working
drift_plot = portfolio_daily.sort_values('date')

fig, axes = plt.subplots(2, 1, figsize=(15, 10), sharex=False)

axes[0].plot(drift_plot['date'], 100 * drift_plot['remaining_pct_nav'], color='#d62728', linewidth=1.6, label='Distance to weekly target')
axes[0].plot(drift_plot['date'], 100 * drift_plot['schedule_drift_pct_nav'], color='#1f77b4', linewidth=1.4, alpha=0.8, label='Distance to scheduled target')
axes[0].set_title('Target Drift Through Time (% NAV)')
axes[0].set_ylabel('% NAV')
axes[0].legend()
axes[0].grid(alpha=0.3)

day_drift = (
    portfolio_daily.groupby('day_in_cycle', as_index=False)
                 .agg(
                     mean_target_drift=('remaining_pct_nav', 'mean'),
                     mean_schedule_drift=('schedule_drift_pct_nav', 'mean')
                 )
)

axes[1].plot(day_drift['day_in_cycle'], 100 * day_drift['mean_target_drift'], marker='o', color='#d62728', label='Mean distance to weekly target')
axes[1].plot(day_drift['day_in_cycle'], 100 * day_drift['mean_schedule_drift'], marker='s', color='#1f77b4', label='Mean distance to scheduled target')
axes[1].set_title('Average Drift Profile by Day in Cycle')
axes[1].set_xlabel('Day in cycle (0-indexed)')
axes[1].set_ylabel('% NAV')
axes[1].legend()
axes[1].grid(alpha=0.3)

plt.tight_layout()
plt.show()

display(day_drift)


In [None]:
# Turnover % NAV per rebalance (from order fills)
turnover_plot = (
    weekly[['week_id', 'start_date']]
    .merge(weekly_turnover_raw, on='week_id', how='left')
    .fillna({'turnover_pct_nav': 0.0, 'filled_notional': 0.0, 'implied_slippage_dollars': 0.0})
    .sort_values('start_date')
    .reset_index(drop=True)
)

x = np.arange(len(turnover_plot))
step = max(1, len(turnover_plot) // 12)

fig, ax = plt.subplots(figsize=(16, 6))
ax.bar(x, 100 * turnover_plot['turnover_pct_nav'], color='#17becf', alpha=0.75, label='Turnover % NAV')
ax.set_title('Turnover per Rebalance (% NAV)')
ax.set_ylabel('Turnover (% NAV)')
ax.set_xlabel('week_id (rebalance date)')
ax.set_xticks(x[::step])
ax.set_xticklabels(turnover_plot['week_id'].iloc[::step], rotation=45, ha='right')
ax.grid(axis='y', alpha=0.3)

ax2 = ax.twinx()
ax2.plot(x, turnover_plot['filled_notional'], color='#1f77b4', linewidth=1.4, marker='o', alpha=0.7, label='Filled notional ($)')
ax2.set_ylabel('Filled notional ($)')

lines_1, labels_1 = ax.get_legend_handles_labels()
lines_2, labels_2 = ax2.get_legend_handles_labels()
ax.legend(lines_1 + lines_2, labels_1 + labels_2, loc='upper right')

plt.tight_layout()
plt.show()

display(turnover_plot.tail(10))


In [None]:
# Annualized turnover (% NAV/year): compare three definitions weekly
if len(fills):
    fill_defs = fills.copy()
    fill_defs['buy_notional'] = np.where(fill_defs['fill_quantity'] > 0, fill_defs['fill_notional'], 0.0)
    fill_defs['sell_notional'] = np.where(fill_defs['fill_quantity'] < 0, fill_defs['fill_notional'], 0.0)

    weekly_trade_defs = (
        fill_defs.groupby('week_id', as_index=False)
                 .agg(
                     buy_notional=('buy_notional', 'sum'),
                     sell_notional=('sell_notional', 'sum'),
                     gross_notional=('fill_notional', 'sum')
                 )
    )
else:
    weekly_trade_defs = pd.DataFrame(columns=['week_id', 'buy_notional', 'sell_notional', 'gross_notional'])

week_nav = (
    date_to_week.merge(nav_by_date, on='date', how='left')
               .groupby('week_id', as_index=False)
               .agg(
                   start_date=('date', 'min'),
                   avg_nav=('nav', 'mean')
               )
)

annual_turn = (
    week_nav.merge(weekly_trade_defs, on='week_id', how='left')
            .fillna({'buy_notional': 0.0, 'sell_notional': 0.0, 'gross_notional': 0.0})
            .sort_values('start_date')
            .reset_index(drop=True)
)

annual_turn['gross_weekly_pct_nav'] = np.where(
    annual_turn['avg_nav'] > 1e-12,
    annual_turn['gross_notional'] / annual_turn['avg_nav'],
    np.nan
)
annual_turn['one_way_weekly_pct_nav'] = np.where(
    annual_turn['avg_nav'] > 1e-12,
    np.minimum(annual_turn['buy_notional'], annual_turn['sell_notional']) / annual_turn['avg_nav'],
    np.nan
)
annual_turn['half_gross_weekly_pct_nav'] = 0.5 * annual_turn['gross_weekly_pct_nav']

annual_turn['annualized_gross_pct_nav'] = 100.0 * annual_turn['gross_weekly_pct_nav'] * 52.0
annual_turn['annualized_one_way_pct_nav'] = 100.0 * annual_turn['one_way_weekly_pct_nav'] * 52.0
annual_turn['annualized_half_gross_pct_nav'] = 100.0 * annual_turn['half_gross_weekly_pct_nav'] * 52.0

for col in ['annualized_gross_pct_nav', 'annualized_one_way_pct_nav', 'annualized_half_gross_pct_nav']:
    annual_turn[f'{col}_4w_ma'] = annual_turn[col].rolling(4, min_periods=1).mean()

fig, ax = plt.subplots(figsize=(16, 7))

ax.plot(
    annual_turn['start_date'],
    annual_turn['annualized_gross_pct_nav'],
    color='#2ca02c',
    linewidth=1.8,
    alpha=0.65,
    label='Gross traded notional (buys + sells)'
)
ax.plot(
    annual_turn['start_date'],
    annual_turn['annualized_one_way_pct_nav'],
    color='#1f77b4',
    linewidth=2.1,
    label='One-way turnover min(buys, sells)'
)
ax.plot(
    annual_turn['start_date'],
    annual_turn['annualized_half_gross_pct_nav'],
    color='#ff7f0e',
    linewidth=1.8,
    label='Half-gross proxy 0.5 * (buys + sells)'
)

ax.plot(
    annual_turn['start_date'],
    annual_turn['annualized_gross_pct_nav_4w_ma'],
    color='#2ca02c',
    linewidth=1.4,
    linestyle='--',
    alpha=0.9,
    label='Gross (4w MA)'
)
ax.plot(
    annual_turn['start_date'],
    annual_turn['annualized_one_way_pct_nav_4w_ma'],
    color='#1f77b4',
    linewidth=1.6,
    linestyle='--',
    alpha=0.95,
    label='One-way (4w MA)'
)
ax.plot(
    annual_turn['start_date'],
    annual_turn['annualized_half_gross_pct_nav_4w_ma'],
    color='#ff7f0e',
    linewidth=1.4,
    linestyle='--',
    alpha=0.9,
    label='Half-gross (4w MA)'
)

ax.set_title('Annualized Portfolio Turnover by Rebalance Week (Three Definitions)')
ax.set_xlabel('Rebalance week')
ax.set_ylabel('Annualized turnover (% NAV / year)')
ax.grid(axis='y', alpha=0.3)
ax.legend(ncol=2)

plt.tight_layout()
plt.show()

display(
    annual_turn[
        [
            'week_id',
            'start_date',
            'buy_notional',
            'sell_notional',
            'gross_notional',
            'avg_nav',
            'annualized_gross_pct_nav',
            'annualized_one_way_pct_nav',
            'annualized_half_gross_pct_nav'
        ]
    ].tail(12)
)


In [None]:
# Outlier annotation chart
out = weekly_analytics.copy().sort_values('start_date').reset_index(drop=True)

out['size_rank'] = out['planned_pct_nav'].rank(pct=True)
out['carry_rank'] = out['unfilled_carryover_pct_nav'].rank(pct=True)
out['slip_rank'] = out['week_slippage_dollars'].abs().rank(pct=True)
out['outlier_score'] = (out['size_rank'] + out['carry_rank'] + out['slip_rank']) / 3.0

top_outliers = out.nlargest(10, 'outlier_score').copy()

fig, ax = plt.subplots(figsize=(12, 7))
bubble_size = 4000 * (out['turnover_pct_nav'].fillna(0.0) + 0.01)
sc = ax.scatter(
    100 * out['planned_pct_nav'],
    100 * out['final_completion_rate'],
    s=bubble_size,
    c=out['outlier_score'],
    cmap='YlOrRd',
    alpha=0.8,
    edgecolor='black',
    linewidth=0.3
)
plt.colorbar(sc, ax=ax, label='Outlier score')

for _, row in top_outliers.iterrows():
    ax.annotate(
        row['week_id'],
        (100 * row['planned_pct_nav'], 100 * row['final_completion_rate']),
        textcoords='offset points',
        xytext=(5, 4),
        fontsize=8
    )

ax.set_title('Outlier Cycles: Load Size vs Completion (bubble=turnover %NAV)')
ax.set_xlabel('Planned load (% NAV)')
ax.set_ylabel('Final completion (%)')
ax.set_ylim(0, 110)
ax.grid(alpha=0.3)

plt.tight_layout()
plt.show()

display(
    top_outliers[
        [
            'week_id', 'start_date', 'planned_pct_nav', 'final_filled_pct_nav',
            'final_completion_rate', 'unfilled_carryover_pct_nav',
            'turnover_pct_nav', 'week_slippage_dollars', 'outlier_score'
        ]
    ]
)
