# TearSheet GPT

Comprehensive tear sheet notebook built from existing WolfpackTrend ObjectStore logs.

Design goals:
- No strategy values hardcoded in this notebook.
- Strategy settings are extracted dynamically from `main.py`.
- All plots degrade gracefully when optional datasets are missing.


## 1) Setup


In [None]:
import ast
import math
import re
from pathlib import Path
from io import StringIO

import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import seaborn as sns
import scipy.stats as sps
from IPython.display import display

pd.set_option('display.max_columns', None)
pd.set_option('display.width', None)

sns.set_style('whitegrid')
plt.rcParams['figure.figsize'] = (14, 6)

from QuantConnect import *
from QuantConnect.Research import QuantBook
from config import TEAM_ID

qb = QuantBook()
print('QuantBook initialized')


## 2) Strategy Metadata Extraction (Dynamic)


In [None]:
TRADING_DAYS = 252


def _literal_value(node):
    if isinstance(node, ast.Constant):
        return node.value
    if isinstance(node, ast.UnaryOp) and isinstance(node.op, ast.USub) and isinstance(node.operand, ast.Constant):
        if isinstance(node.operand.value, (int, float)):
            return -node.operand.value
    if isinstance(node, ast.Tuple):
        vals = []
        for elt in node.elts:
            val = _literal_value(elt)
            if val is None:
                return None
            vals.append(val)
        return tuple(vals)
    return None


def extract_strategy_settings(main_path='main.py'):
    settings = {
        'benchmark_symbol': None,
        'target_vol_annual': None,
        'scaling_days': None,
        'rebalance_interval_trading_days': None,
        'strong_threshold': None,
        'moderate_threshold': None,
    }

    p = Path(main_path)
    if not p.exists():
        print(f'WARNING: {main_path} not found. Settings fallback will be used only where necessary.')
        return settings

    tree = ast.parse(p.read_text())

    for node in ast.walk(tree):
        if isinstance(node, ast.Call):
            fn_name = None
            if isinstance(node.func, ast.Name):
                fn_name = node.func.id
            elif isinstance(node.func, ast.Attribute):
                fn_name = node.func.attr

            if fn_name == 'SetBenchmark' and node.args:
                val = _literal_value(node.args[0])
                if isinstance(val, str):
                    settings['benchmark_symbol'] = val

            if fn_name == 'TargetVolPortfolioConstructionModel':
                for kw in node.keywords:
                    if kw.arg in settings:
                        val = _literal_value(kw.value)
                        if val is not None:
                            settings[kw.arg] = val

            if fn_name == 'SignalStrengthExecutionModel':
                for kw in node.keywords:
                    if kw.arg in settings:
                        val = _literal_value(kw.value)
                        if val is not None:
                            settings[kw.arg] = val

    return settings


STRATEGY = extract_strategy_settings('main.py')

print('Extracted strategy settings:')
for k, v in STRATEGY.items():
    print(f'  {k}: {v}')


## 3) Data Loading Helpers


In [None]:
def read_csv_from_store(key):
    try:
        if not qb.ObjectStore.ContainsKey(key):
            print(f'ObjectStore key not found: {key}')
            return None
        content = qb.ObjectStore.Read(key)
        if not content:
            print(f'Empty ObjectStore key: {key}')
            return None
        return pd.read_csv(StringIO(content))
    except Exception as e:
        print(f'Error reading {key}: {e}')
        return None


def to_numeric(df, cols):
    for col in cols:
        if col in df.columns:
            df[col] = pd.to_numeric(df[col], errors='coerce')
    return df


def parse_tag_value(tag, key):
    if pd.isna(tag):
        return np.nan
    m = re.search(rf'{key}=([^;]+)', str(tag))
    return m.group(1) if m else np.nan


def infer_cycle_days(df_targets, df_snapshots):
    # Prefer explicit scaling index from targets.
    if df_targets is not None and 'scale_day' in df_targets.columns:
        s = pd.to_numeric(df_targets['scale_day'], errors='coerce').dropna()
        if len(s):
            mx = int(s.max())
            if mx >= 0:
                return mx + 1

    # Fallback: distinct dates per week_id.
    if df_targets is not None and {'week_id', 'date'}.issubset(df_targets.columns):
        tmp = df_targets[['week_id', 'date']].dropna().copy()
        if len(tmp):
            tmp['date'] = pd.to_datetime(tmp['date'])
            d = tmp.groupby('week_id')['date'].nunique()
            d = d[d > 0]
            if len(d):
                return int(round(float(d.median())))

    # Last fallback: median trading days per ISO week from snapshots.
    if df_snapshots is not None and 'date' in df_snapshots.columns:
        s = pd.to_datetime(df_snapshots['date']).dropna().dt.to_period('W')
        if len(s):
            d = s.value_counts()
            if len(d):
                return int(round(float(d.median())))

    return max(1, int(round(TRADING_DAYS / 52)))


## 4) Load ObjectStore Data


In [None]:
df_snapshots = read_csv_from_store(f'{TEAM_ID}/daily_snapshots.csv')
df_positions = read_csv_from_store(f'{TEAM_ID}/positions.csv')
df_signals = read_csv_from_store(f'{TEAM_ID}/signals.csv')
df_slippage = read_csv_from_store(f'{TEAM_ID}/slippage.csv')
df_trades = read_csv_from_store(f'{TEAM_ID}/trades.csv')
df_targets = read_csv_from_store(f'{TEAM_ID}/targets.csv')
df_orders = read_csv_from_store(f'{TEAM_ID}/order_events.csv')

if df_snapshots is None:
    raise ValueError('daily_snapshots.csv is required. Run a backtest first.')

for frame in [df_snapshots, df_positions, df_signals, df_slippage, df_trades, df_targets, df_orders]:
    if frame is not None and 'date' in frame.columns:
        frame['date'] = pd.to_datetime(frame['date'], errors='coerce')
        frame.dropna(subset=['date'], inplace=True)
        frame.sort_values('date', inplace=True)
        frame.reset_index(drop=True, inplace=True)

# Snapshots core
if 'nav' not in df_snapshots.columns:
    raise ValueError('daily_snapshots.csv is missing nav column')

df_snapshots = to_numeric(df_snapshots, [
    'nav', 'cash', 'gross_exposure', 'net_exposure', 'long_exposure', 'short_exposure',
    'daily_pnl', 'cumulative_pnl', 'daily_slippage', 'num_positions', 'estimated_vol'
])
df_snapshots['daily_return'] = df_snapshots['nav'].pct_change()

# Positions core
if df_positions is not None:
    df_positions = to_numeric(df_positions, [
        'invested', 'quantity', 'price', 'market_value', 'weight', 'unrealized_pnl',
        'daily_pnl', 'daily_unrealized_pnl', 'daily_realized_pnl', 'daily_fees',
        'daily_dividends', 'daily_total_net_pnl', 'avg_price'
    ])

# Slippage core
if df_slippage is not None:
    df_slippage = to_numeric(df_slippage, ['quantity', 'expected_price', 'fill_price', 'slippage_dollars'])

# Targets core
if df_targets is not None:
    df_targets = to_numeric(df_targets, [
        'start_w', 'weekly_target_w', 'scheduled_fraction', 'scheduled_w', 'actual_w', 'scale_day'
    ])

# Orders core
if df_orders is not None:
    df_orders = to_numeric(df_orders, [
        'quantity', 'fill_quantity', 'fill_price', 'limit_price', 'market_price_at_submit'
    ])
    if 'tag' in df_orders.columns:
        df_orders['tier_tag'] = df_orders['tag'].apply(lambda x: parse_tag_value(x, 'tier')).fillna('unknown')

print(f"Snapshots: {len(df_snapshots):,} rows | {df_snapshots['date'].min():%Y-%m-%d} -> {df_snapshots['date'].max():%Y-%m-%d}")
for label, frame in [
    ('Positions', df_positions), ('Signals', df_signals), ('Slippage', df_slippage),
    ('Trades', df_trades), ('Targets', df_targets), ('Orders', df_orders)
]:
    print(f'{label}: {0 if frame is None else len(frame):,} rows')


## 5) Benchmark and Risk-Free Series (Dynamic)


In [None]:
# Benchmark ticker from strategy metadata.
benchmark_symbol = STRATEGY.get('benchmark_symbol')
if not benchmark_symbol:
    raise ValueError('Could not extract benchmark symbol from main.py. Please set benchmark explicitly in strategy code.')

bench_symbol = qb.AddEquity(benchmark_symbol, Resolution.Daily).Symbol
start = df_snapshots['date'].min()
end = df_snapshots['date'].max() + pd.Timedelta(days=1)

bench_hist = qb.History(bench_symbol, start, end, Resolution.Daily)
if bench_hist is None or len(bench_hist) == 0:
    raise ValueError(f'Unable to load benchmark history for {benchmark_symbol}')

bench_df = bench_hist.copy()
if isinstance(bench_df.index, pd.MultiIndex):
    extracted = None
    for level in range(bench_df.index.nlevels):
        try:
            extracted = bench_df.xs(bench_symbol, level=level)
            break
        except Exception:
            continue
    if extracted is not None:
        bench_df = extracted

if 'close' in bench_df.columns:
    bench_close = bench_df['close']
elif 'value' in bench_df.columns:
    bench_close = bench_df['value']
else:
    raise ValueError('Benchmark history does not include close/value column')

bench_close = pd.Series(bench_close)
bench_close.index = pd.to_datetime(bench_close.index)
if getattr(bench_close.index, 'tz', None) is not None:
    bench_close.index = bench_close.index.tz_localize(None)
bench_close = bench_close.groupby(bench_close.index).last().sort_index()

df_bench = pd.DataFrame({'date': bench_close.index, 'bench_close': bench_close.values})
df_bench['bench_return'] = df_bench['bench_close'].pct_change()
df_bench = df_bench.dropna(subset=['bench_return']).copy()

# Risk-free proxy (not strategy-specific).
rf_symbol = qb.AddEquity('SGOV', Resolution.Daily).Symbol
rf_hist = qb.History(rf_symbol, start - pd.Timedelta(days=10), end, Resolution.Daily)

df_snapshots['rf_daily'] = 0.0
if rf_hist is not None and len(rf_hist) > 0:
    rf_prices = rf_hist['close'].reset_index()
    date_col = 'time' if 'time' in rf_prices.columns else rf_prices.columns[0]
    rf_prices = rf_prices.rename(columns={date_col: 'date', 'close': 'rf_close'})
    rf_prices['date'] = pd.to_datetime(rf_prices['date']).dt.tz_localize(None).dt.normalize()
    rf_prices['rf_daily'] = rf_prices['rf_close'].pct_change()

    df_snapshots = df_snapshots.merge(rf_prices[['date', 'rf_daily']], on='date', how='left', suffixes=('', '_new'))
    if 'rf_daily_new' in df_snapshots.columns:
        df_snapshots['rf_daily'] = df_snapshots['rf_daily_new'].combine_first(df_snapshots['rf_daily'])
        df_snapshots.drop(columns=['rf_daily_new'], inplace=True)

    df_snapshots['rf_daily'] = df_snapshots['rf_daily'].ffill().fillna(0.0)

df_snapshots['excess_return'] = df_snapshots['daily_return'] - df_snapshots['rf_daily']

merged = df_snapshots[['date', 'daily_return']].merge(
    df_bench[['date', 'bench_return']], on='date', how='inner'
).dropna().sort_values('date').reset_index(drop=True)

returns = df_snapshots['daily_return'].dropna()
excess_returns = df_snapshots['excess_return'].dropna()
rf_annual = float(df_snapshots['rf_daily'].mean() * TRADING_DAYS)

print(f'Benchmark: {benchmark_symbol} | overlap rows: {len(merged):,}')
print(f'Risk-free proxy: SGOV | effective annualized RF: {rf_annual * 100:.2f}%')


## 6) Metric Helpers


In [None]:
def sharpe_ratio(rets, risk_free_rate=0.0, periods_per_year=252):
    daily_rf = risk_free_rate / periods_per_year
    excess = rets - daily_rf
    if excess.std() == 0:
        return np.nan
    return (excess.mean() / excess.std()) * np.sqrt(periods_per_year)


def sortino_ratio(rets, risk_free_rate=0.0, periods_per_year=252):
    daily_rf = risk_free_rate / periods_per_year
    excess = rets - daily_rf
    downside = excess[excess < 0]
    downside_std = np.sqrt((downside ** 2).mean())
    if downside_std == 0:
        return np.nan
    return (excess.mean() / downside_std) * np.sqrt(periods_per_year)


def calmar_ratio(rets, periods_per_year=252):
    cumulative = (1 + rets).cumprod()
    total_return = cumulative.iloc[-1] - 1
    years = len(rets) / periods_per_year
    if years <= 0:
        return np.nan
    ann_return = (1 + total_return) ** (1 / years) - 1
    running_max = cumulative.cummax()
    max_dd = abs(((cumulative / running_max) - 1).min())
    if max_dd == 0:
        return np.nan
    return ann_return / max_dd


def max_drawdown(rets):
    cumulative = (1 + rets).cumprod()
    running_max = cumulative.cummax()
    return abs(((cumulative / running_max) - 1).min())


def probabilistic_sharpe_ratio(rets, sr, benchmark_sr=0.0):
    n = len(rets)
    if n < 3:
        return np.nan
    skew = sps.skew(rets)
    kurt = sps.kurtosis(rets)
    variance = (1 - skew * sr + ((kurt - 1) / 4) * sr ** 2) / (n - 1)
    if variance <= 0:
        return np.nan
    return sps.norm.cdf((sr - benchmark_sr) / np.sqrt(variance))


def historical_var(rets, confidence=0.95):
    return -np.percentile(rets, (1 - confidence) * 100)


def historical_cvar(rets, confidence=0.95):
    var = historical_var(rets, confidence)
    tail = rets[rets <= -var]
    return -tail.mean() if len(tail) else np.nan

print('Helper functions defined')


## Plot 1: Performance Summary Table


In [None]:
if len(returns) == 0:
    print('No return series available')
else:
    df_tmp = df_snapshots[['date', 'nav', 'daily_return']].dropna().copy()
    df_tmp['year'] = df_tmp['date'].dt.year
    df_tmp['month'] = df_tmp['date'].dt.month

    monthly = df_tmp.groupby(['year', 'month']).agg(nav_start=('nav', 'first'), nav_end=('nav', 'last')).reset_index()
    monthly['monthly_return'] = (monthly['nav_end'] / monthly['nav_start']) - 1

    total_return = (df_tmp['nav'].iloc[-1] / df_tmp['nav'].iloc[0]) - 1
    years = len(returns) / TRADING_DAYS
    ann_return = (1 + total_return) ** (1 / years) - 1 if years > 0 else np.nan
    ann_vol = returns.std() * np.sqrt(TRADING_DAYS)

    sr = sharpe_ratio(returns, rf_annual, TRADING_DAYS)
    so = sortino_ratio(returns, rf_annual, TRADING_DAYS)
    ca = calmar_ratio(returns, TRADING_DAYS)
    mdd = max_drawdown(returns)
    psr0 = probabilistic_sharpe_ratio(returns, sr, benchmark_sr=0.0)
    psr1 = probabilistic_sharpe_ratio(returns, sr, benchmark_sr=1.0)

    metrics = pd.DataFrame({
        'Metric': [
            'Total Return', 'Annualized Return', 'Annualized Volatility',
            'Sharpe Ratio', 'Sortino Ratio', 'Calmar Ratio',
            'PSR (SR*=0)', 'PSR (SR*=1)', 'Maximum Drawdown',
            'Best Day', 'Worst Day', 'Best Month', 'Worst Month',
            'Win Rate (Daily)', 'Win Rate (Monthly)', 'Skewness', 'Kurtosis'
        ],
        'Value': [
            f'{total_return * 100:.2f}%', f'{ann_return * 100:.2f}%', f'{ann_vol * 100:.2f}%',
            f'{sr:.4f}', f'{so:.4f}', f'{ca:.4f}',
            f'{psr0:.4f}', f'{psr1:.4f}', f'{mdd * 100:.2f}%',
            f'{returns.max() * 100:.4f}%', f'{returns.min() * 100:.4f}%',
            f'{monthly["monthly_return"].max() * 100:.2f}%', f'{monthly["monthly_return"].min() * 100:.2f}%',
            f'{(returns > 0).mean() * 100:.1f}%', f'{(monthly["monthly_return"] > 0).mean() * 100:.1f}%',
            f'{returns.skew():.4f}', f'{returns.kurtosis():.4f}'
        ]
    })

    print('=' * 80)
    print('PERFORMANCE SUMMARY')
    print('=' * 80)
    print(f'Period: {df_tmp["date"].min():%Y-%m-%d} to {df_tmp["date"].max():%Y-%m-%d}')
    print(f'Risk-free proxy annualized: {rf_annual * 100:.2f}%')
    display(metrics)


## Plot 2: Cumulative Return + Underwater Drawdown


In [None]:
if len(returns) == 0:
    print('No return series available')
else:
    df_plot = df_snapshots[['date', 'daily_return']].dropna().copy()
    df_plot['cum'] = (1 + df_plot['daily_return']).cumprod()
    df_plot['run_max'] = df_plot['cum'].cummax()
    df_plot['drawdown'] = (df_plot['cum'] / df_plot['run_max']) - 1

    # Benchmark rebased to the same starting day used in strategy returns.
    bench_aligned = df_bench.merge(df_plot[['date']], on='date', how='inner').copy()
    bench_aligned['bench_cum'] = (1 + bench_aligned['bench_return']).cumprod()

    fig, axes = plt.subplots(2, 1, figsize=(14, 10), sharex=True)

    axes[0].plot(df_plot['date'], (df_plot['cum'] - 1) * 100, linewidth=2, label='Strategy')
    if len(bench_aligned):
        axes[0].plot(bench_aligned['date'], (bench_aligned['bench_cum'] - 1) * 100, linewidth=2, alpha=0.8,
                     label=f'{benchmark_symbol}')
    axes[0].axhline(0, color='black', alpha=0.3)
    axes[0].set_title('Cumulative Return', fontsize=14, fontweight='bold')
    axes[0].set_ylabel('Return (%)')
    axes[0].legend(loc='upper left')
    axes[0].grid(True, alpha=0.3)

    axes[1].fill_between(df_plot['date'], 0, df_plot['drawdown'] * 100, color='firebrick', alpha=0.5)
    axes[1].plot(df_plot['date'], df_plot['drawdown'] * 100, color='darkred', linewidth=1)
    axes[1].set_title('Underwater (Drawdown)', fontsize=14, fontweight='bold')
    axes[1].set_xlabel('Date')
    axes[1].set_ylabel('Drawdown (%)')
    axes[1].grid(True, alpha=0.3)

    i = df_plot['drawdown'].idxmin()
    axes[1].scatter(df_plot.loc[i, 'date'], df_plot.loc[i, 'drawdown'] * 100, color='black', zorder=5)
    axes[1].annotate(
        f"Max DD: {df_plot.loc[i, 'drawdown'] * 100:.2f}%",
        (df_plot.loc[i, 'date'], df_plot.loc[i, 'drawdown'] * 100),
        xytext=(10, 10), textcoords='offset points', fontsize=9,
        bbox=dict(boxstyle='round', fc='white', ec='black', alpha=0.8)
    )

    plt.xticks(rotation=45, ha='right')
    plt.tight_layout()
    plt.show()


## Plot 3: Monthly Return Heatmap


In [None]:
if len(returns) == 0:
    print('No return series available')
else:
    df_m = df_snapshots[['date', 'nav']].dropna().copy()
    df_m['year'] = df_m['date'].dt.year
    df_m['month'] = df_m['date'].dt.month

    monthly = df_m.groupby(['year', 'month']).agg(nav_start=('nav', 'first'), nav_end=('nav', 'last')).reset_index()
    monthly['monthly_return'] = (monthly['nav_end'] / monthly['nav_start']) - 1

    heat = monthly.pivot(index='year', columns='month', values='monthly_return') * 100
    month_labels = ['Jan', 'Feb', 'Mar', 'Apr', 'May', 'Jun', 'Jul', 'Aug', 'Sep', 'Oct', 'Nov', 'Dec']
    heat.columns = [month_labels[m - 1] for m in heat.columns]

    plt.figure(figsize=(12, max(4, 0.8 * len(heat))))
    sns.heatmap(heat, annot=True, fmt='.1f', cmap='RdYlGn', center=0, linewidths=1,
                cbar_kws={'label': 'Return (%)'})
    plt.title('Monthly Returns Heatmap (%)', fontsize=14, fontweight='bold')
    plt.xlabel('Month')
    plt.ylabel('Year')
    plt.tight_layout()
    plt.show()


## Plot 4: Rolling Sharpe, Sortino, Volatility


In [None]:
if len(returns) < 30:
    print('Insufficient observations for rolling metrics')
else:
    window = min(252, max(30, len(returns) // 3))

    ret_vals = returns.reset_index(drop=True)
    rf_vals = df_snapshots['rf_daily'].dropna().reset_index(drop=True)
    rf_vals = rf_vals.iloc[:len(ret_vals)]

    ex = pd.Series(ret_vals.values - rf_vals.values)

    rolling_sharpe = (ex.rolling(window).mean() / ret_vals.rolling(window).std()) * np.sqrt(TRADING_DAYS)

    def _rolling_sortino(x):
        idx = x.index
        ex_i = x.values - rf_vals.iloc[idx].values
        down = ex_i[ex_i < 0]
        if len(down) == 0:
            return np.nan
        dstd = np.sqrt((down ** 2).mean())
        if dstd == 0:
            return np.nan
        return (ex_i.mean() / dstd) * np.sqrt(TRADING_DAYS)

    rolling_sortino = ret_vals.rolling(window).apply(_rolling_sortino, raw=False)
    rolling_vol = ret_vals.rolling(window).std() * np.sqrt(TRADING_DAYS)

    plot_dates = df_snapshots[['date']].dropna().iloc[1:].reset_index(drop=True)
    n = min(len(plot_dates), len(rolling_sharpe), len(rolling_sortino), len(rolling_vol))

    fig, axes = plt.subplots(3, 1, figsize=(14, 12), sharex=True)

    axes[0].plot(plot_dates['date'].iloc[:n], rolling_sharpe.iloc[:n], linewidth=2, color='steelblue')
    axes[0].axhline(0, color='red', linestyle='--', alpha=0.5)
    axes[0].axhline(1, color='green', linestyle='--', alpha=0.5)
    axes[0].set_title(f'Rolling Sharpe ({window}D)', fontsize=13, fontweight='bold')
    axes[0].set_ylabel('Sharpe')
    axes[0].grid(True, alpha=0.3)

    axes[1].plot(plot_dates['date'].iloc[:n], rolling_sortino.iloc[:n], linewidth=2, color='darkorange')
    axes[1].axhline(0, color='red', linestyle='--', alpha=0.5)
    axes[1].axhline(1, color='green', linestyle='--', alpha=0.5)
    axes[1].set_title(f'Rolling Sortino ({window}D)', fontsize=13, fontweight='bold')
    axes[1].set_ylabel('Sortino')
    axes[1].grid(True, alpha=0.3)

    axes[2].plot(plot_dates['date'].iloc[:n], 100 * rolling_vol.iloc[:n], linewidth=2, color='purple')
    axes[2].set_title(f'Rolling Volatility ({window}D)', fontsize=13, fontweight='bold')
    axes[2].set_ylabel('Vol (%)')
    axes[2].set_xlabel('Date')
    axes[2].grid(True, alpha=0.3)

    plt.xticks(rotation=45, ha='right')
    plt.tight_layout()
    plt.show()


## Plot 5: Rolling Beta + Return Scatter vs Benchmark


In [None]:
if len(merged) < 30:
    print('Insufficient benchmark overlap for beta analytics')
else:
    windows = [w for w in [20, 60, 252] if w <= len(merged)]

    beta_frame = merged.copy()
    for w in windows:
        cov = beta_frame['daily_return'].rolling(w).cov(beta_frame['bench_return'])
        var = beta_frame['bench_return'].rolling(w).var()
        beta_frame[f'beta_{w}'] = cov / var

    full_beta = np.cov(beta_frame['daily_return'], beta_frame['bench_return'], ddof=1)[0, 1] / np.var(beta_frame['bench_return'], ddof=1)
    full_corr = beta_frame['daily_return'].corr(beta_frame['bench_return'])
    full_r2 = full_corr ** 2
    full_alpha = (1 + (beta_frame['daily_return'].mean() - full_beta * beta_frame['bench_return'].mean())) ** TRADING_DAYS - 1

    up = beta_frame['bench_return'] > 0
    down = beta_frame['bench_return'] < 0
    up_beta = np.nan
    down_beta = np.nan
    if up.sum() > 10:
        up_beta = np.cov(beta_frame.loc[up, 'daily_return'], beta_frame.loc[up, 'bench_return'], ddof=1)[0, 1] / np.var(beta_frame.loc[up, 'bench_return'], ddof=1)
    if down.sum() > 10:
        down_beta = np.cov(beta_frame.loc[down, 'daily_return'], beta_frame.loc[down, 'bench_return'], ddof=1)[0, 1] / np.var(beta_frame.loc[down, 'bench_return'], ddof=1)

    fig, axes = plt.subplots(1, 2, figsize=(16, 6))

    for w in windows:
        axes[0].plot(beta_frame['date'], beta_frame[f'beta_{w}'], linewidth=2, label=f'{w}D beta')
    axes[0].axhline(1.0, color='black', linestyle='--', alpha=0.6)
    axes[0].axhline(0.0, color='gray', linestyle=':', alpha=0.8)
    axes[0].set_title(f'Rolling Beta vs {benchmark_symbol}', fontsize=13, fontweight='bold')
    axes[0].set_ylabel('Beta')
    axes[0].set_xlabel('Date')
    axes[0].legend(loc='upper left')
    axes[0].grid(True, alpha=0.3)

    x = beta_frame['bench_return'].values
    y = beta_frame['daily_return'].values
    slope, intercept = np.polyfit(x, y, 1)
    xline = np.linspace(x.min(), x.max(), 100)
    yline = slope * xline + intercept

    axes[1].scatter(100 * x, 100 * y, alpha=0.35, s=20, color='#1f77b4')
    axes[1].plot(100 * xline, 100 * yline, color='crimson', linewidth=2)
    axes[1].axhline(0, color='black', alpha=0.5)
    axes[1].axvline(0, color='black', alpha=0.5)
    axes[1].set_title(f'Daily Return Scatter vs {benchmark_symbol}', fontsize=13, fontweight='bold')
    axes[1].set_xlabel(f'{benchmark_symbol} Return (%)')
    axes[1].set_ylabel('Strategy Return (%)')
    axes[1].grid(True, alpha=0.3)

    summary_text = "\n".join([
        f'Beta: {full_beta:.3f}',
        f'Corr: {full_corr:.3f}',
        f'R2: {full_r2:.3f}',
        f'Alpha (ann): {full_alpha * 100:.2f}%',
        f'Up beta: {up_beta:.3f}',
        f'Down beta: {down_beta:.3f}',
    ])
    axes[1].text(0.02, 0.98, summary_text, transform=axes[1].transAxes, va='top', fontsize=9,
                 bbox=dict(boxstyle='round', fc='white', ec='gray', alpha=0.9))

    plt.tight_layout()
    plt.show()


## Plot 6: Exposure Time Series (Gross / Net / Long / Short)


In [None]:
needed = ['gross_exposure', 'net_exposure', 'long_exposure', 'short_exposure']
cols = [c for c in needed if c in df_snapshots.columns]

if len(cols) < 2:
    print('Insufficient exposure columns in snapshots')
else:
    fig, ax = plt.subplots(figsize=(14, 6))
    for c in cols:
        ax.plot(df_snapshots['date'], 100 * df_snapshots[c], linewidth=1.8, label=c)

    ax.axhline(0, color='black', alpha=0.4)
    ax.set_title('Portfolio Exposures Over Time', fontsize=14, fontweight='bold')
    ax.set_xlabel('Date')
    ax.set_ylabel('Exposure (% NAV)')
    ax.legend(loc='upper left')
    ax.grid(True, alpha=0.3)
    plt.xticks(rotation=45, ha='right')
    plt.tight_layout()
    plt.show()

    print('Exposure stats:')
    display(df_snapshots[cols].describe())


## Plot 7: VaR / CVaR (Static + Rolling)


In [None]:
if len(returns) < 30:
    print('Insufficient data for VaR/CVaR')
else:
    rows = []
    for cl in [0.95, 0.99]:
        v = historical_var(returns, cl)
        c = historical_cvar(returns, cl)
        rows.append({
            'Confidence': f'{int(cl * 100)}%',
            'VaR (daily)': v,
            'CVaR (daily)': c,
            'VaR (annualized)': v * np.sqrt(TRADING_DAYS),
            'CVaR (annualized)': c * np.sqrt(TRADING_DAYS),
        })
    stat = pd.DataFrame(rows)
    print('Static VaR/CVaR')
    display(stat)

    windows = [w for w in [20, 60, 252] if w <= len(returns)]
    plot_dates = df_snapshots[['date']].dropna().iloc[1:].reset_index(drop=True)

    fig, axes = plt.subplots(2, 1, figsize=(14, 10), sharex=True)
    ret_reset = returns.reset_index(drop=True)

    for w in windows:
        rv = ret_reset.rolling(w).apply(lambda x: historical_var(pd.Series(x), 0.95), raw=False)
        rc = ret_reset.rolling(w).apply(lambda x: historical_cvar(pd.Series(x), 0.95), raw=False)
        n = min(len(plot_dates), len(rv), len(rc))
        axes[0].plot(plot_dates['date'].iloc[:n], 100 * rv.iloc[:n], linewidth=1.8, label=f'VaR {w}D')
        axes[1].plot(plot_dates['date'].iloc[:n], 100 * rc.iloc[:n], linewidth=1.8, label=f'CVaR {w}D')

    axes[0].set_title('Rolling 95% VaR', fontsize=13, fontweight='bold')
    axes[0].set_ylabel('VaR (%)')
    axes[0].legend(loc='upper left')
    axes[0].grid(True, alpha=0.3)

    axes[1].set_title('Rolling 95% CVaR', fontsize=13, fontweight='bold')
    axes[1].set_ylabel('CVaR (%)')
    axes[1].set_xlabel('Date')
    axes[1].legend(loc='upper left')
    axes[1].grid(True, alpha=0.3)

    plt.xticks(rotation=45, ha='right')
    plt.tight_layout()
    plt.show()


## Plot 8: Return Distribution (Daily + Monthly)


In [None]:
if len(returns) == 0:
    print('No returns available')
else:
    df_m = df_snapshots[['date', 'nav']].dropna().copy()
    df_m['year'] = df_m['date'].dt.year
    df_m['month'] = df_m['date'].dt.month
    monthly = df_m.groupby(['year', 'month']).agg(nav_start=('nav', 'first'), nav_end=('nav', 'last')).reset_index()
    monthly['monthly_return'] = (monthly['nav_end'] / monthly['nav_start']) - 1

    fig, axes = plt.subplots(1, 2, figsize=(14, 5))

    axes[0].hist(100 * returns, bins=50, color='steelblue', alpha=0.75, edgecolor='black')
    axes[0].axvline(0, color='red', linestyle='--', linewidth=2)
    axes[0].axvline(100 * returns.mean(), color='green', linestyle='--', linewidth=2,
                    label=f'Mean: {100 * returns.mean():.3f}%')
    axes[0].set_title('Daily Return Distribution', fontsize=13, fontweight='bold')
    axes[0].set_xlabel('Daily Return (%)')
    axes[0].set_ylabel('Frequency')
    axes[0].legend()
    axes[0].grid(True, alpha=0.3)

    axes[1].hist(100 * monthly['monthly_return'], bins=20, color='coral', alpha=0.75, edgecolor='black')
    axes[1].axvline(0, color='red', linestyle='--', linewidth=2)
    axes[1].axvline(100 * monthly['monthly_return'].mean(), color='green', linestyle='--', linewidth=2,
                    label=f'Mean: {100 * monthly["monthly_return"].mean():.2f}%')
    axes[1].set_title('Monthly Return Distribution', fontsize=13, fontweight='bold')
    axes[1].set_xlabel('Monthly Return (%)')
    axes[1].set_ylabel('Frequency')
    axes[1].legend()
    axes[1].grid(True, alpha=0.3)

    plt.tight_layout()
    plt.show()


## Plot 9: Concentration Risk (Top-N Shares + HHI + Effective N)


In [None]:
if df_positions is None or not {'date', 'symbol', 'weight'}.issubset(df_positions.columns):
    print('positions.csv missing required columns for concentration plot')
else:
    pos = df_positions.copy()

    if 'invested' in pos.columns:
        inv = pd.to_numeric(pos['invested'], errors='coerce').fillna(0)
        pos = pos[inv > 0].copy()

    pos['abs_weight'] = pos['weight'].abs()

    recs = []
    for d, g in pos.groupby('date'):
        w = g['abs_weight'].dropna()
        tot = w.sum()
        if tot <= 0:
            continue
        s = (w / tot).sort_values(ascending=False)
        hhi = float((s ** 2).sum())
        recs.append({
            'date': d,
            'top_1_share': float(s.iloc[:1].sum()),
            'top_3_share': float(s.iloc[:3].sum()),
            'top_5_share': float(s.iloc[:5].sum()),
            'top_10_share': float(s.iloc[:10].sum()),
            'hhi': hhi,
            'effective_n': (1.0 / hhi) if hhi > 0 else np.nan,
            'n_positions': int(len(g))
        })

    conc = pd.DataFrame(recs).sort_values('date')
    if len(conc) == 0:
        print('No concentration rows computed')
    else:
        fig, axes = plt.subplots(2, 1, figsize=(14, 10), sharex=True)

        axes[0].plot(conc['date'], 100 * conc['top_1_share'], linewidth=1.8, label='Top 1')
        axes[0].plot(conc['date'], 100 * conc['top_3_share'], linewidth=1.8, label='Top 3')
        axes[0].plot(conc['date'], 100 * conc['top_5_share'], linewidth=1.8, label='Top 5')
        axes[0].plot(conc['date'], 100 * conc['top_10_share'], linewidth=1.8, label='Top 10')
        axes[0].set_title('Top-N Share of Gross Exposure', fontsize=13, fontweight='bold')
        axes[0].set_ylabel('Share (%)')
        axes[0].legend(loc='upper right')
        axes[0].grid(True, alpha=0.3)

        axes[1].plot(conc['date'], conc['hhi'], linewidth=1.8, color='crimson', label='HHI')
        axes[1].plot(conc['date'], conc['effective_n'], linewidth=1.8, color='steelblue', label='Effective N')
        axes[1].set_title('HHI and Effective Number of Bets', fontsize=13, fontweight='bold')
        axes[1].set_ylabel('Value')
        axes[1].set_xlabel('Date')
        axes[1].legend(loc='upper right')
        axes[1].grid(True, alpha=0.3)

        plt.xticks(rotation=45, ha='right')
        plt.tight_layout()
        plt.show()

        latest = conc.iloc[-1]
        print('Latest concentration snapshot:')
        print(f"Top1={100*latest['top_1_share']:.2f}% | Top3={100*latest['top_3_share']:.2f}% | "
              f"Top5={100*latest['top_5_share']:.2f}% | HHI={latest['hhi']:.4f} | EffectiveN={latest['effective_n']:.2f}")


## Plot 10: Slippage Distribution + Cumulative Slippage


In [None]:
if df_slippage is None or len(df_slippage) == 0:
    print('No slippage data available')
else:
    slp = df_slippage.copy()

    if not {'slippage_dollars', 'expected_price', 'quantity'}.issubset(slp.columns):
        print('slippage.csv missing required columns')
    else:
        denom = (slp['expected_price'].abs() * slp['quantity'].abs()).replace(0, np.nan)
        slp['slippage_bps'] = 10000.0 * slp['slippage_dollars'] / denom

        daily = slp.groupby('date', as_index=False)['slippage_dollars'].sum().sort_values('date')
        daily['cumulative_slippage'] = daily['slippage_dollars'].cumsum()

        fig, axes = plt.subplots(1, 2, figsize=(14, 5))

        axes[0].hist(slp['slippage_bps'].dropna(), bins=50, color='steelblue', alpha=0.75, edgecolor='black')
        axes[0].axvline(0, color='red', linestyle='--', linewidth=2)
        med = float(slp['slippage_bps'].median())
        axes[0].axvline(med, color='green', linestyle='--', linewidth=2, label=f'Median: {med:.2f} bps')
        axes[0].set_title('Per-Fill Slippage Distribution', fontsize=13, fontweight='bold')
        axes[0].set_xlabel('Slippage (bps)')
        axes[0].set_ylabel('Frequency')
        axes[0].legend()
        axes[0].grid(True, alpha=0.3)

        axes[1].plot(daily['date'], daily['cumulative_slippage'], linewidth=2, color='coral')
        axes[1].fill_between(daily['date'], 0, daily['cumulative_slippage'], alpha=0.2, color='coral')
        axes[1].set_title('Cumulative Slippage Over Time', fontsize=13, fontweight='bold')
        axes[1].set_xlabel('Date')
        axes[1].set_ylabel('Cumulative Slippage ($)')
        axes[1].grid(True, alpha=0.3)

        plt.tight_layout()
        plt.show()

        print('Slippage summary:')
        print(f"Fills: {len(slp):,}")
        print(f"Total slippage: ${slp['slippage_dollars'].sum():,.2f}")
        print(f"Mean bps: {slp['slippage_bps'].mean():.2f} | Median bps: {slp['slippage_bps'].median():.2f}")


## Plot 11: P&L by Long vs Short


In [None]:
if df_positions is None or len(df_positions) == 0:
    print('No positions data available')
else:
    pos = df_positions.copy()

    pnl_col = None
    for c in ['daily_total_net_pnl', 'daily_pnl', 'daily_unrealized_pnl']:
        if c in pos.columns:
            pnl_col = c
            break

    if pnl_col is None or 'weight' not in pos.columns:
        print('Required columns for side-PnL plot not available')
    else:
        pos['side'] = np.where(pos['weight'] > 0, 'Long', np.where(pos['weight'] < 0, 'Short', 'Flat'))
        pos = pos[pos['side'] != 'Flat']

        pnl = pos.groupby(['date', 'side'])[pnl_col].sum().unstack(fill_value=0).sort_index()
        for c in ['Long', 'Short']:
            if c not in pnl.columns:
                pnl[c] = 0.0

        fig, axes = plt.subplots(2, 1, figsize=(14, 10), sharex=True)

        axes[0].bar(pnl.index, pnl['Long'], color='#2ca02c', alpha=0.7, label='Long')
        axes[0].bar(pnl.index, pnl['Short'], bottom=pnl['Long'], color='#9467bd', alpha=0.7, label='Short')
        axes[0].axhline(0, color='black', alpha=0.3)
        axes[0].set_title('Daily P&L by Side', fontsize=13, fontweight='bold')
        axes[0].set_ylabel('P&L ($)')
        axes[0].legend(loc='upper left')
        axes[0].grid(True, alpha=0.3)

        axes[1].plot(pnl.index, pnl['Long'].cumsum(), linewidth=2, color='#2ca02c', label='Long cumulative')
        axes[1].plot(pnl.index, pnl['Short'].cumsum(), linewidth=2, color='#9467bd', label='Short cumulative')
        axes[1].plot(pnl.index, (pnl['Long'] + pnl['Short']).cumsum(), linewidth=2, linestyle='--', color='steelblue',
                     label='Total cumulative')
        axes[1].axhline(0, color='black', alpha=0.3)
        axes[1].set_title('Cumulative P&L by Side', fontsize=13, fontweight='bold')
        axes[1].set_xlabel('Date')
        axes[1].set_ylabel('Cumulative P&L ($)')
        axes[1].legend(loc='upper left')
        axes[1].grid(True, alpha=0.3)

        plt.xticks(rotation=45, ha='right')
        plt.tight_layout()
        plt.show()


## Plot 12: Top / Bottom Contributors


In [None]:
if df_positions is None or len(df_positions) == 0:
    print('No positions data available')
else:
    pnl_col = None
    for c in ['daily_total_net_pnl', 'daily_pnl', 'daily_unrealized_pnl']:
        if c in df_positions.columns:
            pnl_col = c
            break

    if pnl_col is None or 'symbol' not in df_positions.columns:
        print('Required columns for contributors plot not available')
    else:
        sym = df_positions.groupby('symbol')[pnl_col].sum().sort_values()
        if len(sym) == 0:
            print('No contributor rows available')
        else:
            n = min(10, len(sym))
            combined = pd.concat([sym.head(n), sym.tail(n)])
            colors = ['#d62728' if v < 0 else '#2ca02c' for v in combined.values]

            fig, ax = plt.subplots(figsize=(10, max(6, 0.35 * len(combined))))
            ax.barh(combined.index.astype(str), combined.values, color=colors, alpha=0.85)
            ax.axvline(0, color='black', alpha=0.3)
            ax.set_title(f'Top {n} and Bottom {n} Contributors', fontsize=13, fontweight='bold')
            ax.set_xlabel('Total P&L ($)')
            ax.grid(True, alpha=0.3, axis='x')
            plt.tight_layout()
            plt.show()


## Plot 13: Turnover Over Time


In [None]:
if df_positions is None or len(df_positions) == 0 or not {'date', 'symbol', 'weight'}.issubset(df_positions.columns):
    print('positions.csv missing required columns for turnover')
else:
    w = df_positions.pivot_table(index='date', columns='symbol', values='weight', aggfunc='last', fill_value=0).sort_index()
    daily_turn = 0.5 * w.diff().abs().sum(axis=1)

    cycle_days = infer_cycle_days(df_targets, df_snapshots)
    roll = daily_turn.rolling(cycle_days, min_periods=max(2, cycle_days // 2)).sum()

    years = len(daily_turn) / TRADING_DAYS if len(daily_turn) else np.nan
    ann_turn = daily_turn.sum() / years if years and years > 0 else np.nan

    fig, ax = plt.subplots(figsize=(14, 6))
    ax.plot(roll.index, 100 * roll.values, linewidth=1.8, color='steelblue')
    ax.fill_between(roll.index, 0, 100 * roll.values, alpha=0.15, color='steelblue')
    ax.set_title(f'Rolling Turnover ({cycle_days} trading days)', fontsize=13, fontweight='bold')
    ax.set_xlabel('Date')
    ax.set_ylabel('Turnover (% NAV)')
    ax.grid(True, alpha=0.3)

    if pd.notna(ann_turn):
        ax.text(0.02, 0.95, f'Annualized turnover: {100 * ann_turn:.1f}%', transform=ax.transAxes,
                va='top', fontsize=10, bbox=dict(boxstyle='round', fc='wheat', alpha=0.5))

    plt.xticks(rotation=45, ha='right')
    plt.tight_layout()
    plt.show()


## Plot 14: Position Count Over Time


In [None]:
if 'num_positions' not in df_snapshots.columns:
    print('num_positions not found in snapshots')
else:
    fig, ax = plt.subplots(figsize=(14, 5))
    ax.plot(df_snapshots['date'], df_snapshots['num_positions'], linewidth=1.8, color='steelblue')
    ax.fill_between(df_snapshots['date'], 0, df_snapshots['num_positions'], alpha=0.15, color='steelblue')
    ax.set_title('Open Position Count Over Time', fontsize=13, fontweight='bold')
    ax.set_xlabel('Date')
    ax.set_ylabel('Positions')
    ax.grid(True, alpha=0.3)
    plt.xticks(rotation=45, ha='right')
    plt.tight_layout()
    plt.show()


## Plot 15: Estimated Vol vs Realized Vol vs Target


In [None]:
if len(returns) < 20:
    print('Insufficient data for volatility comparison')
else:
    est_col = 'estimated_vol' if 'estimated_vol' in df_snapshots.columns else None

    # Dynamic target from strategy metadata.
    target_vol_annual = STRATEGY.get('target_vol_annual')

    ret_series = returns.reset_index(drop=True)
    realized_window = min(60, max(20, len(ret_series) // 4))
    realized_vol = ret_series.rolling(realized_window).std() * np.sqrt(TRADING_DAYS)

    plot_dates = df_snapshots[['date']].dropna().iloc[1:].reset_index(drop=True)
    n = min(len(plot_dates), len(realized_vol))

    fig, ax = plt.subplots(figsize=(14, 6))

    if est_col:
        est = pd.to_numeric(df_snapshots[est_col], errors='coerce')
        ax.plot(df_snapshots['date'], 100 * est, linewidth=1.8, color='coral', label='Estimated vol')

    ax.plot(plot_dates['date'].iloc[:n], 100 * realized_vol.iloc[:n], linewidth=1.8, color='steelblue',
            label=f'Realized vol ({realized_window}D)')

    if target_vol_annual is not None:
        ax.axhline(100 * float(target_vol_annual), color='green', linestyle='--', linewidth=1.6,
                   label=f'Target vol ({100 * float(target_vol_annual):.1f}%)')

    ax.set_title('Estimated vs Realized Volatility', fontsize=13, fontweight='bold')
    ax.set_xlabel('Date')
    ax.set_ylabel('Annualized Volatility (%)')
    ax.legend(loc='upper left')
    ax.grid(True, alpha=0.3)
    plt.xticks(rotation=45, ha='right')
    plt.tight_layout()
    plt.show()


## Plot 16: Order Lifecycle Quality (Status Mix + Fill Ratio by Tier)


In [None]:
if df_orders is None or len(df_orders) == 0 or 'order_id' not in df_orders.columns:
    print('No order lifecycle data available')
else:
    ev = df_orders.copy().sort_values('date')

    # Tier from tags when present; no hardcoded fallback mapping to strategy internals.
    if 'tier_tag' not in ev.columns:
        ev['tier_tag'] = 'unknown'

    grp = ev.groupby('order_id', as_index=False)
    order_summary = grp.agg(
        symbol=('symbol', 'first') if 'symbol' in ev.columns else ('order_id', 'first'),
        tier=('tier_tag', 'first'),
        quantity=('quantity', 'first') if 'quantity' in ev.columns else ('order_id', 'size'),
        submitted_at=('date', 'min'),
        final_at=('date', 'max')
    )

    if 'status' in ev.columns:
        final_status = ev.groupby('order_id').tail(1)[['order_id', 'status']].rename(columns={'status': 'final_status'})
        order_summary = order_summary.merge(final_status, on='order_id', how='left')
    else:
        order_summary['final_status'] = 'unknown'

    if 'fill_quantity' in ev.columns:
        fills = ev.groupby('order_id', as_index=False)['fill_quantity'].sum().rename(columns={'fill_quantity': 'filled_qty'})
        order_summary = order_summary.merge(fills, on='order_id', how='left')
        qty = pd.to_numeric(order_summary['quantity'], errors='coerce').abs().replace(0, np.nan)
        order_summary['fill_ratio'] = (pd.to_numeric(order_summary['filled_qty'], errors='coerce').abs() / qty).fillna(0).clip(0, 1)
    else:
        order_summary['fill_ratio'] = np.nan

    order_summary['days_to_final'] = (order_summary['final_at'] - order_summary['submitted_at']).dt.days.fillna(0)

    fig, axes = plt.subplots(1, 2, figsize=(16, 6))

    order_summary['final_status'].value_counts().plot(kind='bar', ax=axes[0], color='#1f77b4')
    axes[0].set_title('Final Order Status Counts', fontsize=13, fontweight='bold')
    axes[0].set_ylabel('Count')
    axes[0].grid(axis='y', alpha=0.3)

    tier_order = sorted(order_summary['tier'].dropna().unique().tolist())
    sns.boxplot(data=order_summary, x='tier', y='fill_ratio', order=tier_order, ax=axes[1])
    axes[1].set_title('Fill Ratio by Tier (from order tags)', fontsize=13, fontweight='bold')
    axes[1].set_xlabel('Tier')
    axes[1].set_ylabel('Fill ratio')
    axes[1].grid(alpha=0.3)

    plt.tight_layout()
    plt.show()


## Plot 17: Scaling Adherence (Planned vs Actual Progress)


In [None]:
if df_targets is None or len(df_targets) == 0:
    print('No targets data available')
else:
    needed = {'week_id', 'symbol', 'start_w', 'weekly_target_w', 'scheduled_w', 'actual_w', 'date'}
    if not needed.issubset(df_targets.columns):
        print(f'targets.csv missing required columns: {sorted(needed - set(df_targets.columns))}')
    else:
        d = df_targets.copy().sort_values(['week_id', 'symbol', 'date'])

        for col in ['start_w', 'weekly_target_w', 'scheduled_w', 'actual_w', 'scale_day']:
            if col in d.columns:
                d[col] = pd.to_numeric(d[col], errors='coerce')

        order_delta = d['weekly_target_w'] - d['start_w']
        total = order_delta.abs()
        d = d[total > 1e-10].copy()
        d['order_dir'] = np.sign(order_delta.loc[d.index])
        d['total_order_abs'] = total.loc[d.index]

        # Prefer explicit trading-day index from strategy target state.
        if 'scale_day' in d.columns and d['scale_day'].notna().any():
            d['day_idx'] = d['scale_day'].round().astype('Int64')
        else:
            d['day_idx'] = d.groupby(['week_id', 'symbol']).cumcount().astype('Int64')

        # If logger emits is_scaling, keep only active scaling rows.
        if 'is_scaling' in d.columns:
            raw = d['is_scaling']
            is_scaling = (
                raw.astype(str).str.strip().str.lower().isin(['true', '1', 'yes', 'y']) |
                pd.to_numeric(raw, errors='coerce').fillna(0).astype(float).gt(0)
            )
            d = d[is_scaling].copy()

        d = d[d['day_idx'].notna() & (d['day_idx'] >= 0)].copy()

        cycle_days = STRATEGY.get('scaling_days')
        if cycle_days is None:
            cycle_days = infer_cycle_days(df_targets, df_snapshots)
        cycle_days = max(1, int(cycle_days))
        d = d[d['day_idx'] < cycle_days].copy()

        if len(d) == 0:
            print('No active scaling rows after filtering')
        else:
            d['planned_progress'] = ((d['scheduled_w'] - d['start_w']).abs() / d['total_order_abs']).clip(0, 1)
            d['actual_progress'] = ((d['actual_w'] - d['start_w']).abs() / d['total_order_abs']).clip(0, 1)
            d['progress_gap'] = d['actual_progress'] - d['planned_progress']

            fig, axes = plt.subplots(1, 2, figsize=(16, 6))

            sns.boxplot(data=d, y='progress_gap', ax=axes[0], color='steelblue')
            axes[0].axhline(0, color='black', alpha=0.5)
            axes[0].set_title('Progress Gap Distribution (Actual - Planned)', fontsize=13, fontweight='bold')
            axes[0].set_ylabel('Gap')
            axes[0].grid(alpha=0.3)

            profile = d.groupby('day_idx', as_index=False).agg(
                planned=('planned_progress', 'mean'),
                actual=('actual_progress', 'mean'),
                rows=('day_idx', 'size')
            ).sort_values('day_idx')

            axes[1].plot(profile['day_idx'], profile['planned'], marker='o', linewidth=2, label='Planned')
            axes[1].plot(profile['day_idx'], profile['actual'], marker='o', linewidth=2, label='Actual')
            axes[1].set_title('Planned vs Actual Progress by Scale Day', fontsize=13, fontweight='bold')
            axes[1].set_xlabel('Scale day index')
            axes[1].set_ylabel('Progress')
            axes[1].set_xticks(sorted(profile['day_idx'].astype(int).unique().tolist()))
            axes[1].legend()
            axes[1].grid(alpha=0.3)

            plt.tight_layout()
            plt.show()

            print(f'Using cycle_days={cycle_days}; plotted day_idx range: {int(profile["day_idx"].min())}..{int(profile["day_idx"].max())}')


## Plot 18: Stale Signal Risk Proxy (Adverse Move During Scaling)


In [None]:
if df_targets is None or len(df_targets) == 0:
    print('Need targets.csv for stale-signal risk proxy')
else:
    req_t = {'date', 'week_id', 'symbol', 'start_w', 'weekly_target_w'}
    if not req_t.issubset(df_targets.columns):
        print(f'Required target columns missing: {sorted(req_t - set(df_targets.columns))}')
    else:
        tgt = df_targets[list(req_t.union({'scale_day', 'is_scaling'}).intersection(df_targets.columns))].copy()
        tgt['date'] = pd.to_datetime(tgt['date'])

        for col in ['start_w', 'weekly_target_w', 'scale_day']:
            if col in tgt.columns:
                tgt[col] = pd.to_numeric(tgt[col], errors='coerce')

        tgt['order_delta_w'] = tgt['weekly_target_w'] - tgt['start_w']
        tgt['order_dir'] = np.sign(tgt['order_delta_w'])
        tgt['order_abs'] = tgt['order_delta_w'].abs()
        tgt = tgt[tgt['order_abs'] > 1e-10].copy()

        if len(tgt) == 0:
            print('No non-zero weekly orders found for stale-signal analysis')
        else:
            if 'scale_day' in tgt.columns and tgt['scale_day'].notna().any():
                tgt['day_idx'] = tgt['scale_day'].round().astype('Int64')
            else:
                tgt = tgt.sort_values(['week_id', 'symbol', 'date'])
                tgt['day_idx'] = tgt.groupby(['week_id', 'symbol']).cumcount().astype('Int64')

            if 'is_scaling' in tgt.columns:
                raw = tgt['is_scaling']
                is_scaling = (
                    raw.astype(str).str.strip().str.lower().isin(['true', '1', 'yes', 'y']) |
                    pd.to_numeric(raw, errors='coerce').fillna(0).astype(float).gt(0)
                )
                tgt = tgt[is_scaling].copy()

            cycle_days = STRATEGY.get('scaling_days')
            if cycle_days is None:
                cycle_days = infer_cycle_days(df_targets, df_snapshots)
            cycle_days = max(1, int(cycle_days))

            tgt = tgt[tgt['day_idx'].notna() & (tgt['day_idx'] >= 0) & (tgt['day_idx'] < cycle_days)].copy()

            if len(tgt) == 0:
                print('No scaling rows after day-index filtering')
            else:
                # Primary price source: positions log.
                px = pd.DataFrame(columns=['date', 'symbol', 'price'])
                if df_positions is not None and {'date', 'symbol', 'price'}.issubset(df_positions.columns):
                    px = df_positions[['date', 'symbol', 'price']].copy()
                    px['date'] = pd.to_datetime(px['date']).dt.normalize()
                    px['price'] = pd.to_numeric(px['price'], errors='coerce')
                    px = px.dropna(subset=['price']).drop_duplicates(['date', 'symbol'], keep='last')

                tgt['date'] = pd.to_datetime(tgt['date']).dt.normalize()
                m = tgt.merge(px, on=['date', 'symbol'], how='left')

                # Fallback: QC price history for missing date-symbol pairs.
                miss_rate = float(m['price'].isna().mean()) if len(m) else 1.0
                if miss_rate > 0:
                    syms = sorted(m.loc[m['price'].isna(), 'symbol'].dropna().unique().tolist())
                    if len(syms):
                        qc_map = {}
                        for s in syms:
                            try:
                                qc_map[s] = qb.AddEquity(str(s), Resolution.Daily).Symbol
                            except Exception:
                                pass

                        if len(qc_map):
                            hist = qb.History(list(qc_map.values()), m['date'].min(), m['date'].max() + pd.Timedelta(days=1), Resolution.Daily)
                            if hist is not None and len(hist):
                                h = hist.reset_index()
                                sym_col = 'symbol' if 'symbol' in h.columns else h.columns[0]
                                if 'time' in h.columns:
                                    tcol = 'time'
                                elif 'end_time' in h.columns:
                                    tcol = 'end_time'
                                else:
                                    tcol = h.columns[1]
                                pcol = 'close' if 'close' in h.columns else ('Close' if 'Close' in h.columns else None)

                                if pcol is not None:
                                    h['symbol'] = h[sym_col].astype(str).str.split(' ').str[0]
                                    h['date'] = pd.to_datetime(h[tcol]).dt.tz_localize(None).dt.normalize()
                                    h['price_hist'] = pd.to_numeric(h[pcol], errors='coerce')
                                    h = h[['date', 'symbol', 'price_hist']].dropna().drop_duplicates(['date', 'symbol'], keep='last')

                                    m = m.merge(h, on=['date', 'symbol'], how='left')
                                    m['price'] = m['price'].fillna(m['price_hist'])
                                    m.drop(columns=['price_hist'], inplace=True)

                # Rebalance reference: first available scaling-day price in each symbol-week.
                ref = (
                    m.dropna(subset=['price'])
                     .sort_values(['week_id', 'symbol', 'day_idx', 'date'])
                     .groupby(['week_id', 'symbol'], as_index=False)
                     .first()[['week_id', 'symbol', 'price']]
                     .rename(columns={'price': 'rebalance_price'})
                )

                m = m.merge(ref, on=['week_id', 'symbol'], how='left')
                m['ret_since_rebal'] = (m['price'] / m['rebalance_price']) - 1.0
                m['adverse_move'] = -m['order_dir'] * m['ret_since_rebal']

                prof = (
                    m.groupby('day_idx', as_index=False)
                     .agg(mean_adverse_move=('adverse_move', 'mean'),
                          med_adverse_move=('adverse_move', 'median'),
                          obs=('adverse_move', lambda s: int(s.notna().sum())))
                     .sort_values('day_idx')
                )

                if len(prof) == 0 or prof['obs'].sum() == 0:
                    print('No valid adverse-move observations after price alignment')
                else:
                    fig, ax = plt.subplots(figsize=(12, 5))
                    ax.plot(prof['day_idx'], 100 * prof['mean_adverse_move'], marker='o', linewidth=2, color='firebrick', label='Mean')
                    ax.plot(prof['day_idx'], 100 * prof['med_adverse_move'], marker='s', linewidth=1.5, color='steelblue', alpha=0.85, label='Median')
                    ax.axhline(0, color='black', alpha=0.5)
                    ax.set_title('Adverse Move During Scaling Window', fontsize=13, fontweight='bold')
                    ax.set_xlabel('Scale day index')
                    ax.set_ylabel('Adverse move (%)')
                    ax.set_xticks(sorted(prof['day_idx'].astype(int).unique().tolist()))
                    ax.legend()
                    ax.grid(alpha=0.3)
                    plt.tight_layout()
                    plt.show()

                    print(f'Cycle days used: {cycle_days}')
                    print('Observation count by day:')
                    display(prof[['day_idx', 'obs']])


## Notes

- This notebook intentionally derives strategy-specific settings (benchmark, volatility target, execution thresholds) from `main.py`.
- If the strategy wiring changes substantially, rerun the metadata extraction cell and confirm parsed values.
- Optional datasets (`slippage.csv`, `targets.csv`, `order_events.csv`) are handled with skip logic.
