# Strategy Tear Sheet

Comprehensive tear sheet generated from ObjectStore CSVs logged by the backtest.

**Data Sources:**
- `{TEAM_ID}/daily_snapshots.csv` — Daily NAV, exposure, P&L, volatility
- `{TEAM_ID}/positions.csv` — Per-symbol daily positions and P&L
- `{TEAM_ID}/signals.csv` — Signal scores on rebalance days
- `{TEAM_ID}/slippage.csv` — Per-fill slippage data
- `{TEAM_ID}/trades.csv` — Completed round-trip trades
- `{TEAM_ID}/targets.csv` — Scaling schedule targets
- `{TEAM_ID}/order_events.csv` — Full order lifecycle events
- SPY and SGOV fetched live via QuantBook

**Prerequisites:** Run a cloud backtest first to populate ObjectStore.

## Setup

In [None]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns
import scipy.stats as sps
import re
from io import StringIO
from IPython.display import display

pd.set_option('display.max_columns', None)
pd.set_option('display.width', None)

sns.set_style('whitegrid')
plt.rcParams['figure.figsize'] = (14, 6)

from QuantConnect import *
from QuantConnect.Research import QuantBook
from config import TEAM_ID

qb = QuantBook()
print("QuantBook initialized")

In [None]:
def read_csv_from_store(key):
    """Read a CSV from ObjectStore with existence check and error handling."""
    try:
        if not qb.ObjectStore.ContainsKey(key):
            print(f'ObjectStore key not found: {key}')
            return None
        content = qb.ObjectStore.Read(key)
        if not content:
            print(f'Empty ObjectStore key: {key}')
            return None
        return pd.read_csv(StringIO(content))
    except Exception as e:
        print(f'Error reading {key}: {e}')
        return None

## Load All CSVs

In [None]:
TRADING_DAYS = 252

# --- Load CSVs ---
df_snapshots = read_csv_from_store(f"{TEAM_ID}/daily_snapshots.csv")
df_positions = read_csv_from_store(f"{TEAM_ID}/positions.csv")
df_signals   = read_csv_from_store(f"{TEAM_ID}/signals.csv")
df_slippage  = read_csv_from_store(f"{TEAM_ID}/slippage.csv")
df_trades    = read_csv_from_store(f"{TEAM_ID}/trades.csv")
df_targets   = read_csv_from_store(f"{TEAM_ID}/targets.csv")
df_orders    = read_csv_from_store(f"{TEAM_ID}/order_events.csv")

# --- Validate the minimum requirement ---
if df_snapshots is None:
    raise ValueError("daily_snapshots.csv is required. Run a backtest first.")

# --- Parse dates and sort ---
for name, frame in [('df_snapshots', df_snapshots),
                    ('df_positions', df_positions),
                    ('df_signals',   df_signals),
                    ('df_slippage',  df_slippage),
                    ('df_trades',    df_trades),
                    ('df_targets',   df_targets),
                    ('df_orders',    df_orders)]:
    if frame is not None and 'date' in frame.columns:
        frame['date'] = pd.to_datetime(frame['date'])
        frame.sort_values('date', inplace=True)
        frame.reset_index(drop=True, inplace=True)

df_snapshots['daily_return'] = df_snapshots['nav'].pct_change()

print(f"Snapshots: {len(df_snapshots)} rows, "
      f"{df_snapshots['date'].min():%Y-%m-%d} to {df_snapshots['date'].max():%Y-%m-%d}")
for label, frame in [('Positions', df_positions), ('Signals', df_signals),
                     ('Slippage', df_slippage), ('Trades', df_trades),
                     ('Targets', df_targets), ('Orders', df_orders)]:
    print(f"{label}: {len(frame) if frame is not None else 'NOT FOUND'} rows")

## Load SGOV Risk-Free Rate

In [None]:
sgov_symbol = qb.AddEquity("SGOV", Resolution.Daily).Symbol

sgov_start = df_snapshots['date'].min() - pd.Timedelta(days=10)
sgov_end   = df_snapshots['date'].max() + pd.Timedelta(days=1)

sgov_history = qb.History(sgov_symbol, sgov_start, sgov_end, Resolution.Daily)
sgov_prices  = sgov_history['close'].reset_index()

# Normalize tz-aware timestamps from QuantBook
date_col = 'time' if 'time' in sgov_prices.columns else sgov_prices.columns[0]
sgov_prices = sgov_prices.rename(columns={date_col: 'date', 'close': 'sgov_close'})
sgov_prices['date'] = pd.to_datetime(sgov_prices['date']).dt.tz_localize(None).dt.normalize()
sgov_prices['sgov_daily_rf'] = sgov_prices['sgov_close'].pct_change()

df_snapshots = df_snapshots.merge(sgov_prices[['date', 'sgov_daily_rf']], on='date', how='left')
df_snapshots['sgov_daily_rf'] = df_snapshots['sgov_daily_rf'].ffill().fillna(0.0)

RISK_FREE_RATE = df_snapshots['sgov_daily_rf'].mean() * TRADING_DAYS
df_snapshots['excess_return'] = df_snapshots['daily_return'] - df_snapshots['sgov_daily_rf']

returns = df_snapshots['daily_return'].dropna()
excess_returns = df_snapshots['excess_return'].dropna()

print(f"Effective annualized risk-free rate (SGOV): {RISK_FREE_RATE * 100:.2f}%")
print(f"Return observations: {len(returns)}")

## Load SPY Benchmark

In [None]:
spy_symbol = qb.AddEquity('SPY', Resolution.Daily).Symbol

spy_start = df_snapshots['date'].min()
spy_end   = df_snapshots['date'].max() + pd.Timedelta(days=1)

spy_hist = qb.History(spy_symbol, spy_start, spy_end, Resolution.Daily)

# --- Handle QuantBook's MultiIndex return format ---
spy_df = spy_hist.copy()
if isinstance(spy_df.index, pd.MultiIndex):
    for level in range(spy_df.index.nlevels):
        try:
            spy_df = spy_df.xs(spy_symbol, level=level)
            break
        except Exception:
            continue

if 'close' in spy_df.columns:
    spy_close = spy_df['close']
elif 'value' in spy_df.columns:
    spy_close = spy_df['value']
else:
    raise ValueError('SPY history does not include close/value price column.')

spy_close = pd.Series(spy_close)
spy_close.index = pd.to_datetime(spy_close.index)
if getattr(spy_close.index, 'tz', None) is not None:
    spy_close.index = spy_close.index.tz_localize(None)
spy_close.index = spy_close.index.normalize()
spy_close = spy_close.groupby(spy_close.index).last().sort_index()

df_spy = pd.DataFrame({'date': spy_close.index, 'spy_close': spy_close.values})
df_spy['spy_return'] = df_spy['spy_close'].pct_change()
df_spy = df_spy.dropna(subset=['spy_return']).copy()

# --- Merge onto snapshots ---
merged = df_snapshots[['date', 'daily_return']].merge(
    df_spy[['date', 'spy_return']],
    on='date', how='inner'
).dropna().sort_values('date').reset_index(drop=True)

print(f"SPY overlap: {len(merged)} trading days")
if len(merged) == 0:
    print("WARNING: No date overlap between snapshots and SPY.")
    print(f"  Snapshot dates sample: {df_snapshots['date'].head(3).tolist()}")
    print(f"  SPY dates sample:     {df_spy['date'].head(3).tolist()}")

## Helper Functions

In [None]:
def sharpe_ratio(rets, risk_free_rate=0.0, periods_per_year=252):
    daily_rf = risk_free_rate / periods_per_year
    excess = rets - daily_rf
    if excess.std() == 0:
        return np.nan
    return (excess.mean() / excess.std()) * np.sqrt(periods_per_year)


def sortino_ratio(rets, risk_free_rate=0.0, periods_per_year=252):
    daily_rf = risk_free_rate / periods_per_year
    excess = rets - daily_rf
    downside = excess[excess < 0]
    downside_std = np.sqrt((downside ** 2).mean())
    if downside_std == 0:
        return np.nan
    return (excess.mean() / downside_std) * np.sqrt(periods_per_year)


def calmar_ratio(rets, periods_per_year=252):
    cumulative = (1 + rets).cumprod()
    total_return = cumulative.iloc[-1] - 1
    years = len(rets) / periods_per_year
    ann_return = (1 + total_return) ** (1 / years) - 1
    running_max = cumulative.cummax()
    max_dd = abs(((cumulative / running_max) - 1).min())
    if max_dd == 0:
        return np.nan
    return ann_return / max_dd


def max_drawdown(rets):
    cumulative = (1 + rets).cumprod()
    running_max = cumulative.cummax()
    return abs(((cumulative / running_max) - 1).min())


def probabilistic_sharpe_ratio(rets, sr, benchmark_sr=0.0):
    n = len(rets)
    skew = sps.skew(rets)
    kurt = sps.kurtosis(rets)
    variance = (1 - skew * sr + ((kurt - 1) / 4) * sr ** 2) / (n - 1)
    if variance <= 0:
        return np.nan
    return sps.norm.cdf((sr - benchmark_sr) / np.sqrt(variance))


def tier_from_mag(m):
    """Classify signal magnitude into tier."""
    if pd.isna(m):  return 'unknown'
    if m >= 0.7:    return 'strong'
    if m >= 0.3:    return 'moderate'
    return 'weak'


print("Helper functions defined")

---
# Performance Core

## Plot 1: Performance Summary Table

In [None]:
# --- Monthly returns for monthly stats ---
df_snapshots['year']  = df_snapshots['date'].dt.year
df_snapshots['month'] = df_snapshots['date'].dt.month

monthly = df_snapshots.groupby(['year', 'month']).agg(
    nav_start=('nav', 'first'),
    nav_end=('nav', 'last')
)
monthly['monthly_return'] = (monthly['nav_end'] / monthly['nav_start']) - 1
monthly = monthly.reset_index()

# --- Compute all metrics ---
total_return    = (df_snapshots['nav'].iloc[-1] / df_snapshots['nav'].iloc[0]) - 1
years           = len(returns) / TRADING_DAYS
ann_return      = (1 + total_return) ** (1 / years) - 1
ann_vol         = returns.std() * np.sqrt(TRADING_DAYS)
sr              = sharpe_ratio(returns, RISK_FREE_RATE, TRADING_DAYS)
sort            = sortino_ratio(returns, RISK_FREE_RATE, TRADING_DAYS)
cal             = calmar_ratio(returns, TRADING_DAYS)
mdd             = max_drawdown(returns)
psr_zero        = probabilistic_sharpe_ratio(returns, sr, benchmark_sr=0.0)
psr_one         = probabilistic_sharpe_ratio(returns, sr, benchmark_sr=1.0)

win_loss_daily  = abs(returns[returns > 0].mean() / returns[returns < 0].mean())

metrics = pd.DataFrame({
    'Metric': [
        'Period',
        'Total Return', 'CAGR', 'Annualized Volatility',
        'Sharpe Ratio', 'Sortino Ratio', 'Calmar Ratio',
        'PSR (SR* = 0)', 'PSR (SR* = 1)',
        'Maximum Drawdown',
        'Best Day', 'Worst Day', 'Best Month', 'Worst Month',
        'Win Rate (Daily)', 'Win Rate (Monthly)',
        'Avg Win / Avg Loss (Daily)',
        'Skewness (Daily)', 'Kurtosis (Daily)',
    ],
    'Value': [
        f"{df_snapshots['date'].min():%Y-%m-%d} to {df_snapshots['date'].max():%Y-%m-%d}",
        f"{total_return * 100:.2f}%",
        f"{ann_return * 100:.2f}%",
        f"{ann_vol * 100:.2f}%",
        f"{sr:.4f}", f"{sort:.4f}", f"{cal:.4f}",
        f"{psr_zero:.4f}", f"{psr_one:.4f}",
        f"{mdd * 100:.2f}%",
        f"{returns.max() * 100:.4f}%", f"{returns.min() * 100:.4f}%",
        f"{monthly['monthly_return'].max() * 100:.2f}%",
        f"{monthly['monthly_return'].min() * 100:.2f}%",
        f"{(returns > 0).mean() * 100:.1f}%",
        f"{(monthly['monthly_return'] > 0).mean() * 100:.1f}%",
        f"{win_loss_daily:.2f}",
        f"{returns.skew():.4f}", f"{returns.kurtosis():.4f}",
    ]
})

display(metrics)

## Plot 2: Cumulative Return + Underwater Drawdown

In [None]:
# --- Strategy cumulative return ---
df_snapshots['cumulative_return'] = (1 + df_snapshots['daily_return'].fillna(0)).cumprod()
df_snapshots['running_max'] = df_snapshots['cumulative_return'].cummax()
df_snapshots['drawdown'] = (df_snapshots['cumulative_return'] / df_snapshots['running_max']) - 1

# --- SPY cumulative return (aligned to same start date) ---
spy_aligned = df_spy[df_spy['date'] >= df_snapshots['date'].min()].copy()
spy_aligned['spy_cumulative'] = (1 + spy_aligned['spy_return'].fillna(0)).cumprod()

fig, axes = plt.subplots(2, 1, figsize=(14, 10), sharex=True)

# -- Top: cumulative return --
axes[0].plot(df_snapshots['date'],
             (df_snapshots['cumulative_return'] - 1) * 100,
             linewidth=2, color='steelblue', label='Strategy')
axes[0].plot(spy_aligned['date'],
             (spy_aligned['spy_cumulative'] - 1) * 100,
             linewidth=1.5, color='grey', alpha=0.7, label='SPY')
axes[0].fill_between(df_snapshots['date'], 0,
                     (df_snapshots['cumulative_return'] - 1) * 100,
                     alpha=0.15, color='steelblue')
axes[0].axhline(y=0, color='black', linestyle='-', alpha=0.3)
axes[0].set_title('Cumulative Return', fontsize=14, fontweight='bold')
axes[0].set_ylabel('Cumulative Return (%)')
axes[0].legend(loc='upper left')
axes[0].grid(True, alpha=0.3)

# -- Bottom: underwater --
axes[1].fill_between(df_snapshots['date'], 0,
                     df_snapshots['drawdown'] * 100,
                     color='red', alpha=0.5)
axes[1].plot(df_snapshots['date'], df_snapshots['drawdown'] * 100,
             linewidth=1, color='darkred')
axes[1].set_title('Underwater Plot (Drawdown)', fontsize=14, fontweight='bold')
axes[1].set_xlabel('Date')
axes[1].set_ylabel('Drawdown (%)')
axes[1].grid(True, alpha=0.3)

# -- Annotate max drawdown --
worst_idx = df_snapshots['drawdown'].idxmin()
worst_date = df_snapshots.loc[worst_idx, 'date']
worst_dd   = df_snapshots.loc[worst_idx, 'drawdown'] * 100
axes[1].annotate(f'Max DD: {worst_dd:.1f}%',
                 xy=(worst_date, worst_dd),
                 xytext=(worst_date + pd.Timedelta(days=30), worst_dd - 1),
                 fontsize=10, color='darkred',
                 arrowprops=dict(arrowstyle='->', color='darkred'))

plt.xticks(rotation=45, ha='right')
plt.tight_layout()
plt.show()

## Plot 3: Monthly Return Heatmap

In [None]:
heatmap_data = monthly.pivot(index='year', columns='month', values='monthly_return') * 100

# Dynamically label only the months that appear
month_labels = ['Jan', 'Feb', 'Mar', 'Apr', 'May', 'Jun',
                'Jul', 'Aug', 'Sep', 'Oct', 'Nov', 'Dec']
heatmap_data.columns = [month_labels[m - 1] for m in heatmap_data.columns]

fig, ax = plt.subplots(figsize=(12, max(4, len(heatmap_data) * 0.8)))
sns.heatmap(heatmap_data, annot=True, fmt='.1f', cmap='RdYlGn', center=0,
            linewidths=1, ax=ax, cbar_kws={'label': 'Return (%)'})
ax.set_title('Monthly Returns (%)', fontsize=14, fontweight='bold')
ax.set_xlabel('Month')
ax.set_ylabel('Year')
plt.tight_layout()
plt.show()

## Plot 4: Rolling Sharpe, Sortino, Volatility

In [None]:
window = TRADING_DAYS  # 252

sgov_rf = df_snapshots['sgov_daily_rf'].iloc[1:].reset_index(drop=True)
ret_vals = returns.reset_index(drop=True)
excess_ser = pd.Series(ret_vals.values - sgov_rf.values, index=ret_vals.index)
plot_dates = df_snapshots['date'].iloc[1:].reset_index(drop=True)

# -- Rolling Sharpe --
rolling_sharpe = (excess_ser.rolling(window).mean()
                  / ret_vals.rolling(window).std()) * np.sqrt(TRADING_DAYS)

# -- Rolling Sortino --
sgov_rf_arr = sgov_rf.values

def _rolling_sortino(x):
    idx = x.index
    excess = x.values - sgov_rf_arr[idx]
    downside = excess[excess < 0]
    if len(downside) == 0:
        return np.nan
    ds_std = np.sqrt((downside ** 2).mean())
    if ds_std == 0:
        return np.nan
    return (excess.mean() / ds_std) * np.sqrt(TRADING_DAYS)

rolling_sortino = ret_vals.rolling(window).apply(_rolling_sortino, raw=False)

# -- Rolling Volatility --
rolling_vol = ret_vals.rolling(window).std() * np.sqrt(TRADING_DAYS)

fig, axes = plt.subplots(3, 1, figsize=(14, 14), sharex=True)

axes[0].plot(plot_dates, rolling_sharpe, linewidth=2, color='steelblue')
axes[0].axhline(y=0, color='red', linestyle='--', alpha=0.5)
axes[0].axhline(y=1, color='green', linestyle='--', alpha=0.5, label='Sharpe = 1')
axes[0].set_title(f'Rolling {window}-Day Sharpe Ratio (Risk-Free: SGOV)',
                  fontsize=14, fontweight='bold')
axes[0].set_ylabel('Sharpe Ratio')
axes[0].legend(loc='upper left')
axes[0].grid(True, alpha=0.3)

axes[1].plot(plot_dates, rolling_sortino, linewidth=2, color='coral')
axes[1].axhline(y=0, color='red', linestyle='--', alpha=0.5)
axes[1].axhline(y=1, color='green', linestyle='--', alpha=0.5, label='Sortino = 1')
axes[1].set_title(f'Rolling {window}-Day Sortino Ratio (Risk-Free: SGOV)',
                  fontsize=14, fontweight='bold')
axes[1].set_ylabel('Sortino Ratio')
axes[1].legend(loc='upper left')
axes[1].grid(True, alpha=0.3)

axes[2].plot(plot_dates, rolling_vol * 100, linewidth=2, color='#1f77b4', label='Realized Vol')
axes[2].axhline(y=10, color='green', linestyle='--', alpha=0.5, label='10% Target')
axes[2].set_title(f'Rolling {window}-Day Annualized Volatility',
                  fontsize=14, fontweight='bold')
axes[2].set_xlabel('Date')
axes[2].set_ylabel('Volatility (%)')
axes[2].legend(loc='upper left')
axes[2].grid(True, alpha=0.3)

plt.xticks(rotation=45, ha='right')
plt.tight_layout()
plt.show()

## Plot 5: Rolling Beta + Scatter vs SPY

In [None]:
if len(merged) < 10:
    print(f"Insufficient SPY overlap ({len(merged)} days) — skipping beta/scatter plot.")
else:
    # --- Rolling beta ---
    fig, axes = plt.subplots(2, 1, figsize=(14, 12))

    for win, color, label in [(60, 'coral', '60-Day'), (TRADING_DAYS, 'steelblue', '252-Day')]:
        cov  = merged['daily_return'].rolling(win).cov(merged['spy_return'])
        var  = merged['spy_return'].rolling(win).var()
        beta = cov / var
        axes[0].plot(merged['date'], beta, linewidth=1.5, color=color, label=f'{label} Beta')

    axes[0].axhline(y=0, color='black', linestyle='-', alpha=0.3)
    axes[0].axhline(y=1, color='red', linestyle='--', alpha=0.3, label='Beta = 1')
    axes[0].set_title('Rolling Beta to SPY', fontsize=14, fontweight='bold')
    axes[0].set_ylabel('Beta')
    axes[0].legend(loc='upper left')
    axes[0].grid(True, alpha=0.3)

    # --- Full-period regression stats ---
    x = merged['spy_return'].values
    y = merged['daily_return'].values
    slope, intercept, r_value, p_value, std_err = sps.linregress(x, y)
    ann_alpha = intercept * TRADING_DAYS
    corr = np.corrcoef(x, y)[0, 1]

    # Up/down market beta
    up_mask   = merged['spy_return'] > 0
    down_mask = merged['spy_return'] < 0
    up_beta   = np.cov(y[up_mask], x[up_mask])[0, 1] / np.var(x[up_mask]) if up_mask.sum() > 2 else np.nan
    down_beta = np.cov(y[down_mask], x[down_mask])[0, 1] / np.var(x[down_mask]) if down_mask.sum() > 2 else np.nan

    # --- Scatter ---
    axes[1].scatter(x * 100, y * 100, alpha=0.3, s=10, color='steelblue')
    fit_x = np.linspace(x.min(), x.max(), 100)
    axes[1].plot(fit_x * 100, (slope * fit_x + intercept) * 100,
                 color='crimson', linewidth=2, label='OLS Fit')
    axes[1].axhline(y=0, color='black', linestyle='-', alpha=0.3)
    axes[1].axvline(x=0, color='black', linestyle='-', alpha=0.3)
    axes[1].set_title('Daily Returns: Strategy vs SPY', fontsize=14, fontweight='bold')
    axes[1].set_xlabel('SPY Daily Return (%)')
    axes[1].set_ylabel('Strategy Daily Return (%)')
    axes[1].legend(loc='upper left')
    axes[1].grid(True, alpha=0.3)

    # Annotation box
    stats_text = (f"Beta: {slope:.3f}\n"
                  f"Alpha (ann): {ann_alpha * 100:.2f}%\n"
                  f"Correlation: {corr:.3f}\n"
                  f"R\u00b2: {r_value**2:.3f}\n"
                  f"Up Beta: {up_beta:.3f}\n"
                  f"Down Beta: {down_beta:.3f}")
    axes[1].text(0.02, 0.97, stats_text, transform=axes[1].transAxes,
                 fontsize=10, verticalalignment='top',
                 bbox=dict(boxstyle='round', facecolor='wheat', alpha=0.5))

    plt.tight_layout()
    plt.show()

---
# Exposure & Risk

## Plot 6: Exposure Time Series

In [None]:
fig, ax = plt.subplots(figsize=(14, 6))

ax.plot(df_snapshots['date'], df_snapshots['gross_exposure'],
        linewidth=1.5, color='#1f77b4', label='Gross')
ax.plot(df_snapshots['date'], df_snapshots['net_exposure'],
        linewidth=1.5, color='#d62728', label='Net')
ax.plot(df_snapshots['date'], df_snapshots['long_exposure'],
        linewidth=1.5, color='#2ca02c', label='Long')
ax.plot(df_snapshots['date'], df_snapshots['short_exposure'],
        linewidth=1.5, color='#9467bd', label='Short')

ax.axhline(y=0, color='black', linestyle='-', alpha=0.3)
ax.set_title('Portfolio Exposure Over Time', fontsize=14, fontweight='bold')
ax.set_xlabel('Date')
ax.set_ylabel('Exposure (fraction of NAV)')
ax.legend(loc='upper left')
ax.grid(True, alpha=0.3)

plt.xticks(rotation=45, ha='right')
plt.tight_layout()
plt.show()

# -- Summary stats --
print("\nExposure Summary:")
print("=" * 60)
for col in ['gross_exposure', 'net_exposure', 'long_exposure', 'short_exposure']:
    vals = df_snapshots[col]
    print(f"  {col:20s}  mean={vals.mean():.3f}  min={vals.min():.3f}  max={vals.max():.3f}")

## Plot 7: VaR / CVaR

In [None]:
# --- Static VaR / CVaR ---
confidence_levels = [0.95, 0.99]
var_cvar_rows = []
for cl in confidence_levels:
    var = np.percentile(returns, (1 - cl) * 100)
    cvar = returns[returns <= var].mean()
    var_cvar_rows.append({
        'Confidence': f'{cl * 100:.0f}%',
        'VaR (daily)':  f'{var * 100:.4f}%',
        'CVaR (daily)': f'{cvar * 100:.4f}%',
    })
display(pd.DataFrame(var_cvar_rows))

# --- Rolling VaR (95%) ---
fig, ax = plt.subplots(figsize=(14, 6))
for win, color in [(20, 'coral'), (60, '#1f77b4'), (TRADING_DAYS, 'steelblue')]:
    rolling_var = returns.rolling(win).quantile(0.05)
    ax.plot(df_snapshots['date'].iloc[1:].reset_index(drop=True),
            rolling_var * 100, linewidth=1.5, color=color, label=f'{win}d VaR (95%)')

ax.set_title('Rolling 95% VaR (Daily)', fontsize=14, fontweight='bold')
ax.set_xlabel('Date')
ax.set_ylabel('VaR (%)')
ax.legend(loc='lower left')
ax.grid(True, alpha=0.3)
plt.xticks(rotation=45, ha='right')
plt.tight_layout()
plt.show()

## Plot 8: Return Distribution

In [None]:
fig, axes = plt.subplots(1, 2, figsize=(14, 5))

# Daily
axes[0].hist(returns * 100, bins=50, color='steelblue', alpha=0.7, edgecolor='black')
axes[0].axvline(x=0, color='red', linestyle='--', linewidth=2)
axes[0].axvline(x=returns.mean() * 100, color='green', linestyle='--', linewidth=2,
                label=f'Mean: {returns.mean() * 100:.4f}%')
axes[0].set_title('Daily Return Distribution', fontsize=14, fontweight='bold')
axes[0].set_xlabel('Daily Return (%)')
axes[0].set_ylabel('Frequency')
axes[0].legend()
axes[0].grid(True, alpha=0.3)

# Monthly
axes[1].hist(monthly['monthly_return'] * 100, bins=20, color='coral', alpha=0.7, edgecolor='black')
axes[1].axvline(x=0, color='red', linestyle='--', linewidth=2)
axes[1].axvline(x=monthly['monthly_return'].mean() * 100, color='green', linestyle='--',
                linewidth=2, label=f'Mean: {monthly["monthly_return"].mean() * 100:.2f}%')
axes[1].set_title('Monthly Return Distribution', fontsize=14, fontweight='bold')
axes[1].set_xlabel('Monthly Return (%)')
axes[1].set_ylabel('Frequency')
axes[1].legend()
axes[1].grid(True, alpha=0.3)

plt.tight_layout()
plt.show()

## Plot 9: Concentration Risk

In [None]:
if df_positions is not None:
    pos = df_positions[df_positions['invested'] == True].copy()
    pos['abs_weight'] = pos['weight'].abs()

    daily_conc = []
    for date, group in pos.groupby('date'):
        total = group['abs_weight'].sum()
        if total == 0:
            continue
        shares = (group['abs_weight'] / total).sort_values(ascending=False)
        hhi = (shares ** 2).sum()
        daily_conc.append({
            'date': date,
            'top_1': shares.iloc[0] if len(shares) >= 1 else np.nan,
            'top_3': shares.iloc[:3].sum() if len(shares) >= 3 else shares.sum(),
            'top_5': shares.iloc[:5].sum() if len(shares) >= 5 else shares.sum(),
            'hhi': hhi,
            'eff_n': 1.0 / hhi if hhi > 0 else np.nan,
            'n_positions': len(group),
        })
    conc = pd.DataFrame(daily_conc)

    fig, axes = plt.subplots(3, 1, figsize=(14, 14), sharex=True)

    for col, label, color in [('top_1', 'Top 1', '#d62728'),
                               ('top_3', 'Top 3', '#1f77b4'),
                               ('top_5', 'Top 5', '#2ca02c')]:
        axes[0].plot(conc['date'], conc[col] * 100, linewidth=1.5, color=color, label=label)
    axes[0].set_title('Concentration: Top-N Share of Gross Exposure',
                      fontsize=14, fontweight='bold')
    axes[0].set_ylabel('Share (%)')
    axes[0].legend(loc='upper right')
    axes[0].grid(True, alpha=0.3)

    axes[1].plot(conc['date'], conc['hhi'], linewidth=1.5, color='steelblue')
    axes[1].axhline(y=0.12, color='red', linestyle='--', alpha=0.5, label='HHI = 0.12 threshold')
    axes[1].set_title('Herfindahl-Hirschman Index (HHI)', fontsize=14, fontweight='bold')
    axes[1].set_ylabel('HHI')
    axes[1].legend(loc='upper right')
    axes[1].grid(True, alpha=0.3)

    axes[2].plot(conc['date'], conc['eff_n'], linewidth=1.5, color='#2ca02c')
    axes[2].set_title('Effective Number of Bets (1 / HHI)', fontsize=14, fontweight='bold')
    axes[2].set_xlabel('Date')
    axes[2].set_ylabel('Effective N')
    axes[2].grid(True, alpha=0.3)

    plt.xticks(rotation=45, ha='right')
    plt.tight_layout()
    plt.show()
else:
    print("positions.csv not available — skipping concentration plot.")

## Plot 14: Position Count Over Time

In [None]:
fig, ax = plt.subplots(figsize=(14, 5))
ax.plot(df_snapshots['date'], df_snapshots['num_positions'],
        linewidth=1.5, color='steelblue')
ax.fill_between(df_snapshots['date'], 0, df_snapshots['num_positions'],
                alpha=0.15, color='steelblue')
ax.set_title('Number of Open Positions', fontsize=14, fontweight='bold')
ax.set_xlabel('Date')
ax.set_ylabel('Position Count')
ax.grid(True, alpha=0.3)
plt.xticks(rotation=45, ha='right')
plt.tight_layout()
plt.show()

## Plot 15: Estimated vs Realized Volatility

In [None]:
ret_ser = df_snapshots['daily_return'].dropna()
realized_vol_60 = ret_ser.rolling(60).std() * np.sqrt(TRADING_DAYS)

fig, ax = plt.subplots(figsize=(14, 6))

ax.plot(df_snapshots['date'], df_snapshots['estimated_vol'] * 100,
        linewidth=1.5, color='coral', label='Estimated Vol (PCM)')
ax.plot(df_snapshots['date'].iloc[1:].reset_index(drop=True),
        realized_vol_60 * 100,
        linewidth=1.5, color='steelblue', label='Realized Vol (60d)')
ax.axhline(y=10, color='green', linestyle='--', linewidth=1.5, alpha=0.7, label='10% Target')

ax.set_title('Estimated vs Realized Volatility', fontsize=14, fontweight='bold')
ax.set_xlabel('Date')
ax.set_ylabel('Annualized Volatility (%)')
ax.legend(loc='upper left')
ax.grid(True, alpha=0.3)
plt.xticks(rotation=45, ha='right')
plt.tight_layout()
plt.show()

## Plot 16: Drawdown Episode Table

In [None]:
if 'drawdown' not in df_snapshots.columns:
    df_snapshots['cumulative_return'] = (1 + df_snapshots['daily_return'].fillna(0)).cumprod()
    df_snapshots['running_max'] = df_snapshots['cumulative_return'].cummax()
    df_snapshots['drawdown'] = (df_snapshots['cumulative_return'] / df_snapshots['running_max']) - 1

df_snapshots['in_drawdown'] = df_snapshots['drawdown'] < 0
df_snapshots['drawdown_start'] = (
    df_snapshots['in_drawdown'] & ~df_snapshots['in_drawdown'].shift(1).fillna(False)
)
df_snapshots['drawdown_end'] = (
    ~df_snapshots['in_drawdown'] & df_snapshots['in_drawdown'].shift(1).fillna(False)
)

drawdown_periods = []
start_date = None

for idx, row in df_snapshots.iterrows():
    if row['drawdown_start']:
        start_date = row['date']
    if row['drawdown_end'] and start_date is not None:
        episode = df_snapshots[
            (df_snapshots['date'] >= start_date) & (df_snapshots['date'] <= row['date'])
        ]
        trough_idx = episode['drawdown'].idxmin()
        drawdown_periods.append({
            'start': start_date,
            'trough': df_snapshots.loc[trough_idx, 'date'],
            'recovery': row['date'],
            'duration_days': (row['date'] - start_date).days,
            'max_drawdown_pct': episode['drawdown'].min() * 100,
        })
        start_date = None

if drawdown_periods:
    dd_df = pd.DataFrame(drawdown_periods).sort_values('max_drawdown_pct')
    print("Top Drawdown Episodes (sorted by severity):")
    print("=" * 80)
    display(dd_df.head(10))
    print(f"\nAverage drawdown duration: {dd_df['duration_days'].mean():.1f} days")
    print(f"Maximum drawdown duration: {dd_df['duration_days'].max()} days")
    print(f"Total completed drawdown episodes: {len(dd_df)}")
else:
    print("No completed drawdown periods found.")

## Plot 17: Exposure Regime Scorecard

In [None]:
GROSS_HIGH = 1.20
GROSS_LOW  = 0.80
NET_BULL   = 0.30
NET_BEAR   = -0.30

for c in ['gross_exposure', 'net_exposure', 'long_exposure', 'short_exposure', 'nav']:
    df_snapshots[c] = pd.to_numeric(df_snapshots[c], errors='coerce')

def classify_regime(row):
    gross = row['gross_exposure']
    net   = row['net_exposure']
    gross_bucket = 'high_gross' if gross >= GROSS_HIGH else ('low_gross' if gross <= GROSS_LOW else 'mid_gross')
    net_bucket   = 'net_long'   if net >= NET_BULL    else ('net_short'  if net <= NET_BEAR   else 'market_neutral')
    return f'{gross_bucket}__{net_bucket}'

df_snapshots['regime'] = df_snapshots.apply(classify_regime, axis=1)
df_snapshots['risk_state'] = np.select(
    [df_snapshots['gross_exposure'] >= GROSS_HIGH,
     df_snapshots['gross_exposure'] <= GROSS_LOW],
    ['risk_on', 'risk_off'],
    default='balanced'
)

# --- Scorecard ---
gross_breach = (df_snapshots['gross_exposure'] > GROSS_HIGH).mean()
gross_under  = (df_snapshots['gross_exposure'] < GROSS_LOW).mean()
net_long_pct = (df_snapshots['net_exposure'] > NET_BULL).mean()
net_short_pct = (df_snapshots['net_exposure'] < NET_BEAR).mean()

print('Exposure Regime Scorecard:')
print('=' * 60)
print(f'  Gross > {GROSS_HIGH * 100:.0f}% NAV: {gross_breach * 100:.2f}% of days')
print(f'  Gross < {GROSS_LOW * 100:.0f}% NAV: {gross_under * 100:.2f}% of days')
print(f'  Net > +{NET_BULL * 100:.0f}% NAV: {net_long_pct * 100:.2f}% of days')
print(f'  Net < -{abs(NET_BEAR) * 100:.0f}% NAV: {net_short_pct * 100:.2f}% of days')

risk_state_counts = (
    df_snapshots.groupby('risk_state', as_index=False)
      .agg(days=('risk_state', 'size'))
      .sort_values('days', ascending=False)
)
risk_state_counts['pct_days'] = risk_state_counts['days'] / len(df_snapshots)
print('\nRisk State Distribution:')
display(risk_state_counts)

# --- Return / Vol / Sharpe by risk state ---
if df_snapshots['daily_return'].notna().sum() > 30:
    by_state = (
        df_snapshots.groupby('risk_state', as_index=False)
          .agg(
              observations=('daily_return', lambda s: s.notna().sum()),
              avg_daily_return=('daily_return', 'mean'),
              annualized_return=('daily_return',
                  lambda s: (1 + s.dropna()).prod() ** (TRADING_DAYS / max(len(s.dropna()), 1)) - 1),
              annualized_vol=('daily_return',
                  lambda s: s.dropna().std() * np.sqrt(TRADING_DAYS)),
          )
    )
    by_state['sharpe'] = np.where(
        by_state['annualized_vol'].abs() > 1e-12,
        by_state['annualized_return'] / by_state['annualized_vol'],
        np.nan
    )
    print('\nPerformance by Risk State:')
    display(by_state)

    fig, ax = plt.subplots(figsize=(10, 5))
    colors = {'risk_on': '#d62728', 'balanced': '#1f77b4', 'risk_off': '#2ca02c'}
    bar_colors = [colors.get(s, '#999999') for s in by_state['risk_state']]
    ax.bar(by_state['risk_state'], by_state['annualized_vol'] * 100, color=bar_colors)
    ax.set_title('Annualized Volatility by Risk State', fontsize=14, fontweight='bold')
    ax.set_ylabel('Annualized Vol (%)')
    ax.set_xlabel('Risk State')
    ax.grid(axis='y', alpha=0.3)
    plt.tight_layout()
    plt.show()

---
# Execution & Implementation

## Plot 10: Slippage Distribution + Cumulative

In [None]:
if df_slippage is not None and len(df_slippage) > 0:
    df_slippage['slippage_bps'] = (
        df_slippage['slippage_dollars']
        / (df_slippage['expected_price'] * df_slippage['quantity'].abs())
        * 10000
    )

    fig, axes = plt.subplots(1, 2, figsize=(14, 5))

    axes[0].hist(df_slippage['slippage_bps'].dropna(), bins=50,
                 color='steelblue', alpha=0.7, edgecolor='black')
    axes[0].axvline(x=0, color='red', linestyle='--', linewidth=2)
    med_bps = df_slippage['slippage_bps'].median()
    axes[0].axvline(x=med_bps, color='green', linestyle='--', linewidth=2,
                    label=f'Median: {med_bps:.1f} bps')
    axes[0].set_title('Slippage Distribution (per fill)', fontsize=14, fontweight='bold')
    axes[0].set_xlabel('Slippage (bps)')
    axes[0].set_ylabel('Frequency')
    axes[0].legend()
    axes[0].grid(True, alpha=0.3)

    daily_slip = df_slippage.groupby('date')['slippage_dollars'].sum().reset_index()
    daily_slip = daily_slip.sort_values('date')
    daily_slip['cumulative_slippage'] = daily_slip['slippage_dollars'].cumsum()

    axes[1].plot(daily_slip['date'], daily_slip['cumulative_slippage'],
                 linewidth=2, color='coral')
    axes[1].fill_between(daily_slip['date'], 0, daily_slip['cumulative_slippage'],
                         alpha=0.2, color='coral')
    axes[1].set_title('Cumulative Slippage ($)', fontsize=14, fontweight='bold')
    axes[1].set_xlabel('Date')
    axes[1].set_ylabel('Cumulative Slippage ($)')
    axes[1].grid(True, alpha=0.3)

    plt.tight_layout()
    plt.show()

    print("\nSlippage Summary:")
    print("=" * 60)
    print(f"  Total fills: {len(df_slippage)}")
    print(f"  Total slippage: ${df_slippage['slippage_dollars'].sum():,.2f}")
    print(f"  Mean slippage: {df_slippage['slippage_bps'].mean():.1f} bps")
    print(f"  Median slippage: {df_slippage['slippage_bps'].median():.1f} bps")
else:
    print("slippage.csv not available — skipping slippage plots.")

## Plot 13: Turnover Over Time

In [None]:
if df_positions is not None:
    weights = df_positions.pivot_table(index='date', columns='symbol',
                                       values='weight', fill_value=0)
    daily_turnover = weights.diff().abs().sum(axis=1) / 2
    daily_turnover = daily_turnover.reset_index()
    daily_turnover.columns = ['date', 'half_turn']

    daily_turnover['weekly_turn'] = daily_turnover['half_turn'].rolling(5).sum()

    trading_days_total = len(daily_turnover)
    years_total = trading_days_total / TRADING_DAYS
    ann_turnover = daily_turnover['half_turn'].sum() / years_total if years_total > 0 else 0

    fig, ax = plt.subplots(figsize=(14, 6))
    ax.plot(daily_turnover['date'], daily_turnover['weekly_turn'] * 100,
            linewidth=1.5, color='steelblue')
    ax.fill_between(daily_turnover['date'], 0,
                    daily_turnover['weekly_turn'] * 100,
                    alpha=0.15, color='steelblue')
    ax.set_title('Rolling 5-Day Turnover', fontsize=14, fontweight='bold')
    ax.set_xlabel('Date')
    ax.set_ylabel('Turnover (% NAV)')
    ax.text(0.02, 0.95, f'Annualized turnover: {ann_turnover * 100:.0f}%',
            transform=ax.transAxes, fontsize=11,
            bbox=dict(boxstyle='round', facecolor='wheat', alpha=0.5))
    ax.grid(True, alpha=0.3)
    plt.xticks(rotation=45, ha='right')
    plt.tight_layout()
    plt.show()
else:
    print("positions.csv not available — skipping turnover chart.")

## Plot 18: Order Lifecycle

In [None]:
if df_orders is not None:
    df_events = df_orders.copy()
    for col in ['quantity', 'fill_quantity', 'fill_price', 'limit_price']:
        if col in df_events.columns:
            df_events[col] = pd.to_numeric(df_events[col], errors='coerce').fillna(0.0)

    def parse_tag_value(tag, key):
        if pd.isna(tag):
            return np.nan
        m = re.search(rf'{key}=([^;]+)', str(tag))
        return m.group(1) if m else np.nan

    df_events['tier'] = df_events['tag'].apply(
        lambda t: parse_tag_value(t, 'tier')).fillna('unknown')

    grp = df_events.sort_values('date').groupby('order_id', as_index=False)
    order_summary = grp.agg(
        symbol=('symbol', 'first'),
        tier=('tier', 'first'),
        order_type=('order_type', 'first'),
        quantity=('quantity', 'first'),
        submitted_at=('date', 'min'),
        final_at=('date', 'max')
    )

    final_status = (
        df_events.sort_values('date')
                 .groupby('order_id').tail(1)[['order_id', 'status']]
                 .rename(columns={'status': 'final_status'})
    )
    fills = (
        df_events.groupby('order_id', as_index=False)['fill_quantity']
                 .sum().rename(columns={'fill_quantity': 'filled_qty'})
    )

    order_summary = order_summary.merge(final_status, on='order_id', how='left')
    order_summary = order_summary.merge(fills, on='order_id', how='left')
    order_summary['abs_qty'] = order_summary['quantity'].abs().replace(0, np.nan)
    order_summary['fill_ratio'] = (
        order_summary['filled_qty'].abs() / order_summary['abs_qty']
    ).fillna(0.0).clip(0, 1)
    order_summary['days_to_final'] = (
        order_summary['final_at'] - order_summary['submitted_at']
    ).dt.days.fillna(0)

    fig, axes = plt.subplots(2, 2, figsize=(16, 10))

    status_counts = order_summary['final_status'].value_counts().sort_values(ascending=False)
    status_counts.plot(kind='bar', ax=axes[0, 0], color='#1f77b4')
    axes[0, 0].set_title('Final Order Status Counts', fontsize=12, fontweight='bold')
    axes[0, 0].set_ylabel('Count')
    axes[0, 0].grid(axis='y', alpha=0.3)

    sns.histplot(order_summary['fill_ratio'], bins=20, ax=axes[0, 1], color='#2ca02c')
    axes[0, 1].set_title('Fill Ratio Distribution', fontsize=12, fontweight='bold')
    axes[0, 1].set_xlabel('Fill Ratio')
    axes[0, 1].grid(alpha=0.3)

    tier_order = ['strong', 'moderate', 'weak', 'exit', 'unknown']
    tier_status = pd.crosstab(order_summary['tier'], order_summary['final_status'], normalize='index')
    tier_status = tier_status.reindex([t for t in tier_order if t in tier_status.index]).dropna(how='all')
    tier_status.plot(kind='bar', stacked=True, ax=axes[1, 0], colormap='tab20')
    axes[1, 0].set_title('Final Status Mix by Tier', fontsize=12, fontweight='bold')
    axes[1, 0].set_ylabel('Share')
    axes[1, 0].grid(axis='y', alpha=0.3)

    sns.boxplot(data=order_summary, x='tier', y='days_to_final',
                order=[t for t in tier_order if t in order_summary['tier'].values],
                ax=axes[1, 1])
    axes[1, 1].set_title('Days to Final Status by Tier', fontsize=12, fontweight='bold')
    axes[1, 1].set_xlabel('Tier')
    axes[1, 1].set_ylabel('Days')
    axes[1, 1].grid(alpha=0.3)

    plt.tight_layout()
    plt.show()

    cancel_rate = (
        order_summary
            .assign(is_canceled=order_summary['final_status']
                                 .astype(str)
                                 .str.contains('Canceled', case=False, na=False))
            .groupby('tier', as_index=False)
            .agg(orders=('order_id', 'count'),
                 cancel_rate=('is_canceled', 'mean'),
                 avg_fill_ratio=('fill_ratio', 'mean'))
            .sort_values('orders', ascending=False)
    )
    print('\nCancel Rate and Fill Ratio by Tier:')
    display(cancel_rate)
else:
    print("order_events.csv not available — skipping order lifecycle.")

## Plot 19: Scaling Adherence

In [None]:
if df_targets is not None:
    tgt = df_targets.copy()
    for col in ['start_w', 'weekly_target_w', 'scheduled_w', 'actual_w', 'scale_day']:
        tgt[col] = pd.to_numeric(tgt[col], errors='coerce').fillna(0.0)

    if df_signals is not None:
        sig = df_signals.copy()
        sig['week_id'] = sig['date'].dt.strftime('%Y-%m-%d')
        sig['magnitude'] = pd.to_numeric(sig['magnitude'], errors='coerce').fillna(0.0).abs()
        sig = sig[['week_id', 'symbol', 'magnitude']].drop_duplicates(['week_id', 'symbol'])
        tgt = tgt.merge(sig, on=['week_id', 'symbol'], how='left')
    else:
        tgt['magnitude'] = np.nan

    tgt['tier'] = tgt['magnitude'].apply(tier_from_mag)

    tgt['total_week_order_abs'] = (tgt['weekly_target_w'] - tgt['start_w']).abs()
    tgt['planned_progress'] = np.where(
        tgt['total_week_order_abs'] > 1e-10,
        (tgt['scheduled_w'] - tgt['start_w']).abs() / tgt['total_week_order_abs'],
        1.0
    ).clip(0, 1)
    tgt['actual_progress'] = np.where(
        tgt['total_week_order_abs'] > 1e-10,
        (tgt['actual_w'] - tgt['start_w']).abs() / tgt['total_week_order_abs'],
        1.0
    ).clip(0, 1)
    tgt['progress_gap'] = tgt['actual_progress'] - tgt['planned_progress']

    tgt = tgt.sort_values(['week_id', 'symbol', 'date'])
    tgt['day_in_week'] = tgt.groupby(['week_id', 'symbol']).cumcount()

    fig, ax = plt.subplots(figsize=(10, 5))
    tier_order = ['strong', 'moderate', 'weak', 'unknown']
    sns.boxplot(data=tgt, x='tier', y='progress_gap',
                order=[t for t in tier_order if t in tgt['tier'].values], ax=ax)
    ax.axhline(0, color='black', linewidth=1)
    ax.set_title('Progress Gap by Signal Tier (Actual - Planned)', fontsize=14, fontweight='bold')
    ax.set_ylabel('Progress Gap')
    ax.set_xlabel('Tier')
    ax.grid(alpha=0.2)
    plt.tight_layout()
    plt.show()

    profile = (
        tgt.groupby(['tier', 'day_in_week'], as_index=False)
           .agg(planned=('planned_progress', 'mean'), actual=('actual_progress', 'mean'))
    )

    fig, axes = plt.subplots(1, 2, figsize=(16, 5), sharey=True)
    sns.lineplot(data=profile, x='day_in_week', y='planned', hue='tier', marker='o', ax=axes[0])
    axes[0].set_title('Planned Progress by Tier', fontsize=12, fontweight='bold')
    axes[0].set_xlabel('Day-in-Week')
    axes[0].set_ylabel('Progress')
    axes[0].grid(alpha=0.3)

    sns.lineplot(data=profile, x='day_in_week', y='actual', hue='tier', marker='o', ax=axes[1])
    axes[1].set_title('Actual Progress by Tier', fontsize=12, fontweight='bold')
    axes[1].set_xlabel('Day-in-Week')
    axes[1].grid(alpha=0.3)

    plt.tight_layout()
    plt.show()

    worst_lag = (
        tgt.groupby('symbol', as_index=False)['progress_gap']
           .mean().sort_values('progress_gap')
    )
    print('Most Lagging Symbols (most negative average gap):')
    display(worst_lag.head(15))
else:
    print("targets.csv not available — skipping scaling adherence.")

## Plot 20: Stale Signal Risk

In [None]:
if df_targets is not None and df_positions is not None:
    px = (
        df_positions[['date', 'symbol', 'price', 'daily_total_net_pnl']]
        .dropna(subset=['price'])
        .drop_duplicates(['date', 'symbol'])
        .sort_values(['symbol', 'date'])
    )
    for col in ['price', 'daily_total_net_pnl']:
        px[col] = pd.to_numeric(px[col], errors='coerce')

    tgt_stale = df_targets[['date', 'week_id', 'symbol',
                             'start_w', 'weekly_target_w', 'actual_w']].drop_duplicates()
    for col in ['start_w', 'weekly_target_w', 'actual_w']:
        tgt_stale[col] = pd.to_numeric(tgt_stale[col], errors='coerce').fillna(0.0)

    m = tgt_stale.merge(px, on=['date', 'symbol'], how='left')

    rebalance_px = (
        m.sort_values('date')
         .groupby(['week_id', 'symbol'], as_index=False)
         .first()[['week_id', 'symbol', 'price']]
         .rename(columns={'price': 'rebalance_price'})
    )
    m = m.merge(rebalance_px, on=['week_id', 'symbol'], how='left')

    m['day_in_week'] = m.sort_values('date').groupby(['week_id', 'symbol']).cumcount()
    m['signal_direction'] = np.sign(m['weekly_target_w']).replace(0, np.nan)
    m['return_since_rebalance'] = (m['price'] / m['rebalance_price']) - 1.0
    m['adverse_move'] = -m['signal_direction'] * m['return_since_rebalance']

    if df_signals is not None:
        sig = df_signals.copy()
        sig['week_id'] = sig['date'].dt.strftime('%Y-%m-%d')
        sig['mag_abs'] = pd.to_numeric(sig['magnitude'], errors='coerce').fillna(0.0).abs()
        sig = sig[['week_id', 'symbol', 'mag_abs']].drop_duplicates(['week_id', 'symbol'])
        m = m.merge(sig, on=['week_id', 'symbol'], how='left')
    else:
        m['mag_abs'] = np.nan

    m['tier'] = m['mag_abs'].apply(tier_from_mag)

    profile = (
        m.groupby(['tier', 'day_in_week'], as_index=False)
         .agg(
             mean_adverse_move=('adverse_move', 'mean'),
             mean_daily_net_pnl=('daily_total_net_pnl', 'mean')
         )
    )

    fig, axes = plt.subplots(1, 2, figsize=(16, 5))

    sns.lineplot(data=profile, x='day_in_week', y='mean_adverse_move',
                 hue='tier', marker='o', ax=axes[0])
    axes[0].axhline(0, color='black', linewidth=1)
    axes[0].set_title('Mean Adverse Move Since Rebalance', fontsize=12, fontweight='bold')
    axes[0].set_ylabel('Adverse Move (fraction)')
    axes[0].set_xlabel('Day-in-Week')
    axes[0].grid(alpha=0.3)

    sns.lineplot(data=profile, x='day_in_week', y='mean_daily_net_pnl',
                 hue='tier', marker='o', ax=axes[1])
    axes[1].axhline(0, color='black', linewidth=1)
    axes[1].set_title('Mean Daily Net P&L During Scaling', fontsize=12, fontweight='bold')
    axes[1].set_ylabel('Daily Net P&L ($)')
    axes[1].set_xlabel('Day-in-Week')
    axes[1].grid(alpha=0.3)

    plt.tight_layout()
    plt.show()

    week_risk = (
        m.groupby('week_id', as_index=False)
         .agg(
             avg_adverse_move=('adverse_move', 'mean'),
             max_adverse_move=('adverse_move', 'max'),
             week_net_pnl=('daily_total_net_pnl', 'sum')
         )
         .sort_values('week_id')
    )
    print('\nWeek-Level Stale Signal Risk (last 15 weeks):')
    display(week_risk.tail(15))
else:
    print("targets.csv and/or positions.csv not available — skipping stale signal risk.")

---
# P&L Attribution

## Plot 11: P&L by Long vs Short

In [None]:
if df_positions is not None:
    pnl_col = 'daily_total_net_pnl' if 'daily_total_net_pnl' in df_positions.columns else 'daily_pnl'

    pos = df_positions.copy()
    pos['side'] = np.where(pos['weight'] > 0, 'Long', np.where(pos['weight'] < 0, 'Short', 'Flat'))
    pos = pos[pos['side'] != 'Flat']

    pnl_by_side = pos.groupby(['date', 'side'])[pnl_col].sum().unstack(fill_value=0)
    for col in ['Long', 'Short']:
        if col not in pnl_by_side.columns:
            pnl_by_side[col] = 0.0
    pnl_by_side = pnl_by_side.sort_index()

    fig, axes = plt.subplots(2, 1, figsize=(14, 10), sharex=True)

    axes[0].bar(pnl_by_side.index, pnl_by_side['Long'],
                color='#2ca02c', alpha=0.7, label='Long')
    axes[0].bar(pnl_by_side.index, pnl_by_side['Short'],
                bottom=pnl_by_side['Long'], color='#9467bd', alpha=0.7, label='Short')
    axes[0].axhline(y=0, color='black', linestyle='-', alpha=0.3)
    axes[0].set_title('Daily P&L by Side', fontsize=14, fontweight='bold')
    axes[0].set_ylabel('P&L ($)')
    axes[0].legend(loc='upper left')
    axes[0].grid(True, alpha=0.3)

    axes[1].plot(pnl_by_side.index, pnl_by_side['Long'].cumsum(),
                 linewidth=2, color='#2ca02c', label='Long (cumulative)')
    axes[1].plot(pnl_by_side.index, pnl_by_side['Short'].cumsum(),
                 linewidth=2, color='#9467bd', label='Short (cumulative)')
    axes[1].plot(pnl_by_side.index, (pnl_by_side['Long'] + pnl_by_side['Short']).cumsum(),
                 linewidth=2, color='steelblue', linestyle='--', label='Total')
    axes[1].axhline(y=0, color='black', linestyle='-', alpha=0.3)
    axes[1].set_title('Cumulative P&L by Side', fontsize=14, fontweight='bold')
    axes[1].set_xlabel('Date')
    axes[1].set_ylabel('Cumulative P&L ($)')
    axes[1].legend(loc='upper left')
    axes[1].grid(True, alpha=0.3)

    plt.xticks(rotation=45, ha='right')
    plt.tight_layout()
    plt.show()

    print(f"\nUsing P&L column: {pnl_col}")
    print(f"  Long total:  ${pnl_by_side['Long'].sum():,.2f}")
    print(f"  Short total: ${pnl_by_side['Short'].sum():,.2f}")
else:
    print("positions.csv not available — skipping P&L by side.")

## Plot 12: Top / Bottom Contributors

In [None]:
if df_positions is not None:
    pnl_col = 'daily_total_net_pnl' if 'daily_total_net_pnl' in df_positions.columns else 'daily_pnl'
    symbol_pnl = df_positions.groupby('symbol')[pnl_col].sum().sort_values()

    n = min(10, len(symbol_pnl))
    bottom = symbol_pnl.head(n)
    top    = symbol_pnl.tail(n)
    combined = pd.concat([bottom, top]).drop_duplicates()

    colors = ['#d62728' if v < 0 else '#2ca02c' for v in combined.values]

    fig, ax = plt.subplots(figsize=(10, max(6, len(combined) * 0.4)))
    ax.barh(combined.index.astype(str), combined.values, color=colors, alpha=0.8)
    ax.axvline(x=0, color='black', linestyle='-', alpha=0.3)
    ax.set_title(f'Top {n} and Bottom {n} Contributors by Total P&L',
                 fontsize=14, fontweight='bold')
    ax.set_xlabel('Total P&L ($)')
    ax.grid(True, alpha=0.3, axis='x')
    plt.tight_layout()
    plt.show()

    print(f"\nUsing P&L column: {pnl_col}")
    print(f"Total positive contributors: {(symbol_pnl > 0).sum()}")
    print(f"Total negative contributors: {(symbol_pnl < 0).sum()}")
else:
    print("positions.csv not available — skipping contributor chart.")

## Plot 21: P&L Reconciliation

In [None]:
if df_positions is not None:
    pnl_col = 'daily_total_net_pnl' if 'daily_total_net_pnl' in df_positions.columns else 'daily_pnl'
    daily_position_pnl = (
        df_positions.groupby('date')[pnl_col].sum()
        .reset_index()
        .rename(columns={pnl_col: 'position_pnl'})
    )

    if 'daily_fees' in df_positions.columns:
        daily_fees = df_positions.groupby('date')['daily_fees'].sum().reset_index()
        daily_fees.columns = ['date', 'fees']
        daily_position_pnl = daily_position_pnl.merge(daily_fees, on='date', how='left')
    if 'fees' not in daily_position_pnl.columns:
        daily_position_pnl['fees'] = 0.0
    daily_position_pnl['fees'] = daily_position_pnl['fees'].fillna(0.0)

    if 'daily_dividends' in df_positions.columns:
        daily_divs = df_positions.groupby('date')['daily_dividends'].sum().reset_index()
        daily_divs.columns = ['date', 'dividends']
        daily_position_pnl = daily_position_pnl.merge(daily_divs, on='date', how='left')
    if 'dividends' not in daily_position_pnl.columns:
        daily_position_pnl['dividends'] = 0.0
    daily_position_pnl['dividends'] = daily_position_pnl['dividends'].fillna(0.0)

    daily_position_pnl['attributed_net'] = daily_position_pnl['position_pnl']

    snap = df_snapshots[['date', 'nav']].copy()
    if 'daily_pnl' in df_snapshots.columns:
        snap['nav_change'] = df_snapshots['daily_pnl']
    else:
        snap['nav_change'] = snap['nav'].diff()

    reconcile = snap[['date', 'nav_change']].merge(daily_position_pnl, on='date', how='left')
    for col in ['position_pnl', 'attributed_net', 'fees', 'dividends']:
        reconcile[col] = reconcile[col].fillna(0)
    reconcile['unexplained'] = reconcile['nav_change'] - reconcile['attributed_net']

    print('=' * 80)
    print(f'P&L RECONCILIATION: Attributed ({pnl_col}) vs NAV Change')
    print('=' * 80)
    print(f"Total NAV Change:         ${reconcile['nav_change'].sum():,.2f}")
    print(f"Total Attributed Net:     ${reconcile['attributed_net'].sum():,.2f}")
    print(f"Total Unexplained:        ${reconcile['unexplained'].sum():,.2f}")
    nav_total = reconcile['nav_change'].sum()
    if nav_total != 0:
        print(f"Unexplained as % of NAV:  {abs(reconcile['unexplained'].sum()) / abs(nav_total) * 100:.2f}%")

    fig, axes = plt.subplots(3, 1, figsize=(14, 12), sharex=True)

    axes[0].plot(reconcile['date'], reconcile['nav_change'],
                 'b-', linewidth=2, label='NAV Change', alpha=0.8)
    axes[0].plot(reconcile['date'], reconcile['attributed_net'],
                 'g--', linewidth=2, label='Attributed Net P&L', alpha=0.8)
    axes[0].set_title('Daily P&L Reconciliation: NAV Change vs Attributed Net',
                      fontsize=14, fontweight='bold')
    axes[0].set_ylabel('P&L ($)')
    axes[0].legend(loc='upper left')
    axes[0].axhline(y=0, color='k', linestyle='-', alpha=0.3)
    axes[0].grid(True, alpha=0.3)

    axes[1].bar(reconcile['date'], reconcile['unexplained'],
                color=np.where(reconcile['unexplained'] >= 0, 'orange', 'purple'), alpha=0.7)
    axes[1].set_title('Daily Unexplained P&L', fontsize=14, fontweight='bold')
    axes[1].set_ylabel('Unexplained P&L ($)')
    axes[1].axhline(y=0, color='k', linestyle='-', alpha=0.3)
    axes[1].grid(True, alpha=0.3)

    reconcile['cum_nav']     = reconcile['nav_change'].cumsum()
    reconcile['cum_attrib']  = reconcile['attributed_net'].cumsum()

    axes[2].plot(reconcile['date'], reconcile['cum_nav'],
                 'b-', linewidth=2, label='Cumulative NAV Change')
    axes[2].plot(reconcile['date'], reconcile['cum_attrib'],
                 'g--', linewidth=2, label='Cumulative Attributed Net')
    axes[2].fill_between(reconcile['date'], reconcile['cum_attrib'], reconcile['cum_nav'],
                         alpha=0.3, color='red', label='Cumulative Gap')
    axes[2].set_title('Cumulative P&L: NAV Change vs Attributed Net',
                      fontsize=14, fontweight='bold')
    axes[2].set_xlabel('Date')
    axes[2].set_ylabel('Cumulative P&L ($)')
    axes[2].legend(loc='upper left')
    axes[2].axhline(y=0, color='k', linestyle='-', alpha=0.3)
    axes[2].grid(True, alpha=0.3)

    plt.xticks(rotation=45, ha='right')
    plt.tight_layout()
    plt.show()
else:
    print("positions.csv not available — skipping P&L reconciliation.")

## Plot 22: P&L by Signal Horizon

In [None]:
if df_positions is not None and df_signals is not None:
    sig = df_signals.copy()

    required_cols = ['price', 'sma_short', 'sma_medium', 'sma_long', 'atr']
    for col in required_cols:
        sig[col] = pd.to_numeric(sig[col], errors='coerce')
    sig = sig.dropna(subset=required_cols).copy()
    sig = sig.sort_values('date').groupby(['date', 'symbol']).tail(1)

    atr_safe = sig['atr'].replace(0, np.nan)
    sig['dist_short']  = (sig['price'] - sig['sma_short'])  / atr_safe
    sig['dist_medium'] = (sig['price'] - sig['sma_medium']) / atr_safe
    sig['dist_long']   = (sig['price'] - sig['sma_long'])   / atr_safe

    weights = {'short': 0.5, 'medium': 0.3, 'long': 0.2}
    sig['comp_short']  = weights['short']  * sig['dist_short']
    sig['comp_medium'] = weights['medium'] * sig['dist_medium']
    sig['comp_long']   = weights['long']   * sig['dist_long']
    sig['abs_total']   = sig[['comp_short', 'comp_medium', 'comp_long']].abs().sum(axis=1)
    sig = sig[sig['abs_total'] > 0].copy()

    pnl_col = 'daily_total_net_pnl' if 'daily_total_net_pnl' in df_positions.columns else 'daily_pnl'
    pos_pnl = (
        df_positions.groupby(['date', 'symbol'])[pnl_col]
                    .sum().reset_index()
                    .rename(columns={pnl_col: 'position_pnl'})
    )

    merged_h = pos_pnl.merge(
        sig[['date', 'symbol', 'comp_short', 'comp_medium', 'comp_long', 'abs_total']],
        on=['symbol', 'date'], how='left'
    ).sort_values(['symbol', 'date'])

    merged_h[['comp_short', 'comp_medium', 'comp_long', 'abs_total']] = (
        merged_h.groupby('symbol')[['comp_short', 'comp_medium', 'comp_long', 'abs_total']].ffill()
    )
    merged_h = merged_h[merged_h['abs_total'].notna() & (merged_h['abs_total'] > 0)].copy()

    merged_h['short_pnl']  = merged_h['position_pnl'] * (merged_h['comp_short'].abs()  / merged_h['abs_total'])
    merged_h['medium_pnl'] = merged_h['position_pnl'] * (merged_h['comp_medium'].abs() / merged_h['abs_total'])
    merged_h['long_pnl']   = merged_h['position_pnl'] * (merged_h['comp_long'].abs()   / merged_h['abs_total'])

    daily_horizon = merged_h.groupby('date')[['short_pnl', 'medium_pnl', 'long_pnl']].sum().reset_index()

    totals = daily_horizon[['short_pnl', 'medium_pnl', 'long_pnl']].sum()
    totals = totals.rename({'short_pnl': 'Short (SMA-20)', 'medium_pnl': 'Medium (SMA-63)', 'long_pnl': 'Long (SMA-252)'})
    totals_df = totals.to_frame('Total P&L')
    total_attributed = totals_df['Total P&L'].sum()
    totals_df['Pct of Attributed'] = totals_df['Total P&L'] / total_attributed if total_attributed != 0 else 0.0
    print('\nHorizon Attribution Summary:')
    print('=' * 60)
    display(totals_df)

    cum_h = daily_horizon.set_index('date').cumsum()
    fig, ax = plt.subplots(figsize=(14, 6))
    ax.plot(cum_h.index, cum_h['short_pnl'],  label='Short (SMA-20)',  color='#1f77b4', linewidth=2)
    ax.plot(cum_h.index, cum_h['medium_pnl'], label='Medium (SMA-63)', color='#ff7f0e', linewidth=2)
    ax.plot(cum_h.index, cum_h['long_pnl'],   label='Long (SMA-252)',  color='#2ca02c', linewidth=2)
    ax.set_title('Cumulative P&L by Signal Horizon', fontsize=14, fontweight='bold')
    ax.set_xlabel('Date')
    ax.set_ylabel('Cumulative P&L ($)')
    ax.axhline(y=0, color='black', linestyle='-', alpha=0.3)
    ax.grid(True, alpha=0.3)
    ax.legend(loc='upper left')
    plt.xticks(rotation=45, ha='right')
    plt.tight_layout()
    plt.show()

    monthly_h = daily_horizon.copy()
    monthly_h['year_month'] = monthly_h['date'].dt.to_period('M')
    monthly_h = monthly_h.groupby('year_month')[['short_pnl', 'medium_pnl', 'long_pnl']].sum()

    fig, ax = plt.subplots(figsize=(14, 6))
    monthly_h.plot(kind='bar', stacked=True, ax=ax, color=['#1f77b4', '#ff7f0e', '#2ca02c'])
    ax.set_title('Monthly P&L by Signal Horizon (Stacked)', fontsize=14, fontweight='bold')
    ax.set_xlabel('Month')
    ax.set_ylabel('P&L ($)')
    ax.axhline(y=0, color='black', linestyle='-', alpha=0.3)
    ax.grid(True, alpha=0.3, axis='y')
    ax.set_xticklabels([str(p) for p in monthly_h.index], rotation=45, ha='right')
    ax.legend(loc='upper left')
    plt.tight_layout()
    plt.show()
else:
    print("positions.csv and/or signals.csv not available — skipping horizon attribution.")

## Plot 23: Slippage Cost Overlay (Gross vs Net P&L)

In [None]:
if df_slippage is not None:
    slippage_col = None
    for col in ['slippage_dollars', 'slippage', 'cost']:
        if col in df_slippage.columns:
            slippage_col = col
            break

    if slippage_col is None and 'expected_price' in df_slippage.columns and 'fill_price' in df_slippage.columns:
        df_slippage['slippage_dollars'] = df_slippage['fill_price'] - df_slippage['expected_price']
        slippage_col = 'slippage_dollars'

    if slippage_col:
        daily_slip = df_slippage.groupby('date')[slippage_col].sum().reset_index()
        daily_slip.columns = ['date', 'daily_slippage']

        overlay = df_snapshots[['date', 'daily_pnl']].merge(daily_slip, on='date', how='left')
        overlay['daily_slippage'] = overlay['daily_slippage'].fillna(0)
        overlay['gross_pnl'] = overlay['daily_pnl'] + overlay['daily_slippage']

        print('\nSlippage Impact:')
        print('=' * 60)
        print(f"Total slippage cost:         ${overlay['daily_slippage'].sum():,.2f}")
        print(f"Net P&L:                     ${overlay['daily_pnl'].sum():,.2f}")
        print(f"Gross P&L (before slippage): ${overlay['gross_pnl'].sum():,.2f}")

        overlay['cum_net']      = overlay['daily_pnl'].cumsum()
        overlay['cum_gross']    = overlay['gross_pnl'].cumsum()
        overlay['cum_slippage'] = overlay['daily_slippage'].cumsum()

        fig, ax = plt.subplots(figsize=(14, 6))
        ax.plot(overlay['date'], overlay['cum_gross'],
                linewidth=2, label='Gross P&L', color='blue', alpha=0.7)
        ax.plot(overlay['date'], overlay['cum_net'],
                linewidth=2, label='Net P&L', color='green', alpha=0.7)
        ax.fill_between(overlay['date'], overlay['cum_gross'], overlay['cum_net'],
                        alpha=0.3, color='red', label='Slippage Cost')
        ax.set_title('Cumulative P&L: Gross vs Net (Slippage Impact)',
                     fontsize=14, fontweight='bold')
        ax.set_xlabel('Date')
        ax.set_ylabel('Cumulative P&L ($)')
        ax.legend(loc='upper left')
        ax.axhline(y=0, color='black', linestyle='-', alpha=0.3)
        ax.grid(True, alpha=0.3)
        plt.xticks(rotation=45, ha='right')
        plt.tight_layout()
        plt.show()
    else:
        print("Could not identify slippage column — skipping overlay.")
else:
    print("slippage.csv not available — skipping slippage overlay.")