# Portfolio Volatility vs 10% Target

Compares realized volatility and model-estimated volatility (if logged) against a 10% annualized target.

Data source: `wolfpack/daily_snapshots.csv`.


In [None]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns
from io import StringIO
from IPython.display import display

pd.set_option('display.max_columns', None)
pd.set_option('display.width', None)

sns.set_style('whitegrid')
plt.rcParams['figure.figsize'] = (14, 6)

from QuantConnect import *
from QuantConnect.Research import QuantBook

qb = QuantBook()
print('QuantBook initialized')


def read_csv_from_store(key):
    try:
        if not qb.ObjectStore.ContainsKey(key):
            print(f'ObjectStore key not found: {key}')
            return None
        content = qb.ObjectStore.Read(key)
        if not content:
            print(f'Empty ObjectStore key: {key}')
            return None
        return pd.read_csv(StringIO(content))
    except Exception as e:
        print(f'Error reading {key}: {e}')
        return None


In [None]:
TARGET_VOL = 0.10
ROLL_WINDOWS = [20, 60, 252]

df_snapshots = read_csv_from_store('wolfpack/daily_snapshots.csv')
if df_snapshots is None:
    raise ValueError('daily_snapshots.csv is required for volatility analysis.')

required_cols = ['date', 'nav']
missing = [c for c in required_cols if c not in df_snapshots.columns]
if missing:
    raise ValueError(f'Missing required columns in daily_snapshots.csv: {missing}')

df = df_snapshots.copy()
df['date'] = pd.to_datetime(df['date'], errors='coerce')
df['nav'] = pd.to_numeric(df['nav'], errors='coerce')
if 'estimated_vol' in df.columns:
    df['estimated_vol'] = pd.to_numeric(df['estimated_vol'], errors='coerce')
else:
    df['estimated_vol'] = np.nan

df = df[df['date'].notna() & df['nav'].notna()].sort_values('date').reset_index(drop=True)
if len(df) < 2:
    raise ValueError('Need at least 2 NAV points to compute realized volatility.')

df['daily_return'] = df['nav'].pct_change()
returns = df['daily_return'].dropna()

for w in ROLL_WINDOWS:
    df[f'realized_vol_{w}d'] = df['daily_return'].rolling(w).std() * np.sqrt(252)

print(f'Rows: {len(df):,}')
print(f'Date range: {df["date"].min().date()} to {df["date"].max().date()}')
print(f'Realized vol (full sample): {returns.std() * np.sqrt(252) * 100:.2f}%')
print(f'Target vol: {TARGET_VOL * 100:.2f}%')


## Realized Volatility Scorecard

This table summarizes rolling realized volatility at 20-, 60-, and 252-day windows along with the fraction of days spent above and below the 10% annualized target. It also reports the latest model-estimated volatility from the logger if that column is present. Use this scorecard to judge at a glance whether the volatility-targeting system is keeping realized risk close to its objective.

## Portfolio Volatility vs Target — Time Series

These two panels compare realized portfolio volatility against the 10% annualized target through the full backtest. The top chart overlays all rolling windows and the model's own estimated volatility (when available), while the bottom panel shows the signed gap between 20-day realized vol and the target — positive means above target, negative means below. Persistent gaps in one direction suggest the vol-targeting scaler is systematically under- or over-sizing positions.

## Estimated vs Realized Volatility Comparison

This section runs only when the logger's estimated volatility column has enough observations, and directly compares the model's vol estimate to 20-day realized vol via a scatter plot and error distribution histogram. The scatter shows whether estimates track realized vol on a level-by-level basis, while the histogram reveals whether errors are symmetric or biased in a consistent direction. A mean error near zero and a tight distribution indicate the diagonal-covariance approximation is working well.

In [None]:
score_rows = []
for w in ROLL_WINDOWS:
    col = f'realized_vol_{w}d'
    s = df[col].dropna()
    if len(s) == 0:
        continue

    above = (s > TARGET_VOL).sum()
    below = (s < TARGET_VOL).sum()

    score_rows.append({
        'window_days': w,
        'observations': len(s),
        'mean_realized_vol': s.mean(),
        'median_realized_vol': s.median(),
        'latest_realized_vol': s.iloc[-1],
        'pct_days_above_target': above / len(s),
        'pct_days_below_target': below / len(s),
        'max_realized_vol': s.max(),
        'min_realized_vol': s.min(),
    })

vol_scorecard = pd.DataFrame(score_rows)
if not vol_scorecard.empty:
    display(vol_scorecard)

if df['estimated_vol'].notna().sum() > 0:
    est = df['estimated_vol'].dropna()
    print(f'Estimated vol observations: {len(est):,}')
    print(f'Average estimated vol: {100 * est.mean():.2f}%')
    print(f'Latest estimated vol: {100 * est.iloc[-1]:.2f}%')
else:
    print('No estimated_vol values found in snapshots (column missing or empty).')


In [None]:
fig, axes = plt.subplots(2, 1, figsize=(16, 10), sharex=True)

for w in ROLL_WINDOWS:
    col = f'realized_vol_{w}d'
    if col in df.columns:
        axes[0].plot(df['date'], 100 * df[col], label=f'Realized {w}d', linewidth=2)

if df['estimated_vol'].notna().sum() > 0:
    axes[0].plot(df['date'], 100 * df['estimated_vol'], label='Estimated vol (logger)', linewidth=2, color='#111111', alpha=0.8)

axes[0].axhline(100 * TARGET_VOL, color='red', linestyle='--', linewidth=2, label='Target 10%')
axes[0].set_title('Portfolio Volatility vs Target')
axes[0].set_ylabel('Annualized vol (%)')
axes[0].legend(loc='upper left')
axes[0].grid(alpha=0.3)

if 'realized_vol_20d' in df.columns:
    axes[1].fill_between(df['date'], 0, 100 * (df['realized_vol_20d'] - TARGET_VOL), color='#1f77b4', alpha=0.25)
    axes[1].plot(df['date'], 100 * (df['realized_vol_20d'] - TARGET_VOL), color='#1f77b4', linewidth=2)
    axes[1].axhline(0, color='black', linewidth=1)
    axes[1].set_title('20d Realized Vol Minus Target')
    axes[1].set_ylabel('Vol gap (percentage points)')
    axes[1].set_xlabel('Date')
    axes[1].grid(alpha=0.3)

plt.tight_layout()
plt.show()


In [None]:
if df['estimated_vol'].notna().sum() > 20 and 'realized_vol_20d' in df.columns:
    compare = df[['date', 'estimated_vol', 'realized_vol_20d']].dropna().copy()
    compare['error_bps'] = 10000 * (compare['estimated_vol'] - compare['realized_vol_20d'])

    print('Estimated vol vs realized 20d diagnostics:')
    print(f"  Correlation: {compare['estimated_vol'].corr(compare['realized_vol_20d']):.3f}")
    print(f"  Mean error (estimated - realized): {compare['error_bps'].mean():.1f} bps")
    print(f"  MAE: {compare['error_bps'].abs().mean():.1f} bps")

    fig, axes = plt.subplots(1, 2, figsize=(15, 5))

    axes[0].scatter(100 * compare['estimated_vol'], 100 * compare['realized_vol_20d'], alpha=0.6, color='#2ca02c')
    lims = [0, max(100 * compare['estimated_vol'].max(), 100 * compare['realized_vol_20d'].max()) * 1.05]
    axes[0].plot(lims, lims, '--', color='black', linewidth=1)
    axes[0].set_xlim(lims)
    axes[0].set_ylim(lims)
    axes[0].set_title('Estimated vs Realized 20d Vol')
    axes[0].set_xlabel('Estimated vol (%)')
    axes[0].set_ylabel('Realized vol (%)')
    axes[0].grid(alpha=0.3)

    axes[1].hist(compare['error_bps'], bins=30, color='#ff7f0e', edgecolor='black', alpha=0.8)
    axes[1].axvline(compare['error_bps'].mean(), color='red', linestyle='--', linewidth=2, label='Mean error')
    axes[1].set_title('Estimated Vol Error Distribution')
    axes[1].set_xlabel('Error (bps, estimated - realized 20d)')
    axes[1].set_ylabel('Frequency')
    axes[1].legend()
    axes[1].grid(alpha=0.3)

    plt.tight_layout()
    plt.show()
else:
    print('Not enough overlapping estimated_vol and realized 20d data for model-vs-realized diagnostics.')


In [None]:
if 'realized_vol_20d' in df.columns:
    s = df['realized_vol_20d'].dropna()
    if len(s) > 0:
        above = s > TARGET_VOL

        max_streak = 0
        current = 0
        for flag in above:
            current = current + 1 if flag else 0
            max_streak = max(max_streak, current)

        print('20d realized vol target-breach summary:')
        print(f'  Pct days above target: {100 * above.mean():.2f}%')
        print(f'  Longest consecutive streak above target: {max_streak} days')
        print(f'  Latest 20d vol: {100 * s.iloc[-1]:.2f}%')
        print(f'  Latest gap vs target: {100 * (s.iloc[-1] - TARGET_VOL):.2f} percentage points')
