# Beta vs SPY

Estimate portfolio market beta relative to SPY, plus rolling beta over time.

**Data Source:**
- `wolfpack/daily_snapshots.csv` - portfolio NAV for returns
- QC market data for SPY daily closes

**Analysis:**
- Full-period beta, correlation, alpha, and R-squared
- Up-market and down-market beta
- Rolling beta (20/60/252 days)
- Portfolio vs SPY return scatter and fit line

**Prerequisites:** Run a WolfpackTrend backtest to populate `daily_snapshots.csv`.

In [None]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns
from io import StringIO
from IPython.display import display

pd.set_option('display.max_columns', None)
pd.set_option('display.width', None)

sns.set_style('whitegrid')
plt.rcParams['figure.figsize'] = (14, 6)

from QuantConnect import *
from QuantConnect.Research import QuantBook

qb = QuantBook()
print('QuantBook initialized')


def read_csv_from_store(key):
    try:
        if not qb.ObjectStore.ContainsKey(key):
            print(f'ObjectStore key not found: {key}')
            return None
        content = qb.ObjectStore.Read(key)
        if not content:
            print(f'Empty ObjectStore key: {key}')
            return None
        return pd.read_csv(StringIO(content))
    except Exception as e:
        print(f'Error reading {key}: {e}')
        return None

## Load Portfolio Returns

In [None]:
df_snapshots = read_csv_from_store('wolfpack/daily_snapshots.csv')

if df_snapshots is None:
    raise ValueError('daily_snapshots.csv is required. Run a backtest first.')

required_cols = ['date', 'nav']
missing = [c for c in required_cols if c not in df_snapshots.columns]
if missing:
    raise ValueError(f'daily_snapshots.csv missing required columns: {missing}')

portfolio = df_snapshots[['date', 'nav']].copy()
portfolio['date'] = pd.to_datetime(portfolio['date'])
portfolio['nav'] = pd.to_numeric(portfolio['nav'], errors='coerce')

portfolio = portfolio.dropna(subset=['date', 'nav']).sort_values('date').reset_index(drop=True)
portfolio['portfolio_return'] = portfolio['nav'].pct_change()
portfolio = portfolio.dropna(subset=['portfolio_return']).copy()

print(f'Loaded {len(portfolio)} portfolio return rows')
print(f'Date range: {portfolio.date.min().strftime("%Y-%m-%d")} to {portfolio.date.max().strftime("%Y-%m-%d")}')

## Load SPY Returns

In [None]:
spy_symbol = qb.AddEquity('SPY', Resolution.Daily).Symbol

start = portfolio['date'].min()
end = portfolio['date'].max() + pd.Timedelta(days=1)

spy_hist = qb.History(spy_symbol, start, end, Resolution.Daily)

if spy_hist is None or len(spy_hist) == 0:
    raise ValueError('Unable to load SPY history from QuantBook.')

if isinstance(spy_hist, pd.Series):
    spy_close = spy_hist.copy()
else:
    spy_df = spy_hist.copy()

    if isinstance(spy_df.index, pd.MultiIndex):
        extracted = None
        for level in range(spy_df.index.nlevels):
            try:
                extracted = spy_df.xs(spy_symbol, level=level)
                break
            except Exception:
                continue
        if extracted is not None and isinstance(extracted, pd.DataFrame):
            spy_df = extracted

    if 'close' in spy_df.columns:
        spy_close = spy_df['close']
    elif 'value' in spy_df.columns:
        spy_close = spy_df['value']
    else:
        raise ValueError('SPY history does not include close/value price column.')

spy_close = pd.Series(spy_close)
spy_close.index = pd.to_datetime(spy_close.index)
if getattr(spy_close.index, 'tz', None) is not None:
    spy_close.index = spy_close.index.tz_localize(None)

spy_close = spy_close.groupby(spy_close.index).last().sort_index()

benchmark = pd.DataFrame({'date': spy_close.index, 'spy_close': spy_close.values})
benchmark['spy_return'] = benchmark['spy_close'].pct_change()
benchmark = benchmark.dropna(subset=['spy_return']).copy()

print(f'Loaded {len(benchmark)} SPY return rows')
display(benchmark.head())

## Align Data and Define Helpers

In [None]:
merged = portfolio[['date', 'portfolio_return']].merge(
    benchmark[['date', 'spy_return']],
    on='date',
    how='inner'
).dropna().sort_values('date').reset_index(drop=True)

if len(merged) < 30:
    raise ValueError(f'Need at least 30 overlapping observations. Found {len(merged)}.')

print(f'Overlapping return rows: {len(merged)}')


def compute_beta(portfolio_returns, benchmark_returns):
    var_bench = np.var(benchmark_returns, ddof=1)
    if var_bench == 0 or np.isnan(var_bench):
        return np.nan
    covar = np.cov(portfolio_returns, benchmark_returns, ddof=1)[0, 1]
    return covar / var_bench


def compute_r_squared(portfolio_returns, benchmark_returns):
    corr = np.corrcoef(portfolio_returns, benchmark_returns)[0, 1]
    return corr ** 2


def compute_annualized_alpha(portfolio_returns, benchmark_returns, beta_value):
    daily_alpha = portfolio_returns.mean() - beta_value * benchmark_returns.mean()
    return (1 + daily_alpha) ** 252 - 1


## Full-Period Beta Statistics

In [None]:
full_beta = compute_beta(merged['portfolio_return'], merged['spy_return'])
full_corr = merged['portfolio_return'].corr(merged['spy_return'])
full_r2 = compute_r_squared(merged['portfolio_return'], merged['spy_return'])
full_alpha = compute_annualized_alpha(merged['portfolio_return'], merged['spy_return'], full_beta)

up_mask = merged['spy_return'] > 0
down_mask = merged['spy_return'] < 0

up_beta = compute_beta(merged.loc[up_mask, 'portfolio_return'], merged.loc[up_mask, 'spy_return']) if up_mask.sum() > 10 else np.nan
down_beta = compute_beta(merged.loc[down_mask, 'portfolio_return'], merged.loc[down_mask, 'spy_return']) if down_mask.sum() > 10 else np.nan

summary = pd.DataFrame([
    {'Metric': 'Beta (full period)', 'Value': full_beta},
    {'Metric': 'Correlation', 'Value': full_corr},
    {'Metric': 'R-squared', 'Value': full_r2},
    {'Metric': 'Annualized alpha', 'Value': full_alpha},
    {'Metric': 'Up-market beta', 'Value': up_beta},
    {'Metric': 'Down-market beta', 'Value': down_beta},
])

summary['Formatted'] = summary.apply(
    lambda r: f"{r['Value'] * 100:.2f}%" if r['Metric'] == 'Annualized alpha' else f"{r['Value']:.4f}",
    axis=1
)

display(summary)

## Rolling Beta

In [None]:
windows = [20, 60, 252]

for window in windows:
    rolling_cov = merged['portfolio_return'].rolling(window).cov(merged['spy_return'])
    rolling_var = merged['spy_return'].rolling(window).var()
    merged[f'beta_{window}'] = rolling_cov / rolling_var

fig, ax = plt.subplots(figsize=(14, 6))

for window in windows:
    col = f'beta_{window}'
    if col in merged.columns and merged[col].notna().any():
        ax.plot(merged['date'], merged[col], linewidth=2, label=f'{window}-day beta')

ax.axhline(1.0, color='black', linestyle='--', linewidth=1.5, label='Market beta = 1.0')
ax.axhline(0.0, color='gray', linestyle=':', linewidth=1)

ax.set_title('Rolling Beta vs SPY', fontsize=14, fontweight='bold')
ax.set_xlabel('Date')
ax.set_ylabel('Beta')
ax.grid(alpha=0.3)
ax.legend(loc='upper left')

plt.xticks(rotation=45, ha='right')
plt.tight_layout()
plt.show()

## Return Scatter (Portfolio vs SPY)

In [None]:
x = merged['spy_return'].values
y = merged['portfolio_return'].values

slope, intercept = np.polyfit(x, y, 1)
x_line = np.linspace(x.min(), x.max(), 100)
y_line = slope * x_line + intercept

plt.figure(figsize=(10, 7))
plt.scatter(x * 100, y * 100, alpha=0.4, s=22, color='#1f77b4')
plt.plot(x_line * 100, y_line * 100, color='crimson', linewidth=2, label=f'Fit: y = {slope:.2f}x + {intercept:.4f}')
plt.axhline(0, color='black', linewidth=1, alpha=0.6)
plt.axvline(0, color='black', linewidth=1, alpha=0.6)

plt.title('Daily Return Scatter: Portfolio vs SPY', fontsize=14, fontweight='bold')
plt.xlabel('SPY Return (%)')
plt.ylabel('Portfolio Return (%)')
plt.grid(alpha=0.3)
plt.legend(loc='upper left')
plt.tight_layout()
plt.show()

## Summary

In [None]:
latest = merged.iloc[-1]

print('=' * 80)
print('BETA SUMMARY')
print('=' * 80)
print(f"Period: {merged['date'].min().strftime('%Y-%m-%d')} to {merged['date'].max().strftime('%Y-%m-%d')}")
print(f'Observations: {len(merged)}')
print(f'Beta (full period): {full_beta:.4f}')
print(f'Correlation: {full_corr:.4f}')
print(f'R-squared: {full_r2:.4f}')
print(f'Annualized alpha: {full_alpha * 100:.2f}%')

for window in windows:
    col = f'beta_{window}'
    if col in merged.columns and not np.isnan(latest[col]):
        print(f'Latest {window}-day beta: {latest[col]:.4f}')

print('=' * 80)