# Correlation Risk

Assess concentration to shared moves across symbols using rolling return correlations.

**Data Source:**
- `wolfpack/positions.csv` - daily symbol price and portfolio weight

**Analysis:**
- Build a symbol return matrix from position history
- Rolling pairwise correlation metrics (20/60/252 days)
- Latest correlation heatmap for top-exposure symbols
- Highest-correlation symbol pairs

**Prerequisites:** Run a WolfpackTrend backtest with position logging enabled.

In [None]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns
from io import StringIO
from IPython.display import display

pd.set_option('display.max_columns', None)
pd.set_option('display.width', None)

sns.set_style('whitegrid')
plt.rcParams['figure.figsize'] = (14, 6)

from QuantConnect import *
from QuantConnect.Research import QuantBook

qb = QuantBook()
print('QuantBook initialized')


def read_csv_from_store(key):
    try:
        if not qb.ObjectStore.ContainsKey(key):
            print(f'ObjectStore key not found: {key}')
            return None
        content = qb.ObjectStore.Read(key)
        if not content:
            print(f'Empty ObjectStore key: {key}')
            return None
        return pd.read_csv(StringIO(content))
    except Exception as e:
        print(f'Error reading {key}: {e}')
        return None

## Load Position Data

In [None]:
df_positions = read_csv_from_store('wolfpack/positions.csv')

if df_positions is None:
    raise ValueError('positions.csv is required. Run a backtest with position logging enabled.')

symbol_col = None
for col in ['symbol', 'ticker', 'underlying']:
    if col in df_positions.columns:
        symbol_col = col
        break

weight_col = None
for col in ['weight', 'portfolio_weight', 'actual_w']:
    if col in df_positions.columns:
        weight_col = col
        break

if symbol_col is None:
    raise ValueError('Could not find symbol column in positions.csv (expected one of: symbol, ticker, underlying).')
if weight_col is None:
    raise ValueError('Could not find weight column in positions.csv (expected one of: weight, portfolio_weight, actual_w).')
if 'price' not in df_positions.columns:
    raise ValueError('positions.csv missing required price column for return calculation.')

df = df_positions[['date', symbol_col, weight_col, 'price']].copy()
df.columns = ['date', 'symbol', 'weight', 'price']

df['date'] = pd.to_datetime(df['date'])
df['weight'] = pd.to_numeric(df['weight'], errors='coerce').fillna(0.0)
df['price'] = pd.to_numeric(df['price'], errors='coerce')

df = df.dropna(subset=['date', 'symbol', 'price'])
df = df.sort_values(['symbol', 'date']).reset_index(drop=True)

print(f'Loaded {len(df):,} position rows')
print(f'Date range: {df.date.min().strftime("%Y-%m-%d")} to {df.date.max().strftime("%Y-%m-%d")}')
print(f'Symbols: {df.symbol.nunique()}')
display(df.head())

## Build Return Matrix

In [None]:
df['symbol_return'] = df.groupby('symbol')['price'].pct_change()

panel = df.dropna(subset=['symbol_return']).copy()
panel = panel[(panel['symbol_return'] > -0.9) & (panel['symbol_return'] < 0.9)]

if panel.empty:
    raise ValueError('No usable symbol returns after cleaning. Check price history in positions.csv.')

top_symbols = (
    panel.groupby('symbol')['weight']
         .apply(lambda s: s.abs().mean())
         .sort_values(ascending=False)
         .head(20)
         .index
         .tolist()
)

panel = panel[panel['symbol'].isin(top_symbols)].copy()

returns_wide = (
    panel.pivot_table(index='date', columns='symbol', values='symbol_return', aggfunc='last')
         .sort_index()
)

weights_wide = (
    panel.pivot_table(index='date', columns='symbol', values='weight', aggfunc='last')
         .sort_index()
         .reindex(returns_wide.index)
         .fillna(0.0)
)

print(f'Return matrix shape: {returns_wide.shape[0]} days x {returns_wide.shape[1]} symbols')
display(returns_wide.tail())

## Rolling Correlation Risk

In [None]:
def rolling_corr_metrics(returns_frame, windows=(20, 60, 252)):
    metrics = []

    for window in windows:
        if len(returns_frame) < window:
            continue

        for end_idx in range(window - 1, len(returns_frame)):
            date = returns_frame.index[end_idx]
            window_slice = returns_frame.iloc[end_idx - window + 1:end_idx + 1]

            valid = window_slice.dropna(axis=1, thresh=max(10, int(window * 0.8)))
            if valid.shape[1] < 2:
                continue

            corr = valid.corr()
            mask = np.triu(np.ones(corr.shape, dtype=bool), k=1)
            pairwise = corr.where(mask).stack()

            if pairwise.empty:
                continue

            metrics.append({
                'date': date,
                'window': window,
                'symbols_used': valid.shape[1],
                'mean_pairwise_corr': pairwise.mean(),
                'mean_abs_corr': pairwise.abs().mean(),
                'max_abs_corr': pairwise.abs().max()
            })

    return pd.DataFrame(metrics)


metrics_df = rolling_corr_metrics(returns_wide)

if metrics_df.empty:
    print('No rolling correlation metrics available (insufficient history).')
else:
    display(metrics_df.tail(10))

## Visualize Correlation Risk

In [None]:
if not metrics_df.empty:
    fig, axes = plt.subplots(2, 1, figsize=(14, 10), sharex=True)

    for window in sorted(metrics_df['window'].unique()):
        subset = metrics_df[metrics_df['window'] == window]
        axes[0].plot(subset['date'], subset['mean_abs_corr'], label=f'{window}-day', linewidth=2)
        axes[1].plot(subset['date'], subset['max_abs_corr'], label=f'{window}-day', linewidth=2)

    axes[0].set_title('Rolling Mean Absolute Pairwise Correlation', fontsize=14, fontweight='bold')
    axes[0].set_ylabel('Mean |Correlation|')
    axes[0].grid(alpha=0.3)
    axes[0].legend(loc='upper left')

    axes[1].set_title('Rolling Max Absolute Pairwise Correlation', fontsize=14, fontweight='bold')
    axes[1].set_xlabel('Date')
    axes[1].set_ylabel('Max |Correlation|')
    axes[1].grid(alpha=0.3)
    axes[1].legend(loc='upper left')

    plt.xticks(rotation=45, ha='right')
    plt.tight_layout()
    plt.show()

## Latest Heatmap and Top Pairs

In [None]:
window = 60 if len(returns_wide) >= 60 else 20

if len(returns_wide) >= window:
    recent = returns_wide.tail(window).dropna(axis=1, thresh=max(5, int(window * 0.7)))

    if recent.shape[1] >= 2:
        corr = recent.corr()

        latest_weights = weights_wide.reindex(columns=corr.columns).tail(1).T.squeeze()
        latest_weights = latest_weights.abs().sort_values(ascending=False)
        order = latest_weights.index.tolist()

        corr = corr.reindex(index=order, columns=order)

        plt.figure(figsize=(12, 9))
        sns.heatmap(corr, cmap='RdBu_r', vmin=-1, vmax=1, center=0, linewidths=0.4)
        plt.title(f'Latest {window}-Day Correlation Matrix (Top Exposure Symbols)', fontsize=14, fontweight='bold')
        plt.tight_layout()
        plt.show()

        pairwise = corr.where(np.triu(np.ones(corr.shape, dtype=bool), k=1)).stack().reset_index()
        pairwise.columns = ['symbol_a', 'symbol_b', 'correlation']
        pairwise['abs_correlation'] = pairwise['correlation'].abs()

        top_pairs = pairwise.sort_values('abs_correlation', ascending=False).head(15)
        print('Top correlation pairs:')
        display(top_pairs)
    else:
        print('Not enough symbols with valid history to build a correlation heatmap.')
else:
    print('Insufficient history for the selected heatmap window.')

## Summary

In [None]:
if not metrics_df.empty:
    latest_summary = (
        metrics_df.sort_values('date')
                  .groupby('window', as_index=False)
                  .tail(1)
                  .sort_values('window')
    )

    display(latest_summary[['window', 'date', 'symbols_used', 'mean_pairwise_corr', 'mean_abs_corr', 'max_abs_corr']])

    latest_row = latest_summary[latest_summary['window'] == latest_summary['window'].max()].iloc[-1]
    print('\nCorrelation risk snapshot:')
    print(f"  Window: {int(latest_row['window'])} days")
    print(f"  Symbols used: {int(latest_row['symbols_used'])}")
    print(f"  Mean abs correlation: {latest_row['mean_abs_corr']:.3f}")
    print(f"  Max abs correlation: {latest_row['max_abs_corr']:.3f}")
else:
    print('No summary available due to insufficient rolling-correlation observations.')
