### Slides 10–13: Timeframe-Aware Plots
- Heatmaps: IV level and IV return correlations.
- Optional: Rolling correlation of IV returns (target vs peers).
- Pooled XGBoost importances for levels and returns.

In [None]:
# Setup
import os
from pathlib import Path
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import seaborn as sns
import xgboost as xgb

try:
    from dotenv import load_dotenv
    load_dotenv()
except Exception:
    pass

from baseline_correlation import compute_baseline_correlations
from data_loader_coordinator import load_cores_with_auto_fetch
from feature_engineering import build_iv_panel, DEFAULT_DB_PATH, build_pooled_iv_return_dataset_time_safe

# User-configurable parameters
tickers = ["QBTS", "IONQ", "RGTI", "QUBT"]  # edit as needed
target_ticker = "QBTS"                                # for rolling correlation
start = os.getenv("SLIDE_START", "2025-01-02")
end = os.getenv("SLIDE_END", "2025-08-27")
timeframe = os.getenv("SLIDE_TIMEFRAME", "1h").lower()  # '1h' or '1m'

# Resolve DB path: env IV_DB_PATH > DEFAULT_DB_PATH; override for timeframe if needed
env_db = os.getenv("IV_DB_PATH")
db_path = Path(env_db) if env_db else DEFAULT_DB_PATH
if timeframe == '1m' and str(db_path).endswith('iv_data_1h.db'):
    db_path = Path('data/iv_data_1m.db')

# Timeframe-aware defaults
forward_steps = 60 if timeframe == '1m' else 15
tolerance = '30s' if timeframe == '1m' else '15s'
roll_window = 23400 if timeframe == '1m' else 390

plots_dir = Path('plots')
plots_dir.mkdir(parents=True, exist_ok=True)

print(f'Tickers: {tickers}')
print(f'Date range: {start} to {end}')
print(f'Database: {db_path} | timeframe={timeframe} | tol={tolerance} | fwd={forward_steps}')

#### Slide 10 — Correlation Heatmaps

In [None]:
corrs = compute_baseline_correlations(tickers=tickers, start=start, end=end, db_path=db_path)
clip_corr = corrs.get('clip', pd.DataFrame())
ivret_corr = corrs.get('iv_returns', pd.DataFrame())

def _plot_heatmap(df, title, fname):
    if df is None or df.empty:
        print(f'[WARN] {title}: not enough data to plot')
        return
    plt.figure(figsize=(6,5))
    sns.heatmap(df, annot=True, cmap='coolwarm', vmin=-1, vmax=1, fmt='.2f')
    plt.title(title)
    plt.tight_layout()
    out = plots_dir / fname
    plt.savefig(out, dpi=150)
    plt.show()
    print(f'[SAVED] {out}')

_plot_heatmap(clip_corr, 'IV Level Correlations', 'slide10_iv_level_corr_heatmap.png')
_plot_heatmap(ivret_corr, 'IV Return Correlations', 'slide10_iv_return_corr_heatmap.png')

clip_corr
ivret_corr

#### Slide 10 — Rolling IV Return Correlations (optional)

In [None]:
cores = load_cores_with_auto_fetch(list(tickers), start, end, db_path)
panel = build_iv_panel(cores, tolerance=tolerance) if cores else None

if panel is None or panel.empty:
    print('[WARN] Panel empty — skipping rolling correlations')
else:
    tgt_col = f'IVRET_{target_ticker}'
    if tgt_col not in panel.columns:
        print(f'[WARN] Missing target return column: {tgt_col}')
    else:
        plt.figure(figsize=(8,5))
        for peer in tickers:
            if peer == target_ticker:
                continue
            peer_col = f'IVRET_{peer}'
            if peer_col not in panel.columns:
                continue
            s = panel[tgt_col].rolling(roll_window, min_periods=max(5, roll_window//4)).corr(panel[peer_col]).dropna()
            if len(s) > 0:
                s.rename(peer, inplace=True)
                s.plot(label=peer, alpha=0.9)
        plt.axhline(0, color='k', lw=0.8)
        plt.title(f'Rolling window {roll_window} bars — IV Return Corr vs {target_ticker}')
        plt.legend(title='Peer')
        plt.tight_layout()
        out = plots_dir / f'slide10_rolling_corr_{target_ticker}.png'
        plt.savefig(out, dpi=150)
        plt.show()
        print(f'[SAVED] {out}')

#### Slides 12–13 — Pooled Dataset + Feature Importances

In [None]:
start_ts = pd.Timestamp(start, tz='UTC')
end_ts = pd.Timestamp(end, tz='UTC')
pooled = build_pooled_iv_return_dataset_time_safe(
    tickers=tickers, start=start_ts, end=end_ts, forward_steps=forward_steps, tolerance=tolerance, db_path=db_path
)
print(f'Pooled rows: {len(pooled):,}, columns: {pooled.shape[1]}')
pooled.head(3)

In [None]:
def train_and_importance(df: pd.DataFrame, target: str):
    if df is None or df.empty:
        raise ValueError('Empty dataset')
    if target not in df.columns:
        raise KeyError(f'Missing target: {target}')
    y = pd.to_numeric(df[target], errors='coerce')
    X = df.drop(columns=[target]).copy()
    leak_cols = []
    if target == 'iv_clip':
        leak_cols += [c for c in ['iv_ret_fwd','iv_ret_fwd_abs','core_iv_ret_fwd_abs'] if c in X.columns]
    elif target == 'iv_ret_fwd':
        leak_cols += [c for c in ['iv_ret_fwd_abs','core_iv_ret_fwd_abs'] if c in X.columns]
    X.drop(columns=leak_cols, inplace=True, errors='ignore')
    X = X.select_dtypes(include=['number','bool']).astype(float)
    n = len(X); split = int(n*0.8)
    model = xgb.XGBRegressor(objective='reg:squarederror', n_estimators=350, learning_rate=0.05, max_depth=6, subsample=0.9, colsample_bytree=0.9, random_state=42, n_jobs=-1)
    model.fit(X.iloc[:split], y.iloc[:split])
    pred = model.predict(X.iloc[split:])
    rmse = float(np.sqrt(((y.iloc[split:] - pred)**2).mean()))
    gain = model.get_booster().get_score(importance_type='gain')
    importances = pd.DataFrame([{'feature':k,'gain':v} for k,v in gain.items()]).sort_values('gain', ascending=False)
    def feature_ticker(feat: str):
        return next((t for t in tickers if t in feat), None)
    importances['ticker'] = importances['feature'].apply(feature_ticker)
    agg = importances.groupby('ticker', dropna=True)['gain'].sum().sort_values(ascending=False).reset_index().rename(columns={'gain':'agg_gain'})
    return model, rmse, importances, agg

# Levels
model_lvl, rmse_lvl, imp_lvl, agg_lvl = train_and_importance(pooled.copy(), 'iv_clip')
print(f'Levels RMSE: {rmse_lvl:.6f}')
display(agg_lvl.head(10))
plt.figure(figsize=(6,4))
colors = agg_lvl['ticker'].apply(lambda t: '#d62728' if t in ['IONQ','RGTI'] else ('#7f7f7f' if t in ['QBTS','QUBT'] else '#1f77b4'))
sns.barplot(data=agg_lvl, x='ticker', y='agg_gain', palette=list(colors))
plt.title('Slide 12: Pooled IV Level — Per-Ticker Importance (gain)')
plt.ylabel('Aggregate gain')
plt.tight_layout(); plt.savefig(plots_dir/'slide12_importance_levels.png', dpi=150); plt.show()

# Returns
model_ret, rmse_ret, imp_ret, agg_ret = train_and_importance(pooled.copy(), 'iv_ret_fwd')
print(f'Returns RMSE: {rmse_ret:.6f}')
display(agg_ret.head(10))
plt.figure(figsize=(6,4))
colors = agg_ret['ticker'].apply(lambda t: '#d62728' if t in ['IONQ','RGTI'] else ('#7f7f7f' if t in ['QBTS','QUBT'] else '#1f77b4'))
sns.barplot(data=agg_ret, x='ticker', y='agg_gain', palette=list(colors))
plt.title('Slide 13: Pooled IV Return — Per-Ticker Importance (gain)')
plt.ylabel('Aggregate gain')
plt.tight_layout(); plt.savefig(plots_dir/'slide13_importance_returns.png', dpi=150); plt.show()