In [5]:
import json
import pandas as pd
import numpy as np
from scipy.stats import spearmanr

# Load data correctly
with open('json_data/historical.json') as f:
    hist = json.load(f)

# Extract from nested structure
data = hist['historical']
dates = pd.to_datetime(data['dates'])
ensemble = pd.Series(data['ensemble_scores'], index=dates)
spx = pd.Series(data['spx_close'], index=dates)
spx_forward_10d = pd.Series(data['spx_forward_10d'], index=dates)

# Need VIX - check if it's in model_cache or fetch it
try:
    import yfinance as yf
    vix = yf.download('^VIX', start=dates[0], end=dates[-1], progress=False)['Close']
    vix = vix.reindex(dates, method='ffill')
except:
    print("⚠️  Cannot load VIX - skipping VIX tests")
    vix = None

print("="*60)
print("DATA LOADED")
print("="*60)
print(f"Dates: {dates[0]} to {dates[-1]}")
print(f"Samples: {len(ensemble)}")
print(f"Ensemble range: {ensemble.min():.3f} to {ensemble.max():.3f}")
print(f"SPX range: ${spx.min():.0f} to ${spx.max():.0f}")

# ============================================================
# TEST 1: Does ensemble predict SPX REALIZED volatility?
# ============================================================
print("\n" + "="*60)
print("TEST 1: Predicting SPX Realized Volatility")
print("="*60)

spx_returns = spx.pct_change()
spx_realized_vol_10d = spx_returns.rolling(10).std() * np.sqrt(252) * 100

# Shift forward to avoid lookahead
future_vol = spx_realized_vol_10d.shift(-10)
valid_mask = ensemble.notna() & future_vol.notna()

corr_vol, p_vol = spearmanr(ensemble[valid_mask], future_vol[valid_mask])
print(f"Ensemble → SPX realized vol (10d ahead): {corr_vol:.3f} (p={p_vol:.4f})")

if corr_vol > 0.20:
    print("✅ STRONG: System predicts volatility")
elif corr_vol > 0.10:
    print("⚠️  WEAK: Marginal signal")
else:
    print("❌ NONE: No volatility prediction")

# ============================================================
# TEST 2: Does ensemble predict SPX TAIL EVENTS?
# ============================================================
print("\n" + "="*60)
print("TEST 2: Predicting SPX Tail Events (5%+ drops)")
print("="*60)

spx_10d_return = spx.pct_change(10)
tail_events = (spx_10d_return < -0.05).astype(int).shift(-10)  # 5%+ drop ahead

valid_mask = ensemble.notna() & tail_events.notna()
X = ensemble[valid_mask].values
y = tail_events[valid_mask].values

if y.sum() > 10:  # Need enough events
    from sklearn.metrics import roc_auc_score
    auc_tail = roc_auc_score(y, X)
    
    baseline_rate = y.mean()
    high_ensemble_mask = X > np.percentile(X, 75)
    conditional_rate = y[high_ensemble_mask].mean()
    
    print(f"Baseline tail event rate: {baseline_rate:.1%}")
    print(f"When ensemble > 75th percentile: {conditional_rate:.1%}")
    print(f"AUC: {auc_tail:.3f}")
    print(f"Lift: {conditional_rate/baseline_rate:.2f}x")
    
    if auc_tail > 0.60:
        print("✅ STRONG: Good tail risk detector")
    elif auc_tail > 0.55:
        print("⚠️  WEAK: Some predictive power")
    else:
        print("❌ NONE: No better than random")
else:
    print("⚠️  Insufficient tail events for test")

# ============================================================
# TEST 3: Using YOUR forward return data
# ============================================================
print("\n" + "="*60)
print("TEST 3: Using Built-in Forward Returns")
print("="*60)

valid_mask = ensemble.notna() & spx_forward_10d.notna()
corr_fwd, p_fwd = spearmanr(ensemble[valid_mask], -spx_forward_10d[valid_mask])
print(f"Ensemble → SPX forward return: {corr_fwd:.3f} (p={p_fwd:.4f})")
print("(Negative correlation = high ensemble predicts drops)")

if abs(corr_fwd) > 0.15:
    print("✅ DIRECTIONAL: System predicts returns")
elif abs(corr_fwd) > 0.08:
    print("⚠️  WEAK: Marginal signal")
else:
    print("❌ NONE: No directional edge")

# ============================================================
# TEST 4: VIX tests (if available)
# ============================================================
if vix is not None:
    print("\n" + "="*60)
    print("TEST 4: VIX Prediction (Your Original Test)")
    print("="*60)
    
    vix_change_10d = (vix.shift(-10) - vix)
    valid_mask = ensemble.notna() & vix_change_10d.notna()
    
    corr_vix, p_vix = spearmanr(ensemble[valid_mask], vix_change_10d[valid_mask])
    print(f"Ensemble → VIX change (10d): {corr_vix:.3f} (p={p_vix:.4f})")
    
    if corr_vix > 0.15:
        print("✅ VIX predictor confirmed")
    else:
        print("❌ Not a VIX predictor (expected)")

# ============================================================
# FINAL VERDICT
# ============================================================
print("\n" + "="*60)
print("VERDICT: What is this system actually good for?")
print("="*60)

results = {
    'SPX Vol Prediction': corr_vol if 'corr_vol' in locals() else 0,
    'Tail Event Detection': auc_tail if 'auc_tail' in locals() else 0.5,
    'Directional Signal': abs(corr_fwd) if 'corr_fwd' in locals() else 0
}

best_use_case = max(results, key=results.get)
best_score = results[best_use_case]

print(f"\nStrongest signal: {best_use_case} ({best_score:.3f})")

if best_score > 0.20 or (best_use_case == 'Tail Event Detection' and best_score > 0.58):
    print("\n✅ SALVAGEABLE: Reframe system around this use case")
else:
    print("\n❌ WEAK SIGNAL: Consider major rework or pivot to monitoring only")

  vix = yf.download('^VIX', start=dates[0], end=dates[-1], progress=False)['Close']


DATA LOADED
Dates: 2009-09-18 00:00:00 to 2025-10-31 00:00:00
Samples: 4056
Ensemble range: 0.054 to 0.999
SPX range: $1023 to $6891

TEST 1: Predicting SPX Realized Volatility
Ensemble → SPX realized vol (10d ahead): 0.527 (p=0.0000)
✅ STRONG: System predicts volatility

TEST 2: Predicting SPX Tail Events (5%+ drops)
Baseline tail event rate: 4.3%
When ensemble > 75th percentile: 7.0%
AUC: 0.618
Lift: 1.64x
✅ STRONG: Good tail risk detector

TEST 3: Using Built-in Forward Returns
Ensemble → SPX forward return: -0.174 (p=0.0000)
(Negative correlation = high ensemble predicts drops)
✅ DIRECTIONAL: System predicts returns

TEST 4: VIX Prediction (Your Original Test)


TypeError: Indexing a Series with DataFrame is not supported, use the appropriate DataFrame column

In [4]:
# Check the 'historical' key structure
import json

with open('json_data/historical.json') as f:
    hist = json.load(f)

print("Historical data keys:")
print(list(hist['historical'].keys()))
print("\nSample of each:")
for key in list(hist['historical'].keys())[:5]:
    print(f"{key}: {str(hist['historical'][key])[:100]}...")

Historical data keys:
['dates', 'ensemble_scores', 'spx_close', 'spx_forward_10d', 'regime_stats', 'thresholds']

Sample of each:
dates: ['2009-09-18', '2009-09-21', '2009-09-22', '2009-09-23', '2009-09-24', '2009-09-25', '2009-09-28', '...
ensemble_scores: [0.6783859303090072, 0.599276791584484, 0.5954635108481262, 0.5936226166995398, 0.6195923734385272, ...
spx_close: [1068.300048828125, 1064.6600341796875, 1071.6600341796875, 1060.8699951171875, 1050.780029296875, 1...
spx_forward_10d: [-4.033519228787153, -2.273032936831665, -1.5807311028006588, -0.3101265072669501, 1.398956086147862...
regime_stats: {'metadata': {'total_trading_days': 9026, 'start_date': '1990-01-02', 'end_date': '2025-10-31', 'gen...
