In [1]:
from spx_predictor import SPXPredictor
from spx_feature_explorer import SPXFeatureExplorer

# Train baseline
predictor = SPXPredictor()
predictor.train(years=7)

# The predictor already has the data, let me re-fetch it
from UnifiedDataFetcher import UnifiedDataFetcher
from datetime import datetime, timedelta

fetcher = UnifiedDataFetcher()
end_date = datetime.now()
start_date = end_date - timedelta(days=7 * 365)
spx_df = fetcher.fetch_spx(start_date.strftime('%Y-%m-%d'), end_date.strftime('%Y-%m-%d'))
spx = spx_df['Close'].squeeze()
spx.index = spx.index.tz_localize(None)

# Explore
explorer = SPXFeatureExplorer()
results = explorer.run_full_analysis(
    base_features=predictor.features_scaled,
    spx=spx,
    target='direction_21d'
)


SPX PREDICTION SYSTEM

üìä Fetching data: 2018-10-23 to 2025-10-21
‚úì Using cached: yahoo_^GSPC_2018-10-23_2025-10-21.parquet
‚úì Using cached: vix_2018-10-23_2025-10-21.parquet
‚úì Using cached: macro_2018-10-23_2025-10-21.parquet
‚úì Using cached: fred_all_2018-10-23_2025-10-21.parquet
üìä Calculating IV-RV spread...
‚úÖ Data loaded

üîß Building features...
‚úÖ Features built: 1463 samples, 45 features


üîç Selecting top 30 features...
‚úÖ Selected 30 features

üìä TOP 30 FEATURES BY IMPORTANCE:
    1. iv_rv_spread                                       0.1933
    2. iv_rv_vs_avg                                       0.1300
    3. iv_rv_momentum_21                                  0.0681
    4. 10Y-2Y Yield Spread_change_63                      0.0472
    5. yield_slope                                        0.0411
    6. 10Y Breakeven Inflation_level                      0.0399
    7. spx_realized_vol_63                                0.0337
    8. 10Y-2Y Yield Spread_level 

In [1]:
from spx_predictor import SPXPredictor
from spx_feature_explorer import SPXFeatureExplorer
from UnifiedDataFetcher import UnifiedDataFetcher
from datetime import datetime, timedelta

# ========================================
# STEP 1: Train Baseline
# ========================================
print("="*70)
print("STEP 1: TRAINING BASELINE MODEL")
print("="*70)

predictor = SPXPredictor()
predictor.train(years=7)

# Get SPX data
fetcher = UnifiedDataFetcher()
end_date = datetime.now()
start_date = end_date - timedelta(days=7 * 365)
spx_df = fetcher.fetch_spx(start_date.strftime('%Y-%m-%d'), end_date.strftime('%Y-%m-%d'))
spx = spx_df['Close'].squeeze()
spx.index = spx.index.tz_localize(None)

explorer = SPXFeatureExplorer()

# ========================================
# STEP 2: Baseline Walk-Forward Validation
# ========================================
print("\n" + "="*70)
print("STEP 2: BASELINE STABILITY CHECK")
print("="*70)
print("Testing if 91.5% accuracy is robust across time periods...")

baseline_stability = explorer.walk_forward_validation(
    features=predictor.features_scaled,
    spx=spx,
    n_splits=5,
    target='direction_21d'
)

print("\nüìä BASELINE STABILITY RESULTS:")
print(baseline_stability[['split', 'test_acc', 'gap', 'stability']])

# ========================================
# STEP 3: Test New Features (Fixed Alignment)
# ========================================
print("\n" + "="*70)
print("STEP 3: TESTING INTERACTION FEATURES (NO DATA LOSS)")
print("="*70)

results = explorer.run_full_analysis(
    base_features=predictor.features_scaled,
    spx=spx,
    target='direction_21d'
)

# ========================================
# STEP 4: Summary & Recommendations
# ========================================
print("\n" + "="*70)
print("FINAL SUMMARY & RECOMMENDATIONS")
print("="*70)

print("\n1Ô∏è‚É£ BASELINE MODEL:")
print(f"   Features: 30")
print(f"   Test Accuracy: 91.5%")
print(f"   Gap: -1.7% (NEGATIVE = Good!)")

if baseline_stability is not None:
    mean_acc = baseline_stability['test_acc'].mean()
    std_acc = baseline_stability['test_acc'].std()
    stability_rating = baseline_stability['stability'].iloc[0]
    print(f"\n   Walk-Forward: {mean_acc:.1%} ¬± {std_acc:.1%}")
    print(f"   Stability: {stability_rating}")

print("\n2Ô∏è‚É£ WITH INTERACTION FEATURES:")
if results['accept']:
    print("   ‚úÖ ACCEPTED - Interactions improve model")
    print("   Action: Consider adding to production")
else:
    improvement = results['test_results']['improvement']
    print(f"   ‚ùå REJECTED - No meaningful improvement ({improvement:+.1%})")
    print("   Action: Keep current 30 features")

print("\n3Ô∏è‚É£ KEY INSIGHTS:")
print("   ‚Ä¢ IV-RV spread (19.3%) remains dominant signal")
print("   ‚Ä¢ Top 3 features contribute 38% of predictive power")
print("   ‚Ä¢ Macro indicators (yield curve, inflation) add 15%+")
print("   ‚Ä¢ Current feature set is well-balanced")

print("\n4Ô∏è‚É£ NEXT STEPS:")
if results['accept']:
    print("   1. Manually add accepted features to spx_features.py")
    print("   2. Retrain full model")
    print("   3. Deploy with new features")
else:
    print("   1. Focus on walk-forward robustness validation")
    print("   2. Test different prediction horizons (10d, 30d)")
    print("   3. Build P&L backtester for trading strategy")
    print("   4. Feature engineering complete - move to trading logic!")

print("\n" + "="*70)

STEP 1: TRAINING BASELINE MODEL

SPX PREDICTION SYSTEM

üìä Fetching data: 2018-10-23 to 2025-10-21
‚úì Using cached: yahoo_^GSPC_2018-10-23_2025-10-21.parquet
‚úì Using cached: vix_2018-10-23_2025-10-21.parquet
‚úì Using cached: macro_2018-10-23_2025-10-21.parquet
‚úì Using cached: fred_all_2018-10-23_2025-10-21.parquet
üìä Calculating IV-RV spread...
‚úÖ Data loaded

üîß Building features...
‚úÖ Features built: 1463 samples, 45 features


üîç Selecting top 30 features...
‚úÖ Selected 30 features

üìä TOP 30 FEATURES BY IMPORTANCE:
    1. iv_rv_spread                                       0.1933
    2. iv_rv_vs_avg                                       0.1300
    3. iv_rv_momentum_21                                  0.0681
    4. 10Y-2Y Yield Spread_change_63                      0.0472
    5. yield_slope                                        0.0411
    6. 10Y Breakeven Inflation_level                      0.0399
    7. spx_realized_vol_63                                0.0337
 

In [3]:
import yfinance as yf

spx = yf.Ticker("^GSPC")
price = spx.fast_info["last_price"]
print("Current S&P 500 price:", price)


Current S&P 500 price: 6745.33984375
