# 05: Walk-Forward Backtest

Run walk-forward backtesting and evaluate ensemble performance.

In [ ]:
# Setup
import sys
from pathlib import Path
import pandas as pd
import numpy as np

PROJECT_ROOT = Path().absolute().parent.parent
sys.path.insert(0, str(PROJECT_ROOT / "src"))

from models import XGBoostModel, LightGBMModel, SentimentModel, RuleBasedModel
from ensemble import MetaEnsemble
from backtest import WalkForwardBacktest
from utils.config import PROCESSED_DATA_DIR, SPECIALIST_MODELS_DIR, META_MODEL_DIR, RESULTS_DIR

TICKER = "AAPL"

In [ ]:
# Load all models
xgb = XGBoostModel()
xgb.load(SPECIALIST_MODELS_DIR / f"{TICKER}_xgb.model")

lgb = LightGBMModel()
lgb.load(SPECIALIST_MODELS_DIR / f"{TICKER}_lgb.txt")

sentiment = SentimentModel()
sentiment.load(SPECIALIST_MODELS_DIR / f"{TICKER}_sentiment.pkl")

rule = RuleBasedModel()
rule.load(SPECIALIST_MODELS_DIR / f"{TICKER}_rule.pkl")

ensemble = MetaEnsemble([xgb, lgb, sentiment, rule])
ensemble.load(META_MODEL_DIR / f"{TICKER}_meta_ensemble.pkl")

print("✓ Loaded all models")

In [ ]:
# Load data
features = pd.read_csv(PROCESSED_DATA_DIR / f"{TICKER}_features.csv", index_col=0, parse_dates=True)
prices = pd.read_csv(PROCESSED_DATA_DIR / f"{TICKER}_prices.csv", index_col=0, parse_dates=True)

feature_cols = [col for col in features.columns 
                if col not in ['target_return_1d', 'target_direction', 'open', 'high', 'low', 'close', 'volume']]
X = features[feature_cols].fillna(0)
y = features['target_return_1d']  # Use return for backtesting

print(f"Backtest data: {len(X)} samples")

In [ ]:
# Run walk-forward backtest
backtest = WalkForwardBacktest(ensemble, train_window_days=252, test_window_days=21)
results = backtest.run(X, prices, y)

print("\n" + "="*60)
print("BACKTEST RESULTS")
print("="*60)
for key, value in results['metrics'].items():
    print(f"{key}: {value:.4f}")

In [ ]:
# Save results
backtest.save_results(RESULTS_DIR / f"{TICKER}_backtest_results.pkl")
results['predictions'].to_csv(RESULTS_DIR / f"{TICKER}_predictions.csv")
results['actuals'].to_csv(RESULTS_DIR / f"{TICKER}_actuals.csv")

print(f"✓ Saved results to {RESULTS_DIR}")

In [ ]:
# Visualize results
import matplotlib.pyplot as plt

fig, axes = plt.subplots(2, 2, figsize=(15, 10))

# Prediction distribution
axes[0, 0].hist(results['predictions']['signal'], bins=3, alpha=0.7)
axes[0, 0].set_title('Signal Distribution')
axes[0, 0].set_xlabel('Signal (-1=sell, 0=abstain, 1=buy)')

# Confidence over time
axes[0, 1].plot(results['predictions']['confidence'])
axes[0, 1].set_title('Confidence Over Time')
axes[0, 1].set_ylabel('Confidence')

# Accuracy by confidence
conf_bins = np.linspace(0, 1, 10)
bin_centers = (conf_bins[:-1] + conf_bins[1:]) / 2
accuracies = []
for i in range(len(conf_bins)-1):
    mask = (results['predictions']['confidence'] >= conf_bins[i]) & (results['predictions']['confidence'] < conf_bins[i+1])
    if mask.sum() > 0:
        pred_dir = np.where(results['predictions'].loc[mask, 'signal'] > 0, 1, -1)
        actual_dir = results['actuals'].loc[mask, 'actual_direction']
        acc = (pred_dir == actual_dir).mean()
        accuracies.append(acc)
    else:
        accuracies.append(0)

axes[1, 0].plot(bin_centers, accuracies, 'o-')
axes[1, 0].set_title('Accuracy by Confidence Level')
axes[1, 0].set_xlabel('Confidence')
axes[1, 0].set_ylabel('Accuracy')

# Returns distribution
axes[1, 1].hist(results['actuals']['actual_return'], bins=50, alpha=0.7)
axes[1, 1].set_title('Actual Returns Distribution')
axes[1, 1].set_xlabel('Return')

plt.tight_layout()
plt.savefig(RESULTS_DIR / f"{TICKER}_backtest_plots.png")
print(f"✓ Saved plots to {RESULTS_DIR / f'{TICKER}_backtest_plots.png'}")
plt.show()