# 08 — Multi-Strategy Analysis (Phase 4)

This notebook explores the multi-strategy framework introduced in Phase 4:

1. **Short-term vs Medium-term** classification comparison
2. **Regression** — predicting return magnitude (not just direction)
3. **Macro & Calendar features** impact analysis
4. **Ensemble regression** walk-forward validation

Key concepts:
- Medium-term (5-10 day) horizons reduce daily noise
- Regression provides richer signal for position sizing
- Calendar features capture cyclical patterns (month-end, options expiry, etc.)
- Directional accuracy > 55% is a useful regression model

In [None]:
import sys
sys.path.insert(0, "..")

import warnings
warnings.filterwarnings("ignore")

import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import seaborn as sns
from datetime import datetime, timedelta

from src.data.fetcher import YFinanceFetcher
from src.data.market_config import load_market_config, load_strategy_config
from src.data.preprocessing import preprocess_ohlcv
from src.features.technical import compute_technical_indicators
from src.features.returns import compute_return_features
from src.features.market_adaptive import compute_market_adaptive_features
from src.features.macro import (
    compute_macro_features,
    compute_calendar_features,
    compute_vix_proxy_features,
    compute_trend_context_features,
    get_macro_feature_names,
)
from src.analysis.regime import detect_regime
from src.features.labels import generate_labels, get_clean_features_and_labels
from src.models.ensemble import MarketPulseEnsemble
from src.models.xgboost_classifier import MarketPulseXGBClassifier
from src.models.xgboost_regressor import MarketPulseXGBRegressor
from src.models.evaluator import MarketPulseEvaluator
from src.models.regression_evaluator import RegressionEvaluator
from src.utils.validation import WalkForwardValidator

plt.style.use("seaborn-v0_8-whitegrid")
print("Phase 4 imports OK")

## 1. Fetch & Enrich Data

We'll use SPY (S&P 500 ETF) as our main example, then compare across markets.

In [None]:
# Load configs
market_config = load_market_config("stocks")
short_strategy = load_strategy_config("short_term")
medium_strategy = load_strategy_config("medium_term")
regression_strategy = load_strategy_config("medium_term_regression")

# Fetch data (6 years for medium-term)
fetcher = YFinanceFetcher(market_config=market_config)
end_date = datetime.now().strftime("%Y-%m-%d")
start_date = (datetime.now() - timedelta(days=6 * 365)).strftime("%Y-%m-%d")

ticker = "MSFT"
raw = fetcher.fetch(ticker, start=start_date, end=end_date)

# Full pipeline
df = preprocess_ohlcv(raw, market_config=market_config)
df = compute_technical_indicators(df)
df = compute_return_features(df)
df = compute_market_adaptive_features(df, market_name="stocks", strategy_config=medium_strategy)
df = detect_regime(df, market_name="stocks")
df = compute_macro_features(df, strategy_config=medium_strategy)

print(f"Enriched dataset: {len(df)} rows × {df.shape[1]} columns")
print(f"Date range: {df.index[0].date()} to {df.index[-1].date()}")

## 2. Macro Feature Exploration

Let's see what calendar & macro features look like.

In [None]:
# Identify macro features
macro_names = get_macro_feature_names()
macro_cols = [c for c in macro_names if c in df.columns]
print(f"Macro features in dataset: {len(macro_cols)}")
print("Features:", macro_cols[:10], "...")

# Summary stats
df[macro_cols].describe().round(3)

In [None]:
# Calendar effects: average return by day of week
if "day_of_week" in df.columns:
    dow_returns = df.groupby("day_of_week")["returns"].agg(["mean", "std", "count"])
    dow_returns.index = ["Mon", "Tue", "Wed", "Thu", "Fri"] + list(dow_returns.index[5:])
    
    fig, ax = plt.subplots(figsize=(8, 4))
    dow_returns["mean"].plot(kind="bar", ax=ax, color="steelblue")
    ax.set_title(f"{ticker}: Average Daily Return by Day of Week")
    ax.set_ylabel("Mean Return")
    ax.axhline(y=0, color="red", linestyle="--")
    plt.tight_layout()
    plt.show()

In [None]:
# VIX proxy (realized vol percentile) vs price
if "vix_percentile" in df.columns:
    fig, (ax1, ax2) = plt.subplots(2, 1, figsize=(12, 6), sharex=True)
    
    ax1.plot(df.index, df["close"], color="blue", linewidth=0.8)
    ax1.set_title(f"{ticker}: Price")
    ax1.set_ylabel("Price ($)")
    
    ax2.fill_between(df.index, 0, df["vix_percentile"], 
                     where=df["vix_percentile"] > 0.8, 
                     color="red", alpha=0.3, label="High Fear")
    ax2.fill_between(df.index, 0, df["vix_percentile"], 
                     where=df["vix_percentile"] <= 0.8, 
                     color="green", alpha=0.3, label="Normal")
    ax2.set_title("VIX Proxy (Realized Vol Percentile)")
    ax2.set_ylabel("Percentile")
    ax2.legend()
    
    plt.tight_layout()
    plt.show()

## 3. Short-Term vs Medium-Term Classification

Compare 1-day and 5-day prediction horizons.

In [None]:
results = {}

for horizon, strategy_name in [(1, "short_term"), (5, "medium_term")]:
    strategy = load_strategy_config(strategy_name)
    threshold = strategy.get("threshold", 0.01)
    
    labeled = generate_labels(
        df, horizon=horizon, label_type="classification",
        num_classes=3, threshold=threshold,
    )
    X, y = get_clean_features_and_labels(labeled)
    
    val_cfg = strategy.get("validation", {})
    validator = WalkForwardValidator(
        initial_train_days=val_cfg.get("initial_train_days", 504),
        test_days=val_cfg.get("test_days", 21),
        step_days=val_cfg.get("step_days", 21),
    )
    folds = validator.split(X)
    evaluator = MarketPulseEvaluator(num_classes=3)
    
    fold_accs = []
    for fold in folds:
        X_train, y_train, X_test, y_test = validator.get_fold_data(X, y, fold)
        model = MarketPulseEnsemble.from_strategy_config(strategy)
        model.fit(X_train, y_train)
        y_pred = model.predict(X_test)
        y_proba = model.predict_proba(X_test)
        result = evaluator.evaluate_fold(
            y_true=y_test.values.astype(int), y_pred=y_pred,
            y_proba=y_proba, fold_number=fold.fold_number,
            train_size=fold.train_size,
        )
        fold_accs.append(result.accuracy)
    
    results[f"{strategy_name} ({horizon}d)"] = fold_accs
    print(f"{strategy_name} ({horizon}d): mean acc = {np.mean(fold_accs):.4f} ± {np.std(fold_accs):.4f}")

In [None]:
# Box plot comparison
fig, ax = plt.subplots(figsize=(8, 5))
data = [results[k] for k in results]
labels = list(results.keys())
bp = ax.boxplot(data, labels=labels, patch_artist=True)
colors = ["#4C72B0", "#55A868"]
for patch, color in zip(bp["boxes"], colors):
    patch.set_facecolor(color)
    patch.set_alpha(0.7)
ax.set_title(f"{ticker}: Short-Term vs Medium-Term Accuracy")
ax.set_ylabel("Fold Accuracy")
ax.axhline(y=1/3, color="red", linestyle="--", label="Random (33%)")
ax.legend()
plt.tight_layout()
plt.show()

## 4. Regression — Predicting Return Magnitude

Instead of UP/FLAT/DOWN, we predict the actual return. This is useful because:
- Position sizing: bigger predicted move → bigger position
- Even if R² is low, **directional accuracy** > 50% is valuable
- **Information Coefficient** (rank corr) > 0.05 is considered useful in quant trading

In [None]:
# Generate regression labels (5-day forward return)
reg_labeled = generate_labels(df, horizon=5, label_type="regression")
X_reg, y_reg = get_clean_features_and_labels(reg_labeled)

print(f"Regression dataset: {len(X_reg)} samples, {X_reg.shape[1]} features")
print(f"Target (5d return) stats:")
print(y_reg.describe().round(4))

# Distribution of target
fig, ax = plt.subplots(figsize=(8, 4))
y_reg.hist(bins=50, ax=ax, color="steelblue", alpha=0.7)
ax.axvline(x=0, color="red", linestyle="--")
ax.set_title(f"{ticker}: Distribution of 5-Day Forward Returns")
ax.set_xlabel("Return")
plt.tight_layout()
plt.show()

In [None]:
# Walk-forward regression with ensemble
reg_strategy = load_strategy_config("medium_term_regression")
val_cfg = reg_strategy.get("validation", {})
validator = WalkForwardValidator(
    initial_train_days=val_cfg.get("initial_train_days", 504),
    test_days=val_cfg.get("test_days", 42),
    step_days=val_cfg.get("step_days", 21),
)
folds = validator.split(X_reg)

reg_evaluator = RegressionEvaluator()
fold_results = []
all_true, all_pred = [], []

for fold in folds:
    X_train, y_train, X_test, y_test = validator.get_fold_data(X_reg, y_reg, fold)
    
    ensemble = MarketPulseEnsemble.from_strategy_config(reg_strategy)
    ensemble.fit(X_train, y_train)
    y_pred = ensemble.predict(X_test)
    
    result = reg_evaluator.evaluate_fold(
        y_true=y_test.values, y_pred=y_pred,
        fold_number=fold.fold_number, train_size=fold.train_size,
        test_start_date=fold.test_start_date, test_end_date=fold.test_end_date,
    )
    fold_results.append(result)
    all_true.extend(y_test.values)
    all_pred.extend(y_pred)

report = reg_evaluator.aggregate_results(
    fold_results, ticker=ticker, horizon=5,
)
reg_evaluator.print_report(report)

In [None]:
# Predicted vs Actual scatter plot
all_true_arr = np.array(all_true)
all_pred_arr = np.array(all_pred)

fig, axes = plt.subplots(1, 2, figsize=(14, 5))

# Scatter
ax = axes[0]
ax.scatter(all_true_arr, all_pred_arr, alpha=0.3, s=10, color="steelblue")
lims = [min(all_true_arr.min(), all_pred_arr.min()),
        max(all_true_arr.max(), all_pred_arr.max())]
ax.plot(lims, lims, "r--", label="Perfect")
ax.set_xlabel("Actual Return")
ax.set_ylabel("Predicted Return")
ax.set_title("Predicted vs Actual 5d Returns")
ax.legend()

# Directional accuracy per fold
ax = axes[1]
das = [f.directional_accuracy for f in fold_results]
ax.bar(range(len(das)), das, color="steelblue", alpha=0.7)
ax.axhline(y=0.5, color="red", linestyle="--", label="Random (50%)")
ax.axhline(y=np.mean(das), color="green", linestyle="--", label=f"Mean: {np.mean(das):.3f}")
ax.set_xlabel("Fold")
ax.set_ylabel("Directional Accuracy")
ax.set_title("Directional Accuracy per Fold")
ax.legend()

plt.tight_layout()
plt.show()

## 5. Feature Importance — Macro Features

How much do calendar, VIX proxy, and trend context features contribute?

In [None]:
# Train a single model on the full dataset for feature importance
from src.models.xgboost_regressor import MarketPulseXGBRegressor

full_model = MarketPulseXGBRegressor.from_strategy_config(reg_strategy)
train_end = len(X_reg) - 42
full_model.fit(X_reg.iloc[:train_end], y_reg.iloc[:train_end])

importance = full_model.get_feature_importance()

# Highlight macro features
macro_names_set = set(get_macro_feature_names())
imp_df = importance.head(30).reset_index()
imp_df.columns = ["feature", "importance"]
imp_df["is_macro"] = imp_df["feature"].isin(macro_names_set)

fig, ax = plt.subplots(figsize=(10, 8))
colors = ["#DD8452" if m else "#4C72B0" for m in imp_df["is_macro"]]
ax.barh(imp_df["feature"][::-1], imp_df["importance"][::-1], color=colors[::-1])
ax.set_title("Top 30 Features (orange = macro/calendar)")
ax.set_xlabel("Importance")
plt.tight_layout()
plt.show()

# Count macro features in top-N
for n in [10, 20, 30]:
    top_n = importance.head(n)
    n_macro = sum(1 for f in top_n.index if f in macro_names_set)
    print(f"Macro features in top-{n}: {n_macro}/{n} ({n_macro/n*100:.0f}%)")

## 6. Cross-Market Regression Comparison

How does the regression ensemble perform across stocks and indices?

In [None]:
# Compare markets
market_tickers = {
    "stocks": "MSFT",
    "indices": "^GSPC",
}

market_results = {}

for market_name, mkt_ticker in market_tickers.items():
    print(f"\n{'='*40} {market_name.upper()}: {mkt_ticker} {'='*40}")
    
    mkt_config = load_market_config(market_name)
    fetcher_m = YFinanceFetcher(market_config=mkt_config)
    raw_m = fetcher_m.fetch(mkt_ticker, start=start_date, end=end_date)
    
    if raw_m.empty:
        print(f"  No data for {mkt_ticker}")
        continue
    
    df_m = preprocess_ohlcv(raw_m, market_config=mkt_config)
    df_m = compute_technical_indicators(df_m)
    df_m = compute_return_features(df_m)
    df_m = compute_market_adaptive_features(df_m, market_name=market_name, strategy_config=regression_strategy)
    df_m = detect_regime(df_m, market_name=market_name)
    df_m = compute_macro_features(df_m, strategy_config=regression_strategy)
    
    reg_labeled_m = generate_labels(df_m, horizon=5, label_type="regression")
    X_m, y_m = get_clean_features_and_labels(reg_labeled_m)
    
    if len(X_m) < 600:
        print(f"  Insufficient data: {len(X_m)}")
        continue
    
    folds_m = validator.split(X_m)
    fold_res_m = []
    
    for fold in folds_m:
        Xtr, ytr, Xte, yte = validator.get_fold_data(X_m, y_m, fold)
        ens = MarketPulseEnsemble.from_strategy_config(regression_strategy)
        ens.fit(Xtr, ytr)
        yp = ens.predict(Xte)
        res = reg_evaluator.evaluate_fold(
            y_true=yte.values, y_pred=yp,
            fold_number=fold.fold_number, train_size=fold.train_size,
        )
        fold_res_m.append(res)
    
    rpt = reg_evaluator.aggregate_results(fold_res_m, ticker=mkt_ticker, horizon=5)
    market_results[market_name] = {
        "DA": rpt.mean_directional_accuracy,
        "MAE": rpt.mean_mae,
        "R2": rpt.mean_r2,
        "IC": rpt.mean_ic,
    }
    print(f"  DA={rpt.mean_directional_accuracy:.3f}  MAE={rpt.mean_mae:.5f}  R²={rpt.mean_r2:.4f}  IC={rpt.mean_ic:.4f}")

# Summary table
pd.DataFrame(market_results).T.round(4)

## 7. Key Takeaways

### Phase 4 Additions
- **Medium-term strategy**: 5-10 day horizons reduce noise, wider thresholds (±2%)
- **Regression models**: XGBRegressor + LGBMRegressor ensemble predicts return magnitude
- **Macro features**: ~35 calendar, VIX proxy, and trend context features
- **Dashboard upgraded**: now uses ensemble, market-adaptive, regime, macro features, and supports regression view

### Interpretation
- **Directional Accuracy > 55%** = model has real predictive power
- **Information Coefficient > 0.05** = useful for portfolio construction
- Medium-term is generally easier to predict than short-term
- Calendar features can add 1-3% improvement if they capture real effects