In [7]:
# === 1. LIBRARIES & LOAD DATA ===
import pandas as pd
import numpy as np
from sklearn.ensemble import RandomForestClassifier
from sklearn.linear_model import LogisticRegression
from sklearn.model_selection import TimeSeriesSplit
from sklearn.preprocessing import StandardScaler
from sklearn.metrics import accuracy_score
from sklearn.inspection import permutation_importance
import matplotlib.pyplot as plt
import seaborn as sns

# Load data
df = pd.read_csv("monthly_all.csv")
df["Date"] = pd.to_datetime(df["Date"])
df = df.sort_values("Date").drop_duplicates("Date").reset_index(drop=True)
df.set_index("Date", inplace=True)

# === 2. COMPUTE RETURNS & TARGET ===
df["cad_ig_er_return"] = df["cad_ig_er_index"].pct_change()
df["target"] = (df["cad_ig_er_return"].shift(-1) > 0).astype(int)
df = df.dropna(subset=["target"])

# === 3. BASE & ENGINEERED FEATURES ===
base_features = [
    "cad_oas", "us_hy_oas", "us_ig_oas", "tsx", "vix", "us_3m_10y",
    "us_growth_surprises", "us_inflation_surprises", "us_lei_yoy",
    "us_hard_data_surprises", "us_equity_revisions"
]

# Feature Engineering
for col in base_features:
    df[f"{col}_chg"] = df[col].diff()
    df[f"{col}_z"] = (df[col] - df[col].rolling(12).mean()) / df[col].rolling(12).std()

df["oasis_risk_aversion"] = df["cad_oas"] * df["vix"]
df["value_growth_spread"] = df["us_growth_surprises"] - df["us_inflation_surprises"]
df["recession_flag"] = (df["us_lei_yoy"] < 0).astype(int)
df["curve_inversion"] = (df["us_3m_10y"] < 0).astype(int)

engineered_features = (
    [f"{col}_chg" for col in base_features] +
    [f"{col}_z" for col in base_features] +
    ["oasis_risk_aversion", "value_growth_spread", "recession_flag", "curve_inversion"]
)

df = df.dropna(subset=engineered_features)  # drop NA from z-score or lagged

# === 4. SCALING & MODEL TRAINING ===
X = df[engineered_features]
y = df["target"]
scaler = StandardScaler()
X_scaled = scaler.fit_transform(X)

rf = RandomForestClassifier(n_estimators=100, max_depth=5, random_state=42)
rf.fit(X_scaled, y)

# === 5. SIGNAL GENERATION (unchanged) ===
df["signal"] = (
    pd.Series(rf.predict(X_scaled), index=df.index)   # model prediction
      .shift(1)                                       # trade next bar
      .fillna(0)
      .astype(int)
)

# === 6. BACKTEST WITH VECTORBT ==========================================
# pip install vectorbt -q  # run once if not installed
import vectorbt as vbt
import pandas as pd

price   = df["cad_ig_er_index"]
entries = df["signal"].diff() ==  1          # flat -> long
exits   = df["signal"].diff() == -1          # long -> flat
freq_td = pd.Timedelta(days=30)              # 30-day bar ≈ monthly

pf_ml = vbt.Portfolio.from_signals(
    price, entries, exits,
    init_cash=1_000_000,
    freq=freq_td
)

pf_bh = vbt.Portfolio.from_holding(
    price,
    init_cash=1_000_000,
    freq=freq_td
)

# === 7. PRINT FULL STATS ===============================================
print("\n=== ML Strategy Stats ===")
print(pf_ml.stats())          # full vectorbt stats table

print("\n=== Buy & Hold Stats ===")
print(pf_bh.stats())

# Optional: side-by-side DataFrame
both_stats = pd.concat(
    [pf_ml.stats(), pf_bh.stats()],
    axis=1, keys=["ML Strategy", "Buy & Hold"]
)
print("\n=== Combined Stats ===")
print(both_stats)




=== ML Strategy Stats ===
Start                         2004-07-31 00:00:00
End                           2025-03-31 00:00:00
Period                         9540 days 00:00:00
Start Value                             1000000.0
End Value                          1251305.108514
Total Return [%]                        25.130511
Benchmark Return [%]                     29.94169
Max Gross Exposure [%]                      100.0
Total Fees Paid                               0.0
Max Drawdown [%]                         5.811461
Max Drawdown Duration          1140 days 00:00:00
Total Trades                                   87
Total Closed Trades                            87
Total Open Trades                               0
Open Trade PnL                                0.0
Win Rate [%]                            45.977011
Best Trade [%]                           5.507914
Worst Trade [%]                           -2.8485
Avg Winning Trade [%]                    0.931041
Avg Losing Trade [%]   