In [49]:
import yfinance as yf
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
from sklearn.mixture import GaussianMixture
from sklearn.preprocessing import StandardScaler
from sklearn.ensemble import RandomForestClassifier
from statsmodels.tsa.stattools import adfuller
from abc import ABC, abstractmethod
import warnings
from datetime import datetime, timedelta
warnings.filterwarnings('ignore')

## Feature Lab

In [50]:
class FeatureLab:
    """Shared mathematical engine for technical and statistical features."""
    
    @staticmethod
    def get_weights_frac_diff(d, size, threshold=1e-5):
        w = [1.0]
        for k in range(1, size):
            w_k = -w[-1] / k * (d - k + 1)
            w.append(w_k)
        w = np.array(w[::-1])
        w = w[np.abs(w) > threshold]
        return w

    @staticmethod
    def frac_diff_fixed(series, d, window=50):
        # Solves Stationarity Dilemma [cite: 61]
        weights = FeatureLab.get_weights_frac_diff(d, window)
        res = series.rolling(window=len(weights)).apply(lambda x: np.dot(x, weights), raw=True)
        return res

    @staticmethod
    def yang_zhang_volatility(df, window=30):
        # Captures intraday energy/gaps [cite: 82]
        log_ho = (df['High'] / df['Open']).apply(np.log)
        log_lo = (df['Low'] / df['Open']).apply(np.log)
        log_co = (df['Close'] / df['Open']).apply(np.log)
        log_oc = (df['Open'] / df['Close'].shift(1)).apply(np.log)
        log_cc = (df['Close'] / df['Close'].shift(1)).apply(np.log)
        
        rs = log_ho * (log_ho - log_co) + log_lo * (log_lo - log_co)
        close_vol = log_cc.rolling(window=window).var()
        open_vol = log_oc.rolling(window=window).var()
        window_rs = rs.rolling(window=window).mean()

        k = 0.34 / (1.34 + (window + 1) / (window - 1))
        return np.sqrt(open_vol + k * window_rs)

    @staticmethod
    def compute_rsi(series, window=14):
        delta = series.diff()
        gain = (delta.where(delta > 0, 0)).rolling(window=window).mean()
        loss = (-delta.where(delta < 0, 0)).rolling(window=window).mean()
        rs = gain / loss
        return 100 - (100 / (1 + rs))

    @staticmethod
    def triple_barrier_labels(prices, vol, pt=1.0, sl=1.0, barrier_window=10):
        """
        Implements the Triple Barrier Method.
        Labels: 1 (Profit Target Hit), -1 (Stop Loss Hit), 0 (Time Limit/Neutral)
        """
        labels = pd.Series(0, index=prices.index)
        # Shift prices to align future outcome with current row
        # However, to avoid look-ahead in features, we usually compute label for row t based on t+1...t+k
        # This function generates the TARGET variable (y) for training.
        
        limit = len(prices) - barrier_window
        p_values = prices.values
        v_values = vol.values
        
        for i in range(limit):
            current_p = p_values[i]
            current_vol = v_values[i]
            
            # Dynamic barriers based on volatility [cite: 215]
            target = current_p * (1 + pt * current_vol)
            stop = current_p * (1 - sl * current_vol)
            
            future_window = p_values[i+1 : i+1+barrier_window]
            
            hit_target = np.where(future_window >= target)[0]
            hit_stop = np.where(future_window <= stop)[0]
            
            first_target = hit_target[0] if len(hit_target) > 0 else barrier_window + 1
            first_stop = hit_stop[0] if len(hit_stop) > 0 else barrier_window + 1
            
            if first_target < first_stop and first_target <= barrier_window:
                labels.iloc[i] = 1
            elif first_stop < first_target and first_stop <= barrier_window:
                labels.iloc[i] = 0 # In Meta-Labeling, we often treat Stop (-1) as 0 (Do Not Trade)
            # Else 0 (Time limit reached or neutral)
            
        return labels

## Base Strategy

In [51]:
class BaseStrategy(ABC):
    def __init__(self, ticker, start_date, end_date):
        self.ticker = ticker
        self.start_date = start_date
        self.end_date = end_date
        self.data = None
        self.results = None
        self.metrics = {}

    def fetch_data(self, warmup_years=2):
        start_dt = datetime.strptime(self.start_date, "%Y-%m-%d")
        warmup_start_dt = start_dt - timedelta(days=warmup_years*365)
        warmup_start_str = warmup_start_dt.strftime("%Y-%m-%d")
        
        try:
            self.data = yf.download(self.ticker, start=warmup_start_str, end=self.end_date, progress=False, auto_adjust=False)
            if isinstance(self.data.columns, pd.MultiIndex): 
                self.data.columns = self.data.columns.get_level_values(0)
            if 'Adj Close' not in self.data.columns: 
                self.data['Adj Close'] = self.data['Close']
            self.data['Returns'] = self.data['Adj Close'].pct_change()
            self.data.dropna(inplace=True)
        except Exception as e:
            print(f"Error fetching {self.ticker}: {e}")
            self.data = pd.DataFrame()

    @abstractmethod
    def generate_signals(self):
        pass

    def run_backtest(self, transaction_cost=0.0005, rebalance_threshold=0.1):
        if self.data is None or self.data.empty: return
        
        backtest_mask = self.data.index >= self.start_date
        df = self.data.loc[backtest_mask].copy()
        if df.empty: return

        # Position Smoothing
        clean_positions = []
        current_pos = 0.0
        raw_signals = df['Signal'].values
        
        for target in raw_signals:
            if abs(target - current_pos) > rebalance_threshold:
                current_pos = target
            clean_positions.append(current_pos)
            
        df['Position'] = clean_positions
        df['Prev_Position'] = df['Position'].shift(1).fillna(0)
        df['Turnover'] = (df['Prev_Position'] - df['Position'].shift(2).fillna(0)).abs()
        df['Gross_Returns'] = df['Prev_Position'] * df['Returns']
        df['Net_Returns'] = df['Gross_Returns'] - (df['Turnover'] * transaction_cost)
        df['Net_Returns'].fillna(0, inplace=True)
        
        df['Cumulative_Strategy'] = (1 + df['Net_Returns']).cumprod()
        df['Cumulative_Market'] = (1 + df['Returns']).cumprod()
        
        roll_max = df['Cumulative_Strategy'].cummax()
        df['Drawdown'] = (df['Cumulative_Strategy'] / roll_max) - 1.0
        
        self.results = df
        
        # Performance Calculation
        total_ret = df['Cumulative_Strategy'].iloc[-1] - 1
        vol = df['Net_Returns'].std() * np.sqrt(252)
        sharpe = (df['Net_Returns'].mean() / df['Net_Returns'].std()) * np.sqrt(252) if vol > 0 else 0
        max_dd = df['Drawdown'].min()
        
        self.metrics = {
            'Total Return': total_ret,
            'Sharpe Ratio': sharpe,
            'Max Drawdown': max_dd
        }
        return df

## Model V1-V4

In [52]:
class StrategyV1_Baseline(BaseStrategy):
    """V1: Fixed FracDiff, Standard GMM."""
    def generate_signals(self):
        if self.data is None or self.data.empty: return
        df = self.data.copy()
        
        df['Volatility'] = FeatureLab.yang_zhang_volatility(df)
        df['FracDiff'] = FeatureLab.frac_diff_fixed(df['Adj Close'].apply(np.log), d=0.4, window=50)
        df['RSI'] = FeatureLab.compute_rsi(df['Adj Close'])
        df['Returns_Smoothed'] = df['Returns'].rolling(5).mean()
        df['Vol_Smoothed'] = df['Volatility'].rolling(5).mean()
        df.dropna(inplace=True)
        
        X = df[['Returns_Smoothed', 'Vol_Smoothed']].values
        scaler = StandardScaler()
        X_scaled = scaler.fit_transform(X)
        gmm = GaussianMixture(n_components=3, random_state=42)
        df['Cluster'] = gmm.fit_predict(X_scaled)
        
        stats = df.groupby('Cluster')['Returns_Smoothed'].mean().sort_values().index
        mapping = {stats[0]: -1, stats[1]: 0, stats[2]: 1}
        df['Regime'] = df['Cluster'].map(mapping)
        
        df['Signal'] = 0
        df.loc[(df['Regime'] == 1) & (df['FracDiff'] > 0), 'Signal'] = 1
        df.loc[(df['Regime'] == 0) & (df['RSI'] < 40), 'Signal'] = 1
        
        target_vol = 0.15 / np.sqrt(252)
        df['Vol_Scaler'] = (target_vol / df['Volatility']).clip(upper=1.5)
        df['Signal'] = df['Signal'] * df['Vol_Scaler']
        self.data = df

In [53]:
class StrategyV3_Macro(BaseStrategy):
    """V3: Macro (SPY) Filter."""
    def __init__(self, ticker, start_date, end_date):
        super().__init__(ticker, start_date, end_date)
        self.spy_data = None

    def fetch_data(self, warmup_years=2):
        super().fetch_data(warmup_years)
        start_dt = datetime.strptime(self.start_date, "%Y-%m-%d") - timedelta(days=warmup_years*365)
        try:
            spy = yf.download("SPY", start=start_dt.strftime("%Y-%m-%d"), end=self.end_date, progress=False, auto_adjust=False)
            if isinstance(spy.columns, pd.MultiIndex): spy.columns = spy.columns.get_level_values(0)
            self.spy_data = spy[['Adj Close']].rename(columns={'Adj Close': 'SPY_Price'})
        except: pass

    def generate_signals(self):
        if self.data is None or self.data.empty: return
        df = self.data.copy()
        
        if self.spy_data is not None:
            df = df.join(self.spy_data, how='left')
            df['SPY_MA200'] = df['SPY_Price'].rolling(window=200).mean()
            df['Macro_Bull'] = df['SPY_Price'] > df['SPY_MA200']
        else:
            df['Macro_Bull'] = True
            
        df['Volatility'] = FeatureLab.yang_zhang_volatility(df)
        df['FracDiff'] = FeatureLab.frac_diff_fixed(df['Adj Close'].apply(np.log), d=0.4, window=50)
        df['RSI'] = FeatureLab.compute_rsi(df['Adj Close'])
        df.dropna(inplace=True)
        
        df['Signal'] = 0
        df.loc[(df['FracDiff'] > 0), 'Signal'] = 1
        df.loc[df['Macro_Bull'] == False, 'Signal'] = 0
        
        target_vol = 0.15 / np.sqrt(252)
        df['Signal'] = df['Signal'] * (target_vol / df['Volatility']).clip(upper=1.5)
        self.data = df

In [54]:
class StrategyV9_RegimeUnshackled(BaseStrategy):
    """
    V9 (Robust): The 'Unshackled' Regime Model with Walk-Forward Learning.
    
    CRITICAL FIX:
    - Replaces static gmm.fit_predict() with a Rolling Walk-Forward loop.
    - Eliminates Look-Ahead Bias by retraining the regime model every month
      using only the trailing 1-year window.
    - Solves the 'Label Switching' problem dynamically at each step.
    """
    def __init__(self, ticker, start_date, end_date):
        super().__init__(ticker, start_date, end_date)
        self.spy_data = None

    def fetch_data(self, warmup_years=2):
        super().fetch_data(warmup_years)
        start_dt = datetime.strptime(self.start_date, "%Y-%m-%d") - timedelta(days=warmup_years*365)
        try:
            # Fetch Macro Context (SPY) for the 'Alpha Clause'
            spy = yf.download("SPY", start=start_dt.strftime("%Y-%m-%d"), end=self.end_date, progress=False, auto_adjust=False)
            if isinstance(spy.columns, pd.MultiIndex): spy.columns = spy.columns.get_level_values(0)
            
            # Simple Macro Trend (200 SMA)
            spy['Macro_Trend'] = (spy['Adj Close'] > spy['Adj Close'].rolling(200).mean()).astype(int)
            self.spy_data = spy[['Macro_Trend']]
        except: 
            pass

    def _apply_kalman_filter(self, prices):
        # [cite: 151] Kalman Filter for recursive state estimation
        x = prices.values
        n = len(x)
        state = np.zeros(n)
        slope = np.zeros(n)
        state[0] = x[0]
        P, Q, R = 1.0, 0.001, 0.1 # Static params for robustness
        
        for t in range(1, n):
            pred_state = state[t-1] + slope[t-1]
            pred_P = P + Q
            measurement = x[t]
            residual = measurement - pred_state
            
            K = pred_P / (pred_P + R)
            state[t] = pred_state + K * residual
            slope[t] = 0.9 * slope[t-1] + 0.1 * (state[t] - state[t-1])
            P = (1 - K) * pred_P
            
        return pd.Series(slope, index=prices.index)

    def generate_signals(self):
        if self.data is None or self.data.empty: return
        df = self.data.copy()
        
        # --- 1. Feature Engineering ---
        if self.spy_data is not None:
            df = df.join(self.spy_data, how='left').fillna(method='ffill')
        else: 
            df['Macro_Trend'] = 1
            
        # Volatility & Kalman Slope [cite: 82, 151]
        df['Volatility'] = FeatureLab.yang_zhang_volatility(df)
        df['Kalman_Slope'] = self._apply_kalman_filter(np.log(df['Adj Close']))
        df['RSI'] = FeatureLab.compute_rsi(df['Adj Close'])
        
        # Features for GMM (Smoothed to reduce noise)
        df['Returns_Smoothed'] = df['Returns'].rolling(5).mean()
        df['Vol_Smoothed'] = df['Volatility'].rolling(5).mean()
        
        # Drop NaN from warmup
        df.dropna(inplace=True)
        
        # --- 2. Rolling Walk-Forward GMM ---
        # We need to predict 'Regime_Type' for every row without looking ahead.
        
        train_window = 252  # Lookback: 1 Year
        refit_step = 21     # Re-train every Month
        
        # Initialize default regime (CHOP)
        df['Regime_Type'] = 'CHOP' 
        
        # Prepare Feature Matrix
        X = df[['Returns_Smoothed', 'Vol_Smoothed']].values
        scaler = StandardScaler()
        
        indices = np.arange(len(df))
        
        # Start loop after the first training window
        for t in range(train_window, len(df), refit_step):
            start_idx = t - train_window
            end_idx = t
            predict_end_idx = min(t + refit_step, len(df))
            
            # A. Train on PAST window
            X_train = X[start_idx:end_idx]
            
            # Scale locally (Standardization must also be walk-forward)
            #  "Standard algorithms assume training data... same probability distribution"
            scaler_local = StandardScaler()
            X_train_scaled = scaler_local.fit_transform(X_train)
            
            try:
                # Fit GMM
                gmm = GaussianMixture(n_components=3, random_state=42, n_init=5)
                gmm.fit(X_train_scaled)
                
                # B. Predict on NEXT window (Future)
                X_future = X[end_idx:predict_end_idx]
                X_future_scaled = scaler_local.transform(X_future)
                clusters_future = gmm.predict(X_future_scaled)
                
                # C. Dynamic Label Mapping (Solve Label Switching)
                # We map clusters to Bull/Bear based on the *Training* means
                # Calculate mean return for each cluster center in the training set
                # We can approximate this by predicting the training set again
                train_clusters = gmm.predict(X_train_scaled)
                
                # Create a temp dataframe to sort clusters
                temp_df = pd.DataFrame({
                    'Ret': X_train[:, 0], # Returns is column 0
                    'Cluster': train_clusters
                })
                
                stats = temp_df.groupby('Cluster')['Ret'].mean().sort_values()
                
                # The cluster with lowest mean return = BEAR
                # The cluster with highest mean return = BULL
                # Middle = CHOP
                if len(stats) == 3:
                    bear_c = stats.index[0]
                    chop_c = stats.index[1]
                    bull_c = stats.index[2]
                    
                    mapping = {bear_c: 'BEAR', chop_c: 'CHOP', bull_c: 'BULL'}
                else:
                    # Fallback if GMM collapses to fewer clusters
                    mapping = {c: 'CHOP' for c in stats.index}

                # Apply mapping to the prediction window
                regimes_mapped = [mapping.get(c, 'CHOP') for c in clusters_future]
                
                # Store results in the main DataFrame
                # Use iloc for integer-based indexing on the slice
                df.iloc[end_idx:predict_end_idx, df.columns.get_loc('Regime_Type')] = regimes_mapped
                
            except Exception as e:
                # Keep default 'CHOP' on failure
                pass

        # --- 3. Unshackled Signal Logic (unchanged from V9) ---
        df['Signal'] = 0.0
        
        # A. BULL REGIME: "Trust The Trend"
        bull_signal = (df['Regime_Type'] == 'BULL')
        df.loc[bull_signal, 'Signal'] = 1.0
        
        # Boost: If Kalman agrees (Strong Trend), increase leverage
        strong_trend = bull_signal & (df['Kalman_Slope'] > 0)
        df.loc[strong_trend, 'Signal'] = 1.3
        
        # B. CHOP REGIME: "Buy Dips"
        # chop_buy = (df['Regime_Type'] == 'CHOP') & (df['RSI'] < 45)
        # df.loc[chop_buy, 'Signal'] = 1.0

        # 1. Calculate a "Floor" (e.g., Lower Bollinger Band)
        df['SMA_20'] = df['Adj Close'].rolling(20).mean()
        df['BB_Lower'] = df['SMA_20'] - 2 * df['SMA_20'].rolling(20).std()

        # 2. Strict Chop Entry
        # OLD: RSI < 45
        # NEW: RSI < 45 AND Price > BB_Lower (Don't buy if it's crashing through the floor)
        #      AND FracDiff > -0.1 (Don't buy if trend is catastrophically negative)

        chop_buy = (
            (df['Regime_Type'] == 'CHOP') & 
            (df['RSI'] < 45) & 
            (df['Adj Close'] > df['BB_Lower']) # The Falling Knife Filter
        )
        df.loc[chop_buy, 'Signal'] = 1.0
        
        # C. BEAR REGIME: "Survival" (Cash unless deep panic)
        panic_buy = (df['Regime_Type'] == 'BEAR') & (df['RSI'] < 25)
        df.loc[panic_buy, 'Signal'] = 1.0
        
        # --- 4. The Alpha Clause (Macro Handling) ---
        target_vol = 0.15 / np.sqrt(252)
        
        # Volatility Sizing (Risk Parity) [cite: 254]
        vol_scaler = (target_vol / df['Volatility']).clip(upper=1.5)
        
        # Macro Filter: Halve size if SPY is bearish, but don't exit fully (Alpha Clause)
        macro_scaler = df['Macro_Trend'].map({1: 1.0, 0: 0.5})
        
        df['Signal'] = df['Signal'] * vol_scaler * macro_scaler
        
        self.data = df

## Q.L. Fang's Models

In [55]:
class U0_MeanReversion(BaseStrategy):
    """
    U0: Hourly-Granularity Mean Reversion.
    Uses 'Wick-Free' Daily Highs/Lows derived from Hourly closes.
    """
    def __init__(self, ticker, start_date, end_date, sensitivity=20, period=14, k=2.5):
        super().__init__(ticker, start_date, end_date)
        self.sensitivity = sensitivity
        self.period = period
        self.k = k

    def fetch_data(self, warmup_years=2):
        # Clamp start date to Yahoo's 730-day hourly limit
        start_dt = datetime.strptime(self.start_date, "%Y-%m-%d") - timedelta(days=warmup_years*365)
        limit_dt = datetime.now() - timedelta(days=725)
        if start_dt < limit_dt: start_dt = limit_dt
        
        try:
            # 1. Hourly Data
            df_h = yf.download(self.ticker, interval="1h", start=start_dt.strftime("%Y-%m-%d"), end=self.end_date, progress=False, auto_adjust=True)
            if df_h.empty: return
            if isinstance(df_h.columns, pd.MultiIndex): df_h.columns = df_h.columns.get_level_values(0)
            df_h = df_h[['Close']].dropna()
            df_h.index = df_h.index.tz_localize(None)

            # 2. Custom Daily Aggregation
            daily_max = df_h['Close'].resample('1D').max()
            daily_min = df_h['Close'].resample('1D').min()
            daily_diff = daily_max - daily_min
            
            # 3. Standard Daily Data
            df_d = yf.download(self.ticker, interval="1d", start=start_dt.strftime("%Y-%m-%d"), end=self.end_date, progress=False, auto_adjust=True)
            if isinstance(df_d.columns, pd.MultiIndex): df_d.columns = df_d.columns.get_level_values(0)
            
            df_d['Returns'] = df_d['Close'].pct_change()
            df_d['Daily_Max_H'] = daily_max
            df_d['Daily_Min_H'] = daily_min
            df_d['Daily_Diff_H'] = daily_diff
            df_d.dropna(inplace=True)
            self.data = df_d
        except: self.data = pd.DataFrame()

    def generate_signals(self):
        if self.data is None or self.data.empty: return
        df = self.data.copy()
        
        # Features
        df['ATR'] = df['Daily_Diff_H'].rolling(self.period).mean()
        df['Roll_Max'] = df['Daily_Max_H'].rolling(self.sensitivity).max()
        df['Roll_Min'] = df['Daily_Min_H'].rolling(self.sensitivity).min()
        
        # Shift (No Lookahead)
        cols = ['ATR', 'Roll_Max', 'Roll_Min', 'Daily_Max_H', 'Daily_Min_H']
        df[cols] = df[cols].shift(1)
        df.dropna(inplace=True)
        
        # Bands
        df['Upper'] = df['Roll_Max'] + self.k * df['ATR']
        df['Lower'] = df['Roll_Min'] - self.k * df['ATR']
        
        # State Machine Vectorization
        pos = 0
        signals = []
        close = df['Close'].values
        lower = df['Lower'].values
        upper = df['Upper'].values
        dmax = df['Daily_Max_H'].values
        dmin = df['Daily_Min_H'].values
        
        for i in range(len(df)):
            if pos == 0:
                if close[i] < lower[i]: pos = 1
                elif close[i] > upper[i]: pos = -1
            elif pos == 1:
                if close[i] > dmax[i]: pos = 0
            elif pos == -1:
                if close[i] < dmin[i]: pos = 0
            signals.append(pos)
            
        df['Signal'] = signals
        # Normalize signal to same scale as V3/V9 (approx 0 to 1 leverage)
        # U0 is aggressive, so we scale it down slightly
        df['Signal'] = df['Signal'] * 0.7 
        self.data = df

# ==========================================
# 3. HRP MATH ENGINE (The Allocator)
# ==========================================
class HRP_Allocator:
    @staticmethod
    def getIVP(cov):
        ivp = 1. / np.diag(cov)
        ivp /= ivp.sum()
        return ivp

    @staticmethod
    def getClusterVar(cov, cItems):
        cov_ = cov.loc[cItems, cItems] 
        w_ = HRP_Allocator.getIVP(cov_).reshape(-1, 1)
        cVar = np.dot(np.dot(w_.T, cov_), w_)[0, 0]
        return cVar

    @staticmethod
    def getQuasiDiag(link):
        link = link.astype(int)
        sortIx = pd.Series([link[-1, 0], link[-1, 1]])
        numItems = link[-1, 3] 
        while sortIx.max() >= numItems:
            sortIx.index = range(0, sortIx.shape[0] * 2, 2) 
            df0 = sortIx[sortIx >= numItems] 
            i = df0.index
            j = df0.values - numItems
            sortIx[i] = link[j, 0] 
            df0 = pd.Series(link[j, 1], index=i + 1)
            sortIx = sortIx.append(df0) 
            sortIx = sortIx.sort_index() 
            sortIx.index = range(sortIx.shape[0]) 
        return sortIx.tolist()

    @staticmethod
    def getRecBipart(cov, sortIx):
        w = pd.Series(1, index=sortIx)
        cItems = [sortIx] 
        while len(cItems) > 0:
            cItems = [i[j:k] for i in cItems for j, k in ((0, len(i) // 2), (len(i) // 2, len(i))) if len(i) > 1]
            for i in range(0, len(cItems), 2):
                cItems0 = cItems[i] 
                cItems1 = cItems[i + 1] 
                cVar0 = HRP_Allocator.getClusterVar(cov, cItems0)
                cVar1 = HRP_Allocator.getClusterVar(cov, cItems1)
                alpha = 1 - cVar0 / (cVar0 + cVar1)
                w[cItems0] *= alpha 
                w[cItems1] *= 1 - alpha 
        return w

    @staticmethod
    def optimize(returns_df):
        # Handle constant/zero returns to avoid NaN correlations
        if returns_df.std().min() < 1e-6:
            return pd.Series(1/len(returns_df.columns), index=returns_df.columns)
            
        corr = returns_df.corr().fillna(0)
        cov = returns_df.cov().fillna(0)
        dist = ((1 - corr) / 2.) ** .5
        link = sch.linkage(dist, 'single')
        sortIx = HRP_Allocator.getQuasiDiag(link)
        sortIx = corr.index[sortIx].tolist()
        return HRP_Allocator.getRecBipart(cov, sortIx)

class Strategy_Ensemble_HRP(BaseStrategy):
    """
    Phase 1 Upgrade: The HRP Ensemble.
    Allocates capital dynamically between V3 (Trend), V9 (Regime), and U0 (Mean Rev)
    based on their CORRELATION structure, not just returns.
    """
    def __init__(self, ticker, start_date, end_date):
        super().__init__(ticker, start_date, end_date)
        self.v3 = StrategyV3_Macro(ticker, start_date, end_date)
        self.v9 = StrategyV9_RegimeUnshackled(ticker, start_date, end_date)
        self.u0 = U0_MeanReversion(ticker, start_date, end_date)
    
    def fetch_data(self, warmup_years=2):
        self.u0.fetch_data(warmup_years) # U0 has strictest data reqs
        if self.u0.data is None or self.u0.data.empty: return
        
        # Sync V3/V9 to U0's available timeframe
        valid_start = self.u0.data.index[0].strftime("%Y-%m-%d")
        self.v3.start_date = valid_start
        self.v9.start_date = valid_start
        
        self.v3.fetch_data(warmup_years)
        self.v9.fetch_data(warmup_years)
        self.data = self.u0.data.copy() # Use U0 as master index

    def generate_signals(self):
        if self.u0.data is None: return
        
        # Run Sub-Strategies
        self.v3.generate_signals()
        self.v9.generate_signals()
        self.u0.generate_signals()
        
        # Merge Signals
        df = self.data.copy()
        # Join by index to handle missing rows
        df = df.join(self.v3.data[['Signal']].rename(columns={'Signal':'S_V3'}), how='inner')
        df = df.join(self.v9.data[['Signal']].rename(columns={'Signal':'S_V9'}), how='inner')
        df = df.join(self.u0.data[['Signal']].rename(columns={'Signal':'S_U0'}), how='inner')
        
        # Simulate Returns for HRP
        df['R_V3'] = df['S_V3'].shift(1) * df['Returns']
        df['R_V9'] = df['S_V9'].shift(1) * df['Returns']
        df['R_U0'] = df['S_U0'].shift(1) * df['Returns']
        
        # HRP Walk-Forward Loop
        lookback = 63  # Short lookback for correlation (3 months)
        rebal = 21     # Monthly rebalance
        
        df['W_V3'], df['W_V9'], df['W_U0'] = 0.33, 0.33, 0.33
        
        indices = df.index
        for t in range(lookback, len(df), rebal):
            start = indices[t-lookback]
            end = indices[t]
            test_end = indices[min(t+rebal, len(df)-1)]
            
            # Extract Returns History for Optimization
            ret_hist = df.loc[start:end, ['R_V3', 'R_V9', 'R_U0']]
            
            # Run HRP
            try:
                weights = HRP_Allocator.optimize(ret_hist)
                df.loc[end:test_end, 'W_V3'] = weights['R_V3']
                df.loc[end:test_end, 'W_V9'] = weights['R_V9']
                df.loc[end:test_end, 'W_U0'] = weights['R_U0']
            except: pass
            
        # Final Ensemble Signal
        df['Signal'] = (df['S_V3']*df['W_V3']) + (df['S_V9']*df['W_V9']) + (df['S_U0']*df['W_U0'])
        self.data = df

## Benchmark

In [56]:
class RobustBenchmark:
    """
    Implements Walk-Forward Analysis and Deflated Sharpe Ratio logic.
    Benchmarks multiple strategies without look-ahead bias[cite: 275].
    """
    def __init__(self, tickers, start_date, end_date):
        self.tickers = tickers
        self.start_date = start_date
        self.end_date = end_date
        self.results = []

    def run(self):
        print(f"{'STRATEGY':<10} | {'TICKER':<6} | {'ANN RET':<7} | {'SHARPE':<6} | {'MAX DD':<7} | {'NOTES'}")
        print("-" * 75)
        
        strategies = {
            "V1_Base": StrategyV1_Baseline,
            # "V2_GMM": StrategyV2_Advanced,
            "V3_Macro": StrategyV3_Macro,
            # "V4_Meta": StrategyV4_Meta,
            # "V5_Kalman": StrategyV5_KalmanState,
            # "V6_Inst": StrategyV6_MetaLabeling,
            "V7_Optim": StrategyV7_AdaptiveOptim,
            "V8_Final": StrategyV8_GrandUnification,
            "V9_Unshack": StrategyV9_RegimeUnshackled,
            "U0_MeanRev": U0_MeanReversion
        }

        for ticker in self.tickers:
            # Capture Buy & Hold first
            bh = StrategyV1_Baseline(ticker, self.start_date, self.end_date)
            bh.fetch_data()
            bh.data['Signal'] = 1 # Force Buy
            bh.run_backtest()
            self._print_row("Buy&Hold", ticker, bh.metrics)
            
            for name, StratClass in strategies.items():
                try:
                    strat = StratClass(ticker, self.start_date, self.end_date)
                    strat.fetch_data(warmup_years=2)
                    strat.generate_signals()
                    strat.run_backtest()
                    
                    self._print_row(name, ticker, strat.metrics)
                    
                    # Store for portfolio level (optional)
                    self.results.append({
                        'Ticker': ticker,
                        'Strategy': name,
                        'Returns': strat.results['Net_Returns']
                    })
                except Exception as e:
                    print(f"Failed {name} {ticker}: {e}")
            print("-" * 75)

    def _print_row(self, name, ticker, metrics):
        if not metrics: return
        ret = metrics['Total Return']
        # Annualize return approx
        ann_ret = (1 + ret) ** (252 / len(metrics.get('Returns', [1]*252))) - 1 if 'Returns' in metrics else ret
        print(f"{name:<10} | {ticker:<6} | {ret:.1%}   | {metrics['Sharpe Ratio']:.2f}   | {metrics['Max Drawdown']:.1%}   |")

## Ensemble

In [57]:
class Strategy_Ensemble(BaseStrategy):
    """
    The 'All-Weather' Ensemble.
    
    Combines V3 (Macro Trend) and V9 (Regime Unshackled) into a single
    portfolio-level signal.
    
    Logic:
    1. Runs V3 to capture high-beta trends (NVDA, Bitcoin).
    2. Runs V9 to capture regime-based alpha and protect downside (JPM, BABA).
    3. Blends signals using a 'Correlation-Adjusted' weighting or fixed 50/50.
    4. Applies a final Volatility Target to the combined equity curve to ensure
       the two strategies don't stack up to dangerous leverage.
    """
    def __init__(self, ticker, start_date, end_date, w_v3=0.5, w_v9=0.5):
        super().__init__(ticker, start_date, end_date)
        self.w_v3 = w_v3
        self.w_v9 = w_v9
        # Instantiate sub-strategies
        self.strat_v3 = StrategyV3_Macro(ticker, start_date, end_date)
        self.strat_v9 = StrategyV9_RegimeUnshackled(ticker, start_date, end_date)

    def fetch_data(self, warmup_years=2):
        # Fetch once for efficiency (logic could be optimized to share DF, 
        # but separate fetch ensures cleaner encapsulation)
        self.strat_v3.fetch_data(warmup_years)
        self.strat_v9.fetch_data(warmup_years)
        
        # We share the index/data from one of them for the main wrapper
        if self.strat_v3.data is not None and not self.strat_v3.data.empty:
            self.data = self.strat_v3.data.copy()
        elif self.strat_v9.data is not None:
            self.data = self.strat_v9.data.copy()

    def generate_signals(self):
        if self.strat_v3.data is None or self.strat_v9.data is None: return
        
        # 1. Generate Sub-Signals
        self.strat_v3.generate_signals()
        self.strat_v9.generate_signals()
        
        # Align Indices (Inner Join to be safe)
        df = self.data.copy()
        s3 = self.strat_v3.data['Signal']
        s9 = self.strat_v9.data['Signal']
        
        # Merge signals into main DF
        df['Sig_V3'] = s3
        df['Sig_V9'] = s9
        df.dropna(inplace=True)
        
        # 2. The Allocation Logic
        # Default: Fixed Weight (Core-Satellite Approach)
        # V3 (Beta) + V9 (Alpha)
        
        # We blend the RAW signals.
        # Note: Signals are already Vol-Targeted to ~15% inside sub-classes.
        # Simple addition would double vol if correlation=1.
        raw_blend = (df['Sig_V3'] * self.w_v3) + (df['Sig_V9'] * self.w_v9)
        
        # 3. Ensemble Volatility Control
        # If V3 and V9 agree (both Long), we get high exposure.
        # If they disagree (V3 Long, V9 Cash), we get half exposure.
        # This naturally deleverages during uncertainty.
        
        df['Signal'] = raw_blend
        
        # Optional: Re-Target Volatility of the *Ensemble*
        # (Prevents leverage creep if strategies are highly correlated)
        # For now, we trust the weighted sum to act as a diversification benefit.
        
        self.data = df

In [58]:
class Strategy_Ensemble_Adaptive(BaseStrategy):
    """
    V10: The Adaptive Ensemble (Dynamic Weighting).
    
    Instead of fixed weights, this strategy re-allocates capital quarterly 
    based on the recent Risk-Adjusted Performance (Sharpe) of the sub-strategies.
    
    Logic:
    1. Lookback: 126 Days (6 Months).
    2. Rebalance: Every 63 Days (Quarterly).
    3. Weighting:
       - Calculate Sharpe Ratio for V3 and V9 in the lookback window.
       - If Sharpe > 0: Weight is proportional to Sharpe.
       - If Sharpe < 0: Weight is set to 0.
       - Normalize weights to sum to 1.0.
       
    This allows the portfolio to automatically 'Risk On' into V3 during strong bulls
    and 'Risk Off' into V9 during bears/chop.
    """
    def __init__(self, ticker, start_date, end_date):
        super().__init__(ticker, start_date, end_date)
        # Instantiate sub-strategies
        self.strat_v3 = StrategyV3_Macro(ticker, start_date, end_date)
        self.strat_v9 = StrategyV9_RegimeUnshackled(ticker, start_date, end_date)

    def fetch_data(self, warmup_years=2):
        self.strat_v3.fetch_data(warmup_years)
        self.strat_v9.fetch_data(warmup_years)
        
        # Use one of the dataframes as the base
        if self.strat_v3.data is not None and not self.strat_v3.data.empty:
            self.data = self.strat_v3.data.copy()
        elif self.strat_v9.data is not None:
            self.data = self.strat_v9.data.copy()

    def generate_signals(self):
        if self.strat_v3.data is None or self.strat_v9.data is None: return
        
        # 1. Generate Sub-Signals
        self.strat_v3.generate_signals()
        self.strat_v9.generate_signals()
        
        # Merge Data
        df = self.data.copy()
        df['Sig_V3'] = self.strat_v3.data['Signal']
        df['Sig_V9'] = self.strat_v9.data['Signal']
        df.dropna(inplace=True)
        
        # 2. Simulate Sub-Strategy Returns (for metric calculation)
        # We need to know how they *would* have performed to weight them.
        # Lag signals by 1 to avoid lookahead bias when calculating returns.
        df['Ret_V3'] = df['Sig_V3'].shift(1) * df['Returns']
        df['Ret_V9'] = df['Sig_V9'].shift(1) * df['Returns']
        
        # 3. Walk-Forward Weight Optimization
        df['W_V3'] = 0.5 # Default start
        df['W_V9'] = 0.5
        
        lookback = 126      # 6 Months Lookback
        rebalance_freq = 21 # Monthly Rebalance (Faster adaptation)
        
        indices = df.index
        
        if len(df) > lookback:
            for t in range(lookback, len(df), rebalance_freq):
                train_start = indices[t - lookback]
                train_end = indices[t]
                test_end_idx = min(t + rebalance_freq, len(df))
                test_end = indices[test_end_idx - 1]
                
                # Calculate Sharpe in Lookback Window
                # Add small epsilon to std to avoid division by zero
                v3_mean = df.loc[train_start:train_end, 'Ret_V3'].mean()
                v3_std = df.loc[train_start:train_end, 'Ret_V3'].std() + 1e-9
                sharpe_v3 = (v3_mean / v3_std) * np.sqrt(252)
                
                v9_mean = df.loc[train_start:train_end, 'Ret_V9'].mean()
                v9_std = df.loc[train_start:train_end, 'Ret_V9'].std() + 1e-9
                sharpe_v9 = (v9_mean / v9_std) * np.sqrt(252)
                
                # Weighting Logic
                # 1. Filter: If Sharpe is negative, set score to 0
                score_v3 = max(0, sharpe_v3)
                score_v9 = max(0, sharpe_v9)
                
                # 2. Normalize
                total_score = score_v3 + score_v9
                
                if total_score > 0:
                    w_v3 = score_v3 / total_score
                    w_v9 = score_v9 / total_score
                else:
                    # Both are failing? Default to Defensive (V9) or Cash (0)
                    # Let's default to V9 (Safety) as the 'bunker'
                    w_v3 = 0.0
                    w_v9 = 1.0
                
                # Apply weights to NEXT window
                df.loc[train_end:test_end, 'W_V3'] = w_v3
                df.loc[train_end:test_end, 'W_V9'] = w_v9
                
        # 4. Final Signal Generation
        # Blend the signals using the dynamic weights
        df['Signal'] = (df['Sig_V3'] * df['W_V3']) + (df['Sig_V9'] * df['W_V9'])
        
        self.data = df

In [59]:
class StrategyV12_Macro_Switch(BaseStrategy):
    """
    V12: The Macro-Guided Ensemble.
    
    Replaces lookback windows with Real-Time Economic Data.
    
    DATA SOURCES (Yahoo Finance):
    1. ^VIX: CBOE Volatility Index.
    2. ^TNX: 10-Year Treasury Yield.
    
    LOGIC:
    1. Calculate 'Macro Stress Score' (0.0 to 1.0).
       - VIX Component: Normalized against recent history. High VIX = High Stress.
       - Yield Component: Rate of Change (ROC) of TNX. Spiking rates = High Stress.
    
    2. Dynamic Weighting:
       - Weight_V3 (Trend) = 1.0 - Stress_Score
       - Weight_V9 (Safety) = Stress_Score
       
    HYPOTHESIS:
    VIX and Rates often spike BEFORE the price crash is fully realized. 
    This allows the model to switch to safety faster than a Moving Average.
    """
    def __init__(self, ticker, start_date, end_date):
        super().__init__(ticker, start_date, end_date)
        self.v3 = StrategyV3_Macro(ticker, start_date, end_date)
        self.v9 = StrategyV9_RegimeUnshackled(ticker, start_date, end_date)
        # We store macro data separately
        self.macro_data = None

    def fetch_data(self, warmup_years=2):
        # 1. Fetch Ticker Data
        self.v3.fetch_data(warmup_years)
        self.v9.fetch_data(warmup_years)
        
        if self.v3.data is None or self.v3.data.empty: return
        self.data = self.v3.data.copy()
        
        # 2. Fetch Macro Data (VIX and TNX)
        start_dt = (self.data.index[0] - timedelta(days=365)).strftime("%Y-%m-%d")
        end_dt = self.end_date
        
        try:
            vix = yf.download("^VIX", start=start_dt, end=end_dt, progress=False, auto_adjust=True)
            tnx = yf.download("^TNX", start=start_dt, end=end_dt, progress=False, auto_adjust=True)
            
            # Cleaning
            if isinstance(vix.columns, pd.MultiIndex): vix.columns = vix.columns.get_level_values(0)
            if isinstance(tnx.columns, pd.MultiIndex): tnx.columns = tnx.columns.get_level_values(0)
            
            macro_df = pd.DataFrame(index=self.data.index)
            # Align macro data to the ticker's trading days (ffill for holidays)
            macro_df['VIX'] = vix['Close'].reindex(self.data.index, method='ffill')
            macro_df['TNX'] = tnx['Close'].reindex(self.data.index, method='ffill')
            
            self.macro_data = macro_df
            
        except Exception as e:
            print(f"Macro Data Fetch Error: {e}")
            # Fallback: Zero stress
            self.macro_data = pd.DataFrame({'VIX': 20, 'TNX': 4}, index=self.data.index)

    def generate_signals(self):
        if self.data is None or self.macro_data is None: return
        
        # 1. Run Sub-Strategies
        self.v3.generate_signals()
        self.v9.generate_signals()
        
        # 2. Sync Data
        df = self.data.copy()
        df = df.join(self.v3.data[['Signal']].rename(columns={'Signal':'S_V3'}), how='left')
        df = df.join(self.v9.data[['Signal']].rename(columns={'Signal':'S_V9'}), how='left')
        
        # 3. Calculate Macro Stress Score
        macro = self.macro_data.copy()
        
        # A. VIX Stress (Fear)
        # Normalize VIX: If VIX > 30, Stress = 1.0. If VIX < 15, Stress = 0.0.
        # Uses a rolling Z-score or simple clamp? Simple clamp is more robust to regime shifts.
        macro['VIX_Stress'] = ((macro['VIX'] - 15) / (30 - 15)).clip(0, 1)
        
        # B. Yield Stress (Rate Shock)
        # We care about SPEED of rate rise, not just level.
        # Calculate 20-day Rate of Change of TNX
        macro['TNX_ROC'] = macro['TNX'].pct_change(20)
        # If Yields rise > 10% in a month, that's a shock.
        macro['TNX_Stress'] = (macro['TNX_ROC'] / 0.10).clip(0, 1)
        
        # Combined Stress (Max of either Fear or Rate Shock)
        # We use Max because either one can crash the market independently.
        macro['Total_Stress'] = macro[['VIX_Stress', 'TNX_Stress']].max(axis=1)
        
        # 4. Allocate Weights
        # Smooth the stress signal to avoid daily jitter (3-day avg)
        stress_signal = macro['Total_Stress'].rolling(3).mean().fillna(0)
        
        df['W_V9'] = stress_signal        # High Stress -> More Safety
        df['W_V3'] = 1.0 - stress_signal  # Low Stress -> More Trend
        
        # 5. Final Signal
        df['Signal'] = (df['S_V3'] * df['W_V3']) + (df['S_V9'] * df['W_V9'])
        
        self.data = df

In [60]:
class StrategyV14_AI_Selector(BaseStrategy):
    """
    V14: The AI 'Mixture of Experts'.
    
    Uses a Walk-Forward Random Forest to dynamically select the best sub-strategy.
    
    LOGIC:
    1. Simulate Returns for V12 (Macro) and V9 (Regime).
    2. Create Target Label:
       - y = 1 if V12 outperforms V9 over the next 5 days.
       - y = 0 if V9 outperforms V12.
    3. Train Classifier:
       - Features: VIX, TNX, Asset Volatility, RSI, Correlation.
    4. Execution:
       - If Model predicts 1 (High Confidence) -> Allocate to V12.
       - If Model predicts 0 -> Allocate to V9.
       
    This removes the brittle '-0.2' threshold and allows the data to decide.
    """
    def __init__(self, ticker, start_date, end_date):
        super().__init__(ticker, start_date, end_date)
        self.v12 = StrategyV12_Macro_Switch(ticker, start_date, end_date)
        self.v9 = StrategyV9_RegimeUnshackled(ticker, start_date, end_date)
        self.macro_data = None

    def fetch_data(self, warmup_years=2):
        # Fetch Sub-Strategies
        self.v12.fetch_data(warmup_years)
        self.v9.fetch_data(warmup_years)
        
        if self.v12.data is None or self.v12.data.empty: return
        self.data = self.v12.data.copy()
        
        # Extract Macro Data (for ML Features)
        if self.v12.macro_data is not None:
            self.macro_data = self.v12.macro_data.reindex(self.data.index).fillna(method='ffill')
        else:
            self.macro_data = pd.DataFrame({'VIX': 20, 'TNX': 4}, index=self.data.index)

    def generate_signals(self):
        if self.data is None or self.macro_data is None: return
        
        # 1. Run Experts
        self.v12.generate_signals()
        self.v9.generate_signals()
        
        # 2. Prepare Training Data
        df = self.data.copy()
        # Signals
        df['S_V12'] = self.v12.data['Signal']
        df['S_V9']  = self.v9.data['Signal']
        
        # Future Returns (Target)
        # Which strategy performs better over next 5 days?
        # We assume 1-day lag for implementation, so we look at t+1 to t+5
        lookahead = 5
        df['Ret_V12'] = (df['S_V12'].shift(1) * df['Returns']).rolling(lookahead).sum().shift(-lookahead)
        df['Ret_V9']  = (df['S_V9'].shift(1) * df['Returns']).rolling(lookahead).sum().shift(-lookahead)
        
        # Target: 1 if V12 wins, 0 if V9 wins
        df['Target'] = (df['Ret_V12'] > df['Ret_V9']).astype(int)
        
        # 3. Construct Features (X)
        # Macro
        df['VIX'] = self.macro_data['VIX']
        df['TNX'] = self.macro_data['TNX']
        df['VIX_Trend'] = df['VIX'].diff(5)
        
        # Micro
        df['RSI'] = FeatureLab.compute_rsi(df['Adj Close'])
        df['Vol'] = FeatureLab.yang_zhang_volatility(df)
        
        # Correlation (The V13 Feature)
        df['Corr_VIX'] = df['Returns'].rolling(63).corr(self.macro_data['VIX'].pct_change())
        
        feature_cols = ['VIX', 'TNX', 'VIX_Trend', 'RSI', 'Vol', 'Corr_VIX']
        df.dropna(inplace=True)
        
        # 4. Walk-Forward ML Loop
        df['Prob_V12'] = 0.5 # Default Neutral
        
        train_window = 252 # 1 Year History to learn
        rebal_freq = 21    # Retrain monthly
        
        clf = RandomForestClassifier(n_estimators=50, max_depth=3, random_state=42)
        
        indices = df.index
        if len(df) > train_window:
            for t in range(train_window, len(df), rebal_freq):
                start = indices[t - train_window]
                end = indices[t]
                test_end = indices[min(t + rebal_freq, len(df) - 1)]
                
                # Train
                X_train = df.loc[start:end, feature_cols]
                y_train = df.loc[start:end, 'Target']
                
                try:
                    clf.fit(X_train, y_train)
                    
                    # Predict
                    X_test = df.loc[end:test_end, feature_cols]
                    probs = clf.predict_proba(X_test)
                    
                    # Get Prob of Class 1 (V12 Wins)
                    if probs.shape[1] == 2:
                        p = probs[:, 1]
                    else:
                        p = probs[:, 0] if clf.classes_[0] == 1 else 0.0
                        
                    df.loc[end:test_end, 'Prob_V12'] = p
                except: pass
                
        # 5. Signal Blending (Soft Voting)
        # If Prob > 0.5, we lean to V12. If Prob < 0.5, we lean to V9.
        # We map 0.0-1.0 probability to weights
        
        # Clip probability to avoid extreme overconfidence
        prob = df['Prob_V12'].clip(0.1, 0.9)
        
        df['W_V12'] = prob
        df['W_V9'] = 1.0 - prob
        
        df['Signal'] = (df['S_V12'] * df['W_V12']) + (df['S_V9'] * df['W_V9'])
        
        self.data = df

## Kalman

In [61]:
from pykalman import KalmanFilter
import numpy as np
import pandas as pd
import yfinance as yf
from datetime import datetime, timedelta
# Assuming BaseStrategy and FeatureLab are already defined in your environment

class StrategyV15_StatArb(BaseStrategy):
    """
    V15: Market Neutral Statistical Arbitrage (Native pykalman).
    
    Logic:
    1. Model the relationship: Price_Y = Alpha + Beta * Price_X + Noise
    2. Use Kalman Filter to estimate dynamic states [Alpha, Beta] in real-time.
    3. Construct the Spread: Spread_t = Y_t - (Alpha_t + Beta_t * X_t).
    4. Trade Mean Reversion of the Spread using Z-Scores.
    """
    def __init__(self, ticker_y, ticker_x, start_date, end_date):
        super().__init__(ticker_y, start_date, end_date)
        self.ticker_x = ticker_x

    def fetch_data(self, warmup_years=2):
        start_dt = datetime.strptime(self.start_date, "%Y-%m-%d") - timedelta(days=warmup_years*365)
        start_str = start_dt.strftime("%Y-%m-%d")
        
        try:
            # Download aligned data
            data_y = yf.download(self.ticker, start=start_str, end=self.end_date, progress=False, auto_adjust=True)
            data_x = yf.download(self.ticker_x, start=start_str, end=self.end_date, progress=False, auto_adjust=True)
            
            if isinstance(data_y.columns, pd.MultiIndex): data_y.columns = data_y.columns.get_level_values(0)
            if isinstance(data_x.columns, pd.MultiIndex): data_x.columns = data_x.columns.get_level_values(0)
            
            df = pd.DataFrame(index=data_y.index)
            df['Y'] = data_y['Close']
            df['X'] = data_x['Close'].reindex(df.index).ffill()
            df.dropna(inplace=True)
            self.data = df
        except Exception as e:
            print(f"StatArb Fetch Error: {e}")
            self.data = pd.DataFrame()

    def generate_signals(self):
        if self.data is None or self.data.empty: return
        df = self.data.copy()
        
        # --- 1. Kalman Filter Setup ---
        # State: [Alpha, Beta]
        # Observation: Y ~ Alpha + Beta * X
        
        # Observation Matrices: Shape (N_samples, N_obs_dim, N_state_dim) -> (N, 1, 2)
        # For each time t, Obs_Mat = [[1, X_t]]
        obs_mat = np.expand_dims(np.vstack([[1] * len(df), df['X']]).T, axis=1)
        
        kf = KalmanFilter(
            n_dim_obs=1, 
            n_dim_state=2, 
            initial_state_mean=[0, 0],
            initial_state_covariance=np.ones((2, 2)),
            transition_matrices=np.eye(2),       # Random Walk (Identity)
            observation_matrices=obs_mat,        # Dynamic relationship
            observation_covariance=1.0,          # Noise in price
            transition_covariance=np.eye(2)*1e-4 # Noise in slope evolution (delta)
        )
        
        # Run Filter (Causal, no lookahead)
        # state_means contains [Alpha_t, Beta_t] for each step
        state_means, _ = kf.filter(df['Y'].values)
        
        df['Alpha'] = state_means[:, 0]
        df['Beta'] = state_means[:, 1]
        
        # --- 2. Spread Construction ---
        # Spread = Actual Y - Predicted Y
        df['Spread'] = df['Y'] - (df['Alpha'] + df['Beta'] * df['X'])
        
        # --- 3. Z-Score Logic ---
        window = 20
        df['Spread_Mean'] = df['Spread'].rolling(window).mean()
        df['Spread_Std'] = df['Spread'].rolling(window).std()
        df['Z_Score'] = (df['Spread'] - df['Spread_Mean']) / df['Spread_Std']
        
        # --- 4. Signal Generation (Mean Reversion) ---
        df['Signal'] = 0.0
        
        # Entry Logic
        short_spread = (df['Z_Score'] > 2.0)  # Spread is too high -> Short Y, Long X
        long_spread = (df['Z_Score'] < -2.0)  # Spread is too low -> Long Y, Short X
        exit_cond = (df['Z_Score'].abs() < 0.5)
        
        # State Machine
        curr_pos = 0 # 1 = Long Spread, -1 = Short Spread
        pos_history = []
        
        # Vector -> Array for speed
        ls_arr = long_spread.values
        ss_arr = short_spread.values
        ex_arr = exit_cond.values
        
        for i in range(len(df)):
            if curr_pos == 0:
                if ls_arr[i]: curr_pos = 1
                elif ss_arr[i]: curr_pos = -1
            elif curr_pos == 1:
                if ex_arr[i]: curr_pos = 0
            elif curr_pos == -1:
                if ex_arr[i]: curr_pos = 0
            pos_history.append(curr_pos)
            
        # Shift position to avoid lookahead (Trade executed next open)
        df['Spread_Position'] = pd.Series(pos_history, index=df.index).shift(1).fillna(0)
        
        # --- 5. Return Calculation ---
        # Long Spread Profit = (Ret_Y) - Beta * (Ret_X)
        df['Ret_Y'] = df['Y'].pct_change()
        df['Ret_X'] = df['X'].pct_change()
        
        # We use the previous day's Hedge Ratio for the return calculation
        df['Net_Returns'] = df['Spread_Position'] * (df['Ret_Y'] - df['Beta'].shift(1) * df['Ret_X'])
        
        # "Signal" column for compatibility with Portfolio Allocators
        # Note: This signal is for PnL calculation only, logic is handled above
        df['Signal'] = df['Spread_Position'] 
        
        self.data = df

    def run_backtest(self, transaction_cost=0.0005):
        # Custom Backtest for Pairs (2x Transaction Costs)
        if self.data is None or self.data.empty: return
        df = self.data.copy()
        
        # Turnover: Change in position direction
        df['Turnover'] = df['Spread_Position'].diff().abs().fillna(0)
        
        # Cost: 2 assets * cost
        df['Strategy_Net'] = df['Net_Returns'] - (df['Turnover'] * transaction_cost * 2)
        
        df['Cumulative_Strategy'] = (1 + df['Strategy_Net']).cumprod()
        roll_max = df['Cumulative_Strategy'].cummax()
        df['Drawdown'] = (df['Cumulative_Strategy'] / roll_max) - 1.0
        
        self.results = df
        
        total_ret = df['Cumulative_Strategy'].iloc[-1] - 1
        vol = df['Strategy_Net'].std() * np.sqrt(252)
        sharpe = (df['Strategy_Net'].mean() / df['Strategy_Net'].std()) * np.sqrt(252) if vol > 0 else 0
        max_dd = df['Drawdown'].min()
        
        self.metrics = {
            'Total Return': total_ret,
            'Sharpe Ratio': sharpe,
            'Max Drawdown': max_dd
        }

In [62]:
from statsmodels.tsa.stattools import adfuller

class StrategyV16_Gated_StatArb(StrategyV15_StatArb):
    """
    V16: Cointegration-Gated Statistical Arbitrage.
    
    Improvements over V15:
    1. STATIONARITY FILTER:
       - Before taking a trade, we calculate the Rolling ADF P-Value of the spread.
       - If P-Value > 0.05 (Non-Stationary), we set Signal to 0.
       - This prevents the model from fighting a structural trend (e.g., NVDA vs AMD).
       
    2. HARD STOP LOSS:
       - If Z-Score > 4.0, we assume the relationship has broken permanently ('Divorce').
       - We force exit to prevent infinite drawdowns.
    """
    def generate_signals(self):
        if self.data is None or self.data.empty: return
        
        # 1. Run Standard Kalman Filter (Inherited from V15)
        # This populates self.data with 'Spread', 'Z_Score', 'Hedge_Ratio'
        super().generate_signals()
        
        df = self.data.copy()
        
        # 2. Rolling ADF Test (The Gatekeeper)
        # We need a window (e.g., 60 days) to test for mean reversion
        window = 60
        adf_pvalues = []
        spread_values = df['Spread'].values
        
        # This loop can be slow, but necessary for Walk-Forward validity
        for i in range(len(df)):
            if i < window:
                adf_pvalues.append(1.0) # Assume non-stationary (safe) at start
            else:
                # Check stationarity of the recent spread history
                window_spread = spread_values[i-window:i]
                # Check if spread is constant (to avoid adfuller error)
                if np.std(window_spread) < 1e-6:
                    p_val = 1.0
                else:
                    try:
                        # regression='c' (constant mean) or 'ct' (trend)
                        res = adfuller(window_spread, regression='c')
                        p_val = res[1]
                    except:
                        p_val = 1.0
                adf_pvalues.append(p_val)
                
        df['ADF_PValue'] = adf_pvalues
        
        # 3. Apply The Filter
        # Mask: 1 if Stationary (Trade), 0 if Trending (Don't Trade)
        # We use a lag to avoid lookahead bias
        is_stationary = (df['ADF_PValue'].shift(1) < 0.05).astype(int)
        
        # 4. Apply Hard Stop Loss (The "Divorce" Clause)
        # If Spread Z-Score is > 4 sigma, the pair is likely broken.
        is_broken = (df['Z_Score'].abs().shift(1) > 4.0).astype(int)
        
        # 5. Final Signal Logic
        # Original Signal (from V15) * Stationarity Mask * (Not Broken)
        # Note: 'Signal' in V15 contains the position (-1, 0, 1)
        
        original_signal = df['Spread_Position'] # This was the calculated position in V15
        
        # If not stationary, force position to 0
        filtered_signal = original_signal * is_stationary
        
        # If broken, force position to 0
        filtered_signal = np.where(is_broken == 1, 0, filtered_signal)
        
        df['Spread_Position'] = filtered_signal
        
        # Recalculate Returns with filtered positions
        df['Net_Returns'] = df['Spread_Position'] * (df['Ret_Y'] - df['Beta'].shift(1) * df['Ret_X'])
        
        # Update Data
        self.data = df

## Execution

In [63]:
if __name__ == "__main__":
    print(f"{'STRATEGY':<12} | {'TICKER':<6} | {'ANN RET':<7} | {'SHARPE':<6} | {'MAX DD':<7} | {'NOTES'}")
    print("-" * 79)
    
    # Helper wrappers for the dictionary
    class Strategy_Ensemble_5050(Strategy_Ensemble):
        def __init__(self, ticker, start, end): super().__init__(ticker, start, end, 0.5, 0.5)

    class Strategy_Ensemble_Growth(Strategy_Ensemble):
        def __init__(self, ticker, start, end): super().__init__(ticker, start, end, 0.7, 0.3)

    strategies = {
        "V3_Macro": StrategyV3_Macro,
        "V9_Unshack": StrategyV9_RegimeUnshackled,
        # "Ens_Bal": Strategy_Ensemble_5050,      # Static 50/50
        # "Ens_Grow": Strategy_Ensemble_Growth,   # Static 70/30
        "Ens_Adapt": Strategy_Ensemble_Adaptive, # Dynamic V10
        # "HRP_Base": Strategy_Ensemble_HRP,
        "V12_Macro": StrategyV12_Macro_Switch,
        "V14_AI": StrategyV14_AI_Selector,
    }

    # Same Stress Test Basket
    tickers = ["NVDA", "JPM", "TSLA", "BABA", "XLE"]

    bench = RobustBenchmark(
        tickers=tickers, 
        start_date="2022-01-01", 
        end_date="2024-12-30"
    )
    
    # Manual run loop to handle the specific classes
    for ticker in tickers:
        # Buy & Hold
        bh = StrategyV1_Baseline(ticker, bench.start_date, bench.end_date)
        bh.fetch_data()
        bh.data['Signal'] = 1
        bh.run_backtest()
        bench._print_row("Buy&Hold", ticker, bh.metrics)
        
        for name, StratClass in strategies.items():
            try:
                strat = StratClass(ticker, bench.start_date, bench.end_date)
                strat.fetch_data(warmup_years=2)
                strat.generate_signals()
                strat.run_backtest()
                bench._print_row(name, ticker, strat.metrics)
            except Exception as e:
                print(f"Err {name} {ticker}: {e}")
        print("-" * 79)

STRATEGY     | TICKER | ANN RET | SHARPE | MAX DD  | NOTES
-------------------------------------------------------------------------------
Buy&Hold   | NVDA   | 355.4%   | 1.19   | -62.7%   |
V3_Macro   | NVDA   | 173.4%   | 1.51   | -23.1%   |
V9_Unshack | NVDA   | 61.9%   | 0.88   | -16.7%   |
Ens_Adapt  | NVDA   | 111.8%   | 1.31   | -20.6%   |
V12_Macro  | NVDA   | 159.6%   | 1.57   | -16.7%   |
V14_AI     | NVDA   | 91.8%   | 1.19   | -16.7%   |
-------------------------------------------------------------------------------
Buy&Hold   | JPM    | 62.1%   | 0.77   | -37.9%   |
V3_Macro   | JPM    | 71.9%   | 0.97   | -28.5%   |
V9_Unshack | JPM    | 39.2%   | 0.70   | -16.7%   |
Ens_Adapt  | JPM    | 80.4%   | 1.16   | -17.9%   |
V12_Macro  | JPM    | 93.1%   | 1.25   | -13.7%   |
V14_AI     | JPM    | 55.7%   | 0.93   | -15.9%   |
-------------------------------------------------------------------------------
Buy&Hold   | TSLA   | 7.9%   | 0.35   | -73.0%   |
V3_Macro   | TSLA   | 

In [64]:
if __name__ == "__main__":
    print(f"{'STRATEGY':<12} | {'PAIR':<9} | {'ANN RET':<7} | {'SHARPE':<6} | {'MAX DD':<7} | {'NOTES'}")
    print("-" * 85)
    
    pairs = [
        ("NVDA", "AMD"),  # The Broken Pair
        ("PEP", "KO"),    # The Stable Pair
        ("JPM", "BAC")    # The Correlated Pair
    ]
    
    start_date = "2023-01-01"
    end_date = "2024-12-30"
    
    strategies = {
        "V15_StatArb": StrategyV15_StatArb,
        "V16_Gated": StrategyV16_Gated_StatArb
    }
    
    for y, x in pairs:
        for name, StratClass in strategies.items():
            try:
                strat = StratClass(y, x, start_date, end_date)
                strat.fetch_data()
                strat.generate_signals()
                strat.run_backtest()
                
                m = strat.metrics
                if m:
                    ann_ret = (1 + m['Total Return'])**(252/len(strat.results)) - 1
                    print(f"{name:<12} | {y}/{x:<5} | {ann_ret:.1%}   | {m['Sharpe Ratio']:.2f}   | {m['Max Drawdown']:.1%}   | {'Stationarity Check' if 'Gated' in name else 'Raw Kalman'}")
            except Exception as e:
                print(f"Error {name} {y}/{x}: {e}")
        print("-" * 85)

STRATEGY     | PAIR      | ANN RET | SHARPE | MAX DD  | NOTES
-------------------------------------------------------------------------------------
V15_StatArb  | NVDA/AMD   | -11.0%   | -0.67   | -55.9%   | Raw Kalman
V16_Gated    | NVDA/AMD   | -8.9%   | -0.55   | -52.2%   | Stationarity Check
-------------------------------------------------------------------------------------
V15_StatArb  | PEP/KO    | -9.8%   | -0.87   | -35.5%   | Raw Kalman
V16_Gated    | PEP/KO    | -5.4%   | -0.53   | -21.6%   | Stationarity Check
-------------------------------------------------------------------------------------
V15_StatArb  | JPM/BAC   | -7.7%   | 0.03   | -64.8%   | Raw Kalman
V16_Gated    | JPM/BAC   | 33.2%   | 1.09   | -30.2%   | Stationarity Check
-------------------------------------------------------------------------------------
