In [34]:
import yfinance as yf
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
from sklearn.mixture import GaussianMixture
from sklearn.preprocessing import StandardScaler
from sklearn.ensemble import RandomForestClassifier
from statsmodels.tsa.stattools import adfuller
from abc import ABC, abstractmethod
import warnings
from datetime import datetime, timedelta
warnings.filterwarnings('ignore')

## 1. FEATURE ENGINEERING LAB (The Math)

In [35]:
class FeatureLab:
    """Shared mathematical engine for technical and statistical features."""
    
    @staticmethod
    def get_weights_frac_diff(d, size, threshold=1e-5):
        w = [1.0]
        for k in range(1, size):
            w_k = -w[-1] / k * (d - k + 1)
            w.append(w_k)
        w = np.array(w[::-1])
        w = w[np.abs(w) > threshold]
        return w

    @staticmethod
    def frac_diff_fixed(series, d, window=50):
        # Solves Stationarity Dilemma [cite: 61]
        weights = FeatureLab.get_weights_frac_diff(d, window)
        res = series.rolling(window=len(weights)).apply(lambda x: np.dot(x, weights), raw=True)
        return res

    @staticmethod
    def yang_zhang_volatility(df, window=30):
        # Captures intraday energy/gaps [cite: 82]
        log_ho = (df['High'] / df['Open']).apply(np.log)
        log_lo = (df['Low'] / df['Open']).apply(np.log)
        log_co = (df['Close'] / df['Open']).apply(np.log)
        log_oc = (df['Open'] / df['Close'].shift(1)).apply(np.log)
        log_cc = (df['Close'] / df['Close'].shift(1)).apply(np.log)
        
        rs = log_ho * (log_ho - log_co) + log_lo * (log_lo - log_co)
        close_vol = log_cc.rolling(window=window).var()
        open_vol = log_oc.rolling(window=window).var()
        window_rs = rs.rolling(window=window).mean()

        k = 0.34 / (1.34 + (window + 1) / (window - 1))
        return np.sqrt(open_vol + k * window_rs)

    @staticmethod
    def compute_rsi(series, window=14):
        delta = series.diff()
        gain = (delta.where(delta > 0, 0)).rolling(window=window).mean()
        loss = (-delta.where(delta < 0, 0)).rolling(window=window).mean()
        rs = gain / loss
        return 100 - (100 / (1 + rs))

    @staticmethod
    def triple_barrier_labels(prices, vol, pt=1.0, sl=1.0, barrier_window=10):
        """
        Implements the Triple Barrier Method.
        Labels: 1 (Profit Target Hit), -1 (Stop Loss Hit), 0 (Time Limit/Neutral)
        """
        labels = pd.Series(0, index=prices.index)
        # Shift prices to align future outcome with current row
        # However, to avoid look-ahead in features, we usually compute label for row t based on t+1...t+k
        # This function generates the TARGET variable (y) for training.
        
        limit = len(prices) - barrier_window
        p_values = prices.values
        v_values = vol.values
        
        for i in range(limit):
            current_p = p_values[i]
            current_vol = v_values[i]
            
            # Dynamic barriers based on volatility [cite: 215]
            target = current_p * (1 + pt * current_vol)
            stop = current_p * (1 - sl * current_vol)
            
            future_window = p_values[i+1 : i+1+barrier_window]
            
            hit_target = np.where(future_window >= target)[0]
            hit_stop = np.where(future_window <= stop)[0]
            
            first_target = hit_target[0] if len(hit_target) > 0 else barrier_window + 1
            first_stop = hit_stop[0] if len(hit_stop) > 0 else barrier_window + 1
            
            if first_target < first_stop and first_target <= barrier_window:
                labels.iloc[i] = 1
            elif first_stop < first_target and first_stop <= barrier_window:
                labels.iloc[i] = 0 # In Meta-Labeling, we often treat Stop (-1) as 0 (Do Not Trade)
            # Else 0 (Time limit reached or neutral)
            
        return labels

## 2. BASE STRATEGY INFRASTRUCTURE

In [36]:
class BaseStrategy(ABC):
    def __init__(self, ticker, start_date, end_date):
        self.ticker = ticker
        self.start_date = start_date
        self.end_date = end_date
        self.data = None
        self.results = None
        self.metrics = {}

    def fetch_data(self, warmup_years=2):
        start_dt = datetime.strptime(self.start_date, "%Y-%m-%d")
        warmup_start_dt = start_dt - timedelta(days=warmup_years*365)
        warmup_start_str = warmup_start_dt.strftime("%Y-%m-%d")
        
        try:
            self.data = yf.download(self.ticker, start=warmup_start_str, end=self.end_date, progress=False, auto_adjust=False)
            if isinstance(self.data.columns, pd.MultiIndex): 
                self.data.columns = self.data.columns.get_level_values(0)
            if 'Adj Close' not in self.data.columns: 
                self.data['Adj Close'] = self.data['Close']
            self.data['Returns'] = self.data['Adj Close'].pct_change()
            self.data.dropna(inplace=True)
        except Exception as e:
            print(f"Error fetching {self.ticker}: {e}")
            self.data = pd.DataFrame()

    @abstractmethod
    def generate_signals(self):
        pass

    def run_backtest(self, transaction_cost=0.0005, rebalance_threshold=0.1):
        if self.data is None or self.data.empty: return
        
        backtest_mask = self.data.index >= self.start_date
        df = self.data.loc[backtest_mask].copy()
        if df.empty: return

        # Position Smoothing
        clean_positions = []
        current_pos = 0.0
        raw_signals = df['Signal'].values
        
        for target in raw_signals:
            if abs(target - current_pos) > rebalance_threshold:
                current_pos = target
            clean_positions.append(current_pos)
            
        df['Position'] = clean_positions
        df['Prev_Position'] = df['Position'].shift(1).fillna(0)
        df['Turnover'] = (df['Prev_Position'] - df['Position'].shift(2).fillna(0)).abs()
        df['Gross_Returns'] = df['Prev_Position'] * df['Returns']
        df['Net_Returns'] = df['Gross_Returns'] - (df['Turnover'] * transaction_cost)
        df['Net_Returns'].fillna(0, inplace=True)
        
        df['Cumulative_Strategy'] = (1 + df['Net_Returns']).cumprod()
        df['Cumulative_Market'] = (1 + df['Returns']).cumprod()
        
        roll_max = df['Cumulative_Strategy'].cummax()
        df['Drawdown'] = (df['Cumulative_Strategy'] / roll_max) - 1.0
        
        self.results = df
        
        # Performance Calculation
        total_ret = df['Cumulative_Strategy'].iloc[-1] - 1
        vol = df['Net_Returns'].std() * np.sqrt(252)
        sharpe = (df['Net_Returns'].mean() / df['Net_Returns'].std()) * np.sqrt(252) if vol > 0 else 0
        max_dd = df['Drawdown'].min()
        
        self.metrics = {
            'Total Return': total_ret,
            'Sharpe Ratio': sharpe,
            'Max Drawdown': max_dd
        }
        return df

## 3.  MODELS (V1-V4)

In [37]:
class StrategyV1_Baseline(BaseStrategy):
    """V1: Fixed FracDiff, Standard GMM."""
    def generate_signals(self):
        if self.data is None or self.data.empty: return
        df = self.data.copy()
        
        df['Volatility'] = FeatureLab.yang_zhang_volatility(df)
        df['FracDiff'] = FeatureLab.frac_diff_fixed(df['Adj Close'].apply(np.log), d=0.4, window=50)
        df['RSI'] = FeatureLab.compute_rsi(df['Adj Close'])
        df['Returns_Smoothed'] = df['Returns'].rolling(5).mean()
        df['Vol_Smoothed'] = df['Volatility'].rolling(5).mean()
        df.dropna(inplace=True)
        
        X = df[['Returns_Smoothed', 'Vol_Smoothed']].values
        scaler = StandardScaler()
        X_scaled = scaler.fit_transform(X)
        gmm = GaussianMixture(n_components=3, random_state=42)
        df['Cluster'] = gmm.fit_predict(X_scaled)
        
        stats = df.groupby('Cluster')['Returns_Smoothed'].mean().sort_values().index
        mapping = {stats[0]: -1, stats[1]: 0, stats[2]: 1}
        df['Regime'] = df['Cluster'].map(mapping)
        
        df['Signal'] = 0
        df.loc[(df['Regime'] == 1) & (df['FracDiff'] > 0), 'Signal'] = 1
        df.loc[(df['Regime'] == 0) & (df['RSI'] < 40), 'Signal'] = 1
        
        target_vol = 0.15 / np.sqrt(252)
        df['Vol_Scaler'] = (target_vol / df['Volatility']).clip(upper=1.5)
        df['Signal'] = df['Signal'] * df['Vol_Scaler']
        self.data = df

In [38]:
class StrategyV2_Advanced(BaseStrategy):
    """V2: Rolling GMM."""
    def generate_signals(self):
        if self.data is None or self.data.empty: return
        df = self.data.copy()
        
        df['Volatility'] = FeatureLab.yang_zhang_volatility(df)
        df['FracDiff'] = FeatureLab.frac_diff_fixed(df['Adj Close'].apply(np.log), d=0.4, window=50)
        df['RSI'] = FeatureLab.compute_rsi(df['Adj Close'])
        df['Returns_Smoothed'] = df['Returns'].rolling(5).mean()
        df['Vol_Smoothed'] = df['Volatility'].rolling(5).mean()
        df.dropna(inplace=True)
        
        df['Regime'] = 0
        window_size, step_size = 504, 126
        preds, indices = [], []
        
        if len(df) > window_size:
            for t in range(window_size, len(df), step_size):
                train = df.iloc[t-window_size:t]
                test = df.iloc[t:t+step_size]
                if test.empty: break
                
                scaler = StandardScaler()
                X_train_s = scaler.fit_transform(train[['Returns_Smoothed', 'Vol_Smoothed']].values)
                X_test_s = scaler.transform(test[['Returns_Smoothed', 'Vol_Smoothed']].values)
                
                gmm = GaussianMixture(n_components=3, random_state=42).fit(X_train_s)
                train['Clust'] = gmm.predict(X_train_s)
                stats = train.groupby('Clust')['Returns_Smoothed'].mean().sort_values().index
                mapping = {stats[0]: -1, stats[1]: 0, stats[2]: 1}
                
                preds.extend([mapping[x] for x in gmm.predict(X_test_s)])
                indices.extend(test.index)
            
            df.loc[indices, 'Regime'] = pd.Series(preds, index=indices)
        
        df['Signal'] = 0
        df.loc[(df['Regime'] == 1) & (df['FracDiff'] > 0), 'Signal'] = 1
        df.loc[(df['Regime'] == 0) & (df['RSI'] < 45), 'Signal'] = 1
        
        target_vol = 0.15 / np.sqrt(252)
        df['Vol_Scaler'] = (target_vol / df['Volatility']).clip(upper=1.0)
        df['Signal'] = df['Signal'] * df['Vol_Scaler']
        self.data = df

In [39]:
class StrategyV3_Macro(BaseStrategy):
    """V3: Macro (SPY) Filter."""
    def __init__(self, ticker, start_date, end_date):
        super().__init__(ticker, start_date, end_date)
        self.spy_data = None

    def fetch_data(self, warmup_years=2):
        super().fetch_data(warmup_years)
        start_dt = datetime.strptime(self.start_date, "%Y-%m-%d") - timedelta(days=warmup_years*365)
        try:
            spy = yf.download("SPY", start=start_dt.strftime("%Y-%m-%d"), end=self.end_date, progress=False, auto_adjust=False)
            if isinstance(spy.columns, pd.MultiIndex): spy.columns = spy.columns.get_level_values(0)
            self.spy_data = spy[['Adj Close']].rename(columns={'Adj Close': 'SPY_Price'})
        except: pass

    def generate_signals(self):
        if self.data is None or self.data.empty: return
        df = self.data.copy()
        
        if self.spy_data is not None:
            df = df.join(self.spy_data, how='left')
            df['SPY_MA200'] = df['SPY_Price'].rolling(window=200).mean()
            df['Macro_Bull'] = df['SPY_Price'] > df['SPY_MA200']
        else:
            df['Macro_Bull'] = True
            
        df['Volatility'] = FeatureLab.yang_zhang_volatility(df)
        df['FracDiff'] = FeatureLab.frac_diff_fixed(df['Adj Close'].apply(np.log), d=0.4, window=50)
        df['RSI'] = FeatureLab.compute_rsi(df['Adj Close'])
        df.dropna(inplace=True)
        
        df['Signal'] = 0
        df.loc[(df['FracDiff'] > 0), 'Signal'] = 1
        df.loc[df['Macro_Bull'] == False, 'Signal'] = 0
        
        target_vol = 0.15 / np.sqrt(252)
        df['Signal'] = df['Signal'] * (target_vol / df['Volatility']).clip(upper=1.5)
        self.data = df

In [40]:
class StrategyV4_Meta(BaseStrategy):
    """V4: Dynamic Profiling with OBV."""
    def generate_signals(self):
        if self.data is None or self.data.empty: return
        df = self.data.copy()
        
        df['Volatility'] = FeatureLab.yang_zhang_volatility(df)
        df['FracDiff'] = FeatureLab.frac_diff_fixed(df['Adj Close'].apply(np.log), d=0.4, window=50)
        df['OBV'] = (np.sign(df['Close'].diff()) * df['Volume']).fillna(0).cumsum()
        df['OBV_Trend'] = df['OBV'].rolling(50).mean()
        df['RSI'] = FeatureLab.compute_rsi(df['Adj Close'])
        df.dropna(inplace=True)
        
        df['Signal'] = 0
        # Trend
        df.loc[(df['FracDiff'] > 0) & (df['OBV'] > df['OBV_Trend']), 'Signal'] = 1
        # Reversion
        df.loc[(df['RSI'] < 30), 'Signal'] = 1
        
        target_vol = 0.15 / np.sqrt(252)
        df['Signal'] = df['Signal'] * (target_vol / df['Volatility']).clip(upper=1.5)
        self.data = df

In [41]:
class StrategyV5_KalmanState(BaseStrategy):
    """
    V5 (Formerly V10): Kalman Filter + Macro Filter + Volatility Burst Control.
    Uses Kalman Filter for noise-free slope estimation[cite: 151].
    """
    def __init__(self, ticker, start_date, end_date):
        super().__init__(ticker, start_date, end_date)
        self.spy_data = None

    def fetch_data(self, warmup_years=2):
        super().fetch_data(warmup_years)
        start_dt = datetime.strptime(self.start_date, "%Y-%m-%d") - timedelta(days=warmup_years*365)
        try:
            spy = yf.download("SPY", start=start_dt.strftime("%Y-%m-%d"), end=self.end_date, progress=False, auto_adjust=False)
            if isinstance(spy.columns, pd.MultiIndex): spy.columns = spy.columns.get_level_values(0)
            spy['Macro_Trend'] = (spy['Adj Close'] > spy['Adj Close'].rolling(200).mean()).astype(int)
            self.spy_data = spy[['Macro_Trend']]
        except: pass

    def _apply_kalman_filter(self, prices):
        x = prices.values
        n = len(x)
        state = np.zeros(n)
        slope = np.zeros(n)
        state[0] = x[0]
        P, Q, R = 1.0, 0.001, 0.1
        
        for t in range(1, n):
            pred_state = state[t-1] + slope[t-1]
            pred_P = P + Q
            measurement = x[t]
            residual = measurement - pred_state
            K = pred_P / (pred_P + R)
            state[t] = pred_state + K * residual
            slope[t] = 0.9 * slope[t-1] + 0.1 * (state[t] - state[t-1])
            P = (1 - K) * pred_P
        return pd.Series(slope, index=prices.index)

    def generate_signals(self):
        if self.data is None or self.data.empty: return
        df = self.data.copy()
        
        if self.spy_data is not None:
            df = df.join(self.spy_data, how='left').fillna(method='ffill')
        else:
            df['Macro_Trend'] = 1 
            
        log_prices = np.log(df['Adj Close'])
        df['Kalman_Slope'] = self._apply_kalman_filter(log_prices)
        df['Volatility'] = FeatureLab.yang_zhang_volatility(df)
        df['Vol_Change'] = df['Volatility'].diff()
        
        df.dropna(inplace=True)
        
        # Primary Logic
        df['Signal'] = 0.0
        long_condition = (df['Kalman_Slope'] > 0) & (df['Macro_Trend'] == 1)
        df.loc[long_condition, 'Signal'] = 1
        
        # Vol Targeting & Burst Protection
        target_vol = 0.15 / np.sqrt(252)
        df['Vol_Scaler'] = (target_vol / df['Volatility']).clip(upper=1.5)
        
        vol_spike = df['Vol_Change'] > df['Vol_Change'].rolling(20).std() * 2
        df.loc[vol_spike, 'Vol_Scaler'] *= 0.5
        df.loc[df['Macro_Trend'] == 0, 'Vol_Scaler'] *= 0.5
        
        df['Signal'] = df['Signal'] * df['Vol_Scaler']
        self.data = df

## 4. NEW MODEL: STRATEGY V6

In [42]:
class StrategyV6_MetaLabeling(BaseStrategy):
    """
    V6.1 (Hybrid): The 'Regime-Adaptive' Institutional Model.
    
    Architecture:
    1. Primary Signal (Hybrid): 
       - TREND: Kalman Slope > 0 (Catch the run)
       - VALUE: RSI < 30 (Catch the dip)
       This ensures we have candidates in both trending and chopping markets.
       
    2. Meta-Labeling (Random Forest): 
       - Learns WHICH of the above signals works for the current asset/regime.
       
    3. Soft-Sizing: Scales leverage based on ML confidence.
    """
    def __init__(self, ticker, start_date, end_date):
        super().__init__(ticker, start_date, end_date)
        self.spy_data = None

    def fetch_data(self, warmup_years=2):
        super().fetch_data(warmup_years)
        start_dt = datetime.strptime(self.start_date, "%Y-%m-%d") - timedelta(days=warmup_years*365)
        try:
            spy = yf.download("SPY", start=start_dt.strftime("%Y-%m-%d"), end=self.end_date, progress=False, auto_adjust=False)
            if isinstance(spy.columns, pd.MultiIndex): spy.columns = spy.columns.get_level_values(0)
            spy['Macro_Trend'] = (spy['Adj Close'] > spy['Adj Close'].rolling(200).mean()).astype(int)
            self.spy_data = spy[['Macro_Trend']]
        except: pass

    def _apply_kalman_filter(self, prices):
        x = prices.values
        n = len(x)
        state = np.zeros(n)
        slope = np.zeros(n)
        state[0] = x[0]
        # Kalman Params
        P, Q, R = 1.0, 0.001, 0.1 
        
        for t in range(1, n):
            pred_state = state[t-1] + slope[t-1]
            pred_P = P + Q
            measurement = x[t]
            residual = measurement - pred_state
            
            K = pred_P / (pred_P + R)
            state[t] = pred_state + K * residual
            slope[t] = 0.9 * slope[t-1] + 0.1 * (state[t] - state[t-1])
            P = (1 - K) * pred_P
            
        return pd.Series(slope, index=prices.index)

    def generate_signals(self):
        if self.data is None or self.data.empty: return
        df = self.data.copy()
        
        # --- 1. Features ---
        if self.spy_data is not None:
            df = df.join(self.spy_data, how='left').fillna(method='ffill')
        else: df['Macro_Trend'] = 1
            
        df['Volatility'] = FeatureLab.yang_zhang_volatility(df)
        df['Kalman_Slope'] = self._apply_kalman_filter(np.log(df['Adj Close']))
        df['RSI'] = FeatureLab.compute_rsi(df['Adj Close'])
        df['Spread'] = df['Adj Close'] - df['Adj Close'].rolling(20).mean()
        df.dropna(inplace=True)
        
        # --- 2. Primary Signal (The Hybrid Generator) ---
        df['Primary_Signal'] = 0
        
        # A. MOMENTUM LEG (For NVDA/SPY)
        # Catch the trend when slope is positive
        trend_signal = (df['Kalman_Slope'] > 0)
        
        # B. MEAN REVERSION LEG (For JPM/Chop)
        # Catch the knife when oversold (Value)
        value_signal = (df['RSI'] < 30)
        
        # Combine: We are interested if EITHER is true
        df.loc[trend_signal | value_signal, 'Primary_Signal'] = 1
        
        # --- 3. Meta-Labeling (The Validator) ---
        # Label: Did buying here result in profit?
        labels = FeatureLab.triple_barrier_labels(df['Adj Close'], df['Volatility'], pt=1.0, sl=1.0, barrier_window=10)
        
        df['Meta_Prob'] = 0.5
        train_window = 252 * 2
        update_freq = 63 
        
        clf = RandomForestClassifier(n_estimators=50, max_depth=3, random_state=42)
        feature_cols = ['Volatility', 'RSI', 'Spread', 'Kalman_Slope']
        
        indices = df.index
        if len(df) > train_window:
            for t in range(train_window, len(df), update_freq):
                train_start = indices[t - train_window]
                train_end = indices[t]
                test_end_idx = min(t + update_freq, len(df))
                test_end = indices[test_end_idx - 1]
                
                X_train = df.loc[train_start:train_end, feature_cols]
                y_train = labels.loc[train_start:train_end]
                
                # Training on all data allows the model to learn "High RSI = Good" for NVDA
                # and "Low RSI = Good" for JPM automatically based on recent history.
                clf.fit(X_train, y_train)
                
                X_test = df.loc[train_end:test_end, feature_cols]
                probs = clf.predict_proba(X_test)
                
                if probs.shape[1] == 2:
                    pos_probs = probs[:, 1]
                else:
                    pos_probs = probs[:, 0] if clf.classes_[0] == 1 else 0.0
                    
                df.loc[train_end:test_end, 'Meta_Prob'] = pos_probs
        
        # --- 4. Signal Construction ---
        df['Signal'] = 0.0
        
        # Confidence Floor: 
        # If the ML confirms the hybrid signal (Prob > 0.45), we execute.
        # This allows RSI Dips to pass IF the ML thinks they are profitable.
        active_trade = (df['Primary_Signal'] == 1) & (df['Meta_Prob'] > 0.45)
        df.loc[active_trade, 'Signal'] = 1
        
        # Sizing (Volatility + Confidence)
        target_vol = 0.15 / np.sqrt(252)
        vol_scaler = (target_vol / df['Volatility']).clip(upper=2.0)
        ml_scaler = (df['Meta_Prob'] / 0.5).clip(0.5, 2.0)
        
        # Macro Override
        # If Bear Market, we are defensive, BUT we allow Deep Value (RSI < 30) 
        # to have slightly more room if the ML loves it.
        macro_scaler = df['Macro_Trend'].map({1: 1.0, 0: 0.5})
        
        df['Signal'] = df['Signal'] * vol_scaler * ml_scaler * macro_scaler
        
        self.data = df

In [43]:
class StrategyV7_AdaptiveOptim(BaseStrategy):
    """
    V7 (WFO): Walk-Forward Optimized Strategy.
    
    Instead of static rules, this strategy runs a 'Tournament' every quarter.
    It tests 4 distinct parameter sets (Profiles) on the past 252 days:
    
    1. Trend_Aggro: Kalman Slope > 0 (No Macro Filter)
    2. Trend_Defense: Kalman Slope > 0 AND Macro_Bull (Like V3)
    3. Reversion_Deep: RSI < 30 (Buying Crashes)
    4. Reversion_Active: RSI < 45 (Buying Dips)
    
    It selects the Profile with the highest Sharpe Ratio in the lookback window
    and uses it for the next execution window.
    """
    def __init__(self, ticker, start_date, end_date):
        super().__init__(ticker, start_date, end_date)
        self.spy_data = None

    def fetch_data(self, warmup_years=2):
        super().fetch_data(warmup_years)
        start_dt = datetime.strptime(self.start_date, "%Y-%m-%d") - timedelta(days=warmup_years*365)
        try:
            spy = yf.download("SPY", start=start_dt.strftime("%Y-%m-%d"), end=self.end_date, progress=False, auto_adjust=False)
            if isinstance(spy.columns, pd.MultiIndex): spy.columns = spy.columns.get_level_values(0)
            spy['Macro_Trend'] = (spy['Adj Close'] > spy['Adj Close'].rolling(200).mean()).astype(int)
            self.spy_data = spy[['Macro_Trend']]
        except: pass

    def _apply_kalman_filter(self, prices):
        x = prices.values
        n = len(x)
        state = np.zeros(n)
        slope = np.zeros(n)
        state[0] = x[0]
        P, Q, R = 1.0, 0.01, 0.1 # Q=0.01 makes it slightly more responsive than V6
        
        for t in range(1, n):
            pred_state = state[t-1] + slope[t-1]
            pred_P = P + Q
            measurement = x[t]
            residual = measurement - pred_state
            
            K = pred_P / (pred_P + R)
            state[t] = pred_state + K * residual
            slope[t] = 0.9 * slope[t-1] + 0.1 * (state[t] - state[t-1])
            P = (1 - K) * pred_P
            
        return pd.Series(slope, index=prices.index)

    def generate_signals(self):
        if self.data is None or self.data.empty: return
        df = self.data.copy()
        
        # --- 1. Global Feature Engineering ---
        if self.spy_data is not None:
            df = df.join(self.spy_data, how='left').fillna(method='ffill')
        else: df['Macro_Trend'] = 1
            
        df['Volatility'] = FeatureLab.yang_zhang_volatility(df)
        df['Kalman_Slope'] = self._apply_kalman_filter(np.log(df['Adj Close']))
        df['RSI'] = FeatureLab.compute_rsi(df['Adj Close'])
        df.dropna(inplace=True)
        
        # --- 2. Pre-Calculate Strategy Candidates (Vectorized) ---
        # We calculate the raw signals for all profiles upfront
        
        # Profile 1: Aggressive Trend (Chase the move)
        sig_trend_aggro = (df['Kalman_Slope'] > 0).astype(int)
        
        # Profile 2: Defensive Trend (V3 Style - Only if Macro agrees)
        sig_trend_def = ((df['Kalman_Slope'] > 0) & (df['Macro_Trend'] == 1)).astype(int)
        
        # Profile 3: Deep Reversion (Catch Falling Knife)
        sig_rev_deep = (df['RSI'] < 30).astype(int)
        
        # Profile 4: Active Reversion (Buy Shallow Dips)
        sig_rev_active = (df['RSI'] < 45).astype(int)
        
        # Store in a dict for easy access
        candidates = {
            'Trend_Aggro': sig_trend_aggro,
            'Trend_Defense': sig_trend_def,
            'Rev_Deep': sig_rev_deep,
            'Rev_Active': sig_rev_active
        }
        
        # --- 3. Walk-Forward Optimization Loop ---
        df['Signal'] = 0.0
        df['Selected_Profile'] = 'None' # For debugging/analysis
        
        lookback = 252       # 1 Year Lookback for Optimization
        rebalance_freq = 63  # Quarterly Re-optimization
        
        indices = df.index
        daily_returns = df['Returns']
        
        if len(df) > lookback:
            for t in range(lookback, len(df), rebalance_freq):
                train_start = indices[t - lookback]
                train_end = indices[t]
                test_end_idx = min(t + rebalance_freq, len(df))
                test_end = indices[test_end_idx - 1]
                
                # The Tournament: Check Sharpe of each candidate in lookback period
                best_score = -999
                best_profile = 'Trend_Defense' # Default safety
                
                lb_returns = daily_returns.loc[train_start:train_end]
                
                for name, sig_series in candidates.items():
                    # Simulate Strategy Return in Lookback
                    # Lag signal by 1 to avoid lookahead in backtest
                    sigs = sig_series.loc[train_start:train_end].shift(1).fillna(0)
                    strat_ret = lb_returns * sigs
                    
                    # Calculate Metric (Sharpe)
                    mean_ret = strat_ret.mean()
                    std_ret = strat_ret.std()
                    
                    if std_ret > 1e-6:
                        score = mean_ret / std_ret # Simple Sharpe
                    else:
                        score = -999 # Flat line is bad
                        
                    if score > best_score:
                        best_score = score
                        best_profile = name
                
                # Apply Best Profile to Next Window (Test Set)
                # We use the signal series for the *future* window based on the *past* winner
                winner_signals = candidates[best_profile].loc[train_end:test_end]
                df.loc[train_end:test_end, 'Signal'] = winner_signals
                df.loc[train_end:test_end, 'Selected_Profile'] = best_profile

        # --- 4. Volatility Targeting (Risk Management) ---
        target_vol = 0.15 / np.sqrt(252)
        vol_scaler = (target_vol / df['Volatility']).clip(upper=1.5)
        
        df['Signal'] = df['Signal'] * vol_scaler
        self.data = df

## 5. ROBUST BENCHMARK INFRASTRUCTURE

In [44]:
class RobustBenchmark:
    """
    Implements Walk-Forward Analysis and Deflated Sharpe Ratio logic.
    Benchmarks multiple strategies without look-ahead bias[cite: 275].
    """
    def __init__(self, tickers, start_date, end_date):
        self.tickers = tickers
        self.start_date = start_date
        self.end_date = end_date
        self.results = []

    def run(self):
        print(f"{'STRATEGY':<10} | {'TICKER':<6} | {'ANN RET':<7} | {'SHARPE':<6} | {'MAX DD':<7} | {'NOTES'}")
        print("-" * 75)
        
        strategies = {
            "V1_Base": StrategyV1_Baseline,
            "V2_GMM": StrategyV2_Advanced,
            "V3_Macro": StrategyV3_Macro,
            "V4_Meta": StrategyV4_Meta,
            "V5_Kalman": StrategyV5_KalmanState,
            "V6_Inst": StrategyV6_MetaLabeling,
            "V7_Optim": StrategyV7_AdaptiveOptim
        }

        for ticker in self.tickers:
            # Capture Buy & Hold first
            bh = StrategyV1_Baseline(ticker, self.start_date, self.end_date)
            bh.fetch_data()
            bh.data['Signal'] = 1 # Force Buy
            bh.run_backtest()
            self._print_row("Buy&Hold", ticker, bh.metrics)
            
            for name, StratClass in strategies.items():
                try:
                    strat = StratClass(ticker, self.start_date, self.end_date)
                    strat.fetch_data(warmup_years=2)
                    strat.generate_signals()
                    strat.run_backtest()
                    
                    self._print_row(name, ticker, strat.metrics)
                    
                    # Store for portfolio level (optional)
                    self.results.append({
                        'Ticker': ticker,
                        'Strategy': name,
                        'Returns': strat.results['Net_Returns']
                    })
                except Exception as e:
                    print(f"Failed {name} {ticker}: {e}")
            print("-" * 75)

    def _print_row(self, name, ticker, metrics):
        if not metrics: return
        ret = metrics['Total Return']
        # Annualize return approx
        ann_ret = (1 + ret) ** (252 / len(metrics.get('Returns', [1]*252))) - 1 if 'Returns' in metrics else ret
        print(f"{name:<10} | {ticker:<6} | {ret:.1%}   | {metrics['Sharpe Ratio']:.2f}   | {metrics['Max Drawdown']:.1%}   |")

## 6. EXECUTION

In [49]:
bench = RobustBenchmark(
    tickers=["NVDA", "SPY", "JPM", "BABA"], 
    start_date="2021-01-01", 
    end_date="2024-12-30"
)
bench.run()

STRATEGY   | TICKER | ANN RET | SHARPE | MAX DD  | NOTES
---------------------------------------------------------------------------
Buy&Hold   | NVDA   | 946.9%   | 1.38   | -66.3%   |
V1_Base    | NVDA   | 50.0%   | 1.19   | -6.9%   |
V2_GMM     | NVDA   | 100.7%   | 1.17   | -10.9%   |
V3_Macro   | NVDA   | 357.9%   | 1.61   | -25.6%   |
V4_Meta    | NVDA   | 354.4%   | 1.63   | -23.8%   |
V5_Kalman  | NVDA   | 149.7%   | 1.18   | -20.5%   |
V6_Inst    | NVDA   | 152.7%   | 1.04   | -25.5%   |
V7_Optim   | NVDA   | 144.0%   | 1.20   | -14.3%   |
---------------------------------------------------------------------------
Buy&Hold   | SPY    | 70.7%   | 0.90   | -24.5%   |
V1_Base    | SPY    | 94.6%   | 1.06   | -20.2%   |
V2_GMM     | SPY    | 14.9%   | 0.37   | -18.9%   |
V3_Macro   | SPY    | 94.7%   | 1.11   | -25.6%   |
V4_Meta    | SPY    | 32.0%   | 0.55   | -17.7%   |
V5_Kalman  | SPY    | 70.4%   | 1.07   | -18.3%   |
V6_Inst    | SPY    | 83.7%   | 0.89   | -28.6%   |
V7_Op