In [None]:
# @title Install Dependencies (Colab-Compatible)
# TA-Lib requires system-level installation on Colab
!wget -q http://prdownloads.sourceforge.net/ta-lib/ta-lib-0.4.0-src.tar.gz
!tar -xzf ta-lib-0.4.0-src.tar.gz
%cd ta-lib/
!./configure --prefix=/usr > /dev/null 2>&1
!make > /dev/null 2>&1
!make install > /dev/null 2>&1
%cd ..
!rm -rf ta-lib ta-lib-0.4.0-src.tar.gz

# Now install Python packages
!pip install -q yfinance pyts deap gymnasium TA-Lib

print("‚úì All dependencies installed!")

In [None]:
# @title Import Libraries
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns
from sklearn.preprocessing import StandardScaler
from sklearn.cluster import KMeans
from sklearn.ensemble import IsolationForest
from sklearn.decomposition import PCA
import torch
import yfinance as yf
import talib
from pyts.image import GramianAngularField
from deap import base, creator, tools, gp, algorithms
import operator
import gymnasium as gym
from gymnasium import spaces
import warnings

warnings.filterwarnings('ignore')
plt.style.use('fivethirtyeight')
print("‚úì All libraries imported!")

In [None]:
# @title Stage 1: HARDCORE Data Preparation (20+ Years, 100+ Features, Human Rules)
import itertools

# =============================================
# === GET MAXIMUM HISTORICAL DATA ===
# =============================================
print("üî• HARDCORE MODE: Loading 20+ years of data...")
ticker = "SPY"
df = yf.download(ticker, start="2000-01-01", end="2024-12-31", progress=False)

if isinstance(df.columns, pd.MultiIndex):
    df.columns = df.columns.get_level_values(0)

print(f"‚úì Loaded {len(df)} days ({len(df)/252:.1f} years) of data")

# =============================================
# === 100+ TECHNICAL INDICATORS ===
# =============================================
if len(df) > 250:
    close = df['Close'].values.astype(float)
    high = df['High'].values.astype(float)
    low = df['Low'].values.astype(float)
    open_price = df['Open'].values.astype(float)
    volume = df['Volume'].values.astype(float)
    
    # === EMA RIBBON (Full Fibonacci Sequence) ===
    for period in [5, 8, 13, 21, 34, 55, 89, 144, 200, 233]:
        df[f'EMA{period}'] = talib.EMA(close, timeperiod=period)
    
    # === SMA at key levels ===
    for period in [5, 10, 20, 50, 100, 150, 200]:
        df[f'SMA{period}'] = talib.SMA(close, timeperiod=period)
    
    # === PROVEN HUMAN PATTERNS (Golden Cross, Death Cross, etc) ===
    df['Golden_Cross'] = ((df['SMA50'] > df['SMA200']) & (df['SMA50'].shift(1) <= df['SMA200'].shift(1))).astype(int)
    df['Death_Cross'] = ((df['SMA50'] < df['SMA200']) & (df['SMA50'].shift(1) >= df['SMA200'].shift(1))).astype(int)
    df['Golden_Cross_Active'] = (df['SMA50'] > df['SMA200']).astype(int)
    
    # EMA Alignment Score (how many EMAs are in bullish order)
    ema_cols = ['EMA5', 'EMA8', 'EMA13', 'EMA21', 'EMA34', 'EMA55', 'EMA89', 'EMA144', 'EMA200']
    df['EMA_Bullish_Count'] = sum((df[ema_cols[i]] > df[ema_cols[i+1]]).astype(int) for i in range(len(ema_cols)-1))
    df['EMA_Perfect_Bullish'] = (df['EMA_Bullish_Count'] == 8).astype(int)
    df['EMA_Perfect_Bearish'] = (df['EMA_Bullish_Count'] == 0).astype(int)
    
    # Price vs ALL EMAs
    for period in [8, 21, 55, 200]:
        df[f'Price_vs_EMA{period}'] = (df['Close'] - df[f'EMA{period}']) / df[f'EMA{period}'] * 100
        df[f'Above_EMA{period}'] = (df['Close'] > df[f'EMA{period}']).astype(int)
    
    # === RSI at multiple timeframes ===
    for period in [2, 5, 7, 14, 21]:
        df[f'RSI{period}'] = talib.RSI(close, timeperiod=period)
    
    # RSI Divergence Detection
    df['RSI14'] = talib.RSI(close, timeperiod=14)
    df['Price_Higher_High'] = ((df['High'] > df['High'].rolling(5).max().shift(1))).astype(int)
    df['RSI_Lower_High'] = ((df['RSI14'] < df['RSI14'].rolling(5).max().shift(1))).astype(int)
    df['Bearish_Divergence'] = (df['Price_Higher_High'] & df['RSI_Lower_High']).astype(int)
    
    df['Price_Lower_Low'] = ((df['Low'] < df['Low'].rolling(5).min().shift(1))).astype(int)
    df['RSI_Higher_Low'] = ((df['RSI14'] > df['RSI14'].rolling(5).min().shift(1))).astype(int)
    df['Bullish_Divergence'] = (df['Price_Lower_Low'] & df['RSI_Higher_Low']).astype(int)
    
    # RSI Overbought/Oversold Zones
    df['RSI_Oversold'] = (df['RSI14'] < 30).astype(int)
    df['RSI_Overbought'] = (df['RSI14'] > 70).astype(int)
    df['RSI_Extreme_Oversold'] = (df['RSI14'] < 20).astype(int)
    df['RSI_Extreme_Overbought'] = (df['RSI14'] > 80).astype(int)
    
    # === MACD Multiple Timeframes ===
    for fast, slow, sig in [(8, 17, 9), (12, 26, 9), (5, 35, 5)]:
        prefix = f'MACD_{fast}_{slow}'
        macd, signal, hist = talib.MACD(close, fastperiod=fast, slowperiod=slow, signalperiod=sig)
        df[f'{prefix}'] = macd
        df[f'{prefix}_Hist'] = hist
        df[f'{prefix}_Cross_Up'] = ((hist > 0) & (np.roll(hist, 1) <= 0)).astype(int)
        df[f'{prefix}_Cross_Down'] = ((hist < 0) & (np.roll(hist, 1) >= 0)).astype(int)
    
    # === STOCHASTIC Multiple Settings ===
    for k_period in [5, 14, 21]:
        k, d = talib.STOCH(high, low, close, fastk_period=k_period, slowk_period=3, slowd_period=3)
        df[f'STOCH_K{k_period}'] = k
        df[f'STOCH_D{k_period}'] = d
        df[f'STOCH{k_period}_Oversold'] = (k < 20).astype(int)
        df[f'STOCH{k_period}_Overbought'] = (k > 80).astype(int)
    
    # === BOLLINGER BANDS Multiple Settings ===
    for period in [10, 20, 50]:
        upper, middle, lower = talib.BBANDS(close, timeperiod=period, nbdevup=2, nbdevdn=2)
        df[f'BB{period}_Upper'] = upper
        df[f'BB{period}_Lower'] = lower
        df[f'BB{period}_Width'] = (upper - lower) / middle * 100
        df[f'BB{period}_Position'] = (df['Close'] - lower) / (upper - lower + 1e-8)
        df[f'BB{period}_Squeeze'] = (df[f'BB{period}_Width'] < df[f'BB{period}_Width'].rolling(50).quantile(0.2)).astype(int)
    
    # === ATR & Volatility ===
    for period in [7, 14, 21]:
        df[f'ATR{period}'] = talib.ATR(high, low, close, timeperiod=period)
        df[f'ATR{period}_Pct'] = df[f'ATR{period}'] / df['Close'] * 100
    
    # Volatility Expansion/Contraction
    df['Vol_Expanding'] = (df['ATR14'] > df['ATR14'].rolling(20).mean()).astype(int)
    df['Vol_Contracting'] = (df['ATR14'] < df['ATR14'].rolling(20).mean() * 0.8).astype(int)
    
    # === ADX & Trend Strength ===
    df['ADX'] = talib.ADX(high, low, close, timeperiod=14)
    df['PLUS_DI'] = talib.PLUS_DI(high, low, close, timeperiod=14)
    df['MINUS_DI'] = talib.MINUS_DI(high, low, close, timeperiod=14)
    df['DI_Cross_Bullish'] = ((df['PLUS_DI'] > df['MINUS_DI']) & (df['PLUS_DI'].shift(1) <= df['MINUS_DI'].shift(1))).astype(int)
    df['DI_Cross_Bearish'] = ((df['PLUS_DI'] < df['MINUS_DI']) & (df['PLUS_DI'].shift(1) >= df['MINUS_DI'].shift(1))).astype(int)
    df['Strong_Trend'] = (df['ADX'] > 25).astype(int)
    df['Very_Strong_Trend'] = (df['ADX'] > 40).astype(int)
    
    # === VOLUME Analysis ===
    df['Volume_SMA20'] = df['Volume'].rolling(20).mean()
    df['Volume_Ratio'] = df['Volume'] / df['Volume_SMA20']
    df['Volume_Spike'] = (df['Volume_Ratio'] > 2).astype(int)
    df['Volume_Surge'] = (df['Volume_Ratio'] > 3).astype(int)
    df['OBV'] = talib.OBV(close, volume)
    df['OBV_Trend'] = df['OBV'] - talib.EMA(df['OBV'].values, timeperiod=20)
    df['MFI'] = talib.MFI(high, low, close, volume, timeperiod=14)
    df['AD'] = talib.AD(high, low, close, volume)
    
    # === CANDLESTICK PATTERNS (All major ones) ===
    df['CDL_DOJI'] = talib.CDLDOJI(open_price, high, low, close) / 100
    df['CDL_HAMMER'] = talib.CDLHAMMER(open_price, high, low, close) / 100
    df['CDL_ENGULFING'] = talib.CDLENGULFING(open_price, high, low, close) / 100
    df['CDL_MORNINGSTAR'] = talib.CDLMORNINGSTAR(open_price, high, low, close) / 100
    df['CDL_EVENINGSTAR'] = talib.CDLEVENINGSTAR(open_price, high, low, close) / 100
    df['CDL_PIERCING'] = talib.CDLPIERCING(open_price, high, low, close) / 100
    df['CDL_HARAMI'] = talib.CDLHARAMI(open_price, high, low, close) / 100
    df['CDL_3WHITESOLDIERS'] = talib.CDL3WHITESOLDIERS(open_price, high, low, close) / 100
    df['CDL_3BLACKCROWS'] = talib.CDL3BLACKCROWS(open_price, high, low, close) / 100
    df['CDL_SHOOTINGSTAR'] = talib.CDLSHOOTINGSTAR(open_price, high, low, close) / 100
    df['CDL_INVERTEDHAMMER'] = talib.CDLINVERTEDHAMMER(open_price, high, low, close) / 100
    
    # === SUPPORT/RESISTANCE ===
    for period in [5, 10, 20, 50]:
        df[f'High_{period}d'] = df['High'].rolling(period).max()
        df[f'Low_{period}d'] = df['Low'].rolling(period).min()
        df[f'Range_Position_{period}d'] = (df['Close'] - df[f'Low_{period}d']) / (df[f'High_{period}d'] - df[f'Low_{period}d'] + 1e-8)
        df[f'Near_High_{period}d'] = (df['Close'] > df[f'High_{period}d'] * 0.98).astype(int)
        df[f'Near_Low_{period}d'] = (df['Close'] < df[f'Low_{period}d'] * 1.02).astype(int)
        df[f'Breakout_{period}d'] = (df['Close'] > df[f'High_{period}d'].shift(1)).astype(int)
        df[f'Breakdown_{period}d'] = (df['Close'] < df[f'Low_{period}d'].shift(1)).astype(int)
    
    # === RETURNS at different horizons ===
    for period in [1, 2, 3, 5, 10, 20, 60]:
        df[f'Return_{period}d'] = df['Close'].pct_change(period)
        df[f'Return_{period}d_Positive'] = (df[f'Return_{period}d'] > 0).astype(int)
    
    # Consecutive Up/Down Days
    df['Up_Day'] = (df['Close'] > df['Open']).astype(int)
    df['Consec_Up'] = df['Up_Day'].rolling(5).sum()
    df['Consec_Down'] = 5 - df['Consec_Up']
    
    # === MOMENTUM INDICATORS ===
    df['ROC5'] = talib.ROC(close, timeperiod=5)
    df['ROC10'] = talib.ROC(close, timeperiod=10)
    df['ROC20'] = talib.ROC(close, timeperiod=20)
    df['MOM10'] = talib.MOM(close, timeperiod=10)
    df['WILLR'] = talib.WILLR(high, low, close, timeperiod=14)
    df['CCI'] = talib.CCI(high, low, close, timeperiod=20)
    df['ULTOSC'] = talib.ULTOSC(high, low, close, timeperiod1=7, timeperiod2=14, timeperiod3=28)
    
    # === AROON ===
    df['AROON_Up'], df['AROON_Down'] = talib.AROON(high, low, timeperiod=14)
    df['AROON_Osc'] = df['AROON_Up'] - df['AROON_Down']
    
    # === PARABOLIC SAR ===
    df['SAR'] = talib.SAR(high, low, acceleration=0.02, maximum=0.2)
    df['SAR_Bullish'] = (df['Close'] > df['SAR']).astype(int)
    df['SAR_Flip_Bull'] = ((df['SAR_Bullish'] == 1) & (df['SAR_Bullish'].shift(1) == 0)).astype(int)
    df['SAR_Flip_Bear'] = ((df['SAR_Bullish'] == 0) & (df['SAR_Bullish'].shift(1) == 1)).astype(int)
    
    # === GAP Analysis ===
    df['Gap'] = (df['Open'] - df['Close'].shift(1)) / df['Close'].shift(1) * 100
    df['Gap_Up'] = (df['Gap'] > 0.5).astype(int)
    df['Gap_Down'] = (df['Gap'] < -0.5).astype(int)
    df['Large_Gap_Up'] = (df['Gap'] > 1.0).astype(int)
    df['Large_Gap_Down'] = (df['Gap'] < -1.0).astype(int)
    
    # === PRICE ACTION ===
    df['Body'] = df['Close'] - df['Open']
    df['Body_Pct'] = abs(df['Body']) / df['Open'] * 100
    df['Upper_Shadow'] = df['High'] - df[['Open', 'Close']].max(axis=1)
    df['Lower_Shadow'] = df[['Open', 'Close']].min(axis=1) - df['Low']
    df['Range'] = df['High'] - df['Low']
    df['Range_Pct'] = df['Range'] / df['Low'] * 100
    
    # === COMPOUND SIGNALS (Human + Machine) ===
    # Triple Screen (Elder)
    df['Triple_Screen_Bull'] = ((df['EMA_Bullish_Count'] >= 6) & (df['RSI14'] < 45) & (df['STOCH_K14'] < 30)).astype(int)
    df['Triple_Screen_Bear'] = ((df['EMA_Bullish_Count'] <= 2) & (df['RSI14'] > 55) & (df['STOCH_K14'] > 70)).astype(int)
    
    # Squeeze + Breakout
    df['Squeeze_Breakout_Up'] = (df['BB20_Squeeze'].shift(1) & df['Breakout_20d']).astype(int)
    df['Squeeze_Breakout_Down'] = (df['BB20_Squeeze'].shift(1) & df['Breakdown_20d']).astype(int)
    
    # Volume Confirmed Breakout
    df['Vol_Confirmed_Breakout'] = (df['Breakout_20d'] & df['Volume_Spike']).astype(int)
    df['Vol_Confirmed_Breakdown'] = (df['Breakdown_20d'] & df['Volume_Spike']).astype(int)
    
    # Mean Reversion Setup
    df['Mean_Reversion_Long'] = ((df['BB20_Position'] < 0.1) & (df['RSI14'] < 30) & (df['STOCH_K14'] < 20)).astype(int)
    df['Mean_Reversion_Short'] = ((df['BB20_Position'] > 0.9) & (df['RSI14'] > 70) & (df['STOCH_K14'] > 80)).astype(int)
    
    df.dropna(inplace=True)

# Count features
feature_count = len([c for c in df.columns if c not in ['Open', 'High', 'Low', 'Close', 'Volume']])
print(f"‚úì HARDCORE Features computed: {feature_count} indicators")
print(f"  Remaining rows: {len(df)} ({len(df)/252:.1f} years)")

# === CREATE 1-DAY PREDICTION TARGET (for bigger gains) ===
df = df.reset_index(drop=True)
df['Returns'] = df['Close'].pct_change()
df['Future_1d_Return'] = df['Returns'].shift(-1)  # Next day return
df['Future_3d_Return'] = df['Returns'].rolling(3).sum().shift(-3)
df['Future_5d_Return'] = df['Returns'].rolling(5).sum().shift(-5)

# High conviction targets (big moves only)
df['Big_Move_Up'] = (df['Future_1d_Return'] > 0.01).astype(int)  # > 1% up
df['Big_Move_Down'] = (df['Future_1d_Return'] < -0.01).astype(int)  # > 1% down

print(f"\nüìä Target Distribution:")
print(f"  Days with >1% up move: {df['Big_Move_Up'].sum()} ({df['Big_Move_Up'].mean()*100:.1f}%)")
print(f"  Days with >1% down move: {df['Big_Move_Down'].sum()} ({df['Big_Move_Down'].mean()*100:.1f}%)")

# GASF Images (keep for visual patterns)
print("\nCreating GASF images...")
close_series = df['Close'].values.astype(float)
returns = np.log(close_series[1:] / close_series[:-1])

window_size = 20
image_size = 20
gasf_images = []
gasf_indices = []

gasf = GramianAngularField(image_size=image_size, method='summation', sample_range=(-1, 1))

for i in range(len(returns) - window_size):
    window = returns[i:i+window_size]
    window_min, window_max = window.min(), window.max()
    if window_max - window_min > 1e-8:
        window_norm = 2 * (window - window_min) / (window_max - window_min) - 1
    else:
        window_norm = np.zeros_like(window)
    try:
        gasf_img = gasf.fit_transform(window_norm.reshape(1, -1))
        gasf_images.append(gasf_img[0])
        gasf_indices.append(i + window_size)
    except:
        continue

gasf_images = np.array(gasf_images)
print(f"‚úì Created {len(gasf_images)} GASF images")

# Align data
valid_indices = [idx for idx in gasf_indices if idx < len(df)]
aligned_future_returns = df['Future_1d_Return'].iloc[valid_indices].values
aligned_gasf_images = gasf_images[:len(valid_indices)]
mask = ~np.isnan(aligned_future_returns)
aligned_gasf_images = aligned_gasf_images[mask]
aligned_future_returns = aligned_future_returns[mask]
aligned_indices = np.array(valid_indices)[mask]

print(f"‚úì Data ready. {len(df)} days, {feature_count} features, 1-day prediction target")

In [None]:
# @title Stage 2: Unsupervised Visual Discovery
# Check if data is ready
if 'aligned_gasf_images' not in dir() or len(aligned_gasf_images) == 0:
    raise RuntimeError("‚ö†Ô∏è Run Cell 3 (Data Preparation) first!")

class VisualPatternFinder:
    """Cluster GASF images - find recurring visual structures."""
    
    def __init__(self, n_patterns=10):
        self.n_patterns = n_patterns
        self.kmeans = None
        self.patterns = {}
    
    def discover_visual_patterns(self, gasf_images, future_returns):
        """
        Cluster images ‚Üí find which clusters are profitable.
        """
        # Reshape for clustering (flatten images)
        n_samples, h, w = gasf_images.shape
        X = gasf_images.reshape(n_samples, h*w)
        
        # Cluster
        self.kmeans = KMeans(n_clusters=self.n_patterns, random_state=42, n_init=10)
        clusters = self.kmeans.fit_predict(X)
        
        # Analyze each cluster
        print("\nVISUAL PATTERN ANALYSIS")
        print("=" * 60)
        
        for cluster_id in range(self.n_patterns):
            mask = clusters == cluster_id
            cluster_returns = future_returns[mask]
            
            if len(cluster_returns) == 0:
                continue

            # Statistics
            avg_return = np.mean(cluster_returns)
            win_rate = (cluster_returns > 0).mean()
            frequency = mask.sum() / len(clusters)
            std_dev = np.std(cluster_returns) + 1e-8
            sharpe = avg_return / std_dev * np.sqrt(252/5)  # Annualized roughly
            
            print(f"\nPattern {cluster_id}:")
            print(f"  Frequency: {frequency*100:.1f}% of days")
            print(f"  Next-5d return: {avg_return*100:.2f}%")
            print(f"  Win rate: {win_rate*100:.1f}%")
            print(f"  Sharpe: {sharpe:.2f}")
            
            if avg_return > 0.005:  # Threshold for "profitable"
                print(f"  ‚úì PROFITABLE PATTERN FOUND")
            
            self.patterns[cluster_id] = {
                'avg_return': avg_return,
                'win_rate': win_rate,
                'frequency': frequency,
                'sharpe': sharpe,
                'centroid': self.kmeans.cluster_centers_[cluster_id].reshape(h, w)
            }
            
    def plot_patterns(self):
        """Visualize the centroids of discovered patterns."""
        if not self.patterns:
            print("No patterns discovered yet.")
            return
            
        fig, axes = plt.subplots(2, (self.n_patterns + 1) // 2, figsize=(15, 6))
        axes = axes.flatten()
        
        for i, (cluster_id, stats) in enumerate(self.patterns.items()):
            if i < len(axes):
                axes[i].imshow(stats['centroid'], cmap='rainbow', origin='lower')
                axes[i].set_title(f"P{cluster_id}: WR {stats['win_rate']:.2f}")
                axes[i].axis('off')
        plt.tight_layout()
        plt.show()

# Discover patterns
finder = VisualPatternFinder(n_patterns=8)
finder.discover_visual_patterns(aligned_gasf_images, aligned_future_returns)
finder.plot_patterns()

print("\n‚úì DISCOVERED: Which price shapes predict returns")

In [None]:
# @title Stage 3: BASELINE - Test Proven Human Trading Strategies

class HumanStrategyTester:
    """Test known trading strategies to establish baseline."""
    
    def __init__(self, df):
        self.df = df.copy()
        self.results = {}
    
    def backtest_strategy(self, name, signal_col, target_col='Future_1d_Return'):
        """Backtest a single strategy."""
        data = self.df[[signal_col, target_col]].dropna()
        signals = data[signal_col].values
        returns = data[target_col].values
        
        # Strategy returns
        strat_returns = signals * returns
        
        # Metrics
        total_return = (1 + strat_returns).prod() - 1
        trades = (signals != 0).sum()
        wins = (strat_returns > 0).sum()
        win_rate = wins / trades if trades > 0 else 0
        
        avg_win = strat_returns[strat_returns > 0].mean() if (strat_returns > 0).sum() > 0 else 0
        avg_loss = abs(strat_returns[strat_returns < 0].mean()) if (strat_returns < 0).sum() > 0 else 1e-8
        profit_factor = avg_win / avg_loss if avg_loss > 0 else 999
        
        sharpe = np.mean(strat_returns) / (np.std(strat_returns) + 1e-8) * np.sqrt(252)
        
        # Drawdown
        cumulative = np.cumsum(strat_returns)
        running_max = np.maximum.accumulate(cumulative)
        drawdown = running_max - cumulative
        max_dd = drawdown.max()
        
        self.results[name] = {
            'total_return': total_return,
            'win_rate': win_rate,
            'profit_factor': profit_factor,
            'sharpe': sharpe,
            'max_drawdown': max_dd,
            'trades': trades
        }
        
        return self.results[name]
    
    def test_all_strategies(self):
        """Test all known strategies."""
        
        print("\nüìä HUMAN STRATEGY BASELINE TEST")
        print("=" * 70)
        print("Testing proven trading strategies on 20+ years of data...")
        print()
        
        # Create strategy signals
        df = self.df
        
        # 1. Buy & Hold
        df['BuyHold_Signal'] = 1
        self.backtest_strategy('Buy & Hold', 'BuyHold_Signal')
        
        # 2. Golden Cross (SMA 50/200)
        if 'Golden_Cross_Active' in df.columns:
            df['GoldenCross_Signal'] = df['Golden_Cross_Active'].apply(lambda x: 1 if x else -1)
            self.backtest_strategy('Golden Cross (50/200)', 'GoldenCross_Signal')
        
        # 3. RSI Mean Reversion
        if 'RSI14' in df.columns:
            df['RSI_MeanRev_Signal'] = 0
            df.loc[df['RSI14'] < 30, 'RSI_MeanRev_Signal'] = 1
            df.loc[df['RSI14'] > 70, 'RSI_MeanRev_Signal'] = -1
            self.backtest_strategy('RSI Mean Reversion', 'RSI_MeanRev_Signal')
        
        # 4. MACD Crossover
        if 'MACD_12_26_Hist' in df.columns:
            df['MACD_Signal'] = np.sign(df['MACD_12_26_Hist'])
            self.backtest_strategy('MACD Crossover', 'MACD_Signal')
        
        # 5. Bollinger Band Mean Reversion
        if 'BB20_Position' in df.columns:
            df['BB_MeanRev_Signal'] = 0
            df.loc[df['BB20_Position'] < 0.1, 'BB_MeanRev_Signal'] = 1
            df.loc[df['BB20_Position'] > 0.9, 'BB_MeanRev_Signal'] = -1
            self.backtest_strategy('Bollinger Band MR', 'BB_MeanRev_Signal')
        
        # 6. Triple Screen (Elder)
        if 'Triple_Screen_Bull' in df.columns:
            df['TripleScreen_Signal'] = 0
            df.loc[df['Triple_Screen_Bull'] == 1, 'TripleScreen_Signal'] = 1
            df.loc[df['Triple_Screen_Bear'] == 1, 'TripleScreen_Signal'] = -1
            self.backtest_strategy('Triple Screen (Elder)', 'TripleScreen_Signal')
        
        # 7. Breakout Strategy
        if 'Breakout_20d' in df.columns:
            df['Breakout_Signal'] = 0
            df.loc[df['Breakout_20d'] == 1, 'Breakout_Signal'] = 1
            df.loc[df['Breakdown_20d'] == 1, 'Breakout_Signal'] = -1
            self.backtest_strategy('20-Day Breakout', 'Breakout_Signal')
        
        # 8. Volume Confirmed Breakout
        if 'Vol_Confirmed_Breakout' in df.columns:
            df['VolBreakout_Signal'] = 0
            df.loc[df['Vol_Confirmed_Breakout'] == 1, 'VolBreakout_Signal'] = 1
            df.loc[df['Vol_Confirmed_Breakdown'] == 1, 'VolBreakout_Signal'] = -1
            self.backtest_strategy('Volume Breakout', 'VolBreakout_Signal')
        
        # 9. ADX Trend Following
        if 'ADX' in df.columns and 'PLUS_DI' in df.columns:
            df['ADX_Signal'] = 0
            df.loc[(df['ADX'] > 25) & (df['PLUS_DI'] > df['MINUS_DI']), 'ADX_Signal'] = 1
            df.loc[(df['ADX'] > 25) & (df['PLUS_DI'] < df['MINUS_DI']), 'ADX_Signal'] = -1
            self.backtest_strategy('ADX Trend Follow', 'ADX_Signal')
        
        # 10. Squeeze Breakout
        if 'Squeeze_Breakout_Up' in df.columns:
            df['Squeeze_Signal'] = 0
            df.loc[df['Squeeze_Breakout_Up'] == 1, 'Squeeze_Signal'] = 1
            df.loc[df['Squeeze_Breakout_Down'] == 1, 'Squeeze_Signal'] = -1
            self.backtest_strategy('Squeeze Breakout', 'Squeeze_Signal')
        
        # 11. EMA Ribbon Alignment
        if 'EMA_Perfect_Bullish' in df.columns:
            df['EMA_Ribbon_Signal'] = 0
            df.loc[df['EMA_Bullish_Count'] >= 7, 'EMA_Ribbon_Signal'] = 1
            df.loc[df['EMA_Bullish_Count'] <= 1, 'EMA_Ribbon_Signal'] = -1
            self.backtest_strategy('EMA Ribbon', 'EMA_Ribbon_Signal')
        
        # 12. RSI Divergence
        if 'Bullish_Divergence' in df.columns:
            df['Divergence_Signal'] = 0
            df.loc[df['Bullish_Divergence'] == 1, 'Divergence_Signal'] = 1
            df.loc[df['Bearish_Divergence'] == 1, 'Divergence_Signal'] = -1
            self.backtest_strategy('RSI Divergence', 'Divergence_Signal')
        
        # 13. Stochastic Overbought/Oversold
        if 'STOCH_K14' in df.columns:
            df['Stoch_Signal'] = 0
            df.loc[df['STOCH_K14'] < 20, 'Stoch_Signal'] = 1
            df.loc[df['STOCH_K14'] > 80, 'Stoch_Signal'] = -1
            self.backtest_strategy('Stochastic OS/OB', 'Stoch_Signal')
        
        # 14. Combined Mean Reversion
        if 'Mean_Reversion_Long' in df.columns:
            df['CombinedMR_Signal'] = 0
            df.loc[df['Mean_Reversion_Long'] == 1, 'CombinedMR_Signal'] = 1
            df.loc[df['Mean_Reversion_Short'] == 1, 'CombinedMR_Signal'] = -1
            self.backtest_strategy('Combined Mean Rev', 'CombinedMR_Signal')
        
        # Print results
        print(f"{'Strategy':<25} {'Return':>10} {'Win Rate':>10} {'Sharpe':>8} {'PF':>8} {'MaxDD':>8}")
        print("-" * 70)
        
        sorted_results = sorted(self.results.items(), key=lambda x: x[1]['sharpe'], reverse=True)
        
        for name, r in sorted_results:
            print(f"{name:<25} {r['total_return']*100:>9.1f}% {r['win_rate']*100:>9.1f}% "
                  f"{r['sharpe']:>8.2f} {r['profit_factor']:>7.2f} {r['max_drawdown']*100:>7.1f}%")
        
        # Best strategy
        best = sorted_results[0]
        print(f"\nüèÜ Best Human Strategy: {best[0]}")
        print(f"   Sharpe: {best[1]['sharpe']:.2f}, Return: {best[1]['total_return']*100:.1f}%")
        
        return self.results

# Test human strategies
print("\n" + "=" * 70)
tester = HumanStrategyTester(df)
human_results = tester.test_all_strategies()

# Store best human strategy for comparison
best_human = max(human_results.items(), key=lambda x: x[1]['sharpe'])
print(f"\n‚ö° AI MUST BEAT: {best_human[0]} with Sharpe {best_human[1]['sharpe']:.2f}")

# Also run rare state detection
print("\n" + "=" * 70)
detector = UltimateRareStateDetector()
rare_profiles = detector.find_rare_states(df)

In [None]:
# @title Stage 4: HARDCORE Formula Evolution (50 Features, 500 Pop, 100 Gens)

class HardcoreFormulaEvolver:
    """Massive genetic programming with 50 key features."""
    
    def __init__(self, feature_names):
        # Reset creators
        if hasattr(creator, "FitnessMax"):
            del creator.FitnessMax
        if hasattr(creator, "Individual"):
            del creator.Individual
            
        creator.create("FitnessMax", base.Fitness, weights=(1.0,))
        creator.create("Individual", gp.PrimitiveTree, fitness=creator.FitnessMax)
        
        self.feature_names = feature_names
        self.n_features = len(feature_names)
        self.toolbox = base.Toolbox()
        self.pset = gp.PrimitiveSet("MAIN", self.n_features)
        
        # Rename arguments
        for i, name in enumerate(self.feature_names):
            self.pset.renameArguments(**{f'ARG{i}': name})
        
        # === EXTENSIVE OPERATIONS ===
        self.pset.addPrimitive(operator.add, 2, name='add')
        self.pset.addPrimitive(operator.sub, 2, name='sub')
        self.pset.addPrimitive(operator.mul, 2, name='mul')
        
        def pdiv(x, y): return x / (y + 1e-8)
        def psqrt(x): return np.sqrt(np.abs(x))
        def plog(x): return np.log(np.abs(x) + 1e-8)
        def pexp(x): return np.clip(np.exp(np.clip(x, -10, 10)), -1e10, 1e10)
        def pmax(x, y): return np.maximum(x, y)
        def pmin(x, y): return np.minimum(x, y)
        def pif(c, t, f): return np.where(c > 0, t, f)
        def pand(x, y): return np.where((x > 0) & (y > 0), 1.0, 0.0)
        def por(x, y): return np.where((x > 0) | (y > 0), 1.0, 0.0)
        def pgt(x, y): return np.where(x > y, 1.0, -1.0)
        def plt(x, y): return np.where(x < y, 1.0, -1.0)
        
        self.pset.addPrimitive(pdiv, 2, name='div')
        self.pset.addPrimitive(psqrt, 1, name='sqrt')
        self.pset.addPrimitive(plog, 1, name='log')
        self.pset.addPrimitive(pexp, 1, name='exp')
        self.pset.addPrimitive(np.sin, 1, name='sin')
        self.pset.addPrimitive(np.cos, 1, name='cos')
        self.pset.addPrimitive(np.tanh, 1, name='tanh')
        self.pset.addPrimitive(np.abs, 1, name='abs')
        self.pset.addPrimitive(operator.neg, 1, name='neg')
        self.pset.addPrimitive(pmax, 2, name='max')
        self.pset.addPrimitive(pmin, 2, name='min')
        self.pset.addPrimitive(pif, 3, name='ifgt0')
        self.pset.addPrimitive(pand, 2, name='AND')
        self.pset.addPrimitive(por, 2, name='OR')
        self.pset.addPrimitive(pgt, 2, name='GT')
        self.pset.addPrimitive(plt, 2, name='LT')
        
        # Constants (trading thresholds)
        for val in [0.0, 0.5, 1.0, 2.0, -1.0, 20.0, 30.0, 50.0, 70.0, 80.0]:
            name = f'c{str(val).replace(".", "_").replace("-", "neg")}'
            self.pset.addTerminal(val, name=name)
        
        self.toolbox.register("expr", gp.genHalfAndHalf, pset=self.pset, min_=2, max_=6)
        self.toolbox.register("individual", tools.initIterate, creator.Individual, self.toolbox.expr)
        self.toolbox.register("population", tools.initRepeat, list, self.toolbox.individual)
        self.toolbox.register("mate", gp.cxOnePoint)
        self.toolbox.register("mutate", gp.mutUniform, expr=self.toolbox.expr, pset=self.pset)
        self.toolbox.register("select", tools.selTournament, tournsize=7)
        
        self.toolbox.decorate("mate", gp.staticLimit(key=operator.attrgetter("height"), max_value=15))
        self.toolbox.decorate("mutate", gp.staticLimit(key=operator.attrgetter("height"), max_value=15))
    
    def eval_formula(self, individual, X, y):
        """Multi-metric fitness: correlation + sharpe + win rate."""
        try:
            func = gp.compile(individual, self.pset)
            pred = func(*[X[:,i] for i in range(self.n_features)])
            
            if np.isnan(pred).any() or np.isinf(pred).any():
                return (-100.0,)
            
            pred = np.array(pred).flatten()
            
            # Correlation
            corr = np.corrcoef(pred, y)[0, 1]
            if np.isnan(corr):
                return (-100.0,)
            
            # Strategy performance
            signals = np.sign(pred)
            strat_ret = signals * y
            
            # Win rate
            wins = (strat_ret > 0).sum()
            trades = (signals != 0).sum()
            win_rate = wins / trades if trades > 0 else 0.5
            
            # Sharpe
            sharpe = np.mean(strat_ret) / (np.std(strat_ret) + 1e-8) * np.sqrt(252)
            
            # Profit factor
            gross_profit = strat_ret[strat_ret > 0].sum()
            gross_loss = abs(strat_ret[strat_ret < 0].sum())
            pf = gross_profit / (gross_loss + 1e-8)
            
            # Combined fitness (weighted)
            fitness = (
                corr * 0.3 +                    # Correlation
                np.tanh(sharpe) * 0.3 +         # Risk-adjusted return
                (win_rate - 0.5) * 2 * 0.2 +    # Win rate above 50%
                np.tanh(pf - 1) * 0.2           # Profit factor above 1
            )
            
            return (fitness,)
            
        except Exception as e:
            return (-100.0,)
    
    def evolve(self, X, y, pop_size=500, generations=100, checkpoint_every=25):
        """Evolve with progress checkpoints."""
        
        self.toolbox.register("evaluate", self.eval_formula, X=X, y=y)
        
        pop = self.toolbox.population(n=pop_size)
        hof = tools.HallOfFame(20)  # Keep top 20
        
        stats = tools.Statistics(lambda ind: ind.fitness.values)
        stats.register("avg", lambda x: np.mean([v[0] for v in x if v[0] > -50]))
        stats.register("max", np.max)
        
        print(f"üß¨ Starting HARDCORE Evolution...")
        print(f"   Population: {pop_size}")
        print(f"   Generations: {generations}")
        print(f"   Features: {self.n_features}")
        print()
        
        # Evolve with checkpoints
        for gen in range(generations):
            # Select and clone
            offspring = self.toolbox.select(pop, len(pop))
            offspring = list(map(self.toolbox.clone, offspring))
            
            # Crossover
            for child1, child2 in zip(offspring[::2], offspring[1::2]):
                if np.random.random() < 0.7:
                    self.toolbox.mate(child1, child2)
                    del child1.fitness.values
                    del child2.fitness.values
            
            # Mutation
            for mutant in offspring:
                if np.random.random() < 0.3:
                    self.toolbox.mutate(mutant)
                    del mutant.fitness.values
            
            # Evaluate
            invalid = [ind for ind in offspring if not ind.fitness.valid]
            fitnesses = map(self.toolbox.evaluate, invalid)
            for ind, fit in zip(invalid, fitnesses):
                ind.fitness.values = fit
            
            pop[:] = offspring
            hof.update(pop)
            
            # Progress
            fits = [ind.fitness.values[0] for ind in pop if ind.fitness.values[0] > -50]
            if fits:
                avg_fit = np.mean(fits)
                max_fit = np.max(fits)
                
                if (gen + 1) % 10 == 0 or gen == 0:
                    print(f"   Gen {gen+1:3d}: avg={avg_fit:.4f}, max={max_fit:.4f}")
                
                if (gen + 1) % checkpoint_every == 0:
                    print(f"\n   üìç Checkpoint at Gen {gen+1}:")
                    best = hof[0]
                    self._show_formula_stats(best, X, y)
                    print()
        
        return hof
    
    def _show_formula_stats(self, formula, X, y):
        """Show formula performance stats."""
        func = gp.compile(formula, self.pset)
        pred = func(*[X[:,i] for i in range(self.n_features)])
        signals = np.sign(pred)
        strat_ret = signals * y
        
        total_ret = (1 + strat_ret).prod() - 1
        win_rate = (strat_ret > 0).sum() / (signals != 0).sum() if (signals != 0).sum() > 0 else 0
        sharpe = np.mean(strat_ret) / (np.std(strat_ret) + 1e-8) * np.sqrt(252)
        
        print(f"      Formula: {str(formula)[:80]}...")
        print(f"      Total Return: {total_ret*100:.1f}%")
        print(f"      Win Rate: {win_rate*100:.1f}%")
        print(f"      Sharpe: {sharpe:.2f}")

# === SELECT 50 MOST IMPORTANT FEATURES ===
print("\nüî• HARDCORE FORMULA EVOLUTION")
print("=" * 70)

# Key features for formula evolution (50 most important)
hardcore_features = [
    # Core Momentum (10)
    'RSI14', 'RSI2', 'RSI7', 'STOCH_K14', 'STOCH_D14', 'MFI', 'CCI', 'WILLR', 'ULTOSC', 'MOM10',
    # Trend (10)
    'ADX', 'PLUS_DI', 'MINUS_DI', 'AROON_Osc', 'EMA_Bullish_Count', 'SAR_Bullish', 
    'Golden_Cross_Active', 'Strong_Trend', 'Above_EMA21', 'Above_EMA200',
    # Volatility (8)
    'ATR14_Pct', 'BB20_Position', 'BB20_Width', 'BB20_Squeeze', 'Vol_Expanding', 'Vol_Contracting',
    'Range_Pct', 'Gap',
    # Volume (5)
    'Volume_Ratio', 'Volume_Spike', 'OBV_Trend', 'AD',
    # Price Position (7)
    'Price_vs_EMA21', 'Price_vs_EMA200', 'Range_Position_20d', 'Near_High_20d', 'Near_Low_20d',
    'Breakout_20d', 'Breakdown_20d',
    # Returns (5)
    'Return_1d', 'Return_3d', 'Return_5d', 'Consec_Up', 'Consec_Down',
    # Human Patterns (5)
    'Bullish_Divergence', 'Bearish_Divergence', 'Mean_Reversion_Long', 'Mean_Reversion_Short',
    'Triple_Screen_Bull'
]

# Filter to available features
available_features = [f for f in hardcore_features if f in df.columns]
print(f"Using {len(available_features)} features for evolution")

# Prepare data
data_subset = df[available_features + ['Future_1d_Return']].dropna()
X_features = data_subset[available_features].values
y_returns = data_subset['Future_1d_Return'].values

# Normalize
from sklearn.preprocessing import RobustScaler
scaler = RobustScaler()  # More robust to outliers
X_features = scaler.fit_transform(X_features)

print(f"Training data: {len(X_features)} samples")
print(f"Target: 1-day forward return")

# === RUN HARDCORE EVOLUTION ===
evolver = HardcoreFormulaEvolver(available_features)
best_formulas = evolver.evolve(X_features, y_returns, pop_size=500, generations=100)

# === SHOW TOP RESULTS ===
print("\n" + "=" * 70)
print("üèÜ TOP 10 DISCOVERED FORMULAS:")
print("=" * 70)

for i, formula in enumerate(best_formulas[:10]):
    print(f"\n{i+1}. Fitness: {formula.fitness.values[0]:.4f}")
    evolver._show_formula_stats(formula, X_features, y_returns)

best_formula = best_formulas[0] if best_formulas else None

In [None]:
# @title Stage 5: HARDCORE RL Training (2000 Episodes, Deep Q-Table)

class DeepQAgent:
    """More sophisticated Q-learning with deeper state discretization."""
    
    def __init__(self, n_features, n_actions=3, n_bins=10):
        self.n_features = min(n_features, 6)  # Use top 6 features
        self.n_actions = n_actions
        self.n_bins = n_bins
        
        # Q-table: n_bins^n_features x n_actions
        self.n_states = n_bins ** self.n_features
        self.q_table = np.zeros((self.n_states, n_actions))
        
        # Learning parameters
        self.lr = 0.1
        self.gamma = 0.95
        self.epsilon = 0.3
        self.epsilon_min = 0.01
        self.epsilon_decay = 0.9995
        
        self.bins = None
        
    def setup_bins(self, data):
        """Create discretization bins from percentiles."""
        self.bins = []
        for i in range(self.n_features):
            percentiles = np.percentile(data[:, i], np.linspace(0, 100, self.n_bins + 1)[1:-1])
            self.bins.append(percentiles)
    
    def discretize(self, state):
        """Convert continuous state to discrete index."""
        if self.bins is None:
            return 0
        
        indices = []
        for i in range(self.n_features):
            idx = np.digitize(state[i], self.bins[i])
            idx = np.clip(idx, 0, self.n_bins - 1)
            indices.append(idx)
        
        # Convert to single index
        state_idx = 0
        for i, idx in enumerate(indices):
            state_idx += idx * (self.n_bins ** i)
        
        return min(state_idx, self.n_states - 1)
    
    def act(self, state, training=True):
        if training and np.random.random() < self.epsilon:
            return np.random.randint(self.n_actions)
        
        state_idx = self.discretize(state)
        return np.argmax(self.q_table[state_idx])
    
    def update(self, state, action, reward, next_state, done):
        state_idx = self.discretize(state)
        next_state_idx = self.discretize(next_state)
        
        if done:
            target = reward
        else:
            target = reward + self.gamma * np.max(self.q_table[next_state_idx])
        
        self.q_table[state_idx, action] += self.lr * (target - self.q_table[state_idx, action])
        
        # Decay epsilon
        self.epsilon = max(self.epsilon_min, self.epsilon * self.epsilon_decay)


class HardcoreTradingEnv(gym.Env):
    """Trading environment optimized for 1-day returns."""
    
    def __init__(self, df, feature_cols):
        super().__init__()
        self.df = df.reset_index(drop=True)
        self.feature_cols = [f for f in feature_cols if f in df.columns][:6]  # Top 6 features
        
        self.current_idx = 250
        self.max_idx = len(df) - 2
        
        self.action_space = spaces.Discrete(3)  # SHORT, HOLD, LONG
        self.observation_space = spaces.Box(
            low=-np.inf, high=np.inf,
            shape=(len(self.feature_cols),),
            dtype=np.float32
        )
        
        # Normalize
        self.scaler = StandardScaler()
        valid_data = df[self.feature_cols].dropna()
        self.scaler.fit(valid_data)
        
        self.position = 0
        self.total_pnl = 0
    
    def reset(self, seed=None, options=None):
        super().reset(seed=seed)
        self.current_idx = np.random.randint(250, self.max_idx - 100)
        self.position = 0
        self.total_pnl = 0
        return self._get_state(), {}
    
    def step(self, action):
        new_position = action - 1  # -1, 0, 1
        
        # Get next day return
        next_return = self.df['Future_1d_Return'].iloc[self.current_idx]
        if np.isnan(next_return):
            next_return = 0
        
        # Calculate reward
        reward = float(new_position * next_return * 100)
        
        # Transaction cost
        if new_position != self.position:
            reward -= 0.02  # 2 bps
        
        self.position = new_position
        self.total_pnl += reward
        self.current_idx += 1
        
        done = self.current_idx >= self.max_idx
        
        return self._get_state(), reward, done, False, {'pnl': self.total_pnl}
    
    def _get_state(self):
        idx = min(self.current_idx, len(self.df) - 1)
        state = self.df[self.feature_cols].iloc[idx].values.astype(float)
        state = np.nan_to_num(state, 0)
        state = self.scaler.transform(state.reshape(1, -1)).flatten()
        return state.astype(np.float32)


# === TRAIN RL AGENT ===
print("\nü§ñ HARDCORE RL TRAINING")
print("=" * 70)

# Key features for RL state
rl_features = ['RSI14', 'ADX', 'BB20_Position', 'Volume_Ratio', 'EMA_Bullish_Count', 'Return_1d']
rl_features = [f for f in rl_features if f in df.columns]

print(f"State features: {rl_features}")

env = HardcoreTradingEnv(df, rl_features)
agent = DeepQAgent(n_features=len(rl_features), n_bins=8)

# Collect states for bin setup
sample_states = []
for _ in range(500):
    state, _ = env.reset()
    sample_states.append(state)
sample_states = np.array(sample_states)
agent.setup_bins(sample_states)

print(f"Q-table size: {agent.n_states} states x {agent.n_actions} actions")
print(f"\nTraining for 2000 episodes...")

episode_rewards = []
best_avg = -np.inf

for episode in range(2000):
    state, _ = env.reset()
    total_reward = 0
    
    for step in range(200):  # ~200 trading days per episode
        action = agent.act(state)
        next_state, reward, done, _, _ = env.step(action)
        agent.update(state, action, reward, next_state, done)
        
        state = next_state
        total_reward += reward
        
        if done:
            break
    
    episode_rewards.append(total_reward)
    
    if (episode + 1) % 200 == 0:
        avg = np.mean(episode_rewards[-200:])
        if avg > best_avg:
            best_avg = avg
            print(f"   Episode {episode+1}: Avg Reward = {avg:.2f} (NEW BEST!) Œµ={agent.epsilon:.3f}")
        else:
            print(f"   Episode {episode+1}: Avg Reward = {avg:.2f}, Œµ={agent.epsilon:.3f}")

print(f"\n‚úì Training complete. Best avg reward: {best_avg:.2f}")

# === EVALUATE RL AGENT ===
print("\nüìä RL Agent Evaluation (Walk-Forward):")

# Test on last 2 years
test_start = len(df) - 504  # ~2 years
test_signals = []
test_returns = []

env.current_idx = test_start
agent.epsilon = 0  # No exploration

while env.current_idx < len(df) - 2:
    state = env._get_state()
    action = agent.act(state, training=False)
    signal = action - 1
    
    actual_return = df['Future_1d_Return'].iloc[env.current_idx]
    if not np.isnan(actual_return):
        test_signals.append(signal)
        test_returns.append(actual_return)
    
    env.current_idx += 1

test_signals = np.array(test_signals)
test_returns = np.array(test_returns)
strat_returns = test_signals * test_returns

rl_total_return = (1 + strat_returns).prod() - 1
rl_win_rate = (strat_returns > 0).sum() / (test_signals != 0).sum() if (test_signals != 0).sum() > 0 else 0
rl_sharpe = np.mean(strat_returns) / (np.std(strat_returns) + 1e-8) * np.sqrt(252)

print(f"Test Period: Last 2 years ({len(test_signals)} days)")
print(f"Total Return: {rl_total_return*100:.1f}%")
print(f"Win Rate: {rl_win_rate*100:.1f}%")
print(f"Sharpe Ratio: {rl_sharpe:.2f}")

In [None]:
# @title Stage 6: ULTIMATE Multimodal Fusion
class UltimateEnsemble:
    """Combine all discovery methods with confidence weighting."""
    
    def __init__(self, visual_finder, rare_detector, best_formula, evolver, rl_agent, env):
        self.visual = visual_finder
        self.rare = rare_detector
        self.formula = best_formula
        self.evolver = evolver
        self.agent = rl_agent
        self.env = env
        
        if self.formula is not None:
            self.compiled_formula = gp.compile(self.formula, self.evolver.pset)
        else:
            self.compiled_formula = None
        
        # Track performance for confidence weighting
        self.method_accuracy = {
            'visual': 0.5,
            'genetic': 0.5,
            'rare': 0.5,
            'rl': 0.5
        }
    
    def get_ensemble_signal(self, row_dict, gasf_img=None):
        """Query all modalities and combine signals."""
        
        votes = {}
        confidences = {}
        
        # 1. Visual Pattern Vote
        if gasf_img is not None and self.visual.kmeans is not None:
            try:
                cluster = self.visual.kmeans.predict(gasf_img.reshape(1, -1))[0]
                pattern_stats = self.visual.patterns.get(cluster, {})
                avg_ret = pattern_stats.get('avg_return', 0)
                votes['visual'] = np.sign(avg_ret)
                confidences['visual'] = min(abs(avg_ret) * 100, 1.0)  # Scale confidence
            except:
                votes['visual'] = 0
                confidences['visual'] = 0
        else:
            votes['visual'] = 0
            confidences['visual'] = 0
            
        # 2. Genetic Formula Vote
        if self.compiled_formula is not None:
            try:
                # Build feature array from row_dict
                features = []
                for feat in self.evolver.feature_names:
                    val = row_dict.get(feat, 0)
                    if np.isnan(val):
                        val = 0
                    features.append(val)
                
                f_val = self.compiled_formula(*features)
                votes['genetic'] = float(np.sign(f_val))
                confidences['genetic'] = min(abs(f_val), 1.0)
            except:
                votes['genetic'] = 0
                confidences['genetic'] = 0
        else:
            votes['genetic'] = 0
            confidences['genetic'] = 0
            
        # 3. Rare State Vote
        # Check if current state is rare (use ADX + BB_Width + Volume_Ratio)
        try:
            adx = row_dict.get('ADX', 25)
            bb_width = row_dict.get('BB_Width', 10)
            vol_ratio = row_dict.get('Volume_Ratio', 1)
            
            # Rare = low ADX + narrow bands + high volume (squeeze breakout)
            is_squeeze = bb_width < 5 and vol_ratio > 1.5
            if is_squeeze:
                votes['rare'] = 1  # Squeeze = bullish bias
                confidences['rare'] = 0.7
            else:
                votes['rare'] = 0
                confidences['rare'] = 0.3
        except:
            votes['rare'] = 0
            confidences['rare'] = 0
            
        # 4. RL Agent Vote
        if self.agent is not None:
            try:
                state = []
                for feat in self.env.state_features:
                    val = row_dict.get(feat, 0)
                    if np.isnan(val):
                        val = 0
                    state.append(val)
                state = np.array(state, dtype=np.float32)
                
                action = self.agent.act(state)
                votes['rl'] = action - 1  # Convert to -1, 0, 1
                
                # Confidence from Q-value spread
                state_idx = self.agent.discretize_state(state)
                q_vals = self.agent.q_table[state_idx]
                q_spread = np.max(q_vals) - np.mean(q_vals)
                confidences['rl'] = min(q_spread / 10, 1.0)
            except:
                votes['rl'] = 0
                confidences['rl'] = 0
        else:
            votes['rl'] = 0
            confidences['rl'] = 0
        
        # Weighted consensus
        total_weight = sum(confidences.values())
        if total_weight > 0:
            weighted_signal = sum(v * confidences[k] for k, v in votes.items()) / total_weight
        else:
            weighted_signal = 0
        
        return {
            'signal': weighted_signal,
            'direction': 'LONG' if weighted_signal > 0.2 else 'SHORT' if weighted_signal < -0.2 else 'NEUTRAL',
            'votes': votes,
            'confidences': confidences,
            'consensus_strength': abs(weighted_signal)
        }
    
    def backtest_ensemble(self, df, gasf_images=None, aligned_indices=None):
        """Backtest the full ensemble on historical data."""
        
        print("\nüìà ENSEMBLE BACKTEST")
        print("=" * 60)
        
        signals = []
        returns = []
        
        # Sample positions to avoid overfitting
        test_indices = range(200, min(len(df)-5, len(df)), 5)  # Every 5th day
        
        for idx in test_indices:
            if idx >= len(df) - 5:
                continue
                
            row = df.iloc[idx].to_dict()
            
            # Get GASF if available
            gasf_img = None
            if gasf_images is not None and aligned_indices is not None:
                matching = np.where(aligned_indices == idx)[0]
                if len(matching) > 0:
                    gasf_img = gasf_images[matching[0]]
            
            result = self.get_ensemble_signal(row, gasf_img)
            signal = result['signal']
            
            # Get actual future return
            future_ret = df['Future_5d_Return'].iloc[idx]
            if np.isnan(future_ret):
                continue
            
            signals.append(signal)
            returns.append(future_ret)
        
        signals = np.array(signals)
        returns = np.array(returns)
        
        # Calculate strategy returns
        positions = np.sign(signals)
        strategy_returns = positions * returns
        
        # Metrics
        total_return = (1 + strategy_returns).prod() - 1
        buy_hold_return = (1 + returns).prod() - 1
        win_rate = (strategy_returns > 0).sum() / (strategy_returns != 0).sum() if (strategy_returns != 0).sum() > 0 else 0
        
        avg_win = strategy_returns[strategy_returns > 0].mean() if (strategy_returns > 0).sum() > 0 else 0
        avg_loss = abs(strategy_returns[strategy_returns < 0].mean()) if (strategy_returns < 0).sum() > 0 else 0
        profit_factor = avg_win / avg_loss if avg_loss > 0 else 999
        
        sharpe = np.mean(strategy_returns) / (np.std(strategy_returns) + 1e-8) * np.sqrt(252/5)
        
        print(f"Strategy Return: {total_return*100:.2f}%")
        print(f"Buy & Hold Return: {buy_hold_return*100:.2f}%")
        print(f"Alpha Generated: {(total_return - buy_hold_return)*100:.2f}%")
        print(f"Win Rate: {win_rate*100:.1f}%")
        print(f"Profit Factor: {profit_factor:.2f}")
        print(f"Sharpe Ratio: {sharpe:.2f}")
        print(f"Trades: {(positions != 0).sum()}")
        
        return {
            'total_return': total_return,
            'buy_hold': buy_hold_return,
            'alpha': total_return - buy_hold_return,
            'win_rate': win_rate,
            'sharpe': sharpe
        }

# Create ensemble with all components
print("\nüéØ BUILDING ULTIMATE ENSEMBLE")
print("=" * 60)

ensemble = UltimateEnsemble(
    visual_finder=finder,
    rare_detector=detector,
    best_formula=best_formula,
    evolver=evolver,
    rl_agent=agent,
    env=env
)

print("‚úì Ensemble integrates:")
print("  1. Visual Pattern Recognition (GASF + KMeans)")
print("  2. Genetic Formula Discovery (25 features)")
print("  3. Rare State Detection (Multi-feature anomaly)")
print("  4. RL Agent (Q-Learning policy)")

# Run backtest
backtest_results = ensemble.backtest_ensemble(df, aligned_gasf_images, aligned_indices)

In [None]:
# @title Stage 7: FINAL COMPARISON - AI vs Human Strategies

print("\n" + "=" * 70)
print("üéØ FINAL SHOWDOWN: AI vs HUMAN STRATEGIES")
print("=" * 70)

# === Compile All Results ===
final_comparison = {}

# 1. Best Human Strategy
best_human_name, best_human_stats = best_human
final_comparison['Best Human Strategy'] = {
    'name': best_human_name,
    'sharpe': best_human_stats['sharpe'],
    'return': best_human_stats['total_return'],
    'win_rate': best_human_stats['win_rate']
}

# 2. Best Genetic Formula
if best_formula is not None:
    func = gp.compile(best_formula, evolver.pset)
    pred = func(*[X_features[:,i] for i in range(len(available_features))])
    signals = np.sign(pred)
    strat_returns = signals * y_returns
    
    gp_return = (1 + strat_returns).prod() - 1
    gp_win_rate = (strat_returns > 0).sum() / (signals != 0).sum()
    gp_sharpe = np.mean(strat_returns) / (np.std(strat_returns) + 1e-8) * np.sqrt(252)
    
    final_comparison['Best Genetic Formula'] = {
        'formula': str(best_formula)[:60] + '...',
        'sharpe': gp_sharpe,
        'return': gp_return,
        'win_rate': gp_win_rate
    }

# 3. RL Agent
final_comparison['RL Agent'] = {
    'sharpe': rl_sharpe,
    'return': rl_total_return,
    'win_rate': rl_win_rate
}

# 4. Buy & Hold Baseline
bh_return = (1 + df['Future_1d_Return'].dropna()).prod() - 1
bh_sharpe = df['Future_1d_Return'].mean() / df['Future_1d_Return'].std() * np.sqrt(252)
final_comparison['Buy & Hold'] = {
    'sharpe': bh_sharpe,
    'return': bh_return,
    'win_rate': (df['Future_1d_Return'] > 0).mean()
}

# === Print Comparison ===
print(f"\n{'Strategy':<30} {'Sharpe':>10} {'Return':>12} {'Win Rate':>10}")
print("-" * 65)

sorted_comparison = sorted(final_comparison.items(), key=lambda x: x[1]['sharpe'], reverse=True)

for name, stats in sorted_comparison:
    sharpe = stats['sharpe']
    ret = stats['return'] * 100
    wr = stats['win_rate'] * 100
    marker = "üèÜ" if name == sorted_comparison[0][0] else "  "
    print(f"{marker} {name:<28} {sharpe:>10.2f} {ret:>11.1f}% {wr:>9.1f}%")

# === Winner Declaration ===
winner = sorted_comparison[0]
print("\n" + "=" * 70)
print(f"üèÜ WINNER: {winner[0]}")
print(f"   Sharpe: {winner[1]['sharpe']:.2f}")
print(f"   Return: {winner[1]['return']*100:.1f}%")
print(f"   Win Rate: {winner[1]['win_rate']*100:.1f}%")

if 'Genetic' in winner[0] or 'RL' in winner[0]:
    print("\nüéâ AI BEATS HUMAN STRATEGIES!")
else:
    print("\n‚ö†Ô∏è Human strategies still win. Need more training or features.")

# === SAVE EVERYTHING ===
print("\n" + "=" * 70)
print("üíæ SAVING ALL DISCOVERIES")
print("=" * 70)

import json
import pickle

discoveries = {
    'ticker': ticker,
    'data_years': len(df) / 252,
    'features_count': feature_count,
    'comparison': {
        k: {key: float(val) if isinstance(val, (np.floating, float)) else val 
            for key, val in v.items()}
        for k, v in final_comparison.items()
    },
    'best_formula': str(best_formula) if best_formula else None,
    'human_strategies': {
        k: {key: float(val) if isinstance(val, (np.floating, float)) else val 
            for key, val in v.items()}
        for k, v in human_results.items()
    }
}

with open('hardcore_discoveries.json', 'w') as f:
    json.dump(discoveries, f, indent=2)
print("‚úì Saved: hardcore_discoveries.json")

# Save models
models = {
    'q_table': agent.q_table,
    'bins': agent.bins,
    'scaler': env.scaler,
    'features': rl_features
}
with open('hardcore_models.pkl', 'wb') as f:
    pickle.dump(models, f)
print("‚úì Saved: hardcore_models.pkl")

# === RECOMMENDATIONS ===
print("\n" + "=" * 70)
print("üöÄ TO GET BIGGER GAINS (like your 4.73%):")
print("=" * 70)
print("""
1. INCREASE EVOLUTION PARAMETERS:
   - Change pop_size=1000, generations=500
   - This will take longer but find better formulas
   
2. FOCUS ON BIG MOVES ONLY:
   - Filter to only trade when Big_Move_Up or Big_Move_Down predicted
   - These are >1% moves (like your 4.73% day)

3. ADD INTRADAY DATA:
   - 1-hour or 15-minute bars give more patterns
   - Use yfinance with interval='1h'

4. MULTI-ASSET:
   - Train on QQQ, IWM, individual stocks
   - Some assets have more predictable patterns

5. COMBINE SIGNALS:
   - Only trade when 3+ methods agree
   - (Genetic + RL + Human strategy)

6. REAL-TIME FEATURES:
   - VIX level and change
   - Sector rotation
   - Market breadth (A/D ratio)
   - Pre-market gaps

7. REGIME DETECTION:
   - Bull market vs Bear market
   - High vol vs Low vol
   - Train different models for each

8. WALK-FORWARD VALIDATION:
   - Train on 2000-2020, test on 2021-2024
   - Prevents overfitting
""")
print("=" * 70)