In [None]:
# ==============================================================================
# PHASE 0: SETUP & DATA LOADING
# ==============================================================================

# Install TA-Lib (requires system library first)
!wget -q http://prdownloads.sourceforge.net/ta-lib/ta-lib-0.4.0-src.tar.gz
!tar -xzf ta-lib-0.4.0-src.tar.gz
%cd ta-lib/
!./configure --prefix=/usr > /dev/null 2>&1
!make > /dev/null 2>&1
!make install > /dev/null 2>&1
%cd ..
!pip install -q TA-Lib

# Install other dependencies
!pip install -q yfinance scikit-learn lightgbm optuna pandas numpy

import numpy as np
import pandas as pd
import yfinance as yf
import talib
from datetime import datetime, timedelta
from sklearn.model_selection import TimeSeriesSplit, cross_val_score
from sklearn.ensemble import HistGradientBoostingClassifier, RandomForestClassifier
from sklearn.linear_model import LogisticRegression
from sklearn.preprocessing import StandardScaler
from sklearn.metrics import accuracy_score, precision_score, recall_score
import warnings
warnings.filterwarnings('ignore')

# GPU check
try:
    import torch
    print(f"üñ•Ô∏è GPU Available: {torch.cuda.is_available()}")
    if torch.cuda.is_available():
        print(f"   GPU: {torch.cuda.get_device_name(0)}")
except:
    print("üñ•Ô∏è PyTorch not installed (not required for this notebook)")

# Download training data
TICKERS = ['SPY', 'QQQ', 'AAPL', 'MSFT', 'NVDA', 'TSLA', 'AMD', 'META', 'GOOGL', 'AMZN']
START_DATE = '2020-01-01'
END_DATE = datetime.now().strftime('%Y-%m-%d')

print(f"\nüì• Downloading data for {len(TICKERS)} tickers...")
all_data = {}
for ticker in TICKERS:
    df = yf.download(ticker, start=START_DATE, end=END_DATE, progress=False, auto_adjust=True)
    if isinstance(df.columns, pd.MultiIndex):
        df.columns = df.columns.get_level_values(0)
    all_data[ticker] = df
    print(f"  ‚úì {ticker}: {len(df)} rows")

print(f"\n‚úÖ Data loaded: {sum(len(d) for d in all_data.values())} total rows")

---
# üìà PHASE 1: FORECAST ENGINE OPTIMIZATION

The `forecast_engine.py` uses a model to predict price direction and generates 24-day forecasts.

**What we'll optimize:**
1. Best model type (HistGB vs LightGBM vs RandomForest)
2. Optimal forecast horizon (7d vs 14d vs 21d)
3. Decay parameters (when to start fading forecast)
4. ATR scaling factors

In [None]:
# ==============================================================================
# PHASE 1A: FEATURE ENGINEERING FOR FORECASTER
# ==============================================================================
def engineer_forecast_features(df):
    """Engineer features for price direction prediction."""
    close = np.asarray(df['Close'].values, dtype='float64')
    high = np.asarray(df['High'].values, dtype='float64')
    low = np.asarray(df['Low'].values, dtype='float64')
    volume = np.asarray(df['Volume'].values, dtype='float64')
    
    features = pd.DataFrame(index=df.index)
    
    # RSI variants
    features['rsi_7'] = talib.RSI(close, timeperiod=7)
    features['rsi_14'] = talib.RSI(close, timeperiod=14)
    features['rsi_21'] = talib.RSI(close, timeperiod=21)
    
    # MACD
    macd, macd_signal, macd_hist = talib.MACD(close)
    features['macd'] = macd
    features['macd_signal'] = macd_signal
    features['macd_hist'] = macd_hist
    
    # Trend indicators
    features['adx'] = talib.ADX(high, low, close, timeperiod=14)
    features['cci'] = talib.CCI(high, low, close, timeperiod=14)
    features['willr'] = talib.WILLR(high, low, close, timeperiod=14)
    
    # Volatility
    features['atr_14'] = talib.ATR(high, low, close, timeperiod=14)
    features['atr_pct'] = features['atr_14'] / close * 100
    bb_upper, bb_middle, bb_lower = talib.BBANDS(close, timeperiod=20)
    features['bb_width'] = (bb_upper - bb_lower) / bb_middle
    features['bb_position'] = (close - bb_lower) / (bb_upper - bb_lower + 1e-9)
    
    # EMAs and trends
    features['ema_8'] = talib.EMA(close, timeperiod=8)
    features['ema_21'] = talib.EMA(close, timeperiod=21)
    features['ema_50'] = talib.EMA(close, timeperiod=50)
    features['trend_short'] = (close - features['ema_8']) / features['ema_8'] * 100
    features['trend_med'] = (close - features['ema_21']) / features['ema_21'] * 100
    features['trend_long'] = (close - features['ema_50']) / features['ema_50'] * 100
    
    # Momentum
    features['mom_5'] = talib.MOM(close, timeperiod=5)
    features['mom_10'] = talib.MOM(close, timeperiod=10)
    features['roc_5'] = talib.ROC(close, timeperiod=5)
    features['roc_10'] = talib.ROC(close, timeperiod=10)
    
    # Volume
    features['obv'] = talib.OBV(close, volume)
    features['vol_sma'] = talib.SMA(volume, timeperiod=20)
    features['vol_ratio'] = volume / (features['vol_sma'] + 1e-9)
    features['mfi'] = talib.MFI(high, low, close, volume, timeperiod=14)
    
    # Returns
    features['ret_1d'] = pd.Series(close, index=df.index).pct_change(1).values
    features['ret_5d'] = pd.Series(close, index=df.index).pct_change(5).values
    features['ret_10d'] = pd.Series(close, index=df.index).pct_change(10).values
    features['ret_21d'] = pd.Series(close, index=df.index).pct_change(21).values
    
    # Clean
    features = features.replace([np.inf, -np.inf], np.nan)
    features = features.bfill().ffill().fillna(0)
    
    return features

def create_labels(df, horizon, threshold=0.02):
    """Create direction labels: 0=BEARISH, 1=NEUTRAL, 2=BULLISH"""
    close = df['Close'].values
    
    # Calculate future returns as numpy array
    future_ret = np.zeros(len(close))
    future_ret[:-horizon] = (close[horizon:] - close[:-horizon]) / close[:-horizon]
    future_ret[-horizon:] = np.nan
    
    # Create labels as numpy array first, then convert to Series
    labels = np.ones(len(close), dtype=int)  # Default NEUTRAL (1)
    labels[future_ret > threshold] = 2   # BULLISH
    labels[future_ret < -threshold] = 0  # BEARISH
    
    return pd.Series(labels, index=df.index)

def create_adaptive_labels(df, horizon, atr_multiplier):
    """ATR-adaptive labels for varying volatility."""
    close = np.asarray(df['Close'].values, dtype='float64')
    high = np.asarray(df['High'].values, dtype='float64')
    low = np.asarray(df['Low'].values, dtype='float64')
    
    atr = talib.ATR(high, low, close, timeperiod=14)
    atr_pct = atr / close
    adaptive_thr = atr_pct * atr_multiplier
    adaptive_thr = np.clip(adaptive_thr, 0.005, 0.10)
    
    # Calculate future returns as numpy array
    future_ret = np.zeros(len(close))
    future_ret[:-horizon] = (close[horizon:] - close[:-horizon]) / close[:-horizon]
    future_ret[-horizon:] = np.nan
    
    # Create labels as numpy array
    labels = np.ones(len(close), dtype=int)  # Default NEUTRAL (1)
    labels[future_ret > adaptive_thr] = 2   # BULLISH
    labels[future_ret < -adaptive_thr] = 0  # BEARISH
    
    return pd.Series(labels, index=df.index)

print("‚úÖ Feature engineering functions defined")

In [None]:
# ==============================================================================
# PHASE 1B: FIND OPTIMAL FORECAST MODEL & HORIZON
# ==============================================================================
from itertools import product

print("üî¨ FORECASTER OPTIMIZATION")
print("="*70)

# Test configurations
HORIZONS = [5, 7, 10, 14, 21]
THRESHOLDS = [0.015, 0.02, 0.025, 0.03]

model_configs = {
    'HistGB': HistGradientBoostingClassifier(max_iter=200, max_depth=8, random_state=42),
    'RandomForest': RandomForestClassifier(n_estimators=100, max_depth=10, random_state=42),
    'LogisticReg': LogisticRegression(max_iter=1000, class_weight='balanced')
}

results = []

for ticker in TICKERS[:5]:  # Test on subset first
    df = all_data[ticker]
    features = engineer_forecast_features(df)
    
    for horizon, threshold in product(HORIZONS, THRESHOLDS):
        labels = create_labels(df, horizon, threshold)
        
        # Align and clean
        valid_idx = features.dropna().index.intersection(labels.dropna().index)
        X = features.loc[valid_idx].iloc[:-horizon]
        y = labels.loc[valid_idx].iloc[:-horizon]
        
        if len(X) < 200:
            continue
        
        # Time series split
        split_idx = int(len(X) * 0.8)
        X_train, X_test = X.iloc[:split_idx], X.iloc[split_idx:]
        y_train, y_test = y.iloc[:split_idx], y.iloc[split_idx:]
        
        # Scale
        scaler = StandardScaler()
        X_train_scaled = scaler.fit_transform(X_train)
        X_test_scaled = scaler.transform(X_test)
        
        for model_name, model in model_configs.items():
            try:
                model.fit(X_train_scaled, y_train)
                y_pred = model.predict(X_test_scaled)
                
                acc = accuracy_score(y_test, y_pred)
                
                # Calculate actual trading performance
                test_df = df.iloc[split_idx:split_idx + len(y_test)]
                actual_returns = test_df['Close'].pct_change(horizon).shift(-horizon).iloc[:-horizon]
                
                # Only trade when model predicts BULLISH (2)
                buy_signals = y_pred[:-horizon] == 2
                if buy_signals.sum() > 0:
                    trade_returns = actual_returns.iloc[:len(buy_signals)][buy_signals]
                    win_rate = (trade_returns > 0).mean() * 100
                    avg_return = trade_returns.mean() * 100
                else:
                    win_rate = 0
                    avg_return = 0
                
                results.append({
                    'ticker': ticker,
                    'model': model_name,
                    'horizon': horizon,
                    'threshold': threshold,
                    'accuracy': acc,
                    'win_rate': win_rate,
                    'avg_return': avg_return,
                    'n_trades': buy_signals.sum()
                })
            except Exception as e:
                pass

# Analyze results
results_df = pd.DataFrame(results)

print("\nüìä TOP 10 CONFIGURATIONS (by Win Rate):")
top_configs = results_df.groupby(['model', 'horizon', 'threshold']).agg({
    'win_rate': 'mean',
    'avg_return': 'mean',
    'n_trades': 'sum'
}).sort_values('win_rate', ascending=False).head(10)
print(top_configs)

# Best config
best_row = top_configs.iloc[0]
best_model, best_horizon, best_threshold = top_configs.index[0]
print(f"\nüèÜ BEST CONFIG: {best_model}, Horizon={best_horizon}d, Threshold={best_threshold}")
print(f"   Win Rate: {best_row['win_rate']:.1f}%")
print(f"   Avg Return: {best_row['avg_return']:.2f}%")

In [None]:
# ==============================================================================
# PHASE 1C: OPTIMIZE FORECAST DECAY PARAMETERS
# ==============================================================================
print("\nüî¨ OPTIMIZING FORECAST DECAY PARAMETERS")
print("="*70)

# Test different decay configurations
DECAY_STARTS = [5, 7, 10, 12, 15]
DECAY_RATES = [0.05, 0.1, 0.15, 0.2]  # How fast to decay per day

decay_results = []

for ticker in TICKERS[:3]:
    df = all_data[ticker]
    features = engineer_forecast_features(df)
    labels = create_labels(df, best_horizon, best_threshold)
    
    valid_idx = features.dropna().index.intersection(labels.dropna().index)
    X = features.loc[valid_idx].iloc[:-best_horizon]
    y = labels.loc[valid_idx].iloc[:-best_horizon]
    
    split_idx = int(len(X) * 0.8)
    X_train, X_test = X.iloc[:split_idx], X.iloc[split_idx:]
    y_train, y_test = y.iloc[:split_idx], y.iloc[split_idx:]
    
    scaler = StandardScaler()
    X_train_scaled = scaler.fit_transform(X_train)
    X_test_scaled = scaler.transform(X_test)
    
    model = HistGradientBoostingClassifier(max_iter=200, max_depth=8, random_state=42)
    model.fit(X_train_scaled, y_train)
    
    # Get probabilities
    proba = model.predict_proba(X_test_scaled)
    
    test_df = df.iloc[split_idx:split_idx + len(y_test)]
    
    for decay_start, decay_rate in product(DECAY_STARTS, DECAY_RATES):
        # Simulate forecast accuracy with decay
        forecast_errors = []
        
        for i in range(len(proba) - 24):
            confidence = proba[i].max()
            predicted_direction = 1 if proba[i][2] > proba[i][0] else -1
            
            # Generate 24-day forecast with decay
            cumulative_error = 0
            for day in range(1, 25):
                # Apply decay
                if day > decay_start:
                    decay_factor = 1 - decay_rate * (day - decay_start)
                    decay_factor = max(0, decay_factor)
                else:
                    decay_factor = 1.0
                
                effective_conf = confidence * decay_factor
                
                if i + day < len(test_df):
                    actual_move = (test_df['Close'].iloc[i + day] / test_df['Close'].iloc[i] - 1) * 100
                    predicted_move = predicted_direction * effective_conf * 2  # Scaled prediction
                    cumulative_error += abs(actual_move - predicted_move)
            
            forecast_errors.append(cumulative_error / 24)
        
        decay_results.append({
            'ticker': ticker,
            'decay_start': decay_start,
            'decay_rate': decay_rate,
            'mean_error': np.mean(forecast_errors)
        })

decay_df = pd.DataFrame(decay_results)
best_decay = decay_df.groupby(['decay_start', 'decay_rate'])['mean_error'].mean().idxmin()
print(f"\nüèÜ BEST DECAY CONFIG: Start at day {best_decay[0]}, Rate={best_decay[1]}")

---
# ü§ñ PHASE 2: AI RECOMMENDER OPTIMIZATION

The `ai_recommender.py` predicts BUY/HOLD/SELL signals.

**What we'll optimize:**
1. Best features (feature importance ranking)
2. Optimal label threshold (ATR-adaptive vs fixed)
3. Model hyperparameters
4. Confidence calibration

In [None]:
# ==============================================================================
# PHASE 2A: FEATURE IMPORTANCE ANALYSIS
# ==============================================================================
from sklearn.feature_selection import mutual_info_classif, f_classif

print("üî¨ AI RECOMMENDER FEATURE IMPORTANCE")
print("="*70)

# Combine all ticker data
all_features = []
all_labels = []

for ticker in TICKERS:
    df = all_data[ticker]
    features = engineer_forecast_features(df)
    labels = create_labels(df, 7, 0.02)
    
    valid_idx = features.dropna().index.intersection(labels.dropna().index)
    X = features.loc[valid_idx].iloc[:-7]
    y = labels.loc[valid_idx].iloc[:-7]
    
    all_features.append(X)
    all_labels.append(y)

X_combined = pd.concat(all_features)
y_combined = pd.concat(all_labels)

# Feature importance via mutual information
mi_scores = mutual_info_classif(X_combined, y_combined, random_state=42)
f_scores, _ = f_classif(X_combined, y_combined)

feature_importance = pd.DataFrame({
    'feature': X_combined.columns,
    'mi_score': mi_scores,
    'f_score': f_scores
})

# Normalize and combine
feature_importance['mi_norm'] = feature_importance['mi_score'] / feature_importance['mi_score'].max()
feature_importance['f_norm'] = feature_importance['f_score'] / feature_importance['f_score'].max()
feature_importance['combined'] = (feature_importance['mi_norm'] + feature_importance['f_norm']) / 2
feature_importance = feature_importance.sort_values('combined', ascending=False)

print("\nüìä TOP 15 FEATURES FOR AI RECOMMENDER:")
print(feature_importance[['feature', 'combined']].head(15).to_string())

TOP_FEATURES = feature_importance['feature'].head(15).tolist()
print(f"\n‚úÖ Selected {len(TOP_FEATURES)} top features")

In [None]:
# ==============================================================================
# PHASE 2B: ADAPTIVE THRESHOLD OPTIMIZATION
# ==============================================================================
print("\nüî¨ OPTIMIZING ADAPTIVE LABEL THRESHOLDS")
print("="*70)

# Note: create_adaptive_labels is already defined in Phase 1A cell
# No need to redefine it here

# Test different ATR multipliers
ATR_MULTIPLIERS = [0.5, 0.75, 1.0, 1.25, 1.5, 2.0]

threshold_results = []

for atr_mult in ATR_MULTIPLIERS:
    all_wr = []
    all_returns = []
    
    for ticker in TICKERS[:5]:
        df = all_data[ticker]
        features = engineer_forecast_features(df)[TOP_FEATURES]
        labels = create_adaptive_labels(df, 7, atr_mult)
        
        valid_idx = features.dropna().index.intersection(labels.dropna().index)
        X = features.loc[valid_idx].iloc[:-7]
        y = labels.loc[valid_idx].iloc[:-7]
        
        if len(X) < 200:
            continue
        
        split_idx = int(len(X) * 0.8)
        X_train, X_test = X.iloc[:split_idx], X.iloc[split_idx:]
        y_train, y_test = y.iloc[:split_idx], y.iloc[split_idx:]
        
        scaler = StandardScaler()
        X_train_scaled = scaler.fit_transform(X_train)
        X_test_scaled = scaler.transform(X_test)
        
        model = HistGradientBoostingClassifier(max_iter=200, random_state=42)
        model.fit(X_train_scaled, y_train)
        y_pred = model.predict(X_test_scaled)
        
        test_df = df.iloc[split_idx:split_idx + len(y_test)]
        actual_returns = test_df['Close'].pct_change(7).shift(-7).iloc[:-7]
        
        buy_signals = y_pred[:-7] == 2
        if buy_signals.sum() > 0:
            trade_returns = actual_returns.iloc[:len(buy_signals)][buy_signals]
            all_wr.append((trade_returns > 0).mean() * 100)
            all_returns.append(trade_returns.mean() * 100)
    
    if all_wr:
        threshold_results.append({
            'atr_multiplier': atr_mult,
            'avg_win_rate': np.mean(all_wr),
            'avg_return': np.mean(all_returns)
        })

threshold_df = pd.DataFrame(threshold_results)
print("\nüìä ATR MULTIPLIER COMPARISON:")
print(threshold_df.to_string(index=False))

best_atr_mult = threshold_df.loc[threshold_df['avg_win_rate'].idxmax(), 'atr_multiplier']
print(f"\nüèÜ BEST ATR MULTIPLIER: {best_atr_mult}")

In [None]:
# ==============================================================================
# PHASE 2C: HYPERPARAMETER OPTIMIZATION WITH OPTUNA
# ==============================================================================
import optuna
from optuna.samplers import TPESampler

print("\nüî¨ HYPERPARAMETER OPTIMIZATION (Optuna)")
print("="*70)

# Prepare combined dataset
all_X = []
all_y = []
for ticker in TICKERS:
    df = all_data[ticker]
    features = engineer_forecast_features(df)[TOP_FEATURES]
    labels = create_adaptive_labels(df, 7, best_atr_mult)
    
    valid_idx = features.dropna().index.intersection(labels.dropna().index)
    X = features.loc[valid_idx].iloc[:-7]
    y = labels.loc[valid_idx].iloc[:-7]
    all_X.append(X)
    all_y.append(y)

X_full = pd.concat(all_X)
y_full = pd.concat(all_y)

# Scale
scaler = StandardScaler()
X_scaled = scaler.fit_transform(X_full)

def objective(trial):
    params = {
        'max_iter': trial.suggest_int('max_iter', 100, 500),
        'max_depth': trial.suggest_int('max_depth', 3, 15),
        'learning_rate': trial.suggest_float('learning_rate', 0.01, 0.3),
        'min_samples_leaf': trial.suggest_int('min_samples_leaf', 5, 50),
        'l2_regularization': trial.suggest_float('l2_regularization', 0.0, 1.0),
    }
    
    model = HistGradientBoostingClassifier(**params, random_state=42)
    
    # Time series cross-validation
    tscv = TimeSeriesSplit(n_splits=5)
    scores = cross_val_score(model, X_scaled, y_full, cv=tscv, scoring='accuracy')
    
    return scores.mean()

# Run optimization
study = optuna.create_study(direction='maximize', sampler=TPESampler(seed=42))
study.optimize(objective, n_trials=50, show_progress_bar=True)

print(f"\nüèÜ BEST HYPERPARAMETERS:")
for key, value in study.best_params.items():
    print(f"   {key}: {value}")
print(f"   Best CV Accuracy: {study.best_value:.4f}")

BEST_AI_PARAMS = study.best_params

---
# üí∞ PHASE 3: RISK MANAGER OPTIMIZATION

Optimize position sizing and risk parameters.

In [None]:
# ==============================================================================
# PHASE 3: RISK MANAGER OPTIMIZATION
# ==============================================================================
print("\nüî¨ RISK MANAGER OPTIMIZATION")
print("="*70)

# Test different risk parameters on backtest
RISK_PER_TRADE = [0.005, 0.01, 0.015, 0.02, 0.025]  # 0.5% to 2.5%
MAX_DAILY_LOSS = [0.01, 0.02, 0.03, 0.05]  # 1% to 5%
MAX_POSITIONS = [3, 5, 7, 10]

risk_results = []

for risk_per_trade, max_daily, max_pos in product(RISK_PER_TRADE[:3], MAX_DAILY_LOSS[:3], MAX_POSITIONS[:3]):
    # Simulate portfolio with these risk params
    account = 10000
    daily_pnl = 0
    trades = 0
    wins = 0
    total_pnl = 0
    max_drawdown = 0
    peak = account
    
    for ticker in TICKERS[:5]:
        df = all_data[ticker]
        
        for i in range(100, len(df) - 10, 5):  # Every 5 days
            # Check daily loss limit
            if daily_pnl < -max_daily * account:
                continue
            
            # Position size based on risk
            position_size = account * risk_per_trade / 0.05  # Assume 5% stop
            position_size = min(position_size, account / max_pos)
            
            # Simulate trade (random for now, just testing risk limits)
            returns = (df['Close'].iloc[i+5] / df['Close'].iloc[i] - 1)
            trade_pnl = position_size * returns
            
            account += trade_pnl
            daily_pnl += trade_pnl
            total_pnl += trade_pnl
            trades += 1
            if trade_pnl > 0:
                wins += 1
            
            # Track drawdown
            if account > peak:
                peak = account
            drawdown = (peak - account) / peak
            max_drawdown = max(max_drawdown, drawdown)
    
    if trades > 0:
        risk_results.append({
            'risk_per_trade': risk_per_trade,
            'max_daily_loss': max_daily,
            'max_positions': max_pos,
            'total_return': (account - 10000) / 10000 * 100,
            'max_drawdown': max_drawdown * 100,
            'win_rate': wins / trades * 100,
            'sharpe': total_pnl / (np.std([total_pnl]) + 0.001)
        })

risk_df = pd.DataFrame(risk_results)
risk_df['risk_adj_return'] = risk_df['total_return'] / (risk_df['max_drawdown'] + 1)

print("\nüìä TOP 5 RISK CONFIGURATIONS (by Risk-Adjusted Return):")
print(risk_df.nlargest(5, 'risk_adj_return').to_string(index=False))

best_risk = risk_df.loc[risk_df['risk_adj_return'].idxmax()]
print(f"\nüèÜ OPTIMAL RISK SETTINGS:")
print(f"   Risk per Trade: {best_risk['risk_per_trade']*100:.1f}%")
print(f"   Max Daily Loss: {best_risk['max_daily_loss']*100:.1f}%")
print(f"   Max Positions: {int(best_risk['max_positions'])}")

---
# üåç PHASE 4: MARKET REGIME OPTIMIZATION

In [None]:
# ==============================================================================
# PHASE 4: MARKET REGIME THRESHOLDS
# ==============================================================================
print("\nüî¨ MARKET REGIME THRESHOLD OPTIMIZATION")
print("="*70)

# Test different regime classification thresholds
BULL_THRESHOLDS = [3, 5, 7, 10]  # 21d return > X% = bull
BEAR_THRESHOLDS = [-3, -5, -7, -10]
ADX_TREND_THRESHOLDS = [20, 25, 30, 35]

def classify_regime(df, bull_thr, bear_thr, adx_thr):
    """Classify market regime."""
    close = np.asarray(df['Close'].values, dtype='float64')
    high = np.asarray(df['High'].values, dtype='float64')
    low = np.asarray(df['Low'].values, dtype='float64')
    
    # Calculate 21-day returns as numpy array
    ret_21d = np.zeros(len(close))
    ret_21d[21:] = (close[21:] - close[:-21]) / close[:-21] * 100
    ret_21d[:21] = np.nan
    
    # ADX as numpy array
    adx = talib.ADX(high, low, close, timeperiod=14)
    
    # Create regimes as numpy array first
    regimes = np.array(['sideways'] * len(close), dtype=object)
    
    # Apply conditions using numpy boolean arrays
    bull_mask = (ret_21d > bull_thr) & (adx > adx_thr)
    bear_mask = ret_21d < bear_thr
    
    regimes[bull_mask] = 'bull'
    regimes[bear_mask] = 'bear'
    
    return pd.Series(regimes, index=df.index)

regime_results = []

for bull_thr, bear_thr, adx_thr in product(BULL_THRESHOLDS, BEAR_THRESHOLDS, ADX_TREND_THRESHOLDS):
    regime_performance = {'bull': [], 'bear': [], 'sideways': []}
    
    for ticker in TICKERS[:5]:
        df = all_data[ticker]
        regimes = classify_regime(df, bull_thr, bear_thr, adx_thr)
        
        # Calculate forward returns by regime
        fwd_ret = df['Close'].pct_change(5).shift(-5) * 100
        
        for regime in ['bull', 'bear', 'sideways']:
            mask = regimes == regime
            if mask.sum() > 10:
                regime_returns = fwd_ret[mask].dropna()
                regime_performance[regime].extend(regime_returns.tolist())
    
    # We want bull regime to have positive returns, bear negative
    bull_ret = np.mean(regime_performance['bull']) if regime_performance['bull'] else 0
    bear_ret = np.mean(regime_performance['bear']) if regime_performance['bear'] else 0
    
    # Score: bull should be positive, bear should be negative (good classification)
    score = bull_ret - bear_ret  # Higher is better
    
    regime_results.append({
        'bull_thr': bull_thr,
        'bear_thr': bear_thr,
        'adx_thr': adx_thr,
        'bull_ret': bull_ret,
        'bear_ret': bear_ret,
        'score': score
    })

regime_df = pd.DataFrame(regime_results)
print("\nüìä TOP 5 REGIME CONFIGURATIONS:")
print(regime_df.nlargest(5, 'score').to_string(index=False))

best_regime = regime_df.loc[regime_df['score'].idxmax()]
print(f"\nüèÜ OPTIMAL REGIME THRESHOLDS:")
print(f"   Bull: 21d return > {best_regime['bull_thr']}%")
print(f"   Bear: 21d return < {best_regime['bear_thr']}%")
print(f"   ADX Trend: > {best_regime['adx_thr']}")

---
# üì¶ PHASE 5: SAVE OPTIMIZED CONFIGURATION

In [None]:
# ==============================================================================
# PHASE 5: GENERATE OPTIMIZED CONFIGURATION FILE
# ==============================================================================
import json

print("\nüì¶ SAVING OPTIMIZED CONFIGURATION")
print("="*70)

optimized_config = {
    'generated_at': datetime.now().isoformat(),
    'training_data': {
        'tickers': TICKERS,
        'start_date': START_DATE,
        'end_date': END_DATE
    },
    'forecast_engine': {
        'best_model': best_model,
        'horizon_days': int(best_horizon),
        'threshold': float(best_threshold),
        'decay_start_day': int(best_decay[0]),
        'decay_rate': float(best_decay[1])
    },
    'ai_recommender': {
        'top_features': TOP_FEATURES,
        'atr_multiplier': float(best_atr_mult),
        'model_params': BEST_AI_PARAMS
    },
    'risk_manager': {
        'risk_per_trade': float(best_risk['risk_per_trade']),
        'max_daily_loss': float(best_risk['max_daily_loss']),
        'max_positions': int(best_risk['max_positions'])
    },
    'market_regime': {
        'bull_threshold': float(best_regime['bull_thr']),
        'bear_threshold': float(best_regime['bear_thr']),
        'adx_trend_threshold': float(best_regime['adx_thr'])
    }
}

# Save as JSON
with open('OPTIMIZED_FULL_STACK_CONFIG.json', 'w') as f:
    json.dump(optimized_config, f, indent=2)

print(json.dumps(optimized_config, indent=2))
print("\n‚úÖ Configuration saved to OPTIMIZED_FULL_STACK_CONFIG.json")

In [None]:
# ==============================================================================
# PHASE 5B: GENERATE PYTHON CONFIG FILE
# ==============================================================================
config_code = f'''"""
OPTIMIZED FULL STACK CONFIGURATION
===================================
Generated by COLAB_FULL_STACK_OPTIMIZER.ipynb
Date: {datetime.now().isoformat()}

This file contains optimized parameters for all production modules.
"""

# ==============================================================================
# FORECAST ENGINE SETTINGS
# ==============================================================================
FORECAST_CONFIG = {{
    'model_type': '{best_model}',
    'horizon_days': {best_horizon},
    'prediction_threshold': {best_threshold},
    'decay_start_day': {best_decay[0]},
    'decay_rate': {best_decay[1]},
    'max_forecast_days': 24
}}

# ==============================================================================
# AI RECOMMENDER SETTINGS
# ==============================================================================
AI_RECOMMENDER_CONFIG = {{
    'top_features': {TOP_FEATURES},
    'atr_multiplier': {best_atr_mult},
    'model_params': {BEST_AI_PARAMS}
}}

# ==============================================================================
# RISK MANAGER SETTINGS
# ==============================================================================
RISK_CONFIG = {{
    'risk_per_trade': {best_risk['risk_per_trade']},  # {best_risk['risk_per_trade']*100:.1f}%
    'max_daily_loss': {best_risk['max_daily_loss']},  # {best_risk['max_daily_loss']*100:.1f}%
    'max_positions': {int(best_risk['max_positions'])},
    'default_stop_loss_pct': 0.05,  # 5%
    'default_take_profit_pct': 0.10  # 10%
}}

# ==============================================================================
# MARKET REGIME SETTINGS
# ==============================================================================
REGIME_CONFIG = {{
    'bull_threshold': {best_regime['bull_thr']},  # 21d return > X%
    'bear_threshold': {best_regime['bear_thr']},  # 21d return < X%
    'adx_trend_threshold': {best_regime['adx_thr']},
    'lookback_days': 21
}}

# ==============================================================================
# COMBINED PRODUCTION STACK CONFIG
# ==============================================================================
PRODUCTION_CONFIG = {{
    'forecast': FORECAST_CONFIG,
    'ai_recommender': AI_RECOMMENDER_CONFIG,
    'risk': RISK_CONFIG,
    'regime': REGIME_CONFIG
}}
'''

with open('optimized_stack_config.py', 'w') as f:
    f.write(config_code)

print(config_code)
print("\n‚úÖ Python config saved to optimized_stack_config.py")

---
# üéØ SUMMARY

Run this notebook in Colab Pro with GPU for best performance.

After running, copy these files back to your project:
1. `OPTIMIZED_FULL_STACK_CONFIG.json`
2. `optimized_stack_config.py`

Then update your modules to use these optimized parameters!