# Load Data

In [1]:
import os
import pandas as pd
import numpy as np
from ta.volatility import AverageTrueRange
from ta.momentum    import RSIIndicator
from ta.trend       import MACD, ADXIndicator
from ta.volatility  import AverageTrueRange
from itertools import product
from sklearn.model_selection import cross_val_score, TimeSeriesSplit
from xgboost import XGBRegressor
from lightgbm import LGBMRegressor
from catboost import CatBoostRegressor
from sklearn.metrics import classification_report, root_mean_squared_error, mean_squared_error, mean_absolute_error, r2_score
import matplotlib.pyplot as plt
from datetime import timedelta
from collections import defaultdict
from sklearn.ensemble import RandomForestRegressor, StackingRegressor
import optuna
import logging
import joblib
import json
import seaborn as sns
from sklearn.isotonic import IsotonicRegression
import shap
import warnings

warnings.filterwarnings("ignore", category=UserWarning)
warnings.filterwarnings("ignore", message=".*There are no meaningful features.*", category=UserWarning)
optuna.logging.set_verbosity(optuna.logging.INFO)

# === Load Data ===
#folder_path = "/Users/francopapalardo-aleo/Desktop/repos/TradingAI 2/data/"
folder_path = "./../data/"
column_names = ['datetime', 'open', 'high', 'low', 'close', 'volume']
df_list = []
plt.rcParams['font.family'] = 'Segoe UI Emoji'

for filename in os.listdir(folder_path):
    if filename.endswith(('.csv', '.txt')):
        file_path = os.path.join(folder_path, filename)
        df = pd.read_csv(file_path, sep=';', header=None, names=column_names)
        df['source_file'] = filename
        df_list.append(df)

df = pd.concat(df_list, ignore_index=True)
df['datetime'] = pd.to_datetime(df['datetime'], utc=True).dt.tz_convert('America/New_York')

df = df.drop_duplicates(subset='datetime', keep='first').reset_index(drop=True)
df = df.sort_values('datetime').reset_index(drop=True)
df[['open', 'high', 'low', 'close', 'volume']] = df[['open', 'high', 'low', 'close', 'volume']].astype(float)

# Base time features
df['hour'] = df['datetime'].dt.hour + df['datetime'].dt.minute / 60
df['minute'] = df['datetime'].dt.minute
df['day_of_week'] = df['datetime'].dt.dayofweek  # 0 = Monday

# Custom session flags (adjust if needed)       # Regular Trading Hours
df['is_premarket'] = df['hour'].between(7, 9.5)
df['is_lunch'] = df['hour'].between(11.5, 13.5)
df['is_postmarket'] = df['hour'].between(15.5, 20)
df['is_after_hours'] = df['hour'].between(20, 23.5)


# Initialize features or indicators

In [2]:
df['atr_5']         = AverageTrueRange(df['high'], df['low'], df['close'], window=5).average_true_range().shift(1)
df['atr_pct']       = df['atr_5'] / df['close']
df['candle_range']  = (df['high'] - df['low']).shift(1)
df['return_1']      = df['close'].pct_change(1).shift(1)
df['rsi_6']         = RSIIndicator(df['close'], window=6).rsi().shift(1)

macd = MACD(df['close'], window_fast=6, window_slow=13, window_sign=5)
df['macd_fast']     = macd.macd().shift(1)
df['macd_fast_diff']= macd.macd_diff().shift(1)

def choppiness_index(high, low, close, length=14):
    tr = AverageTrueRange(high=high, low=low, close=close, window=length).average_true_range().shift(1)
    atr_sum = tr.rolling(length).sum()
    high_max = high.rolling(length).max()
    low_min = low.rolling(length).min()
    return 100 * np.log10(atr_sum / (high_max - low_min)) / np.log10(length)

df['chop_index'] = choppiness_index(df['high'], df['low'], df['close']).shift(1)
# EMAs
df['ema_9'] = df['close'].ewm(span=9, adjust=False).mean().shift(1)
df['ema_21'] = df['close'].ewm(span=21, adjust=False).mean().shift(1)
df['ema_dist'] = df['close'] - df['ema_9']

# Wick and body metrics
df['upper_wick'] = (df['high'] - df[['open', 'close']].max(axis=1)).clip(lower=0)
df['lower_wick'] = (df[['open', 'close']].min(axis=1) - df['low']).clip(lower=0)
df['body_pct'] = (df['close'] - df['open']).abs() / (df['high'] - df['low'] + 1e-9)

# VWAP diff (basic form, for minute-level data)
df['vwap'] = (df['close'] * df['volume']).cumsum() / df['volume'].cumsum()
df['vwap_diff'] = (df['close'] - df['vwap']).shift(1)

# Time-based
df['hour'] = df['datetime'].dt.hour
df['is_afternoon'] = (df['hour'] >= 12).astype(int)
df['is_morning'] = (df['hour'] < 12).astype(int)
df['is_trending'] = (df['chop_index'] < 38).astype(int)    # strong trend
df['is_choppy']   = (df['chop_index'] > 61).astype(int)    # high noise

adx = ADXIndicator(high=df['high'], low=df['low'], close=df['close'], window=14)
df['adx_14'] = adx.adx().shift(1)
df['is_strong_trend'] = (df['adx_14'] > 25).astype(int)

# Rolling historical volatility
df['volatility'] = df['close'].pct_change().rolling(20).std().shift(1)

# Quantile thresholds (adaptive to market)
vol_q25 = df['volatility'].quantile(0.25)
vol_q75 = df['volatility'].quantile(0.75)

# Regime flags
df['is_low_vol'] = (df['volatility'] <= vol_q25).astype(int)
df['is_high_vol'] = (df['volatility'] >= vol_q75).astype(int)

df['trend_x_vol'] = df['is_trending'] * df['volatility']
df['chop_x_body'] = df['is_choppy'] * df['body_pct']

# # === Strategy Setup ===
param_grid_strategy = {
    'SL_ATR_MULT': [1.0, 1.5, 0.5],
    'TP_ATR_MULT': [2.0, 3.0, 4.0, 5.0, 6.0],
    'TRAIL_START_MULT': [0.5, 1.0],
    'TRAIL_STOP_MULT': [0.5, 1.0],
    'TICK_VALUE': [5], 
}

keys, values = zip(*param_grid_strategy.items())
combinations = [dict(zip(keys, v)) for v in product(*values)]

features = [
    'atr_5',
    'atr_pct',
    'candle_range',
    'return_1',
    'rsi_6',
    'macd_fast_diff',
    'macd_fast',
    'chop_index',
    'ema_9',
    'ema_21',
    'ema_dist',
    'upper_wick',
    'lower_wick',
    'body_pct',
    'vwap_diff',
    'hour',
    'is_afternoon',
    'is_trending',
    'is_choppy',
    'volatility',
    'is_low_vol',
    'is_high_vol',
    'trend_x_vol',
    'chop_x_body'
]

avoid_funcs = {}

def session_key(ts: pd.Timestamp) -> pd.Timestamp:
    # shift back 18 h, then floor to midnight to get a unique session “date”
    return (ts - timedelta(hours=18)).normalize()

def is_same_session(start_time: pd.Timestamp, end_time: pd.Timestamp) -> bool:
    return session_key(start_time) == session_key(end_time)

# Declare Combo function for serialization

In [3]:
def evaluate_regression_combo(
    X_test, preds, labeled, df,
    avoid_funcs,
    SL_ATR_MULT, TP_ATR_MULT, TRAIL_START_MULT, TRAIL_STOP_MULT, TICK_VALUE,
    is_same_session,
    long_thresh=0.003,
    short_thresh=-0.003,
    confidence_scores=None
):
    temp_trades_data = []
    skipped_trades = 0
    avoid_hits = defaultdict(int)
    long_trades = 0
    short_trades = 0

    i = 0
    X_test_idx = X_test.index.to_list()

    while i < len(X_test_idx):
        idx = X_test_idx[i]
        row = labeled.loc[idx]
        pred_return = preds[i]
        confidence = confidence_scores[i] if confidence_scores is not None else 1.0

        if confidence < 0.6:
            skipped_trades += 1
            i += 1
            continue

        if pred_return >= long_thresh:
            side = 'long'
            long_trades += 1
        elif pred_return <= short_thresh:
            side = 'short'
            short_trades += 1
        else:
            i += 1
            continue  # skip neutral signals

        # Trade filters
        skip_trade = False
        for name, f in avoid_funcs.items():
            try:
                if f(row):
                    avoid_hits[name] += 1
                    skip_trade = True
            except:
                continue
        if skip_trade or idx >= len(df) - 6:
            skipped_trades += 1
            i += 1
            continue

        # --- Trade Simulation ---
        entry_price = row['close']
        entry_time = row['datetime']
        atr = row['atr_5']

        # Stop Loss (fixed volatility-based)
        sl_price = entry_price - SL_ATR_MULT * atr if side == 'long' else entry_price + SL_ATR_MULT * atr

        # Take Profit (dynamic, from model prediction, clipped)
        expected_move = abs(pred_return) * entry_price
        min_tp = 0.001 * entry_price  # minimum 0.1% move
        max_tp = TP_ATR_MULT * atr
        tp_move = np.clip(expected_move, min_tp, max_tp)
        tp_price = entry_price + tp_move if side == 'long' else entry_price - tp_move

        # Trailing logic
        trail_trigger = entry_price + TRAIL_START_MULT * atr if side == 'long' else entry_price - TRAIL_START_MULT * atr
        trail_stop = None

        max_price, min_price = entry_price, entry_price
        exit_price, exit_time = None, None

        fwd_idx = idx + 1
        while fwd_idx < len(df):
            fwd_row = df.loc[fwd_idx]
            max_price = max(max_price, fwd_row['high'])
            min_price = min(min_price, fwd_row['low'])

            if (side == 'long' and fwd_row['low'] <= sl_price) or (side == 'short' and fwd_row['high'] >= sl_price):
                exit_price = sl_price
                exit_time = fwd_row['datetime']
                break

            if (side == 'long' and fwd_row['high'] >= tp_price) or (side == 'short' and fwd_row['low'] <= tp_price):
                exit_price = tp_price
                exit_time = fwd_row['datetime']
                break

            if side == 'long' and fwd_row['high'] >= trail_trigger:
                trail_stop = fwd_row['close'] - TRAIL_STOP_MULT * atr
            if side == 'short' and fwd_row['low'] <= trail_trigger:
                trail_stop = fwd_row['close'] + TRAIL_STOP_MULT * atr

            if trail_stop:
                if (side == 'long' and fwd_row['low'] <= trail_stop) or (side == 'short' and fwd_row['high'] >= trail_stop):
                    exit_price = trail_stop
                    exit_time = fwd_row['datetime']
                    break

            fwd_idx += 1

        if exit_price is None:
            exit_price = df.loc[len(df) - 1, 'close']
            exit_time = df.loc[len(df) - 1, 'datetime']

        if not is_same_session(entry_time, exit_time):
            i += 1
            continue

        GROSS_PNL = (exit_price - entry_price) * TICK_VALUE if side == 'long' else (entry_price - exit_price) * TICK_VALUE
        COMMISSION = 3.98
        pnl = GROSS_PNL - COMMISSION

        mfe = max_price - entry_price if side == 'long' else entry_price - min_price
        mae = entry_price - min_price if side == 'long' else max_price - entry_price

        temp_trades_data.append({
            'entry_time': entry_time,
            'exit_time': exit_time,
            'side': side,
            'entry_price': entry_price,
            'exit_price': exit_price,
            'pnl': pnl,
            'mfe': mfe,
            'mae': mae,
            'gross_pnl': GROSS_PNL,
            'confidence': confidence,
            'pred_return': pred_return,
        })

        while i < len(X_test_idx) and labeled.loc[X_test_idx[i]]['datetime'] <= exit_time:
            i += 1
        continue

    # === Metrics ===
    results = pd.DataFrame(temp_trades_data)
    pnl_total = results['pnl'].sum() if not results.empty else 0
    trades = len(results)
    win_rate = (results['pnl'] > 0).mean() if not results.empty else 0
    expectancy = results['pnl'].mean() if not results.empty else 0
    profit_factor = results[results['pnl'] > 0]['pnl'].sum() / abs(results[results['pnl'] < 0]['pnl'].sum()) if not results.empty and (results['pnl'] < 0).any() else np.nan
    sharpe = results['pnl'].mean() / (results['pnl'].std() + 1e-9) * np.sqrt(trades) if trades > 1 else 0

    return {
        'pnl': pnl_total,
        'trades': trades,
        'win_rate': win_rate,
        'expectancy': expectancy,
        'profit_factor': profit_factor,
        'sharpe': sharpe,
        'long_trades': long_trades,
        'short_trades': short_trades,
        'avoid_hits': dict(avoid_hits),
        'threshold': long_thresh,
        'results': results
    }

# Cleanup

In [4]:
# def compute_future_return_labels(df: pd.DataFrame, lookahead: int, is_same_session_fn) -> pd.DataFrame:
#     """
#     Computes future return (regression label) and trade direction for a given lookahead period.

#     Parameters:
#     - df: DataFrame with at least ['datetime', 'close']
#     - lookahead: How many bars ahead to evaluate performance
#     - is_same_session_fn: Function that checks if two datetimes are in the same session

#     Returns:
#     - df_labeled: DataFrame with ['future_return', 'trade_dir'] added
#     """
#     future_returns = []
#     trade_dirs = []

#     for idx in range(len(df) - lookahead):
#         entry_price = df['close'].iloc[idx]
#         future_price = df['close'].iloc[idx + lookahead]

#         if pd.isna(entry_price) or pd.isna(future_price):
#             future_returns.append(np.nan)
#             trade_dirs.append(None)
#             continue

#         future_return = (future_price / entry_price) - 1
#         future_returns.append(future_return)
#         trade_dirs.append('long' if future_return > 0 else 'short')

#     # Align output with original df
#     df_labeled = df.iloc[:len(future_returns)].copy()
#     df_labeled['future_return'] = future_returns
#     df_labeled['trade_dir'] = trade_dirs

#     # Drop NaNs
#     df_labeled = df_labeled.dropna(subset=['future_return'])

#     return df_labeled 

In [5]:
def compute_execution_labels(
    df: pd.DataFrame,
    lookahead: int,
    is_same_session_fn,
    sl_atr_mult: float = 1.5,
    tp_atr_mult: float = 3.0,
    tick_value: float = 5.0,
) -> pd.DataFrame:
    """
    Labels trades using realistic SL/TP execution with session-aware filtering.

    Returns:
    - 'execution_label': -1 (SL), 0 (no hit), +1 (TP)
    - 'realistic_return': the simulated PnL%
    """
    labels = []
    returns = []
    valid_idxs = []

    for i in range(len(df) - lookahead):
        entry_time = df['datetime'].iloc[i]
        end_time = df['datetime'].iloc[i + lookahead]

        if not is_same_session_fn(entry_time, end_time):
            continue  # skip if trade would exit in another session

        entry = df.iloc[i]
        atr = entry['atr_5']
        entry_price = entry['close']
        sl_price = entry_price - sl_atr_mult * atr
        tp_price = entry_price + tp_atr_mult * atr

        # Forward price range
        future_slice = df.iloc[i + 1:i + 1 + lookahead]
        highs = future_slice['high']
        lows = future_slice['low']

        hit_sl = (lows <= sl_price).any()
        hit_tp = (highs >= tp_price).any()

        if hit_sl and hit_tp:
            label = -1
            ret = (sl_price - entry_price) / entry_price
        elif hit_tp:
            label = 1
            ret = (tp_price - entry_price) / entry_price
        elif hit_sl:
            label = -1
            ret = (sl_price - entry_price) / entry_price
        else:
            label = 0
            ret = (df.iloc[i + lookahead]['close'] - entry_price) / entry_price

        labels.append(label)
        returns.append(ret)
        valid_idxs.append(i)

    df_out = df.iloc[valid_idxs].copy()
    df_out['execution_label'] = labels
    df_out['realistic_return'] = returns
    return df_out


In [6]:
lookahead_values = [5]

def label_and_save(lookahead):
    df_session = df.copy()
    df_labeled = compute_execution_labels(
        df_session,
        lookahead=lookahead,
        is_same_session_fn=is_same_session,
        sl_atr_mult=1.5,
        tp_atr_mult=3.0
    )
    df_labeled.to_parquet(f"labeled_data_{lookahead}_session_less.parquet")

for lookahead in lookahead_values:
    fname = f"labeled_data_{lookahead}_session_less.parquet"
    if os.path.exists(fname):
        print(f"⏭️ File {fname} already exists. Skipping...")
        continue
    print(f"📦 Labeling {fname}...")
    label_and_save(lookahead)


⏭️ File labeled_data_5_session_less.parquet already exists. Skipping...


# Train

##### Real Training

In [7]:
def check_overfit(model, X_tr, X_te, y_tr, y_te):
    train_preds = model.predict(X_tr)
    test_preds = model.predict(X_te)
    train_mse = mean_squared_error(y_tr, train_preds)
    test_mse = mean_squared_error(y_te, test_preds)
    ratio = test_mse / train_mse if train_mse != 0 else float('inf')

    print(f"\n📉 Overfitting check:")
    print(f"Train MSE: {train_mse:.8f}")
    print(f"Test MSE: {test_mse:.8f}")
    print(f"Overfit ratio (Test / Train): {ratio:.2f}")
    if ratio > 1.5:
        print("⚠️ Potential overfitting detected.")
    elif ratio < 0.7:
        print("⚠️ Possibly underfitting (too simple).")
    else:
        print("✅ Generalization looks reasonable.")

In [8]:
def generate_oof_predictions(models, X, y, n_splits=5):
    oof_preds = np.zeros((X.shape[0], len(models)))
    tscv = TimeSeriesSplit(n_splits=n_splits)

    for i, model in enumerate(models):
        for train_idx, val_idx in tscv.split(X):
            model.fit(X.iloc[train_idx], y.iloc[train_idx])
            oof_preds[val_idx, i] = model.predict(X.iloc[val_idx])
    
    return pd.DataFrame(oof_preds, index=y.index)  # ✅ FIX is here


In [None]:
def run_lookahead_for_session(LOOKAHEAD):
    labeled = pd.read_parquet(f"labeled_data_{LOOKAHEAD}_session_less.parquet")
    labeled = labeled.replace([np.inf, -np.inf], np.nan)
    labeled = labeled.dropna(subset=features + ['realistic_return'])

    cutoff_date = pd.Timestamp("2025-01-01", tz="America/New_York")
    train = labeled[labeled['datetime'] < cutoff_date]
    test = labeled[labeled['datetime'] >= cutoff_date]

    X_train_full, y_train = train[features], train['realistic_return']
    X_test_full, y_test = test[features], test['realistic_return']

    print(f"Train range: {train['datetime'].min()} to {train['datetime'].max()} | Rows: {len(train)}")
    print(f"Test range: {test['datetime'].min()} to {test['datetime'].max()} | Rows: {len(test)}")

    # === Step 7: Train individual models on combined features ===
    print("\n⚙️ Training individual models...")

    def tune_rf(X_train, y_train):
        def objective(trial):
            try:
                params = {
                    'n_estimators': 1000,  # Use static high value; no early stopping for RF
                    'max_depth': trial.suggest_int('max_depth', 4, 20),
                    'min_samples_split': trial.suggest_int('min_samples_split', 2, 20),
                    'min_samples_leaf': trial.suggest_int('min_samples_leaf', 1, 20),
                    'max_leaf_nodes': trial.suggest_int('max_leaf_nodes', 32, 256),
                    'max_features': trial.suggest_categorical('max_features', ['sqrt', 'log2', None]),
                    'bootstrap': True,
                }

                model = RandomForestRegressor(**params, random_state=42, n_jobs=-1)
                tscv = TimeSeriesSplit(n_splits=5)
                scores = []

                for train_idx, val_idx in tscv.split(X_train):
                    X_tr, X_val = X_train.iloc[train_idx], X_train.iloc[val_idx]
                    y_tr, y_val = y_train.iloc[train_idx], y_train.iloc[val_idx]

                    model.fit(X_tr, y_tr)
                    preds = model.predict(X_val)
                    rmse = root_mean_squared_error(y_val, preds)
                    scores.append(rmse)

                return np.mean(scores)
            except Exception as e:
                logging.warning(f"[RF Trial Failed] {e}")
                return float('inf')

        study = optuna.create_study(
            direction='minimize',
            study_name='rf_opt',
            sampler=optuna.samplers.TPESampler(seed=42, n_startup_trials=10),
            pruner=optuna.pruners.SuccessiveHalvingPruner(min_resource=10, reduction_factor=3),
            storage=f'sqlite:///rf_opt_study{LOOKAHEAD}_session_less.db',
            load_if_exists=True
        )
        study.optimize(objective, n_trials=1)

        return study.best_params

    def tune_xgb(X_train, y_train):
        def objective(trial):
            params = {
                "n_estimators": 2000,
                "learning_rate": trial.suggest_float("learning_rate", 0.005, 0.2, log=True),
                "max_depth": trial.suggest_int("max_depth", 3, 12),
                "subsample": trial.suggest_float("subsample", 0.6, 1.0),
                "colsample_bytree": trial.suggest_float("colsample_bytree", 0.5, 1.0),
                "min_child_weight": trial.suggest_int("min_child_weight", 1, 10),
                "gamma": trial.suggest_float("gamma", 0.0, 5.0),
                "reg_alpha": trial.suggest_float("reg_alpha", 0.0, 5.0),
                "reg_lambda": trial.suggest_float("reg_lambda", 0.0, 5.0),
                "verbosity": 0,
                "early_stopping_rounds": 30
            }
            tscv = TimeSeriesSplit(n_splits=5)
            scores = []
            model = XGBRegressor(**params, eval_metric="rmse", random_state=42, n_jobs=-1)
            for train_idx, val_idx in tscv.split(X_train):
                X_tr, X_val = X_train.iloc[train_idx], X_train.iloc[val_idx]
                y_tr, y_val = y_train.iloc[train_idx], y_train.iloc[val_idx]

                model.fit(
                    X_tr, y_tr,
                    eval_set=[(X_val, y_val)],
                    verbose=False
                )
                preds = model.predict(X_val)
                rmse = root_mean_squared_error(y_val, preds)
                scores.append(rmse)

            return np.mean(scores)

        study = optuna.create_study(
            direction='minimize',
            study_name='xgb_opt',
            sampler=optuna.samplers.TPESampler(n_startup_trials=10, seed=42),
            pruner=optuna.pruners.SuccessiveHalvingPruner(min_resource=50, reduction_factor=4),
            storage=f'sqlite:///xgb_opt_study{LOOKAHEAD}_session_less.db',
            load_if_exists=True
        )
        study.optimize(objective, n_trials=1)
        return study.best_params
    
    def tune_lightgbm(X_train, y_train):
        def objective(trial):
            params = {
                "n_estimators": 2000,
                "learning_rate": trial.suggest_float("learning_rate", 0.01, 0.1, log=True),
                "max_depth": trial.suggest_int("max_depth", 3, 8),
                "num_leaves": trial.suggest_int("num_leaves", 31, 256),
                "min_child_samples": trial.suggest_int("min_child_samples", 1, 30),
                "subsample": trial.suggest_float("subsample", 0.7, 1.0),
                "colsample_bytree": trial.suggest_float("colsample_bytree", 0.7, 1.0),
                "reg_alpha": trial.suggest_float("reg_alpha", 0.0, 1.0),
                "reg_lambda": trial.suggest_float("reg_lambda", 0.0, 1.0),
                "min_split_gain": trial.suggest_float("min_split_gain", 0.0, 0.01),
                "force_col_wise": trial.suggest_categorical("force_col_wise", [True, False])
            }
            tscv = TimeSeriesSplit(n_splits=5)
            scores = []
            for train_idx, val_idx in tscv.split(X_train):
                X_tr, X_val = X_train.iloc[train_idx], X_train.iloc[val_idx]
                y_tr, y_val = y_train.iloc[train_idx], y_train.iloc[val_idx]

                model = LGBMRegressor(**params, random_state=42, n_jobs=-1)
                model.fit(
                    X_tr, y_tr,
                    eval_set=[(X_val, y_val)],
                    eval_metric="rmse"
                )
                preds = model.predict(X_val)
                rmse = root_mean_squared_error(y_val, preds)
                scores.append(rmse)
            return np.mean(scores)

        study = optuna.create_study(
            direction="minimize",
            study_name="lgbm_opt",
            sampler=optuna.samplers.TPESampler(seed=42),
            pruner=optuna.pruners.SuccessiveHalvingPruner(min_resource=50, reduction_factor=4),
            storage=f"sqlite:///lgbm_opt_study{LOOKAHEAD}_session_less.db",
            load_if_exists=True
        )
        study.optimize(objective, n_trials=1)
        return study.best_params

    def tune_catboost(X_train, y_train):
        def objective(trial):
            params = {
                'iterations': 2000,
                'depth': trial.suggest_int('depth', 4, 8),
                'learning_rate': trial.suggest_float('learning_rate', 0.01, 0.1, log=True),
                'l2_leaf_reg': trial.suggest_float('l2_leaf_reg', 1.0, 5.0),
                'bootstrap_type': trial.suggest_categorical('bootstrap_type', ['Bayesian', 'Bernoulli']),
                'random_strength': trial.suggest_float('random_strength', 0.5, 2.0)
            }

            tscv = TimeSeriesSplit(n_splits=5)
            scores = []

            for train_idx, val_idx in tscv.split(X_train):
                X_tr, X_val = X_train.iloc[train_idx], X_train.iloc[val_idx]
                y_tr, y_val = y_train.iloc[train_idx], y_train.iloc[val_idx]

                model = CatBoostRegressor(**params, random_state=42)
                model.fit(
                    X_tr, y_tr,
                    eval_set=(X_val, y_val),
                    use_best_model=True,
                    verbose=False,
                    early_stopping_rounds=30
                )
                preds = model.predict(X_val)
                rmse = root_mean_squared_error(y_val, preds)
                scores.append(rmse)

            return np.mean(scores)

        study = optuna.create_study(
            direction='minimize',
            study_name='catboost_opt',
            sampler=optuna.samplers.TPESampler(seed=42),
            pruner=optuna.pruners.SuccessiveHalvingPruner(min_resource=50, reduction_factor=4),
            storage=f'sqlite:///catboost_opt_study{LOOKAHEAD}_session_less.db',
            load_if_exists=True
        )
        study.optimize(objective, n_trials=1)
        return study.best_params

    def get_shap_top_features(model, X, top_n=25):
        explainer = shap.Explainer(model, X)
        shap_values = explainer(X)
        shap_importance = pd.DataFrame({
            'feature': X.columns,
            'shap_importance': np.abs(shap_values.values).mean(axis=0)
        }).sort_values(by='shap_importance', ascending=False)
        return shap_importance.head(top_n)['feature'].tolist()
    
    xgb_temp = XGBRegressor(n_estimators=100, max_depth=3, learning_rate=0.1, random_state=42)
    xgb_temp.fit(X_train_full, y_train)
    
    combined_features = get_shap_top_features(xgb_temp, X_train_full)

    X_train_combined = X_train_full[combined_features]
    X_test_combined = X_test_full[combined_features]

    # Ensure index consistency
    y_train = y_train.loc[X_train_combined.index]
    y_test = y_test.loc[X_test_combined.index]

    xgb_params = tune_xgb(X_train_combined, y_train)
    lgbm_params = tune_lightgbm(X_train_combined, y_train)
    catboost_params = tune_catboost(X_train_combined, y_train)
    rf_params = tune_rf(X_train_combined, y_train)

    xgb = XGBRegressor(**xgb_params, eval_metric='rmse', random_state=42, n_jobs=-1)
    lgbm = LGBMRegressor(**lgbm_params, random_state=42, n_jobs=-1)
    catboost = CatBoostRegressor(**catboost_params, random_state=42, verbose=0)
    rf_best_combined = RandomForestRegressor(**rf_params, random_state=42, n_jobs=-1)

    xgb.fit(X_train_combined, y_train)
    lgbm.fit(X_train_combined, y_train)
    catboost.fit(X_train_combined, y_train)

    rf_best_combined.fit(X_train_combined, y_train)
    
    base_models = [
        rf_best_combined,
        xgb,
        lgbm,
        catboost
    ]

    base_models_preds_train = generate_oof_predictions(base_models, X_train_combined, y_train)

    def tune_meta_xgb(X_meta, y):
        """
        Optuna-tune XGBoost meta-learner for stacking.
        Operates on low-dimensional OOF prediction features.
        """

        def objective(trial):
            params = {
                "n_estimators": trial.suggest_int("n_estimators", 100, 300),
                "learning_rate": trial.suggest_float("learning_rate", 0.01, 0.2, log=True),
                "max_depth": trial.suggest_int("max_depth", 2, 5),
                "subsample": trial.suggest_float("subsample", 0.7, 1.0),
                "colsample_bytree": trial.suggest_float("colsample_bytree", 0.5, 1.0),
                "min_child_weight": trial.suggest_int("min_child_weight", 1, 10),
                "gamma": trial.suggest_float("gamma", 0.0, 2.0),
                "reg_alpha": trial.suggest_float("reg_alpha", 0.0, 1.0),
                "reg_lambda": trial.suggest_float("reg_lambda", 0.0, 1.0),
                "booster": "gbtree"
            }

            tscv = TimeSeriesSplit(n_splits=3)
            scores = []

            for train_idx, val_idx in tscv.split(X_meta):
                X_tr, X_val = X_meta.iloc[train_idx], X_meta.iloc[val_idx]
                y_tr, y_val = y.iloc[train_idx], y.iloc[val_idx]

                model = XGBRegressor(
                    **params,
                    eval_metric="rmse",
                    random_state=42,
                    n_jobs=-1
                )

                model.fit(
                    X_tr,
                    y_tr,
                    eval_set=[(X_val, y_val)],
                    verbose=False
                )

                preds = model.predict(X_val)
                rmse = root_mean_squared_error(y_val, preds)
                scores.append(rmse)

            return np.mean(scores)

        study = optuna.create_study(
            direction="minimize",
            study_name=f"metamodel_stacking",
            sampler=optuna.samplers.TPESampler(seed=42),
            pruner=optuna.pruners.MedianPruner(n_startup_trials=5),
            storage=f"sqlite:///meta_xgb_stack_{LOOKAHEAD}_session_less.db",
            load_if_exists=True
        )

        study.optimize(objective, n_trials=100, show_progress_bar=True)
        return study.best_params

    meta_params = tune_meta_xgb(base_models_preds_train, y_train)
    meta_model = XGBRegressor(**meta_params, random_state=42, n_jobs=-1)

    # # === Step 8: Ensemble  Regressor ===
    stack = StackingRegressor(
        estimators=[('rf', rf_best_combined), ('xgb', xgb), ('lgbm', lgbm), ('catboost', catboost)],
        final_estimator=meta_model,
        n_jobs=-1
    )
    stack.fit(X_train_combined, y_train)

    # # === Step 9: Evaluate all models ===
    def evaluate_model(name, model, Xtr, Xte, ytr, yte, scaled=False):
        train_preds = model.predict(Xtr)
        test_preds = model.predict(Xte)
        train_mse = mean_squared_error(ytr, train_preds)
        test_mse = mean_squared_error(yte, test_preds)
        overfit_ratio = test_mse / train_mse if train_mse != 0 else float('inf')

        print(f"\n📊 {name} Performance:")
        print(f"Train MSE: {train_mse:.8f}")
        print(f"Test MSE: {test_mse:.8f}")
        print(f"Overfit ratio (Test / Train): {overfit_ratio:.2f}")
        if overfit_ratio > 1.5:
            print("⚠️ Potential overfitting detected.")
        elif overfit_ratio < 0.7:
            print("⚠️ Possibly underfitting.")
        else:
            print("✅ Generalization looks reasonable.")
        return test_preds

    X_test_rf = pd.DataFrame(X_train_combined, columns=X_train_combined.columns, index=X_test_combined.index)
    X_train_rf = pd.DataFrame(X_train_combined, columns=X_train_combined.columns, index=X_train_combined.index)
    preds_rf = evaluate_model("RandomForest", rf_best_combined, X_train_rf, X_test_rf, y_train, y_test)
    preds_xgb = evaluate_model("XGBoost", xgb, X_train_combined, X_test_combined, y_train, y_test)
    preds_lgbm = evaluate_model("LightGBM", lgbm, X_train_combined, X_test_combined, y_train, y_test)
    preds_catboost = evaluate_model("CatBoostRegressor", catboost, X_train_combined, X_test_combined, y_train, y_test)
    preds_stack = evaluate_model("Stacking Ensemble", stack, X_train_combined, X_test_combined, y_train, y_test)

    # # === Step 9.5: Isotopic Regression ===
    raw_test_preds = stack.predict(X_test_combined)

    # === Step 10: Choose the final model to backtest ===
    preds = raw_test_preds

    # === Step 11: Backtest Strategy ===
    X_test_df = pd.DataFrame(X_test_combined, columns=combined_features, index=X_test_combined.index)
    y_pred = stack.predict(X_test_df)

    y_train_binary = (y_train > 0).astype(int)  # or: (train['execution_label'] == 1)

    # Train isotonic regression on training set
    raw_train_preds = stack.predict(X_train_combined)
    iso = IsotonicRegression(out_of_bounds='clip')
    iso.fit(np.abs(raw_train_preds), y_train_binary)

    # Calibrate the confidence for test predictions
    confidence_scores = iso.predict(np.abs(raw_test_preds))

    all_results = []

    mae = mean_absolute_error(y_test, y_pred)
    rmse = np.sqrt(mean_squared_error(y_test, y_pred))
    r2 = r2_score(y_test, y_pred)

    print(f"MAE: {mae:.4f}")
    print(f"RMSE: {rmse:.4f}")
    print(f"R²: {r2:.4f}")
    ###

    thresholds = [0.0005, 0.001, 0.005]
    for params in combinations:
        for thresh in thresholds:
            results = evaluate_regression_combo(
                X_test=X_test_df,
                preds=preds,
                labeled=labeled,
                df=df,
                avoid_funcs=avoid_funcs,
                SL_ATR_MULT=params['SL_ATR_MULT'],
                TP_ATR_MULT=params['TP_ATR_MULT'],
                TRAIL_START_MULT=params['TRAIL_START_MULT'],
                TRAIL_STOP_MULT=params['TRAIL_STOP_MULT'],
                TICK_VALUE=params['TICK_VALUE'],
                is_same_session=is_same_session,
                long_thresh=thresh,
                short_thresh=-thresh,
                confidence_scores=confidence_scores
            )

            results['params'] = params
            all_results.append(results)

            print(f"\n\n🔍 Evaluating with params: {params}")

            print(
                f"\n✅ LOOKAHEAD={LOOKAHEAD} | Threshold={thresh}"
                f"\nPnL: ${results['pnl']:.2f}"
                f"\nTrades: {results['trades']}"
                f"\nWin Rate: {results['win_rate']:.2%}"
                f"\nExpectancy: {results['expectancy']:.2f}"
                f"\nProfit Factor: {results['profit_factor']:.2f}"
                f"\nSharpe Ratio: {results['sharpe']:.2f}"
                f"\nLong Trades: {results['long_trades']} | Short Trades: {results['short_trades']}"
            )

            print("Avoid Hits:")
            for name, count in results['avoid_hits'].items():
                print(f" - {name}: {count}")

            print("\n🔢 Top 10 PnL trades:")
            print(results['results'].sort_values(by='pnl', ascending=False).head(10))

            print("\n🔻 Bottom 10 PnL trades:")
            print(results['results'].sort_values(by='pnl', ascending=True).head(10))

    summary_df = pd.DataFrame([{
        'pnl': r['pnl'],
        'sharpe': r['sharpe'],
        'expectancy': r['expectancy'],
        'profit_factor': r['profit_factor'],
        'win_rate': r['win_rate'],
        'trades': r['trades'],
        **r['params']
    } for r in all_results])
    top = summary_df.sort_values(by='sharpe', ascending=False).head(5)
    print(top)

    metadata = {
        "lookahead": LOOKAHEAD,
        "train_range": [str(train["datetime"].min()), str(train["datetime"].max())],
        "test_range": [str(test["datetime"].min()), str(test["datetime"].max())],
        "features_used": combined_features,
        "rf_params": rf_params,
        "xgb_params": xgb_params,
        "lgbm_params": lgbm_params,
        "catboost_params": catboost_params,
        "meta_params": meta_params
    }
    with open(f"model_metadata_{LOOKAHEAD}.json", "w") as f:
        json.dump(metadata, f, indent=2)
        
    joblib.dump(stack, f"stack_model_LOOKAHEAD_{LOOKAHEAD}_session_less.pkl")
    joblib.dump(iso, f"isotonic_calibrator_{LOOKAHEAD}_session_less.pkl")
    with open("stack_features.json", "w") as f:
        json.dump(combined_features, f)

    return {
        'lookahead': LOOKAHEAD,
        'pnl': results['pnl'],
        'win_rate': results['win_rate'],
        'expectancy': results['expectancy'],
        'profit_factor': results['profit_factor'],
        'sharpe': results['sharpe'],
        'trades': results['trades'],
        'preds_rf': preds_rf,
        'preds_xgb': preds_xgb,
        'preds_lgbm': preds_lgbm,
        'preds_stack': preds_stack,
        'pres_catboost': preds_catboost,
        'results': all_results,
    }

In [10]:
def run_lookahead(LOOKAHEAD):
    try:
        result = run_lookahead_for_session(LOOKAHEAD)
        if result is None:
            print(f"No valid run for session_less, skipping CSV.")
            return
        return result
    except Exception as e:
        print(f"⚠️ Skipping session session_less due to error: {e}")
        return

##### Running Train

In [11]:
lookahead_values = [5]
lookahead_results = []

for val in lookahead_values:
    result = run_lookahead(val)
    lookahead_results.append(result)

Train range: 2023-12-10 17:20:00-05:00 to 2024-12-31 16:55:00-05:00 | Rows: 374685
Test range: 2025-01-01 18:01:00-05:00 to 2025-05-19 08:00:00-04:00 | Rows: 131831

⚙️ Training individual models...


[I 2025-05-19 17:42:37,872] Trial 1105 finished with value: 0.0006207567487901578 and parameters: {'learning_rate': 0.013149209573816396, 'max_depth': 8, 'subsample': 0.7464135734093312, 'colsample_bytree': 0.7548317657016187, 'min_child_weight': 6, 'gamma': 1.326252112172378, 'reg_alpha': 0.6540823862387894, 'reg_lambda': 0.3684803942662142}. Best is trial 910 with value: 0.0006207567497196118.
[I 2025-05-19 17:42:37,913] Using an existing study with name 'lgbm_opt' instead of creating a new one.


[LightGBM] [Info] Total Bins 4146
[LightGBM] [Info] Number of data points in the train set: 62450, number of used features: 24
[LightGBM] [Info] Start training from score 0.000006
[LightGBM] [Info] Total Bins 4217
[LightGBM] [Info] Number of data points in the train set: 124897, number of used features: 24
[LightGBM] [Info] Start training from score 0.000003
[LightGBM] [Info] Total Bins 4301
[LightGBM] [Info] Number of data points in the train set: 187344, number of used features: 24
[LightGBM] [Info] Start training from score 0.000003
[LightGBM] [Info] Total Bins 4353
[LightGBM] [Info] Number of data points in the train set: 249791, number of used features: 24
[LightGBM] [Info] Start training from score 0.000003
[LightGBM] [Info] Total Bins 4356
[LightGBM] [Info] Number of data points in the train set: 312238, number of used features: 24
[LightGBM] [Info] Start training from score 0.000002


[I 2025-05-19 17:43:00,225] Trial 223 finished with value: 0.0006207567487905067 and parameters: {'learning_rate': 0.06877554552960749, 'max_depth': 6, 'num_leaves': 34, 'min_child_samples': 30, 'subsample': 0.7509925788627763, 'colsample_bytree': 0.8684723156991941, 'reg_alpha': 0.5503641655400531, 'reg_lambda': 0.7425755314481074, 'min_split_gain': 0.009915084085725376, 'force_col_wise': True}. Best is trial 4 with value: 0.0006207567487905067.
[I 2025-05-19 17:43:00,267] Using an existing study with name 'catboost_opt' instead of creating a new one.




[I 2025-05-19 17:43:04,778] Trial 184 finished with value: 0.0006207352360698574 and parameters: {'depth': 4, 'learning_rate': 0.04296608257215008, 'l2_leaf_reg': 3.4112325689616894, 'bootstrap_type': 'Bayesian', 'random_strength': 0.6376322747303684}. Best is trial 182 with value: 0.0006207371250954646.
[I 2025-05-19 17:43:04,807] Using an existing study with name 'rf_opt' instead of creating a new one.
[I 2025-05-19 17:46:16,486] Trial 13 finished with value: 0.0006238987801059471 and parameters: {'max_depth': 14, 'min_samples_split': 2, 'min_samples_leaf': 20, 'max_leaf_nodes': 241, 'max_features': 'log2'}. Best is trial 9 with value: 0.0006244997477140212.


[LightGBM] [Info] Total Bins 4354
[LightGBM] [Info] Number of data points in the train set: 374685, number of used features: 24
[LightGBM] [Info] Start training from score 0.000003
[LightGBM] [Info] Total Bins 4146
[LightGBM] [Info] Number of data points in the train set: 62450, number of used features: 24
[LightGBM] [Info] Start training from score 0.000006
[LightGBM] [Info] Total Bins 4217
[LightGBM] [Info] Number of data points in the train set: 124897, number of used features: 24
[LightGBM] [Info] Start training from score 0.000003
[LightGBM] [Info] Total Bins 4301
[LightGBM] [Info] Number of data points in the train set: 187344, number of used features: 24
[LightGBM] [Info] Start training from score 0.000003
[LightGBM] [Info] Total Bins 4353
[LightGBM] [Info] Number of data points in the train set: 249791, number of used features: 24
[LightGBM] [Info] Start training from score 0.000003
[LightGBM] [Info] Total Bins 4356
[LightGBM] [Info] Number of data points in the train set: 3122

[I 2025-05-19 17:49:55,075] A new study created in RDB with name: metamodel_stacking


  0%|          | 0/100 [00:00<?, ?it/s]

[W 2025-05-19 17:49:55,197] Trial 0 failed with parameters: {'n_estimators': 175, 'learning_rate': 0.17254716573280354, 'max_depth': 4, 'subsample': 0.8795975452591109, 'colsample_bytree': 0.5780093202212182, 'min_child_weight': 2, 'gamma': 0.11616722433639892, 'reg_alpha': 0.8661761457749352, 'reg_lambda': 0.6011150117432088} because of the following error: TypeError("XGBModel.fit() got an unexpected keyword argument 'early_stopping_rounds'").
Traceback (most recent call last):
  File "c:\Users\Franc\.pyenv\pyenv-win\versions\3.12.10\Lib\site-packages\optuna\study\_optimize.py", line 197, in _run_trial
    value_or_values = func(trial)
                      ^^^^^^^^^^^
  File "C:\Users\Franc\AppData\Local\Temp\ipykernel_32692\2702916969.py", line 272, in objective
    model.fit(
  File "c:\Users\Franc\.pyenv\pyenv-win\versions\3.12.10\Lib\site-packages\xgboost\core.py", line 729, in inner_f
    return func(**kwargs)
           ^^^^^^^^^^^^^^
TypeError: XGBModel.fit() got an unexpected

# Visualize

In [12]:
# for result in lookahead_results:
#     stack_preds = result['stack'].predict(X_test_scaled)
#     rf_preds = result['models']['rf'].predict(X_test_scaled)
#     xgb_preds = result['models']['xgb'].predict(X_test_scaled)
#     enet_preds = result['models']['elasticnet'].predict(X_test_scaled)
    
#     plt.figure(figsize=(12, 4))
#     plt.plot(rf_preds[:100], label='RF')
#     plt.plot(xgb_preds[:100], label='XGB')
#     plt.plot(enet_preds[:100], label='ElasticNet')
#     plt.plot(stack_preds[:100], label='Stack', linewidth=2)

In [13]:
# for run in lookahead_results:
#     for r in run['results']:
#         print(r)
#         df = r['results'].copy()
#         df = df.sort_values(by='entry_time')
#         df['cumulative_pnl'] = df['pnl'].cumsum()

#         if df['cumulative_pnl'].iloc[-1] > 0 and r['sharpe'] > 10 and r['trades'] > 150 and r['win_rate'] > 0.55 and r['profit_factor'] > 1.5 and r['expectancy'] > 0.5 and r['pnl'] > 50000:
#                 plt.figure(figsize=(12, 4))
#                 plt.plot(df['entry_time'], df['cumulative_pnl'], label='Cumulative PnL', color='green')
#                 plt.title(f"PnL | Lookahead={run['lookahead']} | Sharpe={r['sharpe']:.2f}")
#                 plt.xlabel("Datetime")
#                 plt.ylabel("PnL")
#                 plt.grid(True)
#                 plt.legend()
#                 plt.tight_layout()
#                 plt.show()

In [14]:
best_by_lookahead = {
    5: {'win_rate': float('-inf'), 'result': None},
    15: {'win_rate': float('-inf'), 'result': None}
}

for run in lookahead_results:
    lookahead = run['lookahead']
    if lookahead in best_by_lookahead:
        for r in run['results']:
            if r['win_rate'] > best_by_lookahead[lookahead]['win_rate']:
                best_by_lookahead[lookahead] = {
                    'win_rate': r['win_rate'],
                    'result': r,
                    'lookahead': lookahead
                }

# === Display results ===
for lookahead in [5]:
    best = best_by_lookahead[lookahead]
    if best['result']:
        df = best['result']['results'].copy()
        df = df.sort_values(by='entry_time')
        df['cumulative_pnl'] = df['pnl'].cumsum()

        print(f"\n🏆 Best Win Rate Result for Lookahead={lookahead}")
        print(f"Win Rate: {best['win_rate']:.2%}")
        print(f"PnL: {best['result']['pnl']:.2f}")
        print(f"Trades: {best['result']['trades']}")
        print(f"Sharpe: {best['result']['sharpe']:.2f}")
        print(f"Expectancy: {best['result']['expectancy']:.2f}")
        print(f"Profit Factor: {best['result']['profit_factor']:.2f}")
        print(f"Params: {best['result']['params']}")
        print("\n🧾 All Trades from Best Win Rate Result:")
        print(df[['entry_time', 'exit_time', 'side', 'entry_price', 'exit_price', 'pnl', 'mfe', 'mae', 'cumulative_pnl']].to_string(index=False))

        # Plot cumulative PnL
        plt.figure(figsize=(12, 4))
        plt.plot(df['entry_time'], df['cumulative_pnl'], label='Cumulative PnL', color='blue')
        plt.title(f"Best Win Rate Run | Lookahead={lookahead} | Win Rate={best['win_rate']:.2%}")
        plt.xlabel("Datetime")
        plt.ylabel("Cumulative PnL")
        plt.grid(True)
        plt.legend()
        plt.tight_layout()
        plt.show()
    else:
        print(f"No valid result found for Lookahead={lookahead}.")


TypeError: 'NoneType' object is not subscriptable

In [None]:
np.corrcoef([lookahead_results['preds_rf'], lookahead_results['preds_xgb'], lookahead_results['preds_elasticnet']])
preds_matrix = np.vstack([lookahead_results['preds_rf'], lookahead_results['preds_xgb'], lookahead_results['preds_elasticnet']])
corr_matrix = np.corrcoef(preds_matrix)

plt.figure(figsize=(6, 4))
sns.heatmap(corr_matrix, annot=True, xticklabels=['RF', 'XGB', 'ENet'], yticklabels=['RF', 'XGB', 'ENet'], cmap='coolwarm', fmt=".2f")
plt.title("Correlation Between Base Model Predictions")
plt.show()

# Test Model

# Sort and Plot

In [None]:
# Predictions
# y_pred = best_lookahead.predict(X_test)
best_lookahead = max(lookahead_results, key=lambda x: max(r['pnl'] for r in x['results']))
y_pred = best_lookahead['stack'].predict(X_test_scaled)

# Confusion Matrix
labels = sorted(class_mapping)  # Make sure the order matches
cm = confusion_matrix(y_test, y_pred, labels=labels)

# Display Confusion Matrix
plt.figure(figsize=(8, 6))
sns.heatmap(cm, annot=True, fmt="d", cmap="Blues", xticklabels=labels, yticklabels=labels)
plt.xlabel("Predicted")
plt.ylabel("True")
plt.title("Confusion Matrix")
plt.show()

# Classification Report
print("Classification Report:")
print(classification_report(y_test, y_pred, labels=labels, digits=2))