# Import Libraries

In [60]:
import os
import pandas as pd
import platform
import numpy as np
from itertools import product
import matplotlib.pyplot as plt
from datetime import timedelta
from collections import defaultdict
import joblib
import json
import warnings
import time

# TA Indicators
from ta.momentum import RSIIndicator
from ta.volatility import BollingerBands, AverageTrueRange
from ta.trend import EMAIndicator, MACD
from ta.volume import VolumeWeightedAveragePrice, OnBalanceVolumeIndicator
#

# Tensorflow
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Dense, LSTM, Conv1D, Flatten, Dropout, GlobalAveragePooling1D
from tensorflow.keras.optimizers import Adam
#

# Scikit-learn
from sklearn.base import clone, BaseEstimator, RegressorMixin
from sklearn.model_selection import TimeSeriesSplit, train_test_split
from sklearn.ensemble import RandomForestClassifier
from sklearn.linear_model import LogisticRegression
from sklearn.metrics import classification_report, root_mean_squared_error, mean_squared_error, mean_absolute_error, r2_score, roc_auc_score, accuracy_score, f1_score, precision_score, recall_score
from sklearn.ensemble import StackingRegressor
from sklearn.preprocessing import label_binarize, StandardScaler
from sklearn.inspection import permutation_importance
#

# Models and Training
from catboost import CatBoostRegressor, CatBoostClassifier
from xgboost import XGBRegressor
from lightgbm import LGBMRegressor
import optuna
import seaborn as sns
import shap
#

warnings.filterwarnings("ignore", category=UserWarning)
warnings.filterwarnings("ignore", message=".*There are no meaningful features.*", category=UserWarning)
optuna.logging.set_verbosity(optuna.logging.INFO)

In [61]:
folder_path = "./../data/"
column_names = ['datetime', 'open', 'high', 'low', 'close', 'volume']
df_list = []

system = platform.system()
# Set emoji-compatible font based on OS
if system == 'Windows':
    plt.rcParams['font.family'] = 'Segoe UI Emoji'
elif system == 'Linux':
    plt.rcParams['font.family'] = 'Noto Color Emoji'  # if installed

for filename in os.listdir(folder_path):
    if filename.endswith(('.csv', '.txt')):
        file_path = os.path.join(folder_path, filename)
        df = pd.read_csv(file_path, sep=';', header=None, names=column_names)
        df['source_file'] = filename
        df_list.append(df)

df = pd.concat(df_list, ignore_index=True)
df['datetime'] = pd.to_datetime(df['datetime'], utc=True).dt.tz_convert('America/New_York')

df = df.drop_duplicates(subset='datetime', keep='first').reset_index(drop=True)
df = df.sort_values('datetime').reset_index(drop=True)
df[['open', 'high', 'low', 'close', 'volume']] = df[['open', 'high', 'low', 'close', 'volume']].astype(float)

# Base time features
df['hour'] = df['datetime'].dt.hour + df['datetime'].dt.minute / 60
df['minute'] = df['datetime'].dt.minute
df['day_of_week'] = df['datetime'].dt.dayofweek  # 0 = Monday

# Custom session flags (adjust if needed)       # Regular Trading Hours
df['is_premarket'] = df['hour'].between(7, 9.5)
df['is_lunch'] = df['hour'].between(11.5, 13.5)
df['is_postmarket'] = df['hour'].between(15.5, 20)
df['is_after_hours'] = df['hour'].between(20, 23.5)

# Initialize features or indicators

In [62]:
# === EMA / Bollinger / RSI / ATR / VWAP / OBV ===
df['ema_9'] = EMAIndicator(df['close'], window=9).ema_indicator()
df['ema_21'] = EMAIndicator(df['close'], window=21).ema_indicator()

bb = BollingerBands(df['close'], window=20, window_dev=2)
df['boll_upper'] = bb.bollinger_hband()
df['boll_lower'] = bb.bollinger_lband()
df['boll_width'] = df['boll_upper'] - df['boll_lower']

df['rsi_14'] = RSIIndicator(df['close'], window=14).rsi()
df['atr_14'] = AverageTrueRange(df['high'], df['low'], df['close'], window=14).average_true_range()

df['vwap'] = VolumeWeightedAveragePrice(
    high=df['high'], low=df['low'], close=df['close'], volume=df['volume'], window=14
).volume_weighted_average_price()

df['obv'] = OnBalanceVolumeIndicator(df['close'], df['volume']).on_balance_volume()

# === MACD and histogram ===
macd = MACD(df['close'])
df['macd'] = macd.macd()
df['macd_diff'] = macd.macd_diff()

# === Momentum flags ===
df['momentum_up'] = (df['macd_diff'] > 0).astype(int)
df['momentum_down'] = (df['macd_diff'] < 0).astype(int)

# === RSI level flags ===
df['rsi_overbought'] = (df['rsi_14'] > 70).astype(int)
df['rsi_oversold'] = (df['rsi_14'] < 30).astype(int)
df['rsi_midrange'] = ((df['rsi_14'] >= 30) & (df['rsi_14'] <= 70)).astype(int)
df['rsi_neutral_around_50'] = ((df['rsi_14'] > 45) & (df['rsi_14'] < 55)).astype(int)

# === Bollinger breakout ===
df['boll_breakout_up'] = (df['close'] > df['boll_upper']).astype(int)
df['boll_breakout_down'] = (df['close'] < df['boll_lower']).astype(int)

# === VWAP crossover ===
df['vwap_cross_above'] = (df['close'] > df['vwap']).astype(int)
df['vwap_cross_below'] = (df['close'] < df['vwap']).astype(int)

# === RSI short-term ===
df['rsi_5'] = RSIIndicator(df['close'], window=5).rsi()
for i in range(1, 6):
    df[f'rsi_5_tminus{i}'] = df['rsi_5'].shift(i)

# === EMA slope ===
df['ema_9_slope'] = df['ema_9'].diff()
for i in range(1, 6):
    df[f'ema_9_slope_tminus{i}'] = df['ema_9_slope'].shift(i)

# === Return series ===
df['return_1'] = df['close'].pct_change(1)
for i in range(1, 6):
    df[f'return_1_tminus{i}'] = df['return_1'].shift(i)

# === MACD histogram series ===
for i in range(1, 6):
    df[f'macd_diff_tminus{i}'] = df['macd_diff'].shift(i)

# === Support/Resistance via rolling extremes ===
df['resistance_lookback'] = df['high'].rolling(20).max()
df['support_lookback'] = df['low'].rolling(20).min()
df['dist_to_resistance'] = df['resistance_lookback'] - df['close']
df['dist_to_support'] = df['close'] - df['support_lookback']

# === Volume delta (approximate placeholder) ===
df['volume_delta_ema'] = df['volume'].diff().ewm(span=14).mean()

# === RSI z-score and normalization ===
df['rsi_14_zscore'] = (df['rsi_14'] - df['rsi_14'].rolling(50).mean()) / df['rsi_14'].rolling(50).std()
df['rsi_14_norm'] = df['rsi_14'] / 100.0

# === MACD z-score ===
df['macd_z'] = (df['macd'] - df['macd'].rolling(50).mean()) / df['macd'].rolling(50).std()

# === Return demeaned ===
df['return_1_demeaned'] = df['return_1'] - df['return_1'].rolling(50).mean()

tree_based_features = [
    'ema_9', 'ema_21',
    'boll_upper', 'boll_lower', 'boll_width',
    'rsi_14', 'macd', 'macd_diff',
    'vwap', 'obv', 'atr_14',
    'momentum_up', 'momentum_down',
    'rsi_overbought', 'rsi_oversold', 'rsi_midrange', 'rsi_neutral_around_50',
    'boll_breakout_up', 'boll_breakout_down',
    'vwap_cross_above', 'vwap_cross_below'
]

sequential_features = [
    # RSI over time
    'rsi_5_tminus1', 'rsi_5_tminus2', 'rsi_5_tminus3', 'rsi_5_tminus4', 'rsi_5_tminus5',
    
    # EMA slope over time
    'ema_9_slope_tminus1', 'ema_9_slope_tminus2', 'ema_9_slope_tminus3', 'ema_9_slope_tminus4', 'ema_9_slope_tminus5',
    
    # Raw returns over time
    'return_1_tminus1', 'return_1_tminus2', 'return_1_tminus3', 'return_1_tminus4', 'return_1_tminus5',

    # MACD histogram series
    'macd_diff_tminus1', 'macd_diff_tminus2', 'macd_diff_tminus3', 'macd_diff_tminus4', 'macd_diff_tminus5',

    # Support/Resistance proximity
    'dist_to_resistance', 'dist_to_support',

    # Volume delta placeholder (e.g. volume_diff or custom calc)
    'volume_delta_ema'
]

linear_features = [
    'rsi_14_zscore', 'rsi_14_norm',
    'macd_z',  # z-score of MACD
    'boll_width',  # could also use boll_pct (price position in bands)
    'return_1_demeaned',
    
    # One-hot style binary flags
    'momentum_up', 'momentum_down',
    'rsi_overbought', 'rsi_oversold', 'rsi_neutral_around_50'
]
all_features = []
all_features += linear_features
all_features += sequential_features
all_features += tree_based_features

In [63]:
def session_key(ts: pd.Timestamp) -> pd.Timestamp:
    # shift back 18 h, then floor to midnight to get a unique session “date”
    return (ts - timedelta(hours=18)).normalize()

def is_same_session(start_time: pd.Timestamp, end_time: pd.Timestamp) -> bool:
    return session_key(start_time) == session_key(end_time)

param_grid_strategy = {
    'SL_ATR_MULT': [1.0, 1.5, 0.5],
    'TP_ATR_MULT': [2.0, 3.0, 4.0],
    'TRAIL_START_MULT': [0.5, 1.0],
    'TRAIL_STOP_MULT': [0.5, 1.0],
    'TICK_VALUE': [5], 
}

keys, values = zip(*param_grid_strategy.items())
combinations = [dict(zip(keys, v)) for v in product(*values)]

##### Avoid functions

In [64]:
def avoid_news(row):
    ts = row["datetime"]
    return any(start <= ts <= end for (start, end) in news_windows)

def avoid_hour_18_19(row):
    """
    Avoid trading in the first hour of the session (18:00 to 19:00 inclusive).
    """
    if not pd.api.types.is_datetime64_any_dtype(row['datetime']):
        return False
    hour = row['datetime'].hour
    return hour == 18

avoid_funcs = {
    #'avoid_hour_18_19': avoid_hour_18_19
    #'news_window': avoid_news,
}

# Backtesting function

##### Regression Backtesting

In [65]:
def evaluate_regression(
    X_test, preds_stack, preds_cnn, preds_lgbm, labeled, df,
    avoid_funcs,
    SL_ATR_MULT, TP_ATR_MULT, TRAIL_START_MULT, TRAIL_STOP_MULT, TICK_VALUE,
    is_same_session,
    long_thresh,
    short_thresh,
    base_contracts=1,
    max_contracts=5,
    skip_weak_conf=False,
    weak_conf_zscore=0.2,
    stack_weight=0.5,
    cnn_weight=0.5
):
    temp_trades_data = []
    skipped_trades = 0
    avoid_hits = defaultdict(int)
    long_trades = 0
    short_trades = 0

    i = 0
    X_test_idx = X_test.index.to_list()
    combined_preds = stack_weight * np.array(preds_stack) + cnn_weight * np.array(preds_cnn)
    preds_array = combined_preds

    # === Calculate z-score confidence ===
    zscores = (preds_array - preds_array.mean()) / (preds_array.std() + 1e-9)
    zscores = np.clip(zscores, -3.0, 3.0)
    conf_scores = np.clip(np.abs(zscores), 0, 2.0)
    position_sizes = base_contracts + (max_contracts - base_contracts) * (conf_scores / 2.0)
    position_sizes = np.round(position_sizes, 2)


    for i, idx in enumerate(X_test_idx):
        #idx = X_test_idx[i]
        row = labeled.loc[idx]

        if idx + 1 >= len(df):
            skipped_trades += 1
            continue

        vol_adj_pred = preds_array[i]
        conf = conf_scores[i]
        size = position_sizes[i]

        # Skip weak confidence signals if enabled
        if skip_weak_conf and conf < weak_conf_zscore:
            skipped_trades += 1
            continue

        if vol_adj_pred >= long_thresh:  # TP or Strong TP
            side = 'long'
            long_trades += 1
        elif vol_adj_pred <= short_thresh:  # SL side match
            side = 'short'
            short_trades += 1
        else:
            skipped_trades += 1
            continue  # classifier disagrees

        # Trade filters
        skip_trade = False
        for name, f in avoid_funcs.items():
            try:
                if f(row):
                    avoid_hits[name] += 1
                    skip_trade = True
            except:
                continue
        if skip_trade:
            skipped_trades += 1
            i += 1
            continue

        # --- Trade Simulation ---
        entry_price = df.loc[idx + 1, 'open']
        entry_time = df.loc[idx + 1, 'datetime']
        atr = row['atr_5']

        # Stop Loss (fixed volatility-based)
        sl_price = entry_price - SL_ATR_MULT * atr if side == 'long' else entry_price + SL_ATR_MULT * atr

        # Take Profit (dynamic, from model prediction, clipped)
        expected_move = abs(vol_adj_pred) * entry_price
        min_tp = 0.001 * entry_price  # minimum 0.1% move
        max_tp = TP_ATR_MULT * atr
        tp_move = np.clip(expected_move, min_tp, max_tp)
        tp_price = entry_price + tp_move if side == 'long' else entry_price - tp_move

        # Trailing logic
        trail_trigger = entry_price + TRAIL_START_MULT * atr if side == 'long' else entry_price - TRAIL_START_MULT * atr
        trail_stop = None

        max_price, min_price = entry_price, entry_price
        exit_price, exit_time = None, None

        fwd_idx = idx + 1
        while fwd_idx < len(df):
            fwd_row = df.loc[fwd_idx]
            max_price = max(max_price, fwd_row['high'])
            min_price = min(min_price, fwd_row['low'])

            if (side == 'long' and fwd_row['low'] <= sl_price) or (side == 'short' and fwd_row['high'] >= sl_price):
                exit_price = sl_price
                exit_time = fwd_row['datetime']
                break

            if (side == 'long' and fwd_row['high'] >= tp_price) or (side == 'short' and fwd_row['low'] <= tp_price):
                exit_price = tp_price
                exit_time = fwd_row['datetime']
                break

            if side == 'long' and fwd_row['high'] >= trail_trigger:
                trail_stop = fwd_row['close'] - TRAIL_STOP_MULT * atr
            if side == 'short' and fwd_row['low'] <= trail_trigger:
                trail_stop = fwd_row['close'] + TRAIL_STOP_MULT * atr

            if trail_stop:
                if (side == 'long' and fwd_row['low'] <= trail_stop) or (side == 'short' and fwd_row['high'] >= trail_stop):
                    exit_price = trail_stop
                    exit_time = fwd_row['datetime']
                    break

            fwd_idx += 1

        if exit_price is None:
            exit_price = df.loc[len(df) - 1, 'close']
            exit_time = df.loc[len(df) - 1, 'datetime']

        if not is_same_session(entry_time, exit_time):
            i += 1
            continue

        GROSS_PNL = (exit_price - entry_price) * TICK_VALUE * size if side == 'long' else (entry_price - exit_price) * TICK_VALUE * size
        COMMISSION = 3.98 * size
        pnl = GROSS_PNL - COMMISSION

        mfe = max_price - entry_price if side == 'long' else entry_price - min_price
        mae = entry_price - min_price if side == 'long' else max_price - entry_price

        temp_trades_data.append({
            'entry_time': entry_time,
            'exit_time': exit_time,
            'side': side,
            'entry_price': entry_price,
            'exit_price': exit_price,
            'pnl': pnl,
            'mfe': mfe,
            'mae': mae,
            'gross_pnl': GROSS_PNL,
            'vol_adj_pred': vol_adj_pred,
            'pred_class': pred_class,
            'confidence': conf,
            'position_size': size,
        })

        while i < len(X_test_idx) and labeled.loc[X_test_idx[i]]['datetime'] <= exit_time:
            i += 1
        continue

    # === Metrics ===
    results = pd.DataFrame(temp_trades_data)
    pnl_total = results['pnl'].sum() if not results.empty else 0
    trades = len(results)
    win_rate = (results['pnl'] > 0).mean() if not results.empty else 0
    expectancy = results['pnl'].mean() if not results.empty else 0
    profit_factor = results[results['pnl'] > 0]['pnl'].sum() / abs(results[results['pnl'] < 0]['pnl'].sum()) if not results.empty and (results['pnl'] < 0).any() else np.nan
    sharpe = results['pnl'].mean() / (results['pnl'].std() + 1e-9) * np.sqrt(trades) if trades > 1 else 0

    return {
        'pnl': pnl_total,
        'trades': trades,
        'win_rate': win_rate,
        'expectancy': expectancy,
        'profit_factor': profit_factor,
        'sharpe': sharpe,
        'long_trades': long_trades,
        'short_trades': short_trades,
        'avoid_hits': dict(avoid_hits),
        'threshold': long_thresh,
        'results': results
    }

In [66]:
# Not done yet
def evaluate_classification(
    X_test, preds_stack, preds_cnn, preds_lgbm, labeled, df,
    avoid_funcs,
    SL_ATR_MULT, TP_ATR_MULT, TRAIL_START_MULT, TRAIL_STOP_MULT, TICK_VALUE,
    is_same_session,
    long_thresh,
    short_thresh,
    base_contracts=1,
    max_contracts=5,
    skip_weak_conf=False,
    weak_conf_zscore=0.2,
    stack_weight=0.5,
    cnn_weight=0.5
):
    temp_trades_data = []
    skipped_trades = 0
    avoid_hits = defaultdict(int)
    long_trades = 0
    short_trades = 0

    i = 0
    X_test_idx = X_test.index.to_list()
    combined_preds = stack_weight * np.array(preds_stack) + cnn_weight * np.array(preds_cnn)
    preds_array = combined_preds

    # === Calculate z-score confidence ===
    zscores = (preds_array - preds_array.mean()) / (preds_array.std() + 1e-9)
    zscores = np.clip(zscores, -3.0, 3.0)
    conf_scores = np.clip(np.abs(zscores), 0, 2.0)
    position_sizes = base_contracts + (max_contracts - base_contracts) * (conf_scores / 2.0)
    position_sizes = np.round(position_sizes, 2)


    for i, idx in enumerate(X_test_idx):
        #idx = X_test_idx[i]
        row = labeled.loc[idx]

        if idx + 1 >= len(df):
            skipped_trades += 1
            continue

        vol_adj_pred = preds_array[i]
        conf = conf_scores[i]
        size = position_sizes[i]

        # Skip weak confidence signals if enabled
        if skip_weak_conf and conf < weak_conf_zscore:
            skipped_trades += 1
            continue

        if vol_adj_pred >= long_thresh:  # TP or Strong TP
            side = 'long'
            long_trades += 1
        elif vol_adj_pred <= short_thresh:  # SL side match
            side = 'short'
            short_trades += 1
        else:
            skipped_trades += 1
            continue  # classifier disagrees

        # Trade filters
        skip_trade = False
        for name, f in avoid_funcs.items():
            try:
                if f(row):
                    avoid_hits[name] += 1
                    skip_trade = True
            except:
                continue
        if skip_trade:
            skipped_trades += 1
            i += 1
            continue

        # --- Trade Simulation ---
        entry_price = df.loc[idx + 1, 'open']
        entry_time = df.loc[idx + 1, 'datetime']
        atr = row['atr_5']

        # Stop Loss (fixed volatility-based)
        sl_price = entry_price - SL_ATR_MULT * atr if side == 'long' else entry_price + SL_ATR_MULT * atr

        # Take Profit (dynamic, from model prediction, clipped)
        expected_move = abs(vol_adj_pred) * entry_price
        min_tp = 0.001 * entry_price  # minimum 0.1% move
        max_tp = TP_ATR_MULT * atr
        tp_move = np.clip(expected_move, min_tp, max_tp)
        tp_price = entry_price + tp_move if side == 'long' else entry_price - tp_move

        # Trailing logic
        trail_trigger = entry_price + TRAIL_START_MULT * atr if side == 'long' else entry_price - TRAIL_START_MULT * atr
        trail_stop = None

        max_price, min_price = entry_price, entry_price
        exit_price, exit_time = None, None

        fwd_idx = idx + 1
        while fwd_idx < len(df):
            fwd_row = df.loc[fwd_idx]
            max_price = max(max_price, fwd_row['high'])
            min_price = min(min_price, fwd_row['low'])

            if (side == 'long' and fwd_row['low'] <= sl_price) or (side == 'short' and fwd_row['high'] >= sl_price):
                exit_price = sl_price
                exit_time = fwd_row['datetime']
                break

            if (side == 'long' and fwd_row['high'] >= tp_price) or (side == 'short' and fwd_row['low'] <= tp_price):
                exit_price = tp_price
                exit_time = fwd_row['datetime']
                break

            if side == 'long' and fwd_row['high'] >= trail_trigger:
                trail_stop = fwd_row['close'] - TRAIL_STOP_MULT * atr
            if side == 'short' and fwd_row['low'] <= trail_trigger:
                trail_stop = fwd_row['close'] + TRAIL_STOP_MULT * atr

            if trail_stop:
                if (side == 'long' and fwd_row['low'] <= trail_stop) or (side == 'short' and fwd_row['high'] >= trail_stop):
                    exit_price = trail_stop
                    exit_time = fwd_row['datetime']
                    break

            fwd_idx += 1

        if exit_price is None:
            exit_price = df.loc[len(df) - 1, 'close']
            exit_time = df.loc[len(df) - 1, 'datetime']

        if not is_same_session(entry_time, exit_time):
            i += 1
            continue

        GROSS_PNL = (exit_price - entry_price) * TICK_VALUE * size if side == 'long' else (entry_price - exit_price) * TICK_VALUE * size
        COMMISSION = 3.98 * size
        pnl = GROSS_PNL - COMMISSION

        mfe = max_price - entry_price if side == 'long' else entry_price - min_price
        mae = entry_price - min_price if side == 'long' else max_price - entry_price

        temp_trades_data.append({
            'entry_time': entry_time,
            'exit_time': exit_time,
            'side': side,
            'entry_price': entry_price,
            'exit_price': exit_price,
            'pnl': pnl,
            'mfe': mfe,
            'mae': mae,
            'gross_pnl': GROSS_PNL,
            'vol_adj_pred': vol_adj_pred,
            'pred_class': pred_class,
            'confidence': conf,
            'position_size': size,
        })

        while i < len(X_test_idx) and labeled.loc[X_test_idx[i]]['datetime'] <= exit_time:
            i += 1
        continue

    # === Metrics ===
    results = pd.DataFrame(temp_trades_data)
    pnl_total = results['pnl'].sum() if not results.empty else 0
    trades = len(results)
    win_rate = (results['pnl'] > 0).mean() if not results.empty else 0
    expectancy = results['pnl'].mean() if not results.empty else 0
    profit_factor = results[results['pnl'] > 0]['pnl'].sum() / abs(results[results['pnl'] < 0]['pnl'].sum()) if not results.empty and (results['pnl'] < 0).any() else np.nan
    sharpe = results['pnl'].mean() / (results['pnl'].std() + 1e-9) * np.sqrt(trades) if trades > 1 else 0

    return {
        'pnl': pnl_total,
        'trades': trades,
        'win_rate': win_rate,
        'expectancy': expectancy,
        'profit_factor': profit_factor,
        'sharpe': sharpe,
        'long_trades': long_trades,
        'short_trades': short_trades,
        'avoid_hits': dict(avoid_hits),
        'threshold': long_thresh,
        'results': results
    }

In [67]:
# Not done yet
def evaluate_combo(
    X_test, preds_stack, preds_cnn, preds_lgbm, labeled, df,
    avoid_funcs,
    SL_ATR_MULT, TP_ATR_MULT, TRAIL_START_MULT, TRAIL_STOP_MULT, TICK_VALUE,
    is_same_session,
    long_thresh,
    short_thresh,
    base_contracts=1,
    max_contracts=5,
    skip_weak_conf=False,
    weak_conf_zscore=0.2,
    stack_weight=0.5,
    cnn_weight=0.5
):
    temp_trades_data = []
    skipped_trades = 0
    avoid_hits = defaultdict(int)
    long_trades = 0
    short_trades = 0

    i = 0
    X_test_idx = X_test.index.to_list()
    combined_preds = stack_weight * np.array(preds_stack) + cnn_weight * np.array(preds_cnn)
    preds_array = combined_preds

    # === Calculate z-score confidence ===
    zscores = (preds_array - preds_array.mean()) / (preds_array.std() + 1e-9)
    zscores = np.clip(zscores, -3.0, 3.0)
    conf_scores = np.clip(np.abs(zscores), 0, 2.0)
    position_sizes = base_contracts + (max_contracts - base_contracts) * (conf_scores / 2.0)
    position_sizes = np.round(position_sizes, 2)


    for i, idx in enumerate(X_test_idx):
        #idx = X_test_idx[i]
        row = labeled.loc[idx]

        if idx + 1 >= len(df):
            skipped_trades += 1
            continue

        vol_adj_pred = preds_array[i]
        conf = conf_scores[i]
        size = position_sizes[i]

        # Skip weak confidence signals if enabled
        if skip_weak_conf and conf < weak_conf_zscore:
            skipped_trades += 1
            continue

        if vol_adj_pred >= long_thresh:  # TP or Strong TP
            side = 'long'
            long_trades += 1
        elif vol_adj_pred <= short_thresh:  # SL side match
            side = 'short'
            short_trades += 1
        else:
            skipped_trades += 1
            continue  # classifier disagrees

        # Trade filters
        skip_trade = False
        for name, f in avoid_funcs.items():
            try:
                if f(row):
                    avoid_hits[name] += 1
                    skip_trade = True
            except:
                continue
        if skip_trade:
            skipped_trades += 1
            i += 1
            continue

        # --- Trade Simulation ---
        entry_price = df.loc[idx + 1, 'open']
        entry_time = df.loc[idx + 1, 'datetime']
        atr = row['atr_5']

        # Stop Loss (fixed volatility-based)
        sl_price = entry_price - SL_ATR_MULT * atr if side == 'long' else entry_price + SL_ATR_MULT * atr

        # Take Profit (dynamic, from model prediction, clipped)
        expected_move = abs(vol_adj_pred) * entry_price
        min_tp = 0.001 * entry_price  # minimum 0.1% move
        max_tp = TP_ATR_MULT * atr
        tp_move = np.clip(expected_move, min_tp, max_tp)
        tp_price = entry_price + tp_move if side == 'long' else entry_price - tp_move

        # Trailing logic
        trail_trigger = entry_price + TRAIL_START_MULT * atr if side == 'long' else entry_price - TRAIL_START_MULT * atr
        trail_stop = None

        max_price, min_price = entry_price, entry_price
        exit_price, exit_time = None, None

        fwd_idx = idx + 1
        while fwd_idx < len(df):
            fwd_row = df.loc[fwd_idx]
            max_price = max(max_price, fwd_row['high'])
            min_price = min(min_price, fwd_row['low'])

            if (side == 'long' and fwd_row['low'] <= sl_price) or (side == 'short' and fwd_row['high'] >= sl_price):
                exit_price = sl_price
                exit_time = fwd_row['datetime']
                break

            if (side == 'long' and fwd_row['high'] >= tp_price) or (side == 'short' and fwd_row['low'] <= tp_price):
                exit_price = tp_price
                exit_time = fwd_row['datetime']
                break

            if side == 'long' and fwd_row['high'] >= trail_trigger:
                trail_stop = fwd_row['close'] - TRAIL_STOP_MULT * atr
            if side == 'short' and fwd_row['low'] <= trail_trigger:
                trail_stop = fwd_row['close'] + TRAIL_STOP_MULT * atr

            if trail_stop:
                if (side == 'long' and fwd_row['low'] <= trail_stop) or (side == 'short' and fwd_row['high'] >= trail_stop):
                    exit_price = trail_stop
                    exit_time = fwd_row['datetime']
                    break

            fwd_idx += 1

        if exit_price is None:
            exit_price = df.loc[len(df) - 1, 'close']
            exit_time = df.loc[len(df) - 1, 'datetime']

        if not is_same_session(entry_time, exit_time):
            i += 1
            continue

        GROSS_PNL = (exit_price - entry_price) * TICK_VALUE * size if side == 'long' else (entry_price - exit_price) * TICK_VALUE * size
        COMMISSION = 3.98 * size
        pnl = GROSS_PNL - COMMISSION

        mfe = max_price - entry_price if side == 'long' else entry_price - min_price
        mae = entry_price - min_price if side == 'long' else max_price - entry_price

        temp_trades_data.append({
            'entry_time': entry_time,
            'exit_time': exit_time,
            'side': side,
            'entry_price': entry_price,
            'exit_price': exit_price,
            'pnl': pnl,
            'mfe': mfe,
            'mae': mae,
            'gross_pnl': GROSS_PNL,
            'vol_adj_pred': vol_adj_pred,
            'pred_class': pred_class,
            'confidence': conf,
            'position_size': size,
        })

        while i < len(X_test_idx) and labeled.loc[X_test_idx[i]]['datetime'] <= exit_time:
            i += 1
        continue

    # === Metrics ===
    results = pd.DataFrame(temp_trades_data)
    pnl_total = results['pnl'].sum() if not results.empty else 0
    trades = len(results)
    win_rate = (results['pnl'] > 0).mean() if not results.empty else 0
    expectancy = results['pnl'].mean() if not results.empty else 0
    profit_factor = results[results['pnl'] > 0]['pnl'].sum() / abs(results[results['pnl'] < 0]['pnl'].sum()) if not results.empty and (results['pnl'] < 0).any() else np.nan
    sharpe = results['pnl'].mean() / (results['pnl'].std() + 1e-9) * np.sqrt(trades) if trades > 1 else 0

    return {
        'pnl': pnl_total,
        'trades': trades,
        'win_rate': win_rate,
        'expectancy': expectancy,
        'profit_factor': profit_factor,
        'sharpe': sharpe,
        'long_trades': long_trades,
        'short_trades': short_trades,
        'avoid_hits': dict(avoid_hits),
        'threshold': long_thresh,
        'results': results
    }

# Cleanup

In [68]:
def compute_log_return_labels(
    df: pd.DataFrame,
    lookahead: int,
    is_same_session_fn,
    use_vol_norm: bool = True,
    vol_col: str = 'atr_14',
    cap_outliers: bool = True,
    cap_percentile: float = 99.9
) -> pd.DataFrame:
    """
    Computes log-returns and optionally volatility-adjusted returns for regression modeling.

    Parameters:
    - df: DataFrame with at least ['datetime', 'close', vol_col]
    - lookahead: Number of bars to look ahead
    - is_same_session_fn: function that returns True if two timestamps are in the same session
    - use_vol_norm: whether to normalize return by volatility (ATR or std)
    - vol_col: column to use for volatility adjustment
    - cap_outliers: whether to cap large return outliers (Winsorize)
    - cap_percentile: percentile threshold for capping

    Returns:
    - df_labeled: DataFrame with columns: ['log_return', 'vol_adj_return']
    """
    log_returns = []
    vol_adj_returns = []
    valid_idxs = []

    for i in range(len(df) - lookahead):
        t0 = df['datetime'].iloc[i]
        t1 = df['datetime'].iloc[i + lookahead]

        if not is_same_session_fn(t0, t1):
            continue

        entry_price = df['close'].iloc[i]
        future_price = df['close'].iloc[i + lookahead]
        vol = df[vol_col].iloc[i] if use_vol_norm else 1.0

        if entry_price <= 0 or pd.isna(future_price) or pd.isna(vol) or vol <= 0:
            continue

        log_ret = np.log(future_price / entry_price)
        vol_adj_ret = log_ret / vol

        log_returns.append(log_ret)
        vol_adj_returns.append(vol_adj_ret)
        valid_idxs.append(i)

    df_labeled = df.iloc[valid_idxs].copy()
    df_labeled['log_return'] = log_returns
    df_labeled['vol_adj_return'] = vol_adj_returns

    # Winsorize if needed
    if cap_outliers:
        upper = np.percentile(df_labeled['log_return'], cap_percentile)
        lower = np.percentile(df_labeled['log_return'], 100 - cap_percentile)
        df_labeled['log_return'] = df_labeled['log_return'].clip(lower, upper)

        upper_vol = np.percentile(df_labeled['vol_adj_return'], cap_percentile)
        lower_vol = np.percentile(df_labeled['vol_adj_return'], 100 - cap_percentile)
        df_labeled['vol_adj_return'] = df_labeled['vol_adj_return'].clip(lower_vol, upper_vol)

    return df_labeled


In [69]:
def compute_triple_barrier_labels(
    df: pd.DataFrame,
    lookahead: int,
    is_same_session_fn,
    atr_col: str = 'atr_14',
    sl_atr_mult: float = 1.0,
    tp_atr_mult: float = 1.0,
    strong_tp_mult: float = 2.0,
    strong_sl_mult: float = 2.0,
    min_atr_threshold: float = 0.01  # optional filter to skip low-volatility bars
) -> pd.DataFrame:
    """
    Assigns classification labels using the triple-barrier method:
    - +2: Strong take-profit hit (e.g., 2x ATR)
    - +1: Normal take-profit hit (1x ATR)
    -  0: Neither barrier hit within lookahead window
    - -1: Normal stop-loss hit (1x ATR)
    - -2: Strong SL hit (2x ATR)

    Parameters:
    - df: DataFrame with ['datetime', 'close', 'high', 'low', atr_col]
    - lookahead: number of bars to look ahead (the vertical barrier)
    - is_same_session_fn: function that validates two datetime values are in same trading session
    - atr_col: column name to use for ATR values
    - *_mult: multipliers to define barrier thresholds
    - min_atr_threshold: skip labeling if ATR is too low (prevents noise from low-vol zones)

    Returns:
    - df_out: DataFrame with an additional column: 'triple_barrier_label'
    """
    labels = []
    valid_idxs = []

    for i in range(len(df) - lookahead):
        entry_time = df['datetime'].iloc[i]
        exit_time = df['datetime'].iloc[i + lookahead]

        if not is_same_session_fn(entry_time, exit_time):
            continue

        entry_price = df['close'].iloc[i]
        atr = df[atr_col].iloc[i]

        # Filter out low volatility regimes
        if pd.isna(entry_price) or pd.isna(atr) or atr < min_atr_threshold:
            continue

        # Define price barriers
        tp = entry_price + tp_atr_mult * atr
        sl = entry_price - sl_atr_mult * atr
        strong_tp = entry_price + strong_tp_mult * atr
        strong_sl = entry_price - strong_sl_mult * atr

        future = df.iloc[i+1 : i+1+lookahead]
        label = 0  # default: vertical barrier hit first

        for _, row in future.iterrows():
            high = row['high']
            low = row['low']

            if low <= strong_sl:
                label = -2
                break
            elif high >= strong_tp:
                label = +2
                break
            elif low <= sl:
                label = -1
                break
            elif high >= tp:
                label = +1
                break

        labels.append(label)
        valid_idxs.append(i)

    df_out = df.iloc[valid_idxs].copy()
    df_out['triple_barrier_label'] = labels

    return df_out

In [70]:
lookahead_values = [5, 10, 20]

def label_and_save(lookahead):
    df_session = df.copy()
    print(f"Initial rows: {len(df_session)}")

    labeled_regression = compute_log_return_labels(
        df=df_session,
        lookahead=lookahead,
        is_same_session_fn=is_same_session,
        use_vol_norm= True,
        vol_col='atr_14',
        cap_outliers=True,
        cap_percentile=99.9
    )
    print(f"➤ Rows after future_return: {len(labeled_regression)} | Dropped: {len(df_session) - len(labeled_regression)}")


    labeled_class = compute_triple_barrier_labels(
        df=df_session,
        lookahead=lookahead,
        is_same_session_fn=is_same_session,
        atr_col='atr_14',
        sl_atr_mult=1.0,
        tp_atr_mult=1.0,
        strong_tp_mult=2.0,
        strong_sl_mult=2.0,
        min_atr_threshold=0.01
    )
    print(f"➤ Rows after triple_barrier_label: {len(labeled_class)} | Dropped: {len(df_session) - len(labeled_class)}")


    df_combined = labeled_regression.merge(labeled_class[['datetime', 'triple_barrier_label']], on='datetime', how='left')
    print(f"➤ Rows after merging: {len(df_combined)}")
    print(f"➤ triple_barrier_label NaNs after merge: {df_combined['triple_barrier_label'].isna().sum()}")

    rows_before_final = len(df_combined)
    df_final = df_combined.dropna(subset=['vol_adj_return', 'log_return', 'triple_barrier_label'] + all_features)
    print(f"➤ Rows after final drop: {len(df_final)} | Dropped: {rows_before_final - len(df_final)}")

    # Step 5: Save parquet
    df_final.to_parquet(f"labeled_data_{lookahead}_session_less.parquet")
    print(f"✅ Saved labeled_data_{lookahead}_session_less.parquet with {len(df_final)} rows")

for lookahead in lookahead_values:
    fname = f"labeled_data_{lookahead}_session_less.parquet"
    if os.path.exists(fname):
        print(f"⏭️ File {fname} already exists. Skipping...")
        continue
    print(f"📦 Labeling {fname}...")
    label_and_save(lookahead)


⏭️ File labeled_data_5_session_less.parquet already exists. Skipping...
⏭️ File labeled_data_10_session_less.parquet already exists. Skipping...
⏭️ File labeled_data_20_session_less.parquet already exists. Skipping...


# Train

##### Wrappers

In [71]:
class LSTMWrapper(BaseEstimator, RegressorMixin):
    def __init__(self, input_shape, epochs=10, batch_size=32, verbose=0):
        self.input_shape = input_shape
        self.epochs = epochs
        self.batch_size = batch_size
        self.verbose = verbose
        self.model = None

    def build_model(self):
        model = Sequential()
        model.add(LSTM(32, input_shape=(self.input_shape, 1)))
        model.add(Dense(1))
        model.compile(loss='mse', optimizer=Adam(learning_rate=0.001))
        return model

    def fit(self, X, y):
        print(f"🧠 [LSTMWrapper] Starting training with {X.shape[0]} samples, {X.shape[1]} features")
        X = np.array(X).reshape((len(X), self.input_shape, 1))
        self.model = self.build_model()
        self.model.fit(X, y, epochs=self.epochs, batch_size=self.batch_size, verbose=self.verbose)
        print(f"✅ [LSTMWrapper] Finished training.")
        return self

    def predict(self, X):
        print(f"🔮 [LSTMWrapper] Predicting on {X.shape[0]} samples...")
        X = np.array(X).reshape((len(X), self.input_shape, 1))
        print(f"✅ [LSTMWrapper] Prediction complete.")
        return self.model.predict(X).flatten()

In [72]:
class CNN1DWrapper:
    def __init__(self, input_shape):
        self.input_shape = input_shape
        self.model = self.build_model()
        self.scaler = StandardScaler()

    def build_model(self):
        model = Sequential()
        model.add(Conv1D(64, kernel_size=3, activation='relu', input_shape=self.input_shape))
        model.add(Conv1D(32, kernel_size=3, activation='relu'))
        model.add(GlobalAveragePooling1D())
        model.add(Dropout(0.3))
        model.add(Dense(32, activation='relu'))
        model.add(Dense(1, activation='tanh'))  # outputs in [-1, 1]
        model.compile(optimizer=Adam(learning_rate=0.001), loss='mae')
        return model

    def fit(self, X, y, epochs=20, batch_size=128, verbose=1):
        print(f"\n🔧 [CNN1DWrapper] Scaling target and starting training...")
        y_scaled = self.scaler.fit_transform(y.reshape(-1, 1)).ravel()
        start = time.time()
        self.model.fit(X, y_scaled, epochs=epochs, batch_size=batch_size, verbose=verbose)
        print(f"✅ [CNN1DWrapper] Training complete in {time.time() - start:.2f} seconds.")

    def predict(self, X):
        print(f"\n🔮 [CNN1DWrapper] Predicting on {X.shape}...")
        start = time.time()
        y_scaled_pred = self.model.predict(X).ravel()
        y_pred = self.scaler.inverse_transform(y_scaled_pred.reshape(-1, 1)).ravel()
        print(f"✅ [CNN1DWrapper] Prediction done in {time.time() - start:.2f} seconds.")

        print("🔍 Prediction Stats:")
        print(f"Min: {y_pred.min():.6f} | Max: {y_pred.max():.6f}")
        print(f"Mean: {y_pred.mean():.6f} | Std: {y_pred.std():.6f}")

        # Safety check for wild predictions
        if abs(y_pred).max() > 1:
            print("⚠️ Warning: Some predictions exceed ±1 — consider checking target scaling or model output activation.")

        return y_pred

##### Training Helpers

In [73]:
def check_overfit(model, X_tr, X_te, y_tr, y_te):
    train_preds = model.predict(X_tr)
    test_preds = model.predict(X_te)
    train_mse = mean_squared_error(y_tr, train_preds)
    test_mse = mean_squared_error(y_te, test_preds)
    ratio = test_mse / train_mse if train_mse != 0 else float('inf')

    print(f"\n📉 Overfitting check:")
    print(f"Train MSE: {train_mse:.8f}")
    print(f"Test MSE:  {test_mse:.8f}")
    print(f"Overfit ratio (Test / Train): {ratio:.2f}")

    if ratio > 2.0:
        print("🚨 Overfitting: Model performs poorly on unseen data.")
    elif ratio > 1.2:
        print("⚠️ Mild overfitting: Model may be too complex.")
    elif ratio < 0.8:
        print("⚠️ Possible underfitting: Model may be too simple.")
    else:
        print("✅ Good generalization between train and test.")

In [74]:
def generate_oof_predictions(models, X, y, n_splits=5):
    """
    Generates out-of-fold predictions for a list of models using TimeSeriesSplit.

    Parameters:
    - models: list of sklearn-style models (will be cloned per fold)
    - X: feature DataFrame
    - y: target Series
    - n_splits: number of TSCV splits

    Returns:
    - oof_df: DataFrame of shape (len(X), len(models)) with OOF predictions
    """
    oof_preds = np.zeros((len(X), len(models)))
    tscv = TimeSeriesSplit(n_splits=n_splits)

    for i, model in enumerate(models):
        for train_idx, val_idx in tscv.split(X):
            X_train, y_train = X.iloc[train_idx], y.iloc[train_idx]
            X_val = X.iloc[val_idx]

            fold_model = clone(model)
            fold_model.fit(X_train, y_train)
            oof_preds[val_idx, i] = fold_model.predict(X_val)

    return pd.DataFrame(oof_preds, index=X.index, columns=[f'model_{i}' for i in range(len(models))])


In [75]:
def generate_oof_cnn(model_class, X_seq, y, n_splits=5):
    """
    Generate OOF predictions for a CNN1DWrapper-style model.
    
    Parameters:
    - model_class: class (not instance) of your CNN model
    - X_seq: DataFrame or ndarray to be reshaped to 3D
    - y: Series or array
    - n_splits: number of TSCV folds
    
    Returns:
    - oof_preds: np.array of predictions (same length as X_seq)
    """
    X_array = X_seq.values.reshape((len(X_seq), X_seq.shape[1], 1))
    y_array = y.values if hasattr(y, 'values') else y

    oof_preds = np.zeros(len(X_seq))
    tscv = TimeSeriesSplit(n_splits=n_splits)

    for train_idx, val_idx in tscv.split(X_array):
        X_tr, X_val = X_array[train_idx], X_array[val_idx]
        y_tr = y_array[train_idx]

        model = model_class(input_shape=(X_tr.shape[1], 1))
        model.fit(X_tr, y_tr)

        oof_preds[val_idx] = model.predict(X_val)

    return oof_preds

In [76]:
def generate_oof_lstm(model_class, X_seq, y, n_splits=5):
    """
    Generate OOF predictions for an LSTMWrapper-style model.

    Parameters:
    - model_class: class (not instance) of your LSTM wrapper (e.g. LSTMWrapper)
    - X_seq: numpy array or DataFrame (2D: samples x features)
    - y: numpy array or Series
    - n_splits: number of TSCV folds

    Returns:
    - oof_preds: 1D numpy array of out-of-fold predictions
    """
    if hasattr(X_seq, "values"):
        X_seq = X_seq.values
    if hasattr(y, "values"):
        y = y.values

    oof_preds = np.zeros(len(X_seq))
    tscv = TimeSeriesSplit(n_splits=n_splits)

    for train_idx, val_idx in tscv.split(X_seq):
        X_tr, X_val = X_seq[train_idx], X_seq[val_idx]
        y_tr = y[train_idx]

        model = model_class(input_shape=X_tr.shape[1])
        model.fit(X_tr, y_tr)

        oof_preds[val_idx] = model.predict(X_val)

    return oof_preds

##### Real Training

Tree-Based Models (XGBoost, LightGBM, CatBoost, RF)
Sequential Models (LSTM, 1D CNN)
Linear Models (ElasticNet, Logistic Regression)

Regression:
[XGBRegressor
LSTMWrapper
CatBoostRegressor]
MetaRegressor: XGBoostRegressor
+1: 1D CNN

Classifier:
[CatBoostClassifier
RandomForest
LSTM]
MetaClassifier:
+1: XGBoost Classifier

In [77]:
def run_lookahead_for_session_regression(LOOKAHEAD):
    labeled = pd.read_parquet(f"labeled_data_{LOOKAHEAD}_session_less.parquet")

    cutoff_date = pd.Timestamp("2025-01-01", tz="America/New_York")
    train = labeled[labeled['datetime'] < cutoff_date]
    test = labeled[labeled['datetime'] >= cutoff_date]

    X_train_tree = train[tree_based_features]
    X_test_tree = test[tree_based_features]

    X_train_seq  = train[sequential_features]
    X_test_seq = test[sequential_features]

    y_train_tree = train['log_return']
    y_test_tree = test['log_return']

    y_train_seq = train['vol_adj_return']
    y_test_seq = test['vol_adj_return']

    print(f"Train range: {train['datetime'].min()} to {train['datetime'].max()} | Rows: {len(train)}")
    print(f"Test range: {test['datetime'].min()} to {test['datetime'].max()} | Rows: {len(test)}")

    ###########################
    ########## Models #########
    ###########################

    def tune_xgboost(X_train, y_train):
        def objective(trial):
            params = {
                'n_estimators': 2000,
                'learning_rate': trial.suggest_float('learning_rate', 0.05, 0.3, log=True),  # tighten low end
                'max_depth': trial.suggest_int('max_depth', 4, 10),  # more complex trees
                'subsample': trial.suggest_float('subsample', 0.7, 1.0),  # prevent underfitting
                'colsample_bytree': trial.suggest_float('colsample_bytree', 0.7, 1.0),  # prevent weak splits
                'reg_alpha': trial.suggest_float('reg_alpha', 0.0, 0.3),  # reduce L1 regularization
                'reg_lambda': trial.suggest_float('reg_lambda', 0.0, 0.3),  # reduce L2 regularization
                'min_child_weight': trial.suggest_int('min_child_weight', 1, 10),
                'gamma': trial.suggest_float('gamma', 0.0, 1.0),
            }


            tscv = TimeSeriesSplit(n_splits=5)
            scores = []

            for train_idx, val_idx in tscv.split(X_train):
                X_tr, X_val = X_train.iloc[train_idx], X_train.iloc[val_idx]
                y_tr, y_val = y_train.iloc[train_idx], y_train.iloc[val_idx]

                model = XGBRegressor(**params, random_state=42)
                model.fit(X_tr, y_tr)
                preds = model.predict(X_val)
                rmse = root_mean_squared_error(y_val, preds)
                scores.append(rmse)

            return np.mean(scores)

        study = optuna.create_study(
            direction='minimize',
            study_name='xgb_opt',
            sampler=optuna.samplers.TPESampler(seed=42),
            pruner=optuna.pruners.SuccessiveHalvingPruner(min_resource=50, reduction_factor=4),
            storage=f'sqlite:///xgb_opt_study{LOOKAHEAD}_session_less.db',
            load_if_exists=True
        )
        study.optimize(objective, n_trials=1)
        return study.best_params

    def tune_lightgbm(X_train, y_train):
        def objective(trial):
            params = {
                "n_estimators": 2000,
                "learning_rate": trial.suggest_float("learning_rate", 0.01, 0.1, log=True),
                "max_depth": trial.suggest_int("max_depth", 5, 12),
                "num_leaves": trial.suggest_int("num_leaves", 64, 512),
                "min_child_samples": trial.suggest_int("min_child_samples", 1, 30),
                "subsample": trial.suggest_float("subsample", 0.7, 1.0),
                "colsample_bytree": trial.suggest_float("colsample_bytree", 0.7, 1.0),
                'reg_alpha': trial.suggest_float("reg_alpha", 0.0, 0.1),
                'reg_lambda': trial.suggest_float("reg_lambda", 0.0, 0.1),
                "min_split_gain": trial.suggest_float("min_split_gain", 0.0, 0.01),
                "force_col_wise": trial.suggest_categorical("force_col_wise", [True, False])
            }
            tscv = TimeSeriesSplit(n_splits=5)
            scores = []
            for train_idx, val_idx in tscv.split(X_train):
                X_tr, X_val = X_train.iloc[train_idx], X_train.iloc[val_idx]
                y_tr, y_val = y_train.iloc[train_idx], y_train.iloc[val_idx]

                model = LGBMRegressor(**params, random_state=42, n_jobs=-5)
                model.fit(
                    X_tr, y_tr,
                    eval_set=[(X_val, y_val)],
                    eval_metric="rmse"
                )
                preds = model.predict(X_val)
                rmse = root_mean_squared_error(y_val, preds)
                scores.append(rmse)
            return np.mean(scores)

        study = optuna.create_study(
            direction="minimize",
            study_name="lgbm_opt",
            sampler=optuna.samplers.TPESampler(seed=42),
            pruner=optuna.pruners.SuccessiveHalvingPruner(min_resource=50, reduction_factor=4),
            storage=f"sqlite:///lgbm_opt_study{LOOKAHEAD}_session_less.db",
            load_if_exists=True
        )
        study.optimize(objective, n_trials=1)
        return study.best_params

    def tune_catboost(X_train, y_train):
        def objective(trial):
            params = {
                'iterations': 2000,
                'depth': trial.suggest_int('depth', 4, 8),
                'learning_rate': trial.suggest_float('learning_rate', 0.01, 0.05, log=True),
                'loss_function': 'RMSE',
                'l2_leaf_reg': trial.suggest_float('l2_leaf_reg', 3.0, 10.0),
                'random_strength': trial.suggest_float('random_strength', 1.0, 5.0),
                'bootstrap_type': 'Bayesian',
                'bagging_temperature': trial.suggest_float('bagging_temperature', 0.1, 1.0),
                'min_data_in_leaf': trial.suggest_int('min_data_in_leaf', 10, 100),
            }

            tscv = TimeSeriesSplit(n_splits=5)
            scores = []

            for train_idx, val_idx in tscv.split(X_train):
                X_tr, X_val = X_train.iloc[train_idx], X_train.iloc[val_idx]
                y_tr, y_val = y_train.iloc[train_idx], y_train.iloc[val_idx]

                model = CatBoostRegressor(**params, random_state=42)
                model.fit(
                    X_tr, y_tr,
                    eval_set=(X_val, y_val),
                    use_best_model=True,
                    verbose=False,
                    early_stopping_rounds=30
                )
                preds = model.predict(X_val)
                rmse = root_mean_squared_error(y_val, preds)
                scores.append(rmse)

            return np.mean(scores)

        study = optuna.create_study(
            direction='minimize',
            study_name='catboost_opt',
            sampler=optuna.samplers.TPESampler(seed=42),
            pruner=optuna.pruners.SuccessiveHalvingPruner(min_resource=50, reduction_factor=4),
            storage=f'sqlite:///catboost_opt_study{LOOKAHEAD}_session_less.db',
            load_if_exists=True
        )
        study.optimize(objective, n_trials=1)
        return study.best_params

    def tune_meta_xgb(X_meta, y_meta):
        def objective(trial):
            params = {
                'n_estimators': 2000,
                'learning_rate': trial.suggest_float('learning_rate', 0.01, 0.3, log=True),
                'max_depth': trial.suggest_int('max_depth', 2, 6),
                'subsample': trial.suggest_float('subsample', 0.7, 1.0),
                'colsample_bytree': trial.suggest_float('colsample_bytree', 0.5, 1.0),
                'reg_alpha': trial.suggest_float('reg_alpha', 0.0, 1.0),
                'reg_lambda': trial.suggest_float('reg_lambda', 0.0, 1.0),
            }

            tscv = TimeSeriesSplit(n_splits=5)
            scores = []

            for train_idx, val_idx in tscv.split(X_meta):
                X_tr, X_val = X_meta.iloc[train_idx], X_meta.iloc[val_idx]
                y_tr, y_val = y_meta.iloc[train_idx], y_meta.iloc[val_idx]

                model = XGBRegressor(**params, random_state=42)
                model.fit(X_tr, y_tr)
                preds = model.predict(X_val)
                rmse = root_mean_squared_error(y_val, preds)
                scores.append(rmse)

            return np.mean(scores)

        study = optuna.create_study(
            direction='minimize',
            study_name='meta_xgb_stack',
            sampler=optuna.samplers.TPESampler(seed=42),
            pruner=optuna.pruners.MedianPruner(n_startup_trials=5),
            storage=f'sqlite:///meta_xgb_stack_{LOOKAHEAD}_session_less.db',
            load_if_exists=True
        )
        study.optimize(objective, n_trials=25)
        return study.best_params

    ################################################
    ####### Ensure index consistency
    ####### Sequential #######
    y_train_seq = y_train_seq.loc[X_train_seq.index]
    y_test_seq = y_test_seq.loc[X_test_seq.index]

    ################################################
    ####### Tune models
    ####### Tree Based #######
    catboost_params     = tune_catboost(X_train_tree, y_train_seq)
    #xgboost_params      = tune_xgboost(X_train_tree, y_train_seq)
    lgbm_params         = tune_lightgbm(X_train_tree, y_train_tree)
    ####### Sequential #######
    # N/A

    ################################################
    ####### Train models
    ####### Tree Based #######
    catboost    = CatBoostRegressor(**catboost_params, random_state=42, verbose=0)
    #xgboost     = XGBRegressor(**xgboost_params, random_state=42)
    lgbm        = LGBMRegressor(**lgbm_params, random_state=42)
    catboost.fit(X_train_tree, y_train_seq)
    #xgboost.fit(X_train_tree, y_train_seq)
    lgbm.fit(X_train_tree, y_train_tree)
    ####### Sequential #######
    X_lstm = X_train_seq.values
    y_lstm = y_train_seq.values
    lstm_model = LSTMWrapper(input_shape=X_lstm.shape[1])
    lstm_model.fit(X_lstm, y_lstm)  # wrapper does the reshaping
    X_lstm_test = X_test_seq.values
    lstm_preds = lstm_model.predict(X_lstm_test)


    X_cnn = X_train_seq.values.reshape((len(X_train_seq), X_train_seq.shape[1], 1))
    y_cnn = y_train_seq.values
    cnn_model = CNN1DWrapper(input_shape=(X_cnn.shape[1], 1))
    cnn_model.fit(X_cnn, y_cnn)
    X_cnn_test = X_test_seq.values.reshape((len(X_test_seq), X_test_seq.shape[1], 1))
    cnn_preds = cnn_model.predict(X_cnn_test)

    ################################################
    ####### OOF Predicition
    ####### Tree Based #######
    oof_tree = generate_oof_predictions([catboost], X_train_tree, y_train_seq)

    print("\n🔍 Checking variance in OOF base model predictions:")
    print(oof_tree.describe())
    print("Std per model:\n", oof_tree.std())
    ####### Sequential #######
    oof_preds_cnn = generate_oof_cnn(CNN1DWrapper, X_train_seq, y_train_seq)

    print("\n🔍 Checking variance in OOF base model predictions:")
    print(pd.Series(oof_preds_cnn).describe())
    print("Std:", np.std(oof_preds_cnn))

    ################################################
    ####### Meta Params and Training
    ####### Tree Based #######
    X_seq_np = X_train_seq.values
    lstm_oof = generate_oof_lstm(LSTMWrapper, X_seq_np, y_train_seq)  # <- I can give you this

    X_meta_train = pd.DataFrame({
        'cat': oof_tree.iloc[:, 0],
        #'xgb': oof_tree.iloc[:, 1],
        'lstm': lstm_oof
    })

    X_test_meta = pd.DataFrame({
        'xgb': xgboost.predict(X_test_tree),
        'cat': catboost.predict(X_test_tree),
        'lstm': lstm_model.predict(X_test_seq.values)
    })

    meta_params = tune_meta_xgb(X_meta_train, y_train_seq)
    meta_model = XGBRegressor(**meta_params, random_state=42)
    meta_model.fit(X_meta_train, y_train_seq)

    ################################################
    ####### Evaluate Model
    def evaluate_model(name, model, Xtr, Xte, ytr, yte, scaled=False):
        train_preds = model.predict(Xtr)
        test_preds = model.predict(Xte)
        train_mse = mean_squared_error(ytr, train_preds)
        test_mse = mean_squared_error(yte, test_preds)
        overfit_ratio = test_mse / train_mse if train_mse != 0 else float('inf')

        print(f"\n📊 {name} Performance:")
        print(f"Train MSE: {train_mse:.8f}")
        print(f"Test MSE: {test_mse:.8f}")
        print(f"Overfit ratio (Test / Train): {overfit_ratio:.2f}")
        if overfit_ratio > 1.5:
            print("⚠️ Potential overfitting detected.")
        elif overfit_ratio < 0.7:
            print("⚠️ Possibly underfitting.")
        else:
            print("✅ Generalization looks reasonable.")
        return test_preds
    
    ####### Tree Based #######
    #preds_xgboost   = evaluate_model("XGBoostRegressor", xgboost, X_train_tree, X_test_tree, y_train_seq, y_test_seq)
    preds_catboost  = evaluate_model("CatBoostRegressor", catboost, X_train_tree, X_test_tree, y_train_seq, y_test_seq)
    preds_stack     = evaluate_model("StackingRegressor", meta_model, X_meta_train, X_test_meta, y_train_seq.values, y_test_seq.values)
    preds_lgbm      = evaluate_model("LightGBM", lgbm, X_train_tree, X_test_tree, y_train_tree, y_test_tree)
    ####### Sequential #######
    X_cnn_train = X_train_seq.values.reshape((len(X_train_seq), X_train_seq.shape[1], 1))
    X_cnn_test = X_test_seq.values.reshape((len(X_test_seq), X_test_seq.shape[1], 1))

    preds_lstm       = evaluate_model("LSTM", lstm_model, X_train_seq.values, X_test_seq.values, y_train_seq.values, y_test_seq.values)
    preds_cnn      = evaluate_model("CNN", cnn_model, X_cnn_train, X_cnn_test, y_train_seq.values, y_test_seq.values)

    ################################################
    ####### Target Distribution
    ####### Tree based #######
    print("\n🔍 Target distribution:")
    print(y_train_tree.describe())
    ####### Sequential #######
    print("\n🔍 Target distribution:")
    print(y_train_seq.describe())
    
    ################################################
    ####### Choose final model
    ####### Tree Based #######
    preds_lgbm = lgbm.predict(X_test_tree)
    print("\n🔍 Checking prediction variance from stack model:")
    print(f"Min: {preds_lgbm.min():.8f}")
    print(f"Max: {preds_lgbm.max():.8f}")
    print(f"Mean: {preds_lgbm.mean():.8f}")
    print(f"Std Dev: {preds_lgbm.std():.8f}")
    print(f"First 5 Predictions: {preds_lgbm[:5]}")

    mae_lgbm = mean_absolute_error(y_test_tree, preds_lgbm)
    rmse_lgbm = np.sqrt(mean_squared_error(y_test_tree, preds_lgbm))
    r2_lgbm = r2_score(y_test_tree, preds_lgbm)

    print(f"MAE: {mae_lgbm:.4f}")
    print(f"RMSE: {rmse_lgbm:.4f}")
    print(f"R²: {r2_lgbm:.4f}")
    ####### Stacked Model #######
    preds = meta_model.predict(X_test_meta)
    print("\n🔍 Checking prediction variance from stack model:")
    print(f"Min: {preds.min():.8f}")
    print(f"Max: {preds.max():.8f}")
    print(f"Mean: {preds.mean():.8f}")
    print(f"Std Dev: {preds.std():.8f}")
    print(f"First 5 Predictions: {preds[:5]}")

    mae = mean_absolute_error(y_test_seq, preds)
    rmse = np.sqrt(mean_squared_error(y_test_seq, preds))
    r2 = r2_score(y_test_seq, preds)

    print(f"MAE: {mae:.4f}")
    print(f"RMSE: {rmse:.4f}")
    print(f"R²: {r2:.4f}")
    ####### Sequential Solo #######
    X_test_cnn = X_test_seq.values.reshape((len(X_test_seq), X_test_seq.shape[1], 1))
    preds_cnn = cnn_model.predict(X_test_cnn)
    print("\n🔍 Checking prediction variance from stack model:")
    print(f"Min: {preds_cnn.min():.8f}")
    print(f"Max: {preds_cnn.max():.8f}")
    print(f"Mean: {preds_cnn.mean():.8f}")
    print(f"Std Dev: {preds_cnn.std():.8f}")
    print(f"First 5 Predictions: {preds_cnn[:5]}")

    mae_cnn = mean_absolute_error(y_test_seq, preds_cnn)
    rmse_cnn = np.sqrt(mean_squared_error(y_test_seq, preds_cnn))
    r2_cnn = r2_score(y_test_seq, preds_cnn)

    print(f"MAE: {mae_cnn:.4f}")
    print(f"RMSE: {rmse_cnn:.4f}")
    print(f"R²: {r2_cnn:.4f}")

    metadata = {
        "lookahead": LOOKAHEAD,
        #"xgboost_params": xgboost_params,
        "catboost_params": catboost_params,
        "meta_params": meta_params,
        "lgbm_params": lgbm_params
    }
    with open(f"model_metadata_{LOOKAHEAD}.json", "w") as f:
        json.dump(metadata, f, indent=2)
        
    joblib.dump(meta_model, f"stack_model_LOOKAHEAD_{LOOKAHEAD}_session_less.pkl")
    joblib.dump(cnn_model, f"cnn_model_LOOKAHEAD_{LOOKAHEAD}_session_less.pkl")
    joblib.dump(lgbm, f"lgbm_model_LOOKAHEAD_{LOOKAHEAD}_session_less.pkl")

    return {
        'lookahead': LOOKAHEAD,
        'preds_stack': preds,
        'preds_cnn': preds_cnn,
        'preds_lgbm': preds_lgbm,
        'X_test_seq': X_test_seq,
        'X_test_meta': X_test_meta,
        'true_values': y_test_seq.values
    }

In [78]:
def run_lookahead_for_session_classification(LOOKAHEAD):
    labeled = pd.read_parquet(f"labeled_data_{LOOKAHEAD}_session_less.parquet")

    cutoff_date = pd.Timestamp("2025-01-01", tz="America/New_York")
    train = labeled[labeled['datetime'] < cutoff_date]
    test = labeled[labeled['datetime'] >= cutoff_date]

    X_train_tree = train[tree_based_features]
    X_test_tree = test[tree_based_features]

    X_train_seq  = train[sequential_features]
    X_test_seq = test[sequential_features]

    y_train_tree = train['log_return']
    y_test_tree = test['log_return']

    y_train_seq = train['vol_adj_return']
    y_test_seq = test['vol_adj_return']

    print(f"Train range: {train['datetime'].min()} to {train['datetime'].max()} | Rows: {len(train)}")
    print(f"Test range: {test['datetime'].min()} to {test['datetime'].max()} | Rows: {len(test)}")

    ###########################
    ########## Models #########
    ###########################

    def tune_xgboost(X_train, y_train):
        def objective(trial):
            params = {
                'n_estimators': 2000,
                'learning_rate': trial.suggest_float('learning_rate', 0.05, 0.3, log=True),  # tighten low end
                'max_depth': trial.suggest_int('max_depth', 4, 10),  # more complex trees
                'subsample': trial.suggest_float('subsample', 0.7, 1.0),  # prevent underfitting
                'colsample_bytree': trial.suggest_float('colsample_bytree', 0.7, 1.0),  # prevent weak splits
                'reg_alpha': trial.suggest_float('reg_alpha', 0.0, 0.3),  # reduce L1 regularization
                'reg_lambda': trial.suggest_float('reg_lambda', 0.0, 0.3),  # reduce L2 regularization
                'min_child_weight': trial.suggest_int('min_child_weight', 1, 10),
                'gamma': trial.suggest_float('gamma', 0.0, 1.0),
            }


            tscv = TimeSeriesSplit(n_splits=5)
            scores = []

            for train_idx, val_idx in tscv.split(X_train):
                X_tr, X_val = X_train.iloc[train_idx], X_train.iloc[val_idx]
                y_tr, y_val = y_train.iloc[train_idx], y_train.iloc[val_idx]

                model = XGBRegressor(**params, random_state=42)
                model.fit(X_tr, y_tr)
                preds = model.predict(X_val)
                rmse = root_mean_squared_error(y_val, preds)
                scores.append(rmse)

            return np.mean(scores)

        study = optuna.create_study(
            direction='minimize',
            study_name='xgb_opt',
            sampler=optuna.samplers.TPESampler(seed=42),
            pruner=optuna.pruners.SuccessiveHalvingPruner(min_resource=50, reduction_factor=4),
            storage=f'sqlite:///xgb_opt_study{LOOKAHEAD}_session_less.db',
            load_if_exists=True
        )
        study.optimize(objective, n_trials=1)
        return study.best_params

    def tune_lightgbm(X_train, y_train):
        def objective(trial):
            params = {
                "n_estimators": 2000,
                "learning_rate": trial.suggest_float("learning_rate", 0.01, 0.1, log=True),
                "max_depth": trial.suggest_int("max_depth", 5, 12),
                "num_leaves": trial.suggest_int("num_leaves", 64, 512),
                "min_child_samples": trial.suggest_int("min_child_samples", 1, 30),
                "subsample": trial.suggest_float("subsample", 0.7, 1.0),
                "colsample_bytree": trial.suggest_float("colsample_bytree", 0.7, 1.0),
                'reg_alpha': trial.suggest_float("reg_alpha", 0.0, 0.1),
                'reg_lambda': trial.suggest_float("reg_lambda", 0.0, 0.1),
                "min_split_gain": trial.suggest_float("min_split_gain", 0.0, 0.01),
                "force_col_wise": trial.suggest_categorical("force_col_wise", [True, False])
            }
            tscv = TimeSeriesSplit(n_splits=5)
            scores = []
            for train_idx, val_idx in tscv.split(X_train):
                X_tr, X_val = X_train.iloc[train_idx], X_train.iloc[val_idx]
                y_tr, y_val = y_train.iloc[train_idx], y_train.iloc[val_idx]

                model = LGBMRegressor(**params, random_state=42, n_jobs=-5)
                model.fit(
                    X_tr, y_tr,
                    eval_set=[(X_val, y_val)],
                    eval_metric="rmse"
                )
                preds = model.predict(X_val)
                rmse = root_mean_squared_error(y_val, preds)
                scores.append(rmse)
            return np.mean(scores)

        study = optuna.create_study(
            direction="minimize",
            study_name="lgbm_opt",
            sampler=optuna.samplers.TPESampler(seed=42),
            pruner=optuna.pruners.SuccessiveHalvingPruner(min_resource=50, reduction_factor=4),
            storage=f"sqlite:///lgbm_opt_study{LOOKAHEAD}_session_less.db",
            load_if_exists=True
        )
        study.optimize(objective, n_trials=1)
        return study.best_params

    def tune_catboost(X_train, y_train):
        def objective(trial):
            params = {
                'iterations': 2000,
                'depth': trial.suggest_int('depth', 4, 8),
                'learning_rate': trial.suggest_float('learning_rate', 0.01, 0.05, log=True),
                'loss_function': 'RMSE',
                'l2_leaf_reg': trial.suggest_float('l2_leaf_reg', 3.0, 10.0),
                'random_strength': trial.suggest_float('random_strength', 1.0, 5.0),
                'bootstrap_type': 'Bayesian',
                'bagging_temperature': trial.suggest_float('bagging_temperature', 0.1, 1.0),
                'min_data_in_leaf': trial.suggest_int('min_data_in_leaf', 10, 100),
            }

            tscv = TimeSeriesSplit(n_splits=5)
            scores = []

            for train_idx, val_idx in tscv.split(X_train):
                X_tr, X_val = X_train.iloc[train_idx], X_train.iloc[val_idx]
                y_tr, y_val = y_train.iloc[train_idx], y_train.iloc[val_idx]

                model = CatBoostRegressor(**params, random_state=42)
                model.fit(
                    X_tr, y_tr,
                    eval_set=(X_val, y_val),
                    use_best_model=True,
                    verbose=False,
                    early_stopping_rounds=30
                )
                preds = model.predict(X_val)
                rmse = root_mean_squared_error(y_val, preds)
                scores.append(rmse)

            return np.mean(scores)

        study = optuna.create_study(
            direction='minimize',
            study_name='catboost_opt',
            sampler=optuna.samplers.TPESampler(seed=42),
            pruner=optuna.pruners.SuccessiveHalvingPruner(min_resource=50, reduction_factor=4),
            storage=f'sqlite:///catboost_opt_study{LOOKAHEAD}_session_less.db',
            load_if_exists=True
        )
        study.optimize(objective, n_trials=1)
        return study.best_params

    def tune_meta_xgb(X_meta, y_meta):
        def objective(trial):
            params = {
                'n_estimators': 2000,
                'learning_rate': trial.suggest_float('learning_rate', 0.01, 0.3, log=True),
                'max_depth': trial.suggest_int('max_depth', 2, 6),
                'subsample': trial.suggest_float('subsample', 0.7, 1.0),
                'colsample_bytree': trial.suggest_float('colsample_bytree', 0.5, 1.0),
                'reg_alpha': trial.suggest_float('reg_alpha', 0.0, 1.0),
                'reg_lambda': trial.suggest_float('reg_lambda', 0.0, 1.0),
            }

            tscv = TimeSeriesSplit(n_splits=5)
            scores = []

            for train_idx, val_idx in tscv.split(X_meta):
                X_tr, X_val = X_meta.iloc[train_idx], X_meta.iloc[val_idx]
                y_tr, y_val = y_meta.iloc[train_idx], y_meta.iloc[val_idx]

                model = XGBRegressor(**params, random_state=42)
                model.fit(X_tr, y_tr)
                preds = model.predict(X_val)
                rmse = root_mean_squared_error(y_val, preds)
                scores.append(rmse)

            return np.mean(scores)

        study = optuna.create_study(
            direction='minimize',
            study_name='meta_xgb_stack',
            sampler=optuna.samplers.TPESampler(seed=42),
            pruner=optuna.pruners.MedianPruner(n_startup_trials=5),
            storage=f'sqlite:///meta_xgb_stack_{LOOKAHEAD}_session_less.db',
            load_if_exists=True
        )
        study.optimize(objective, n_trials=1)
        return study.best_params

    ################################################
    ####### Ensure index consistency
    ####### Sequential #######
    y_train_seq = y_train_seq.loc[X_train_seq.index]
    y_test_seq = y_test_seq.loc[X_test_seq.index]

    ################################################
    ####### Tune models
    ####### Tree Based #######
    catboost_params     = tune_catboost(X_train_tree, y_train_seq)
    xgboost_params      = tune_xgboost(X_train_tree, y_train_seq)
    lgbm_params         = tune_lightgbm(X_train_tree, y_train_tree)
    ####### Sequential #######
    # N/A

    ################################################
    ####### Train models
    ####### Tree Based #######
    catboost    = CatBoostRegressor(**catboost_params, random_state=42, verbose=0)
    xgboost     = XGBRegressor(**xgboost_params, random_state=42)
    lgbm        = LGBMRegressor(**lgbm_params, random_state=42)
    catboost.fit(X_train_tree, y_train_seq)
    xgboost.fit(X_train_tree, y_train_seq)
    lgbm.fit(X_train_tree, y_train_tree)
    ####### Sequential #######
    X_lstm = X_train_seq.values
    y_lstm = y_train_seq.values
    lstm_model = LSTMWrapper(input_shape=X_lstm.shape[1])
    lstm_model.fit(X_lstm, y_lstm)  # wrapper does the reshaping
    X_lstm_test = X_test_seq.values
    lstm_preds = lstm_model.predict(X_lstm_test)


    X_cnn = X_train_seq.values.reshape((len(X_train_seq), X_train_seq.shape[1], 1))
    y_cnn = y_train_seq.values
    cnn_model = CNN1DWrapper(input_shape=(X_cnn.shape[1], 1))
    cnn_model.fit(X_cnn, y_cnn)
    X_cnn_test = X_test_seq.values.reshape((len(X_test_seq), X_test_seq.shape[1], 1))
    cnn_preds = cnn_model.predict(X_cnn_test)

    ################################################
    ####### OOF Predicition
    ####### Tree Based #######
    oof_tree = generate_oof_predictions([xgboost, catboost], X_train_tree, y_train_seq)

    print("\n🔍 Checking variance in OOF base model predictions:")
    print(oof_tree.describe())
    print("Std per model:\n", oof_tree.std())
    ####### Sequential #######
    oof_preds_cnn = generate_oof_cnn(CNN1DWrapper, X_train_seq, y_train_seq)

    print("\n🔍 Checking variance in OOF base model predictions:")
    print(pd.Series(oof_preds_cnn).describe())
    print("Std:", np.std(oof_preds_cnn))

    ################################################
    ####### Meta Params and Training
    ####### Tree Based #######
    X_seq_np = X_train_seq.values
    lstm_oof = generate_oof_lstm(LSTMWrapper, X_seq_np, y_train_seq)  # <- I can give you this

    X_meta_train = pd.DataFrame({
        'xgb': oof_tree.iloc[:, 0],
        'cat': oof_tree.iloc[:, 1],
        'lstm': lstm_oof
    })

    X_test_meta = pd.DataFrame({
        'xgb': xgboost.predict(X_test_tree),
        'cat': catboost.predict(X_test_tree),
        'lstm': lstm_model.predict(X_test_seq.values)
    })

    meta_params = tune_meta_xgb(X_meta_train, y_train_seq)
    meta_model = XGBRegressor(**meta_params, random_state=42)
    meta_model.fit(X_meta_train, y_train_seq)

    ################################################
    ####### Evaluate Model
    def evaluate_model(name, model, Xtr, Xte, ytr, yte, scaled=False):
        train_preds = model.predict(Xtr)
        test_preds = model.predict(Xte)
        train_mse = mean_squared_error(ytr, train_preds)
        test_mse = mean_squared_error(yte, test_preds)
        overfit_ratio = test_mse / train_mse if train_mse != 0 else float('inf')

        print(f"\n📊 {name} Performance:")
        print(f"Train MSE: {train_mse:.8f}")
        print(f"Test MSE: {test_mse:.8f}")
        print(f"Overfit ratio (Test / Train): {overfit_ratio:.2f}")
        if overfit_ratio > 1.5:
            print("⚠️ Potential overfitting detected.")
        elif overfit_ratio < 0.7:
            print("⚠️ Possibly underfitting.")
        else:
            print("✅ Generalization looks reasonable.")
        return test_preds
    
    ####### Tree Based #######
    preds_xgboost   = evaluate_model("XGBoostRegressor", xgboost, X_train_tree, X_test_tree, y_train_seq, y_test_seq)
    preds_catboost  = evaluate_model("CatBoostRegressor", catboost, X_train_tree, X_test_tree, y_train_seq, y_test_seq)
    preds_stack     = evaluate_model("StackingRegressor", meta_model, X_meta_train, X_test_meta, y_train_seq.values, y_test_seq.values)
    preds_lgbm      = evaluate_model("LightGBM", lgbm, X_train_tree, X_test_tree, y_train_tree, y_test_tree)
    ####### Sequential #######
    X_cnn_train = X_train_seq.values.reshape((len(X_train_seq), X_train_seq.shape[1], 1))
    X_cnn_test = X_test_seq.values.reshape((len(X_test_seq), X_test_seq.shape[1], 1))

    preds_lstm       = evaluate_model("LSTM", lstm_model, X_train_seq.values, X_test_seq.values, y_train_seq.values, y_test_seq.values)
    preds_cnn      = evaluate_model("CNN", cnn_model, X_cnn_train, X_cnn_test, y_train_seq.values, y_test_seq.values)

    ################################################
    ####### Target Distribution
    ####### Tree based #######
    print("\n🔍 Target distribution:")
    print(y_train_tree.describe())
    ####### Sequential #######
    print("\n🔍 Target distribution:")
    print(y_train_seq.describe())
    
    ################################################
    ####### Choose final model
    ####### Tree Based #######
    preds_lgbm = lgbm.predict(X_test_tree)
    print("\n🔍 Checking prediction variance from stack model:")
    print(f"Min: {preds_lgbm.min():.8f}")
    print(f"Max: {preds_lgbm.max():.8f}")
    print(f"Mean: {preds_lgbm.mean():.8f}")
    print(f"Std Dev: {preds_lgbm.std():.8f}")
    print(f"First 5 Predictions: {preds_lgbm[:5]}")

    mae_lgbm = mean_absolute_error(y_test_tree, preds_lgbm)
    rmse_lgbm = np.sqrt(mean_squared_error(y_test_tree, preds_lgbm))
    r2_lgbm = r2_score(y_test_tree, preds_lgbm)

    print(f"MAE: {mae_lgbm:.4f}")
    print(f"RMSE: {rmse_lgbm:.4f}")
    print(f"R²: {r2_lgbm:.4f}")
    ####### Stacked Model #######
    preds = meta_model.predict(X_test_meta)
    print("\n🔍 Checking prediction variance from stack model:")
    print(f"Min: {preds.min():.8f}")
    print(f"Max: {preds.max():.8f}")
    print(f"Mean: {preds.mean():.8f}")
    print(f"Std Dev: {preds.std():.8f}")
    print(f"First 5 Predictions: {preds[:5]}")

    mae = mean_absolute_error(y_test_seq, preds)
    rmse = np.sqrt(mean_squared_error(y_test_seq, preds))
    r2 = r2_score(y_test_seq, preds)

    print(f"MAE: {mae:.4f}")
    print(f"RMSE: {rmse:.4f}")
    print(f"R²: {r2:.4f}")
    ####### Sequential Solo #######
    X_test_cnn = X_test_seq.values.reshape((len(X_test_seq), X_test_seq.shape[1], 1))
    preds_cnn = cnn_model.predict(X_test_cnn)
    print("\n🔍 Checking prediction variance from stack model:")
    print(f"Min: {preds_cnn.min():.8f}")
    print(f"Max: {preds_cnn.max():.8f}")
    print(f"Mean: {preds_cnn.mean():.8f}")
    print(f"Std Dev: {preds_cnn.std():.8f}")
    print(f"First 5 Predictions: {preds_cnn[:5]}")

    mae_cnn = mean_absolute_error(y_test_seq, preds_cnn)
    rmse_cnn = np.sqrt(mean_squared_error(y_test_seq, preds_cnn))
    r2_cnn = r2_score(y_test_seq, preds_cnn)

    print(f"MAE: {mae_cnn:.4f}")
    print(f"RMSE: {rmse_cnn:.4f}")
    print(f"R²: {r2_cnn:.4f}")

    metadata = {
        "lookahead": LOOKAHEAD,
        "xgboost_params": xgboost_params,
        "catboost_params": catboost_params,
        "meta_params": meta_params,
        "lgbm_params": lgbm_params
    }
    with open(f"model_metadata_{LOOKAHEAD}.json", "w") as f:
        json.dump(metadata, f, indent=2)
        
    joblib.dump(meta_model, f"stack_model_LOOKAHEAD_{LOOKAHEAD}_session_less.pkl")
    joblib.dump(cnn_model, f"cnn_model_LOOKAHEAD_{LOOKAHEAD}_session_less.pkl")
    joblib.dump(lgbm, f"lgbm_model_LOOKAHEAD_{LOOKAHEAD}_session_less.pkl")

    return {
        'lookahead': LOOKAHEAD,
        'preds_stack': preds,
        'preds_cnn': preds_cnn,
        'X_test_seq': X_test_seq,
        'X_test_meta': X_test_meta,
        'true_values': y_test_seq.values
    }

In [79]:
def run_lookahead(LOOKAHEAD):
    try:
        result = run_lookahead_for_session(LOOKAHEAD)
        if result is None:
            print(f"No valid run for session_less, skipping CSV.")
            return
        return result
    except Exception as e:
        print(f"⚠️ Skipping session session_less due to error: {e}")
        return

##### Running Train

In [80]:
# Regression Training
lookahead_values = [5]
reg_results = []

for val in lookahead_values:
    regression_models = run_lookahead_for_session_regression(val)
    reg_results.append(regression_models)

[I 2025-05-23 07:50:34,801] Using an existing study with name 'catboost_opt' instead of creating a new one.


Train range: 2022-12-08 01:23:00-05:00 to 2024-12-31 16:55:00-05:00 | Rows: 729268
Test range: 2025-01-01 18:01:00-05:00 to 2025-05-20 19:35:00-04:00 | Rows: 133836


[I 2025-05-23 07:50:38,283] Trial 207 finished with value: 9.080671924963066e-05 and parameters: {'depth': 5, 'learning_rate': 0.049374699955863036, 'l2_leaf_reg': 6.347124555456442, 'random_strength': 3.8660446570216767, 'bagging_temperature': 0.5046321699518339, 'min_data_in_leaf': 19}. Best is trial 173 with value: 9.080568833517627e-05.
[I 2025-05-23 07:50:38,300] Using an existing study with name 'lgbm_opt' instead of creating a new one.


[LightGBM] [Info] Auto-choosing row-wise multi-threading, the overhead of testing was 0.002270 seconds.
You can set `force_row_wise=true` to remove the overhead.
And if memory is not enough, you can set `force_col_wise=true`.
[LightGBM] [Info] Total Bins 2825
[LightGBM] [Info] Number of data points in the train set: 121548, number of used features: 21
[LightGBM] [Info] Start training from score 0.000005
[LightGBM] [Info] Auto-choosing row-wise multi-threading, the overhead of testing was 0.003835 seconds.
You can set `force_row_wise=true` to remove the overhead.
And if memory is not enough, you can set `force_col_wise=true`.
[LightGBM] [Info] Total Bins 2825
[LightGBM] [Info] Number of data points in the train set: 243092, number of used features: 21
[LightGBM] [Info] Start training from score 0.000005
[LightGBM] [Info] Auto-choosing row-wise multi-threading, the overhead of testing was 0.005832 seconds.
You can set `force_row_wise=true` to remove the overhead.
And if memory is not eno

[I 2025-05-23 07:50:53,509] Trial 15 finished with value: 0.0006280662511730274 and parameters: {'learning_rate': 0.06316495803486866, 'max_depth': 7, 'num_leaves': 173, 'min_child_samples': 1, 'subsample': 0.7994852543342729, 'colsample_bytree': 0.814948154954825, 'reg_alpha': 0.083375073769244, 'reg_lambda': 0.09949205445694187, 'min_split_gain': 0.007836296211490169, 'force_col_wise': False}. Best is trial 0 with value: 0.0006280662511730274.


[LightGBM] [Info] Total Bins 2825
[LightGBM] [Info] Number of data points in the train set: 729268, number of used features: 21
[LightGBM] [Info] Start training from score 0.000004
🧠 [LSTMWrapper] Starting training with 729268 samples, 23 features
✅ [LSTMWrapper] Finished training.
🔮 [LSTMWrapper] Predicting on 133836 samples...
✅ [LSTMWrapper] Prediction complete.
[1m4183/4183[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m3s[0m 820us/step

🔧 [CNN1DWrapper] Scaling target and starting training...
Epoch 1/20
[1m5698/5698[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m11s[0m 2ms/step - loss: 0.7366
Epoch 2/20
[1m5698/5698[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m11s[0m 2ms/step - loss: 0.7321
Epoch 3/20
[1m5698/5698[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m11s[0m 2ms/step - loss: 0.7326
Epoch 4/20
[1m5698/5698[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m11s[0m 2ms/step - loss: 0.7326
Epoch 5/20
[1m5698/5698[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m11s[0m

# Backtesting

##### Regression StandAlone Backtesting

In [None]:
all_results = []
thresholds = [0.0005, 0.005]

for result in reg_results:
    lookahead = result['lookahead']
    preds_stack = result['preds_stack']  # or 'preds_cnn'
    preds_cnn = result['preds_cnn']
    preds_lgbm = result['preds_lgbm']
    X_test_combined = result['X_test_meta']  # or 'X_test_seq'
    y_test = result['true_values']
    labeled = pd.read_parquet(f"labeled_data_{lookahead}_session_less.parquet")
    df_backtest = labeled.copy()

    print(f"\n🔎 Predicted return range for LOOKAHEAD={lookahead}: STACK: min={preds_stack.min():.6f}, max={preds_stack.max():.6f} 
            CNN: min={preds_cnn.min():.6f}, max={preds_cnn.max():.6f}")
    for params in combinations:
        for thresh in thresholds:
            results = evaluate_regression(
                X_test=X_test_combined,
                preds_stack=preds_stack,
                preds_cnn=preds_cnn,
                labeled=labeled,
                df=df_backtest,
                avoid_funcs=avoid_funcs,
                SL_ATR_MULT=params['SL_ATR_MULT'],
                TP_ATR_MULT=params['TP_ATR_MULT'],
                TRAIL_START_MULT=params['TRAIL_START_MULT'],
                TRAIL_STOP_MULT=params['TRAIL_STOP_MULT'],
                TICK_VALUE=params['TICK_VALUE'],
                is_same_session=is_same_session,
                long_thresh=thresh,
                short_thresh=-thresh,
                base_contracts=1,
                max_contracts=5,
                skip_weak_conf=True,
                weak_conf_zscore=0.2
            )

            results['params'] = params
            results['threshold'] = thresh
            all_results.append(results)

            print(f"\n\n🔍 Evaluating with params: {params}")

            print(
                f"\n✅ LOOKAHEAD={lookahead} | Threshold={thresh}"
                f"\nPnL: ${results['pnl']:.2f}"
                f"\nTrades: {results['trades']}"
                f"\nWin Rate: {results['win_rate']:.2%}"
                f"\nExpectancy: {results['expectancy']:.2f}"
                f"\nProfit Factor: {results['profit_factor']:.2f}"
                f"\nSharpe Ratio: {results['sharpe']:.2f}"
                f"\nLong Trades: {results['long_trades']} | Short Trades: {results['short_trades']}"
            )

            print("Avoid Hits:")
            for name, count in results['avoid_hits'].items():
                print(f" - {name}: {count}")

            if not results['results'].empty and 'pnl' in results['results'].columns:
                print("\n🔢 Top 5 PnL trades:")
                print(results['results'].sort_values(by='pnl', ascending=False).head(5))

                print("\n🔻 Bottom 5 PnL trades:")
                print(results['results'].sort_values(by='pnl', ascending=True).head(5))
            else:
                print("\n⚠️ No trades executed, skipping PnL trade breakdown.")


summary_df = pd.DataFrame([{
    'pnl': r['pnl'],
    'sharpe': r['sharpe'],
    'expectancy': r['expectancy'],
    'profit_factor': r['profit_factor'],
    'win_rate': r['win_rate'],
    'trades': r['trades'],
    **r['params']
} for r in all_results])
top = summary_df.sort_values(by='sharpe', ascending=False).head(10)
print("\n🏁 Top 10 Configurations Across All Lookaheads:")
print(top)

##### Classification StandAlone Backtesting

In [None]:
all_results = []
thresholds = [0.0005, 0.005]

for result in class_results:
    lookahead = result['lookahead']
    preds_stack = result['preds_stack']  # or 'preds_cnn'
    preds_cnn = result['preds_cnn']
    preds_lgbm = result['preds_lgbm']
    X_test_combined = result['X_test_meta']  # or 'X_test_seq'
    y_test = result['true_values']
    labeled = pd.read_parquet(f"labeled_data_{lookahead}_session_less.parquet")
    df_backtest = labeled.copy()

    print(f"\n🔎 Predicted return range for LOOKAHEAD={lookahead}: STACK: min={preds_stack.min():.6f}, max={preds_stack.max():.6f} 
            CNN: min={preds_cnn.min():.6f}, max={preds_cnn.max():.6f}")
    for params in combinations:
        for thresh in thresholds:
            results = evaluate_regression(
                X_test=X_test_combined,
                preds_stack=preds_stack,
                preds_cnn=preds_cnn,
                labeled=labeled,
                df=df_backtest,
                avoid_funcs=avoid_funcs,
                SL_ATR_MULT=params['SL_ATR_MULT'],
                TP_ATR_MULT=params['TP_ATR_MULT'],
                TRAIL_START_MULT=params['TRAIL_START_MULT'],
                TRAIL_STOP_MULT=params['TRAIL_STOP_MULT'],
                TICK_VALUE=params['TICK_VALUE'],
                is_same_session=is_same_session,
                long_thresh=thresh,
                short_thresh=-thresh,
                base_contracts=1,
                max_contracts=5,
                skip_weak_conf=True,
                weak_conf_zscore=0.2
            )

            results['params'] = params
            results['threshold'] = thresh
            all_results.append(results)

            print(f"\n\n🔍 Evaluating with params: {params}")

            print(
                f"\n✅ LOOKAHEAD={lookahead} | Threshold={thresh}"
                f"\nPnL: ${results['pnl']:.2f}"
                f"\nTrades: {results['trades']}"
                f"\nWin Rate: {results['win_rate']:.2%}"
                f"\nExpectancy: {results['expectancy']:.2f}"
                f"\nProfit Factor: {results['profit_factor']:.2f}"
                f"\nSharpe Ratio: {results['sharpe']:.2f}"
                f"\nLong Trades: {results['long_trades']} | Short Trades: {results['short_trades']}"
            )

            print("Avoid Hits:")
            for name, count in results['avoid_hits'].items():
                print(f" - {name}: {count}")

            if not results['results'].empty and 'pnl' in results['results'].columns:
                print("\n🔢 Top 5 PnL trades:")
                print(results['results'].sort_values(by='pnl', ascending=False).head(5))

                print("\n🔻 Bottom 5 PnL trades:")
                print(results['results'].sort_values(by='pnl', ascending=True).head(5))
            else:
                print("\n⚠️ No trades executed, skipping PnL trade breakdown.")


summary_df = pd.DataFrame([{
    'pnl': r['pnl'],
    'sharpe': r['sharpe'],
    'expectancy': r['expectancy'],
    'profit_factor': r['profit_factor'],
    'win_rate': r['win_rate'],
    'trades': r['trades'],
    **r['params']
} for r in all_results])
top = summary_df.sort_values(by='sharpe', ascending=False).head(10)
print("\n🏁 Top 10 Configurations Across All Lookaheads:")
print(top)

Combo Backtesting

# Visualize

In [None]:
# for result in lookahead_results:
#     stack_preds = result['stack'].predict(X_test_scaled)
#     rf_preds = result['models']['rf'].predict(X_test_scaled)
#     xgb_preds = result['models']['xgb'].predict(X_test_scaled)
#     enet_preds = result['models']['elasticnet'].predict(X_test_scaled)
    
#     plt.figure(figsize=(12, 4))
#     plt.plot(rf_preds[:100], label='RF')
#     plt.plot(xgb_preds[:100], label='XGB')
#     plt.plot(enet_preds[:100], label='ElasticNet')
#     plt.plot(stack_preds[:100], label='Stack', linewidth=2)

In [None]:
# for run in lookahead_results:
#     for r in run['results']:
#         print(r)
#         df = r['results'].copy()
#         df = df.sort_values(by='entry_time')
#         df['cumulative_pnl'] = df['pnl'].cumsum()

#         if df['cumulative_pnl'].iloc[-1] > 0 and r['sharpe'] > 10 and r['trades'] > 150 and r['win_rate'] > 0.55 and r['profit_factor'] > 1.5 and r['expectancy'] > 0.5 and r['pnl'] > 50000:
#                 plt.figure(figsize=(12, 4))
#                 plt.plot(df['entry_time'], df['cumulative_pnl'], label='Cumulative PnL', color='green')
#                 plt.title(f"PnL | Lookahead={run['lookahead']} | Sharpe={r['sharpe']:.2f}")
#                 plt.xlabel("Datetime")
#                 plt.ylabel("PnL")
#                 plt.grid(True)
#                 plt.legend()
#                 plt.tight_layout()
#                 plt.show()

In [None]:
# # Best result holder by lookahead value
# best_by_lookahead = {
#     5: {'win_rate': float('-inf'), 'result': None},
#     15: {'win_rate': float('-inf'), 'result': None}
# }

# # Fill best_by_lookahead from results
# for run in lookahead_results:
#     lookahead = run['lookahead']
#     if lookahead in best_by_lookahead:
#         for r in run['results']:
#             if r['win_rate'] > best_by_lookahead[lookahead]['win_rate']:
#                 best_by_lookahead[lookahead] = {
#                     'win_rate': r['win_rate'],
#                     'result': r,
#                     'lookahead': lookahead
#                 }

# # Display results nicely
# for lookahead in [5]:
#     best = best_by_lookahead[lookahead]
#     if best['result']:
#         df = best['result']['results'].copy()
#         df = df.sort_values(by='entry_time')
#         df['cumulative_pnl'] = df['pnl'].cumsum()

#         # Set float format for readable output
#         pd.options.display.float_format = '{:,.2f}'.format

#         print(f"\n🏆 Best Win Rate Result for Lookahead={lookahead}")
#         print(f"Win Rate: {best['win_rate']:.2%}")
#         print(f"PnL: {best['result']['pnl']:.2f}")
#         print(f"Trades: {best['result']['trades']}")
#         print(f"Sharpe: {best['result']['sharpe']:.2f}")
#         print(f"Expectancy: {best['result']['expectancy']:.2f}")
#         print(f"Profit Factor: {best['result']['profit_factor']:.2f}")
#         print(f"Params: {best['result']['params']}")

#         print("\n🧾 All Trades from Best Win Rate Result:")
#         print(df[['entry_time', 'exit_time', 'side', 'entry_price', 'exit_price',
#                   'pnl', 'mfe', 'mae', 'cumulative_pnl']].to_string(index=False))

#         # Plot cumulative PnL
#         plt.figure(figsize=(12, 4))
#         plt.plot(df['entry_time'], df['cumulative_pnl'], label='Cumulative PnL', color='blue')
#         plt.title(f"Best Win Rate Run | Lookahead={lookahead} | Win Rate={best['win_rate']:.2%}")
#         plt.xlabel("Datetime")
#         plt.ylabel("Cumulative PnL")
#         plt.grid(True)
#         plt.legend()
#         plt.tight_layout()
#         plt.show()
#     else:
#         print(f"No valid result found for Lookahead={lookahead}.")

In [None]:
# np.corrcoef([lookahead_results['preds_rf'], lookahead_results['preds_xgb'], lookahead_results['preds_elasticnet']])
# preds_matrix = np.vstack([lookahead_results['preds_rf'], lookahead_results['preds_xgb'], lookahead_results['preds_elasticnet']])
# corr_matrix = np.corrcoef(preds_matrix)

# plt.figure(figsize=(6, 4))
# sns.heatmap(corr_matrix, annot=True, xticklabels=['RF', 'XGB', 'ENet'], yticklabels=['RF', 'XGB', 'ENet'], cmap='coolwarm', fmt=".2f")
# plt.title("Correlation Between Base Model Predictions")
# plt.show()

# Sort and Plot

In [None]:
# # Predictions
# # y_pred = best_lookahead.predict(X_test)
# best_lookahead = max(lookahead_results, key=lambda x: max(r['pnl'] for r in x['results']))
# y_pred = best_lookahead['stack'].predict(X_test_scaled)

# # Confusion Matrix
# labels = sorted(class_mapping)  # Make sure the order matches
# cm = confusion_matrix(y_test, y_pred, labels=labels)

# # Display Confusion Matrix
# plt.figure(figsize=(8, 6))
# sns.heatmap(cm, annot=True, fmt="d", cmap="Blues", xticklabels=labels, yticklabels=labels)
# plt.xlabel("Predicted")
# plt.ylabel("True")
# plt.title("Confusion Matrix")
# plt.show()

# # Classification Report
# print("Classification Report:")
# print(classification_report(y_test, y_pred, labels=labels, digits=2))