In [1]:
import polars as pl
import polars.selectors as cs
import numpy as np
import torch
import itertools
from loguru import logger
from typing import Tuple, Callable, Optional, Dict, List
from dataclasses import dataclass, field
from numba import njit
# from CONFIG import CONFIG
# from FEATURE_ENGINEERING import FEATURE_ENGINEERING

import xgboost as xgb
import lightgbm as lgb
from sklearn.model_selection import KFold, TimeSeriesSplit
from cuml.explainer import TreeExplainer

In [2]:
class CONFIG:
    WSL = True
    LOCAL = True
    TRAIN = True

    RANDOM_STATE = 42

    DATE_COL = "date_id"

    MIN_INVESTMENT, MAX_INVESTMENT = 0, 2

    START_DATE = 1006
    END_DATE = 8989 - 180
    BINARY_FEATURE = "D"

    if LOCAL:
        TRAIN_X_PATH = "data/train.csv"
        TRAIN_Y_PATH = "data/train_labels.csv"
        TEST_PATH = "data/test.csv"
        TARGET_PAIRS_PATH = "data/target_pairs.csv"
    else:
        BASE_PATH = "/kaggle/input/hull-tactical-market-prediction"
        TRAIN_X_PATH = f"{BASE_PATH}/train.csv"
        TRAIN_Y_PATH = f"{BASE_PATH}/train_labels.csv"
        TEST_PATH = f"{BASE_PATH}/test.csv"
        TARGET_PAIRS_PATH = f"{BASE_PATH}/target_pairs.csv"
        MODEL_PATH = "/kaggle/input/mitsui-nn/pytorch/test/1/sample.pth"  ### TO CHANGE

    if WSL:
        mount = "/mnt/c/Users/Admin/Desktop/Personal-Projects/Kaggle/Hull Tactical - Market Prediction/"
        TRAIN_X_PATH = f"{mount}{TRAIN_X_PATH}"
        TRAIN_Y_PATH = f"{mount}{TRAIN_Y_PATH}"
        TEST_PATH = f"{mount}{TEST_PATH}"

    N_FOLDS = 5
    SEQ_LEN = 16

    BATCH_SIZE = 100

    VERBOSE_EVAL = 10
    NUM_BOOST_ROUND = 100
    EARLY_STOPPING_ROUNDS = 100
    XGB_PARAMS = {
        "tree_method": "hist",
        "device": "gpu",
        "colsample_bylevel": 0.4778,
        "colsample_bynode": 0.3628,
        "colsample_bytree": 0.7107,
        "gamma": 1.7095,
        "learning_rate": 0.02213,
        "max_depth": 20,
        "max_leaves": 12,
        "min_child_weight": 16,
        "n_estimators": 1667,
        "subsample": 0.06567,
        "reg_alpha": 39.3524,
        "reg_lambda": 75.4484,
        "verbosity": 0,
        "random_state": RANDOM_STATE,
        "early_stopping_rounds": 100,
        "n_jobs": -1,
    }


In [3]:
# M* - Market Dynamics/Technical features.
# E* - Macro Economic features.
# I* - Interest Rate features.
# P* - Price/Valuation features.
# V* - Volatility features.
# S* - Sentiment features.
# MOM* - Momentum features.
# D* - Dummy/Binary features.

@dataclass(slots=True)
class FEATURE_ENGINEERING:
    df: pl.LazyFrame

    # ----------------------
    # Autocorrelation Features
    # ----------------------
    def _compute_autocorr_torch(self, df: pl.LazyFrame) -> pl.LazyFrame:
        """
        Compute rolling autocorrelations for each asset using Torch and GPU acceleration.

        Parameters
        ----------
        df : pl.LazyFrame
            Input dataframe containing log returns

        Returns
        -------
        pl.LazyFrame
            Dataframe with autocorrelation features for multiple windows (10, 90, 252)
        """
        names = [i for i in df.collect_schema().names() if i != CONFIG.DATE_COL]
        windows = [10, 90, 252]
        device = "cuda" if torch.cuda.is_available() else "cpu"

        data_np = df.collect().select(names).to_numpy().astype(np.float32)
        dates = df.collect().select(CONFIG.DATE_COL).to_series().to_list()
        data = torch.tensor(data_np, device=device)

        autocorrs = []
        for window in windows:
            rolling = data.unfold(0, window, 1).transpose(1, 2)
            mean = rolling.mean(dim=1, keepdim=True)
            centered = rolling - mean
            var = (centered**2).mean(dim=1)
            autocorr_num = (centered[:, 1:, :] * centered[:, :-1, :]).mean(dim=1)
            autocorr = autocorr_num / var
            output_dates = dates[window - 1 :]
            schema = [f"{name}_auto_corr_{window}" for name in names]
            autocorr_df = (
                pl.DataFrame(autocorr.detach().cpu().numpy(), schema=schema)
                .with_columns(pl.Series(CONFIG.DATE_COL, output_dates))
                .select([CONFIG.DATE_COL] + schema)
            )
            autocorrs.append(autocorr_df)

        all_auto_corrs = pl.DataFrame().with_columns(pl.Series(CONFIG.DATE_COL, dates))
        for autocorr in autocorrs:
            all_auto_corrs = all_auto_corrs.join(autocorr, how="left", on=CONFIG.DATE_COL)

        return all_auto_corrs.fill_null(strategy="backward").lazy()

    # ----------------------
    # Return Skew Features
    # ----------------------
    def _compute_return_skew(self, df: pl.LazyFrame) -> pl.LazyFrame:
        """
        Compute rolling skewness of returns for each asset over windows [5, 10, 90, 252].

        Parameters
        ----------
        df : pl.LazyFrame
            Input dataframe with return features

        Returns
        -------
        pl.LazyFrame
            Dataframe with rolling skew features
        """
        names = [i for i in df.collect_schema().names() if i != CONFIG.DATE_COL and CONFIG.BINARY_FEATURE not in i]
        return (
            df.with_columns(
                [pl.col(col).rolling_skew(window_size=window).alias(f"{col}_return_skew_{window}") for col in names for window in [5, 10, 90, 252]]
            )
            .drop([i for i in df.collect_schema().names() if i != CONFIG.DATE_COL])
            .fill_null(0)
        )

    # ----------------------
    # Rolling Statistics Features
    # ----------------------
    def _compute_rolling(self, df: pl.LazyFrame) -> pl.LazyFrame:
        """
        Compute rolling mean, std, and SMA ratio for return features over windows [5, 10, 90, 252].

        Parameters
        ----------
        df : pl.LazyFrame
            Input dataframe with return features

        Returns
        -------
        pl.LazyFrame
            Dataframe with rolling statistics and SMA ratio features
        """
        df = df.select(
            [CONFIG.DATE_COL] + [col for col in self.df.collect_schema().names() if col != CONFIG.DATE_COL and CONFIG.BINARY_FEATURE not in col]
        )
        names = df.collect_schema().names()
        names = [i for i in names if i != CONFIG.DATE_COL]
        windows = [5, 10, 90, 252]
        return (
            df.with_columns(
                [
                    pl.col(col).rolling_mean(window_size=window, min_periods=2).alias(f"{col}_sma_{window}")
                    for col in names
                    if col
                    for window in windows
                ]
                + [
                    pl.col(col).rolling_std(window_size=window, min_periods=2).alias(f"{col}_vol_{window}")
                    for col in names
                    if col
                    for window in windows
                ]
                + [
                    (
                        pl.col(col).rolling_mean(window_size=window, min_periods=2)
                        / (pl.when(pl.col(col) < 0).then(pl.col(col)).otherwise(0.0).rolling_std(window_size=window, min_periods=2) + 1e-8)
                    ).alias(f"{col}_{window}_sortino")
                    for col in names
                    if col
                    for window in windows
                ]
            )
            .with_columns([(pl.col(col) / pl.col(f"{col}_sma_{window}")).alias(f"{col}_{window}_sharpe") for col in names for window in windows])
            .drop(names)
        )

    # ----------------------
    # Beta Features
    # ----------------------
    def _compute_betas(self, df: pl.LazyFrame) -> pl.LazyFrame:
        """
        Compute rolling betas for all asset pairs using Polars rolling covariance.

        Beta of asset i with respect to asset j at time t is computed as:
            beta[i,j] = cov(i,j) / var(j)

        This method uses Polars rolling_cov and rolling_var without GPU acceleration.

        Parameters
        ----------
        df : pl.LazyFrame
            Input dataframe containing asset returns. Must include a date column defined in CONFIG.DATE_COL.

        Returns
        -------
        pl.LazyFrame
            Dataframe with rolling beta features for all asset pairs over a 90-day window.
            The output columns are named as "beta_{asset_i}_{asset_j}".
        """
        names = [i for i in df.collect_schema().names() if i != CONFIG.DATE_COL]

        # Generate all unique asset pairs
        pairs = [(names[i], names[j]) for i in range(len(names)) for j in range(i + 1, len(names))]

        return (
            df.with_columns(
                [
                    pl.rolling_cov(a=pl.col(p1), b=pl.col(p2), window_size=90, min_periods=2).alias(f"beta_{p1}_{p2}")
                    / pl.col(p1).rolling_var(window_size=90, min_periods=2)
                    for p1, p2 in pairs
                    if p1 != CONFIG.DATE_COL or p2 != CONFIG.DATE_COL
                ]
            )
            .drop(names)
            .fill_null(0)
        )

    # ----------------------
    # Pair Features
    # ----------------------
    def _compute_pairs_features(self, df: pl.LazyFrame) -> pl.LazyFrame:
        names = df.collect_schema().names()
        names = [i for i in names if i != CONFIG.DATE_COL]
        err = 1e-6

        exprs = []
        for pair1, pair2 in itertools.combinations(names, 2):
            p1 = pl.col(f"{pair1}")
            p2 = pl.col(f"{pair2}")
            exprs.extend(
                [
                    # Polynomial
                    ((p1 + p2) ** 2).alias(f"{pair1}_{pair2}_poly2"),
                    ((p1 + p2) ** 3).alias(f"{pair1}_{pair2}_poly3"),
                    ((p1 - p2) ** 2).alias(f"{pair1}_{pair2}_diff_squared"),
                    # Nonlinear transforms
                    (p1 * p2).sqrt().alias(f"{pair1}_{pair2}_sqrt_mul"),
                    (1 + (p1 * p2)).log().alias(f"{pair1}_{pair2}_log_mul"),
                    (p1 - p2).exp().alias(f"{pair1}_{pair2}_exp_diff"),
                    # Statistical / Comparative
                    pl.when((p1 + p2) != 0).then(2 * p1 * p2 / (p1 + p2)).otherwise(None).alias(f"{pair1}_{pair2}_harmonic_mean"),
                    pl.when(p2 != 0).then((p1 / p2).arctan()).otherwise(None).alias(f"{pair1}_{pair2}_atan_ratio"),
                    pl.when(((p1 + err) / (p2 + err)) > 0).then(((p1 + err) / (p2 + err)).log()).otherwise(None).alias(f"{pair1}_{pair2}_log_ratio"),
                    # Logistic / sigmoid
                    (1 / (1 + (p1 - p2).neg().exp())).alias(f"{pair1}_{pair2}_sigmoid_diff"),
                ]
            )
        return df.with_columns(exprs).fill_null(0).fill_nan(0).drop(names)

    def create_market_features(self) -> pl.DataFrame:
        """
        Create all engineered market features including temporal, returns, lags,
        autocorrelation, OBV, skewness, volume z-score, market stats, ATR, and rolling stats.

        Returns
        -------
        pl.LazyFrame
            Fully feature-engineered dataframe ready for modeling
        """
        autocorr_df = self._compute_autocorr_torch(df=self.df)
        skew_df = self._compute_return_skew(df=self.df)
        rolling_stats_df = self._compute_rolling(df=self.df)
        beta_df = self._compute_betas(df=self.df)
        interactions_df = self._compute_pairs_features(df=self.df)

        final_df = (
            self.df.join(autocorr_df, on=CONFIG.DATE_COL)
            .join(skew_df, on=CONFIG.DATE_COL)
            .join(rolling_stats_df, on=CONFIG.DATE_COL)
            .join(beta_df, on=CONFIG.DATE_COL)
            .join(interactions_df, on=CONFIG.DATE_COL)
            .collect()
        )

        non_binary_cols = [col for col in final_df.columns if final_df[col].n_unique() > 3 and col != CONFIG.DATE_COL]

        windows = [10, 90, 252]

        final_df = (
            final_df.with_columns(
                *[
                    self.zscore(col=col, mean_window=mean_w, std_window=std_w)
                    for col in non_binary_cols
                    for (mean_w, std_w) in itertools.product(windows, repeat=2)
                ],
            )
            .drop(non_binary_cols)
            .fill_null(0)
            .fill_nan(0)
            .drop_nulls()
        )

        cols_to_drop = [col for col in final_df.select(cs.numeric()).columns if final_df[col].is_infinite().any()]
        return final_df.drop(cols_to_drop)

    def zscore(self, col: str, mean_window: int, std_window: int) -> pl.Expr:
        return (
            (pl.col(col) - pl.col(col).rolling_mean(window_size=mean_window, min_periods=2))
            / pl.col(col).rolling_std(window_size=std_window, min_periods=2)
        ).alias(f"{col}_std_{mean_window}_{std_window}")


In [4]:
from dataclasses import dataclass


class CustomMetrics:
    """Factory class for creating custom evaluation metrics"""

    @staticmethod
    def comp_metric(
        predt: np.ndarray,
        rfr_data: np.ndarray,
        fwd_data: np.ndarray,
    ) -> Tuple[str, float]:
        position = np.clip(predt, CONFIG.MIN_INVESTMENT, CONFIG.MAX_INVESTMENT)

        N = len(rfr_data)
        strat_ret = rfr_data * (1 - position) + position * fwd_data
        excess_ret = strat_ret - rfr_data
        mean_excess = (1 + excess_ret).prod() ** (1 / N) - 1
        std = strat_ret.std()

        if std == 0:
            return "adj_sharpe", 0.0

        sharpe = mean_excess / std * np.sqrt(252)
        strat_vol = std * np.sqrt(252) * 100
        market_vol = fwd_data.std() * np.sqrt(252) * 100
        market_mean = (1 + fwd_data - rfr_data).prod() ** (1 / N) - 1

        vol_penalty = 1 + max(0, strat_vol / market_vol - 1.2) if market_vol > 0 else 0
        return_penalty = 1 + ((max(0, (market_mean - mean_excess) * 100 * 252)) ** 2) / 100

        return "adj_sharpe", min(sharpe / (vol_penalty * return_penalty), 1e6)

    @staticmethod
    def create_volatility_adjusted_sharpe_xgb(
        rfr_data: np.ndarray,
        fwd_data: np.ndarray,
    ) -> Callable:
        """
        Create XGBoost custom metric with enclosed data.

        Args:
            rfr_data: Risk-free rate array
            fwd_data: Forward returns array

        Returns:
            Custom metric function for XGBoost
        """

        def metric(predt: np.ndarray, dtrain) -> Tuple[str, float]:
            position = np.clip(predt, CONFIG.MIN_INVESTMENT, CONFIG.MAX_INVESTMENT)

            N = len(rfr_data)
            strat_ret = rfr_data * (1 - position) + position * fwd_data
            excess_ret = strat_ret - rfr_data
            mean_excess = (1 + excess_ret).prod() ** (1 / N) - 1
            std = strat_ret.std()

            if std == 0:
                return "adj_sharpe", 0.0

            sharpe = mean_excess / std * np.sqrt(252)
            strat_vol = std * np.sqrt(252) * 100
            market_vol = fwd_data.std() * np.sqrt(252) * 100
            market_mean = (1 + fwd_data - rfr_data).prod() ** (1 / N) - 1

            vol_penalty = 1 + max(0, strat_vol / market_vol - 1.2) if market_vol > 0 else 0
            return_penalty = 1 + ((max(0, (market_mean - mean_excess) * 100 * 252)) ** 2) / 100

            return "adj_sharpe", min(sharpe / (vol_penalty * return_penalty), 1e6)

        return metric

    @staticmethod
    def create_volatility_adjusted_sharpe_lgb(
        rfr_data: np.ndarray,
        fwd_data: np.ndarray,
    ) -> Callable:
        """
        Create LightGBM custom metric with enclosed data.

        Args:
            rfr_data: Risk-free rate array
            fwd_data: Forward returns array

        Returns:
            Custom metric function for LightGBM
        """

        def metric(preds: np.ndarray, train_data) -> Tuple[str, float, bool]:
            position = np.clip(preds, CONFIG.MIN_INVESTMENT, CONFIG.MAX_INVESTMENT)

            N = len(rfr_data)
            strat_ret = rfr_data * (1 - position) + position * fwd_data
            excess_ret = strat_ret - rfr_data
            mean_excess = (1 + excess_ret).prod() ** (1 / N) - 1
            std = strat_ret.std()

            if std == 0:
                return "adj_sharpe", 0.0, True

            sharpe = mean_excess / std * np.sqrt(252)
            strat_vol = std * np.sqrt(252) * 100
            market_vol = fwd_data.std() * np.sqrt(252) * 100
            market_mean = (1 + fwd_data - rfr_data).prod() ** (1 / N) - 1

            vol_penalty = 1 + max(0, strat_vol / market_vol - 1.2) if market_vol > 0 else 0
            return_penalty = 1 + ((max(0, (market_mean - mean_excess) * 100 * 252)) ** 2) / 100

            adj_sharpe = min(sharpe / (vol_penalty * return_penalty), 1e6)
            return "adj_sharpe", adj_sharpe, True

        return metric


@dataclass
class TimeSeriesModelTrainer:
    """Base class for time series model training with cross-validation"""

    cv_results: List[Dict] = field(default_factory=list)
    models: List = field(default_factory=list)
    shap_df: Optional[pl.DataFrame] = None
    feature_names: Optional[List[str]] = None

    def train_cv(self, X: np.ndarray, y: np.ndarray, risk_free_rate: np.ndarray, forward_returns: np.ndarray) -> pl.DataFrame:
        """
        Perform time series cross-validation training.

        Args:
            X: Feature matrix
            y: Target variable
            risk_free_rate: Risk-free rate array
            forward_returns: Forward returns array
            dates: Optional datetime index for logging

        Returns:
            DataFrame with cross-validation results
        """
        raise NotImplementedError("Subclasses must implement train_cv method")

    def get_results_summary(self) -> pl.DataFrame:
        """Get summary of cross-validation results"""
        df = pl.DataFrame(self.cv_results)
        logger.info(f"Mean Test Sharpe: {df['final_score'].mean():.4f} (+/- {df['final_score'].std():.4f})")

        return df

    def compute_shap(self, model, x: np.ndarray, feature_names: Optional[List[str]] = None) -> pl.DataFrame:
        """
        Compute SHAP values for feature importance using cuML TreeExplainer.

        Args:
            model: Trained XGBoost or LightGBM model
            x: Feature matrix (numpy array or cupy array)
            feature_names: List of feature names (optional)

        Returns:
            Polars DataFrame with mean absolute SHAP values per feature
        """
        try:
            # Create TreeExplainer
            explainer = TreeExplainer(model=model)

            # Compute SHAP values
            shap_values = explainer.shap_values(x)

            # Convert to numpy if cupy array
            if hasattr(shap_values, "get"):
                shap_values = shap_values.get()

            # Create DataFrame with absolute SHAP values
            if feature_names is None:
                feature_names = [f"feature_{i}" for i in range(x.shape[1])]

            shap_df = pl.DataFrame(np.abs(shap_values), schema=feature_names).mean()

            # Initialize or concatenate SHAP DataFrame
            if self.shap_df is None:
                self.shap_df = shap_df
            else:
                self.shap_df = pl.concat([self.shap_df, shap_df], how="vertical").with_columns(pl.all().shrink_dtype())

            return shap_df

        except Exception as e:
            logger.error(f"Error computing SHAP values: {e}")
            return None

    def get_aggregated_shap_importance(self) -> pl.DataFrame:
        """
        Get aggregated SHAP importance across all folds.

        Returns:
            Polars DataFrame with mean SHAP values and ranking
        """
        if self.shap_df is None:
            logger.warning("No SHAP values computed yet")
            return None

        # Calculate mean SHAP importance across all folds
        mean_shap = self.shap_df.mean()

        # Create importance DataFrame
        importance_df = (
            pl.DataFrame({"feature": mean_shap.columns, "shap_importance": mean_shap.to_numpy()[0]})
            .sort("shap_importance", descending=True)
            .with_columns(pl.col("shap_importance").rank(descending=True).alias("rank"))
        )

        return importance_df

    def plot_shap_importance(self, top_n: int = 20):
        """
        Plot top N features by SHAP importance.

        Args:
            top_n: Number of top features to display
        """
        if self.shap_df is None:
            logger.warning("No SHAP values computed yet")
            return

        importance_df = self.get_aggregated_shap_importance()

        import matplotlib.pyplot as plt

        # Get top N features
        top_features = importance_df.head(top_n)

        plt.figure(figsize=(10, max(6, top_n * 0.3)))
        plt.barh(range(len(top_features)), top_features["shap_importance"].to_list(), tick_label=top_features["feature"].to_list())
        plt.xlabel("Mean |SHAP value|")
        plt.ylabel("Feature")
        plt.title(f"Top {top_n} Features by SHAP Importance")
        plt.gca().invert_yaxis()
        plt.tight_layout()
        plt.show()


class XGBoostTrainer(TimeSeriesModelTrainer):
    """XGBoost training pipeline with time series cross-validation"""

    def __init__(self, params: Optional[Dict] = None):
        super().__init__()
        self.params = params or self._default_params()

    def _default_params(self) -> Dict:
        """Default XGBoost parameters"""
        return {"max_depth": 6, "eta": 0.1, "device": "cuda", "tree_method": "hist", "seed": CONFIG.RANDOM_STATE, "disable_default_eval_metric": 1}

    def train_cv(
        self,
        X: np.ndarray,
        y: np.ndarray,
        risk_free_rate: np.ndarray,
        forward_returns: np.ndarray,
        batch_size: int,
        NUM_BOOST_ROUND: int,
        compute_shap: bool,
        feature_names: list,
    ) -> Tuple[pl.DataFrame, pl.DataFrame]:
        """Train XGBoost with time series cross-validation"""

        tscv = TimeSeriesSplit(n_splits=CONFIG.N_FOLDS)

        for fold, (train_index, test_index) in enumerate(tscv.split(X), 1):
            # Split data
            X_train, X_test = X[train_index], X[max(train_index) :]
            y_train, y_test = y[train_index], y[max(train_index) :]

            rfr_train, rfr_test = risk_free_rate[train_index], risk_free_rate[max(train_index) :]
            fwd_train, fwd_test = forward_returns[train_index], forward_returns[max(train_index) :]

            split_idx = int(len(X_train) * 0.8)
            X_train, X_val = X_train[:split_idx], X_train[split_idx:]
            y_train, y_val = y_train[:split_idx], y_train[split_idx:]
            rfr_train, rfr_val = rfr_train[:split_idx], rfr_train[split_idx:]
            fwd_train, fwd_val = fwd_train[:split_idx], fwd_train[split_idx:]

            # Create DMatrix
            dtrain = xgb.DMatrix(X_train, label=y_train)
            dval = xgb.DMatrix(X_val, label=y_val)

            # Create custom metrics
            train_metric = CustomMetrics.create_volatility_adjusted_sharpe_xgb(rfr_train, fwd_train)
            val_metric = CustomMetrics.create_volatility_adjusted_sharpe_xgb(rfr_val, fwd_val)

            # Train
            evals_result = {}
            bst = xgb.train(
                self.params,
                dtrain,
                num_boost_round=NUM_BOOST_ROUND,
                evals=[(dval, "val")],
                custom_metric=val_metric,
                evals_result=evals_result,
                early_stopping_rounds=CONFIG.EARLY_STOPPING_ROUNDS,
                maximize=True,
                verbose_eval=CONFIG.VERBOSE_EVAL,
            )

            BATCH_SIZE = batch_size  # Configure based on your timestep requirements
            bst_incremental = bst
            test_predictions = []

            for batch_start in range(0, len(X_test), BATCH_SIZE):
                batch_end = min(batch_start + BATCH_SIZE, len(X_test))

                # Extract batch data
                X_batch = X_test[batch_start:batch_end]
                y_batch = y_test[batch_start:batch_end]
                rfr_batch = rfr_test[batch_start:batch_end]
                fwd_batch = fwd_test[batch_start:batch_end]

                # Make predictions with current model before updating
                d_batch_pred = xgb.DMatrix(X_batch)
                batch_preds = bst_incremental.predict(d_batch_pred)
                test_predictions.extend(batch_preds)

                # Create DMatrix for incremental training
                d_batch_train = xgb.DMatrix(X_batch, label=y_batch)
                batch_metric = CustomMetrics.create_volatility_adjusted_sharpe_xgb(rfr_batch, fwd_batch)

                # Update model with new batch
                bst_incremental = xgb.train(
                    {
                        **self.params,
                        "process_type": "update",
                        "updater": "refresh",
                        "refresh_leaf": True,
                    },
                    d_batch_train,
                    num_boost_round=10,  # Can reduce this for faster updates
                    xgb_model=bst_incremental,
                    custom_metric=batch_metric,
                    maximize=True,
                    verbose_eval=False,
                )

            final_score = CustomMetrics.comp_metric(predt=np.array(test_predictions), rfr_data=rfr_test, fwd_data=fwd_test)

            # Store results
            self.models.append(bst)
            self.cv_results.append(
                {
                    "fold": fold,
                    "model": "XGBoost",
                    "train_size": len(train_index),
                    "test_size": len(test_index),
                    "best_iteration": bst.best_iteration,
                    "best_score": bst.best_score,
                    "train_start": train_index[0],
                    "train_end": train_index[-1],
                    "test_start": test_index[0],
                    "test_end": test_index[-1],
                }
            )

            logger.info(f"Best iteration: {bst.best_iteration}, Best Sharpe: {bst.best_score:.4f}")

            if compute_shap:
                logger.info(f"  Computing SHAP values for fold {fold}...")
                X_test_float32 = X_test.astype(np.float32)
                fold_shap = self.compute_shap(bst, X_test_float32, feature_names)
                if fold_shap is not None:
                    logger.info(
                        f"  Top 5 features: {fold_shap.transpose(include_header=True).sort(by='column_0', descending=True).head(5)['column'].to_list()}"
                    )
        shap_importance = self.get_aggregated_shap_importance() if compute_shap else None

        return shap_importance


class LightGBMTrainer(TimeSeriesModelTrainer):
    """LightGBM training pipeline with time series cross-validation"""

    def __init__(self, params: Optional[Dict] = None):
        super().__init__()
        self.params = params or self._default_params()

    def _default_params(self) -> Dict:
        """Default LightGBM parameters"""
        return {
            "max_depth": 6,
            # 'device': 'gpu',
            "learning_rate": 0.1,
            "seed": CONFIG.RANDOM_STATE,
            "verbose": -1,
            "metric": "None",
        }

    def train_cv(
        self,
        X: np.ndarray,
        y: np.ndarray,
        risk_free_rate: np.ndarray,
        forward_returns: np.ndarray,
        batch_size: int,
        NUM_BOOST_ROUND: int,
        compute_shap: bool,
        feature_names: list,
    ) -> Tuple[pl.DataFrame, pl.DataFrame]:
        """Train LightGBM with time series cross-validation"""

        tscv = TimeSeriesSplit(n_splits=CONFIG.N_FOLDS)

        for fold, (train_index, test_index) in enumerate(tscv.split(X), 1):
            # Split data
            X_train, X_test = X[train_index], X[max(train_index) :]
            y_train, y_test = y[train_index], y[max(train_index) :]

            # Split risk_free_rate and forward_returns for train/test
            rfr_train, rfr_test = risk_free_rate[train_index], risk_free_rate[max(train_index) :]
            fwd_train, fwd_test = forward_returns[train_index], forward_returns[max(train_index) :]

            # Further split train into train/val (including rfr and fwd)
            split_idx = int(len(X_train) * 0.8)
            X_train, X_val = X_train[:split_idx], X_train[split_idx:]
            y_train, y_val = y_train[:split_idx], y_train[split_idx:]
            rfr_train, rfr_val = rfr_train[:split_idx], rfr_train[split_idx:]
            fwd_train, fwd_val = fwd_train[:split_idx], fwd_train[split_idx:]

            # Create Datasets
            train_data = lgb.Dataset(X_train, label=y_train)
            val_data = lgb.Dataset(X_val, label=y_val, reference=train_data)

            # Create custom metric for validation
            val_metric = CustomMetrics.create_volatility_adjusted_sharpe_lgb(rfr_val, fwd_val)

            # Initial training with validation set
            evals_result = {}
            bst = lgb.train(
                self.params,
                train_data,
                num_boost_round=NUM_BOOST_ROUND,
                valid_sets=[val_data],
                valid_names=["validation"],
                feval=val_metric,
                callbacks=[
                    lgb.early_stopping(stopping_rounds=CONFIG.EARLY_STOPPING_ROUNDS, verbose=True),
                    lgb.log_evaluation(period=CONFIG.VERBOSE_EVAL),
                    lgb.record_evaluation(evals_result),
                ],
            )

            # Get best iteration from initial training
            best_iteration = bst.best_iteration
            best_score = bst.best_score["validation"]["adj_sharpe"]

            # Incremental learning on test set with batch updates
            BATCH_SIZE = batch_size  # Configure based on your timestep requirements
            bst_incremental = bst
            test_predictions = []

            for batch_start in range(0, len(X_test), BATCH_SIZE):
                batch_end = min(batch_start + BATCH_SIZE, len(X_test))

                # Extract batch data
                X_batch = X_test[batch_start:batch_end]
                y_batch = y_test[batch_start:batch_end]
                rfr_batch = rfr_test[batch_start:batch_end]
                fwd_batch = fwd_test[batch_start:batch_end]

                # Make predictions with current model before updating
                batch_preds = bst_incremental.predict(X_batch)
                test_predictions.extend(batch_preds)  # type:ignore

                # Create Dataset for incremental training
                batch_data = lgb.Dataset(X_batch, label=y_batch, reference=train_data)
                batch_metric = CustomMetrics.create_volatility_adjusted_sharpe_lgb(rfr_batch, fwd_batch)

                # Update model with new batch using init_model
                bst_incremental = lgb.train(
                    self.params,
                    batch_data,
                    num_boost_round=10,  # Add new trees per batch (adjust as needed)
                    init_model=bst_incremental,  # Continue from previous model
                    valid_sets=[batch_data],
                    valid_names=["batch"],
                    feval=batch_metric,
                    callbacks=[
                        lgb.log_evaluation(period=0),  # Silent during batch updates
                    ],
                )

            final_score = CustomMetrics.comp_metric(predt=np.array(test_predictions), rfr_data=rfr_test, fwd_data=fwd_test)

            if compute_shap:
                logger.info(f"  Computing SHAP values for fold {fold}...")
                X_test_float32 = X_test.astype(np.float32)
                fold_shap = self.compute_shap(bst, X_test_float32, feature_names)
                if fold_shap is not None:
                    logger.info(
                        f"  Top 5 features: {fold_shap.transpose(include_header=True).sort(by='column_0', descending=True).head(5)['column'].to_list()}"
                    )
        shap_importance = self.get_aggregated_shap_importance() if compute_shap else None

        return shap_importance


In [5]:
class FeatureSelector:
    def __init__(self, train_x, train_y):
        self.train_x = train_x  # drop date
        self.train_y = train_y
        self.keep_features = None
        self.total_features = train_x.columns.__len__()
        self.device = torch.device("cuda" if torch.cuda.is_available() else "cpu")

    def basic_filters(self):
        train_x_filter_1 = self.train_x.select([col for col in self.train_x.columns if self.train_x[col].var() > 1e-3])
        train_x_filter_2 = train_x_filter_1.select(
            [col for col in train_x_filter_1.columns if train_x_filter_1[col].value_counts()["count"].max() / len(train_x_filter_1) < 0.80]
        )

        print(f"After Basic Filter: {train_x_filter_2.columns.__len__()} / {self.total_features}")

        return train_x_filter_2

    def run_correlation(self, x: torch.Tensor, y: torch.Tensor, names: list) -> list:
        """
        Memory-optimized version using chunked processing
        Best for when you have enough GPU memory

        Args:
            x: Tensor
            y: Tensor

        Returns:
            correlations: Tensor
        """
        N, D1 = x.shape
        N2, D2 = y.shape
        assert N == N2

        device = x.device

        # Handle NaNs by masking
        x_valid = ~torch.isnan(x)
        y_valid = ~torch.isnan(y)

        # Convert NaNs to 0 for computation
        x_clean = torch.where(x_valid, x, 0.0)
        y_clean = torch.where(y_valid, y, 0.0)

        # Compute valid sample counts for each pair efficiently
        # This is the memory bottleneck, so we chunk it
        chunk_size = 500  # Adjust based on GPU memory
        correlations = torch.zeros(D1, D2, device=device)

        for i in range(0, D1, chunk_size):
            end_i = min(i + chunk_size, D1)

            # Get chunk
            x_chunk = x_clean[:, i:end_i]  # (N, chunk_size)
            x_valid_chunk = x_valid[:, i:end_i]  # (N, chunk_size)

            # Compute valid sample matrix for this chunk
            valid_matrix = x_valid_chunk.unsqueeze(2) & y_valid.unsqueeze(1)  # (N, chunk_size, D2)
            n_valid = valid_matrix.sum(dim=0).float()  # (chunk_size, D2)

            # Sufficient samples mask
            sufficient = n_valid >= 10

            if sufficient.any():
                # Compute means over valid samples
                x_sum = (x_chunk.unsqueeze(2) * valid_matrix).sum(dim=0)  # (chunk_size, D2)
                y_sum = (y_clean.unsqueeze(1) * valid_matrix).sum(dim=0)  # (chunk_size, D2)

                x_mean = x_sum / (n_valid + 1e-10)
                y_mean = y_sum / (n_valid + 1e-10)

                # Center data
                x_centered = (x_chunk.unsqueeze(2) - x_mean.unsqueeze(0)) * valid_matrix  # (N, chunk_size, D2)
                y_centered = (y_clean.unsqueeze(1) - y_mean.unsqueeze(0)) * valid_matrix  # (N, chunk_size, D2)

                # Compute correlation
                numerator = (x_centered * y_centered).sum(dim=0)
                x_var = (x_centered**2).sum(dim=0)
                y_var = (y_centered**2).sum(dim=0)

                denominator = torch.sqrt(x_var * y_var) + 1e-10
                chunk_corr = numerator / denominator

                # Apply sufficient samples mask
                chunk_corr = torch.where(sufficient, chunk_corr, 0.0)
                correlations[i:end_i] = torch.abs(chunk_corr)

        self.correlations = correlations.cpu().numpy()

    def run_selection(self):
        # filtered = self.basic_filters()
        train_x_arr = self.train_x.to_numpy()
        train_y_arr = self.train_y.to_numpy()
        x = torch.tensor(train_x_arr, device="cuda")
        y = torch.tensor(train_y_arr, device="cuda")
        self.run_correlation(x=x, y=y, names=self.train_x.columns)

        corrs_df = pl.DataFrame(self.correlations, schema=["corr"]).with_columns(pl.Series(name="feature", values=self.train_x.columns))

        keep_features = corrs_df.drop_nans().filter(pl.col("corr") > 0.08)

        print(f"After correlation, keeping: {keep_features.__len__()} features")

        # MI_scores = self.run_MI(x=np.nan_to_num(self.train_x.select(keep_features).to_numpy()), y=np.nan_to_num(train_y_arr).T)
        # return MI_scores, keep_features
        return self.correlations, keep_features

In [6]:
feat_engineered = pl.scan_parquet(
    "/mnt/c/Users/Admin/Desktop/Personal-Projects/Kaggle/Hull Tactical - Market Prediction/data/wsl_feature_impt_train.parquet"
)

train_x = feat_engineered.filter(pl.col(CONFIG.DATE_COL).is_between(CONFIG.START_DATE, CONFIG.END_DATE)).collect()
train_y = (
    pl.scan_csv(CONFIG.TRAIN_Y_PATH, infer_schema_length=10_000)
    .filter(pl.col(CONFIG.DATE_COL).is_between(CONFIG.START_DATE, CONFIG.END_DATE))
    .collect()
)

risk_free_rate = (
    pl.scan_csv(CONFIG.TRAIN_X_PATH, infer_schema_length=10_000)
    .filter(pl.col(CONFIG.DATE_COL).is_between(CONFIG.START_DATE, CONFIG.END_DATE))
    .select(["risk_free_rate"])
    .collect()
    .to_numpy()
    .flatten()
)
forward_returns = (
    pl.scan_csv(CONFIG.TRAIN_X_PATH, infer_schema_length=10_000)
    .filter(pl.col(CONFIG.DATE_COL).is_between(CONFIG.START_DATE, CONFIG.END_DATE))
    .select(["forward_returns"])
    .collect()
    .to_numpy()
    .flatten()
)

In [7]:
feature_selector = FeatureSelector(train_x=train_x.drop(CONFIG.DATE_COL), train_y=train_y.drop(CONFIG.DATE_COL))
corr, keep_features = feature_selector.run_selection()

After correlation, keeping: 7530 features


In [8]:
keep_features_list = keep_features.select("feature").to_series().to_list()

In [9]:
X = train_x.drop(CONFIG.DATE_COL).select(keep_features_list).to_numpy()
y = train_y.drop(CONFIG.DATE_COL).to_numpy().flatten()
feature_names = train_x.drop(CONFIG.DATE_COL).select(keep_features_list).columns

In [10]:
xgb_trainer = XGBoostTrainer()
xgb_res = xgb_trainer.train_cv(
    X=X,
    y=y,
    risk_free_rate=risk_free_rate,
    forward_returns=forward_returns,
    batch_size=CONFIG.BATCH_SIZE,
    NUM_BOOST_ROUND=CONFIG.NUM_BOOST_ROUND,
    compute_shap=True,
    feature_names=feature_names,
)

[0]	val-adj_sharpe:0.35977
[10]	val-adj_sharpe:0.40164
[20]	val-adj_sharpe:0.47632
[30]	val-adj_sharpe:0.39702
[40]	val-adj_sharpe:0.38688
[50]	val-adj_sharpe:0.46130
[60]	val-adj_sharpe:0.47974
[70]	val-adj_sharpe:0.57512
[80]	val-adj_sharpe:0.61050
[90]	val-adj_sharpe:0.66004
[99]	val-adj_sharpe:0.69459


  bst.update(dtrain, iteration=i, fobj=obj)
[32m2025-11-15 23:53:50.585[0m | [1mINFO    [0m | [36m__main__[0m:[36mtrain_cv[0m:[36m359[0m - [1mBest iteration: 98, Best Sharpe: 0.6955[0m
[32m2025-11-15 23:53:50.585[0m | [1mINFO    [0m | [36m__main__[0m:[36mtrain_cv[0m:[36m362[0m - [1m  Computing SHAP values for fold 1...[0m
[32m2025-11-15 23:53:51.903[0m | [1mINFO    [0m | [36m__main__[0m:[36mtrain_cv[0m:[36m366[0m - [1m  Top 5 features: ['E2_M3_exp_diff_std_252_90', 'P2_std_252_10', 'E9_S10_sigmoid_diff_std_252_252', 'V3_V7_exp_diff_std_90_90', 'P11_std_252_10'][0m


[0]	val-adj_sharpe:-0.04696
[10]	val-adj_sharpe:-0.09389
[20]	val-adj_sharpe:-0.13515
[30]	val-adj_sharpe:-0.07095
[40]	val-adj_sharpe:-0.07054
[50]	val-adj_sharpe:0.01726
[60]	val-adj_sharpe:-0.00023
[70]	val-adj_sharpe:0.04890
[80]	val-adj_sharpe:0.06450
[90]	val-adj_sharpe:0.08589
[99]	val-adj_sharpe:0.11354


[32m2025-11-15 23:57:50.033[0m | [1mINFO    [0m | [36m__main__[0m:[36mtrain_cv[0m:[36m359[0m - [1mBest iteration: 99, Best Sharpe: 0.1135[0m
[32m2025-11-15 23:57:50.034[0m | [1mINFO    [0m | [36m__main__[0m:[36mtrain_cv[0m:[36m362[0m - [1m  Computing SHAP values for fold 2...[0m
[32m2025-11-15 23:57:50.492[0m | [1mINFO    [0m | [36m__main__[0m:[36mtrain_cv[0m:[36m366[0m - [1m  Top 5 features: ['M16_M8_log_ratio_std_252_10', 'P10_sma_5_std_90_10', 'M16_M8_log_ratio_std_90_10', 'P11_P7_poly3_std_252_10', 'E9_S10_sigmoid_diff_std_252_252'][0m


[0]	val-adj_sharpe:-0.25919
[10]	val-adj_sharpe:-0.19550
[20]	val-adj_sharpe:0.02488
[30]	val-adj_sharpe:-0.01675
[40]	val-adj_sharpe:-0.02537
[50]	val-adj_sharpe:-0.14632
[60]	val-adj_sharpe:-0.12184
[70]	val-adj_sharpe:-0.11006
[80]	val-adj_sharpe:-0.14058
[90]	val-adj_sharpe:-0.06828
[99]	val-adj_sharpe:-0.06690


[32m2025-11-16 00:01:37.192[0m | [1mINFO    [0m | [36m__main__[0m:[36mtrain_cv[0m:[36m359[0m - [1mBest iteration: 36, Best Sharpe: 0.0557[0m
[32m2025-11-16 00:01:37.193[0m | [1mINFO    [0m | [36m__main__[0m:[36mtrain_cv[0m:[36m362[0m - [1m  Computing SHAP values for fold 3...[0m
[32m2025-11-16 00:01:35.135[0m | [1mINFO    [0m | [36m__main__[0m:[36mtrain_cv[0m:[36m366[0m - [1m  Top 5 features: ['M17_P10_poly3_std_252_90', 'M8_V6_log_mul_std_10_10', 'M16_S11_log_ratio_std_90_10', 'E2_E8_exp_diff_std_252_252', 'E3_V3_exp_diff_std_252_10'][0m


[0]	val-adj_sharpe:0.43123
[10]	val-adj_sharpe:0.41291
[20]	val-adj_sharpe:0.32984
[30]	val-adj_sharpe:0.32541
[40]	val-adj_sharpe:0.32861
[50]	val-adj_sharpe:0.29537
[60]	val-adj_sharpe:0.29125
[70]	val-adj_sharpe:0.31327
[80]	val-adj_sharpe:0.31349
[90]	val-adj_sharpe:0.29739
[99]	val-adj_sharpe:0.28623


[32m2025-11-16 00:04:47.445[0m | [1mINFO    [0m | [36m__main__[0m:[36mtrain_cv[0m:[36m359[0m - [1mBest iteration: 1, Best Sharpe: 0.4397[0m
[32m2025-11-16 00:04:47.446[0m | [1mINFO    [0m | [36m__main__[0m:[36mtrain_cv[0m:[36m362[0m - [1m  Computing SHAP values for fold 4...[0m
[32m2025-11-16 00:04:47.695[0m | [1mINFO    [0m | [36m__main__[0m:[36mtrain_cv[0m:[36m366[0m - [1m  Top 5 features: ['M17_P10_poly2_std_252_252', 'E3_P3_poly3_std_252_252', 'M8_V12_atan_ratio_std_90_90', 'P8_V1_log_mul_std_252_90', 'P8_V13_diff_squared_std_90_10'][0m


[0]	val-adj_sharpe:0.49826
[10]	val-adj_sharpe:0.53899
[20]	val-adj_sharpe:0.51261
[30]	val-adj_sharpe:0.58617
[40]	val-adj_sharpe:0.58922
[50]	val-adj_sharpe:0.61618
[60]	val-adj_sharpe:0.64969
[70]	val-adj_sharpe:0.67923
[80]	val-adj_sharpe:0.69013
[90]	val-adj_sharpe:0.68959
[99]	val-adj_sharpe:0.68291


[32m2025-11-16 00:07:53.483[0m | [1mINFO    [0m | [36m__main__[0m:[36mtrain_cv[0m:[36m359[0m - [1mBest iteration: 85, Best Sharpe: 0.6956[0m
[32m2025-11-16 00:07:53.484[0m | [1mINFO    [0m | [36m__main__[0m:[36mtrain_cv[0m:[36m362[0m - [1m  Computing SHAP values for fold 5...[0m
[32m2025-11-16 00:07:53.698[0m | [1mINFO    [0m | [36m__main__[0m:[36mtrain_cv[0m:[36m366[0m - [1m  Top 5 features: ['P9_V10_poly3_std_252_252', 'I2_M1_poly3_std_252_252', 'E20_M1_diff_squared_std_252_90', 'M2_P1_poly3_std_252_10', 'M1_P11_diff_squared_std_252_252'][0m


In [11]:
xgb_res.write_csv("/mnt/c/Users/Admin/Desktop/Personal-Projects/Kaggle/Hull Tactical - Market Prediction/XGB_feature_importance.csv")

In [12]:
lgb_trainer = LightGBMTrainer()
lgb_res = lgb_trainer.train_cv(
    X=X,
    y=y,
    risk_free_rate=risk_free_rate,
    forward_returns=forward_returns,
    batch_size=CONFIG.BATCH_SIZE,
    NUM_BOOST_ROUND=CONFIG.NUM_BOOST_ROUND,
    compute_shap=True,
    feature_names=feature_names,
)

Training until validation scores don't improve for 100 rounds
[10]	validation's adj_sharpe: 0.480006
[20]	validation's adj_sharpe: 0.310033
[30]	validation's adj_sharpe: 0.281605
[40]	validation's adj_sharpe: 0.282918
[50]	validation's adj_sharpe: 0.277143
[60]	validation's adj_sharpe: 0.241533
[70]	validation's adj_sharpe: 0.29549
[80]	validation's adj_sharpe: 0.28066
[90]	validation's adj_sharpe: 0.337667
[100]	validation's adj_sharpe: 0.37017
Did not meet early stopping. Best iteration is:
[6]	validation's adj_sharpe: 0.576674


[32m2025-11-16 00:08:14.290[0m | [1mINFO    [0m | [36m__main__[0m:[36mtrain_cv[0m:[36m489[0m - [1m  Computing SHAP values for fold 1...[0m
[32m2025-11-16 00:08:14.803[0m | [1mINFO    [0m | [36m__main__[0m:[36mtrain_cv[0m:[36m493[0m - [1m  Top 5 features: ['P8_vol_5_std_252_10', 'P2_std_252_10', 'P2_V3_sigmoid_diff_std_90_252', 'P2_sma_10_std_10_10', 'M16_P8_poly3_std_252_252'][0m


Training until validation scores don't improve for 100 rounds
[10]	validation's adj_sharpe: 0.106539
[20]	validation's adj_sharpe: 0.200434
[30]	validation's adj_sharpe: 0.118707
[40]	validation's adj_sharpe: 0.0281534
[50]	validation's adj_sharpe: -0.0579819
[60]	validation's adj_sharpe: -0.0910764
[70]	validation's adj_sharpe: -0.0865236
[80]	validation's adj_sharpe: -0.00854346
[90]	validation's adj_sharpe: -0.0105604
[100]	validation's adj_sharpe: 0.0187411
Did not meet early stopping. Best iteration is:
[20]	validation's adj_sharpe: 0.200434


[32m2025-11-16 00:08:34.788[0m | [1mINFO    [0m | [36m__main__[0m:[36mtrain_cv[0m:[36m489[0m - [1m  Computing SHAP values for fold 2...[0m
[32m2025-11-16 00:08:35.098[0m | [1mINFO    [0m | [36m__main__[0m:[36mtrain_cv[0m:[36m493[0m - [1m  Top 5 features: ['M17_P8_poly3_std_252_90', 'E19_V13_poly3_std_90_90', 'M8_V12_sigmoid_diff_std_10_10', 'M8_forward_returns_poly2_std_252_90', 'E3_P11_harmonic_mean_std_252_90'][0m


Training until validation scores don't improve for 100 rounds
[10]	validation's adj_sharpe: -0.435346
[20]	validation's adj_sharpe: -0.476897
[30]	validation's adj_sharpe: -0.40484
[40]	validation's adj_sharpe: -0.400977
[50]	validation's adj_sharpe: -0.389881
[60]	validation's adj_sharpe: -0.358356
[70]	validation's adj_sharpe: -0.363988
[80]	validation's adj_sharpe: -0.377634
[90]	validation's adj_sharpe: -0.394803
[100]	validation's adj_sharpe: -0.359085
Did not meet early stopping. Best iteration is:
[1]	validation's adj_sharpe: -0.297938


[32m2025-11-16 00:08:54.950[0m | [1mINFO    [0m | [36m__main__[0m:[36mtrain_cv[0m:[36m489[0m - [1m  Computing SHAP values for fold 3...[0m
[32m2025-11-16 00:08:55.196[0m | [1mINFO    [0m | [36m__main__[0m:[36mtrain_cv[0m:[36m493[0m - [1m  Top 5 features: ['M17_P11_log_mul_std_252_252', 'M17_P10_log_mul_std_90_252', 'M2_P8_sigmoid_diff_std_252_90', 'M8_risk_free_rate_log_ratio_std_252_90', 'E8_P2_poly3_std_90_90'][0m


Training until validation scores don't improve for 100 rounds
[10]	validation's adj_sharpe: 0.390929
[20]	validation's adj_sharpe: 0.357827
[30]	validation's adj_sharpe: 0.335994
[40]	validation's adj_sharpe: 0.352491
[50]	validation's adj_sharpe: 0.350694
[60]	validation's adj_sharpe: 0.340364
[70]	validation's adj_sharpe: 0.350884
[80]	validation's adj_sharpe: 0.333479
[90]	validation's adj_sharpe: 0.314479
[100]	validation's adj_sharpe: 0.321898
Did not meet early stopping. Best iteration is:
[1]	validation's adj_sharpe: 0.440672


[32m2025-11-16 00:09:08.821[0m | [1mINFO    [0m | [36m__main__[0m:[36mtrain_cv[0m:[36m489[0m - [1m  Computing SHAP values for fold 4...[0m
[32m2025-11-16 00:09:08.981[0m | [1mINFO    [0m | [36m__main__[0m:[36mtrain_cv[0m:[36m493[0m - [1m  Top 5 features: ['P8_S4_atan_ratio_std_252_252', 'E19_M16_atan_ratio_std_252_10', 'V13_V5_sigmoid_diff_std_252_90', 'E3_M7_exp_diff_std_252_90', 'P10_S7_sigmoid_diff_std_252_252'][0m


Training until validation scores don't improve for 100 rounds
[10]	validation's adj_sharpe: 0.47694
[20]	validation's adj_sharpe: 0.552748
[30]	validation's adj_sharpe: 0.551818
[40]	validation's adj_sharpe: 0.559648
[50]	validation's adj_sharpe: 0.560258
[60]	validation's adj_sharpe: 0.540424
[70]	validation's adj_sharpe: 0.564244
[80]	validation's adj_sharpe: 0.563864
[90]	validation's adj_sharpe: 0.555988
[100]	validation's adj_sharpe: 0.544453
Did not meet early stopping. Best iteration is:
[27]	validation's adj_sharpe: 0.589649


[32m2025-11-16 00:09:21.447[0m | [1mINFO    [0m | [36m__main__[0m:[36mtrain_cv[0m:[36m489[0m - [1m  Computing SHAP values for fold 5...[0m
[32m2025-11-16 00:09:21.561[0m | [1mINFO    [0m | [36m__main__[0m:[36mtrain_cv[0m:[36m493[0m - [1m  Top 5 features: ['P10_S9_atan_ratio_std_252_252', 'M16_P11_log_mul_std_252_252', 'M14_P2_sigmoid_diff_std_90_252', 'P2_P4_sigmoid_diff_std_252_90', 'E19_E9_log_mul_std_252_252'][0m


In [13]:
lgb_res.write_csv("/mnt/c/Users/Admin/Desktop/Personal-Projects/Kaggle/Hull Tactical - Market Prediction/LGBM_feature_importance.csv")