In [1]:
from dataclasses import dataclass
from typing import Dict, List, Optional
import copy
import re
from tqdm.auto import tqdm
from abc import ABC, abstractmethod
import math
import os
import json

import polars as pl
import numpy as np
from numba import njit, prange
from scipy.stats import spearmanr, rankdata, norm

from sklearn.model_selection import TimeSeriesSplit, KFold
from sklearn.base import clone

import torch
import torch.nn.functional as F
import torch.nn as nn
from torch.utils.data import Dataset, DataLoader
import torch.optim as optim

# from torch.profiler import profile, ProfilerActivity, record_function

from CONFIG import CONFIG
from PREPROCESSOR_V2 import PREPROCESSOR
from FEATURE_ENGINEERING_V2 import FEATURE_ENGINEERING
from DATASET import SequentialDataset
from SEQUENTIAL_NN_MODEL import CNNTransformerModel, GRUModel, LSTMModel, PureTransformerModel
from CROSS_SECTIONAL_NN_MODEL import DeepMLPModel, LinearModel, ResidualMLPModel
from ENSEMBLE_NN import ENSEMBLE_NN
from NN import NN
from LOSS import CombinedICIRLoss

import time


def timer(func):
    def wrapper(*args, **kwargs):
        start = time.time()
        result = func(*args, **kwargs)
        end = time.time()
        print(f"{func.__name__} took {end - start:.4f} seconds")
        return result

    return wrapper


In [2]:
folder = "nn_models/v3_gaussian_rank"
os.makedirs(folder, exist_ok=True)
with open(f"{folder}/features.json", "w") as f:
    json.dump(CONFIG.IMPT_COL, f)

with open(f"{folder}/features.json", "r") as f:
    json.load(f)

In [3]:
def gaussian_rank_transform(arr: np.array):
    # n_samples, n_targets = arr.shape
    transformed_targets = np.full_like(arr, np.nan)

    for i, row in enumerate(arr):
        # Find valid (non-NaN) assets for this timestep
        valid_mask = ~np.isnan(row)
        valid_arr = row[valid_mask]
        ranks = rankdata(valid_arr, method="average")
        percentile_ranks = (ranks - 0.5) / (len(ranks))
        percentile_ranks = np.clip(percentile_ranks, 1e-8, 1 - 1e-8)
        gaussian_values = norm.ppf(percentile_ranks)
        transformed_targets[i, valid_mask] = gaussian_values
    return transformed_targets

In [4]:
# # --- Prepare DataLoader ---
# # Create the dataset

# train_x = pl.scan_csv(CONFIG.TRAIN_X_PATH).filter(pl.col("date_id") <= CONFIG.MAX_TRAIN_DATE)
# train_y = pl.scan_csv(CONFIG.TRAIN_Y_PATH).filter(pl.col("date_id") <= CONFIG.MAX_TRAIN_DATE).fill_null(0).collect()

# train_x = PREPROCESSOR(df=train_x)
# train_x.clean()
# train_x = train_x.transform().lazy()

# train_x = FEATURE_ENGINEERING(df=train_x)
# train_x = train_x.create_all_features().collect().pivot(index=CONFIG.DATE_COL, on=["type", "instr"])
# train_x = train_x.rename({col: re.sub(r'[{",}]', "", col).replace(" ", "_").replace(",", "_") for col in train_x.columns})
# train_x = train_x.select(set(CONFIG.IMPT_COLS + [CONFIG.DATE_COL]))

In [5]:
def rank_correlation_sharpe(targets, predictions) -> float:
    """
    Calculates the rank correlation between predictions and target values,
    and returns its Sharpe ratio (mean / standard deviation).

    :param merged_df: DataFrame containing prediction columns (starting with 'prediction_')
                    and target columns (starting with 'target_')
    :return: Sharpe ratio of the rank correlation
    :raises ZeroDivisionError: If the standard deviation is zero
    """
    correlations = []

    for i, (pred_row, target_row) in enumerate(zip(predictions, targets)):
        # Find valid (non-NaN) assets for this timestep
        valid_mask = ~np.isnan(target_row)
        valid_pred = pred_row[valid_mask]
        valid_target = target_row[valid_mask]

        if np.std(pred_row) == 0 or np.std(target_row) == 0:
            raise ZeroDivisionError("Zero standard deviation in a row.")

        rho = np.corrcoef(rankdata(valid_pred, method="average"), rankdata(valid_target, method="average"))[0, 1]
        correlations.append(rho)

    daily_rank_corrs = np.array(correlations)
    std_dev = daily_rank_corrs.std(ddof=0)
    if std_dev == 0:
        raise ZeroDivisionError("Denominator is zero, unable to compute Sharpe ratio.")

    sharpe_ratio = daily_rank_corrs.mean() / std_dev
    return float(sharpe_ratio)

In [6]:
# --- Prepare DataLoader ---
# Create the dataset

train_x = pl.scan_csv(CONFIG.TRAIN_X_PATH)
train_x = PREPROCESSOR(df=train_x)
train_x = train_x.clean()

features = FEATURE_ENGINEERING(df=train_x)
train_x: pl.DataFrame = features.create_market_features()

train_y = pl.scan_csv(CONFIG.TRAIN_Y_PATH)

curr_y = (
    train_y.with_columns([pl.col(CONFIG.LAGS[f"lag{i}"]).exclude(CONFIG.DATE_COL).shift(i + 1) for i in range(1, 5)])
    .with_columns(pl.all().exclude(CONFIG.DATE_COL).shift())
    .filter((pl.col(CONFIG.DATE_COL).is_in(train_x.select(CONFIG.DATE_COL).to_series())))
    .collect()
    .fill_null(0)
    .lazy()
)

y_feat = FEATURE_ENGINEERING(df=curr_y)
ys = y_feat.create_Y_market_features()

train_x = train_x.join(ys, on=CONFIG.DATE_COL).fill_nan(0)


train_y = train_y.filter((pl.col(CONFIG.DATE_COL).is_in(train_x.select(CONFIG.DATE_COL).to_series()))).collect()
train_x = (
    train_x.with_columns([pl.when(pl.col(col).is_infinite()).then(0.0).otherwise(pl.col(col)).alias(col) for col in train_x.columns])
    .with_columns(pl.all().shrink_dtype())
    .filter(pl.col(CONFIG.DATE_COL).is_in(train_y.select(CONFIG.DATE_COL).to_series()))
    .with_columns(pl.col(CONFIG.DATE_COL).cast(pl.Int64))
    .select([CONFIG.DATE_COL] + CONFIG.IMPT_COL)
)

retrain_x = train_x.with_columns(pl.all().exclude(CONFIG.DATE_COL).shift(5))
retrain_y = train_y.filter((pl.col(CONFIG.DATE_COL).is_in(train_x.select(CONFIG.DATE_COL).to_series()))).with_columns(
    pl.all().exclude(CONFIG.DATE_COL).shift(5)
)

train_y_arr = train_y.drop(CONFIG.DATE_COL).to_numpy()

train_y = pl.DataFrame(gaussian_rank_transform(train_y_arr), schema=train_y.drop(CONFIG.DATE_COL).columns).insert_column(
    0, train_y.select(CONFIG.DATE_COL).to_series()
)


# pl.DataFrame(
#     (train_y_arr - np.nanmean(train_y_arr, axis=1).reshape(train_y_arr.shape[0], -1))
#     / np.nanstd(train_y_arr, axis=1).reshape(train_y_arr.shape[0], -1),
#     schema=train_y.drop(CONFIG.DATE_COL).columns,
# ).insert_column(0, train_y.select(CONFIG.DATE_COL).to_series())

retrain_y_arr = retrain_y.drop(CONFIG.DATE_COL).to_numpy()
retrain_y = pl.DataFrame(gaussian_rank_transform(retrain_y_arr), schema=train_y.drop(CONFIG.DATE_COL).columns).insert_column(
    0, train_y.select(CONFIG.DATE_COL).to_series()
)


# pl.DataFrame(
#     (retrain_y_arr - np.nanmean(retrain_y_arr, axis=1).reshape(retrain_y_arr.shape[0], -1))
#     / np.nanstd(retrain_y_arr, axis=1).reshape(retrain_y_arr.shape[0], -1),
#     schema=train_y.drop(CONFIG.DATE_COL).columns,
# ).insert_column(0, train_y.select(CONFIG.DATE_COL).to_series())


create_market_features took 6.6569 seconds


In [7]:
NN_model = NN(
    model=ENSEMBLE_NN(input_dim=len(train_x.columns) - 1, hidden_dim=64, output_dim=CONFIG.NUM_TARGET_COLUMNS, RNN="GRU"),
    seq_len=CONFIG.SEQ_LEN,
    batch_size=CONFIG.BATCH_SIZE,
    lr=0.0005,
    lr_refit=0.0005,
    epochs=200,
    early_stopping_patience=10,
    huber_weight=1.0,
    MSE_weight=0.5,
    ranking_weight=1.0,
)

In [8]:
if CONFIG.RUN_CV:
    dates_unique = (
        train_x.filter(pl.col(CONFIG.DATE_COL) <= CONFIG.MAX_TRAIN_DATE).select(pl.col(CONFIG.DATE_COL).unique().sort()).to_series().to_numpy()
    )
    real_dates_unique = (
        train_x.filter(pl.col(CONFIG.DATE_COL) > CONFIG.MAX_TRAIN_DATE).select(pl.col(CONFIG.DATE_COL).unique().sort()).to_series().to_numpy()
    )

    cv = TimeSeriesSplit(n_splits=CONFIG.N_FOLDS)
    cv_split = cv.split(dates_unique)

    scores = []
    models = []
    for fold, (train_idx, valid_idx) in enumerate(cv_split):
        if fold <= 3:
            continue
        if CONFIG.VERBOSE:
            print("-" * 20 + f"Fold {fold}" + "-" * 20)
            print(f"Train dates from {dates_unique[train_idx].min()} to {dates_unique[train_idx].max()}")
            print(f"Valid dates from {dates_unique[valid_idx].min()} to {dates_unique[valid_idx].max()}")

        dates_train = dates_unique[train_idx]
        dates_valid = dates_unique[valid_idx]

        df_train = train_x.filter(pl.col(CONFIG.DATE_COL).is_in(dates_train))
        true_y = train_y.filter(pl.col(CONFIG.DATE_COL).is_in(dates_train))

        valid_period = range(min(dates_valid) - CONFIG.SEQ_LEN + 1, max(dates_valid) + 1)
        df_valid = train_x.filter(pl.col(CONFIG.DATE_COL).is_in(valid_period))
        df_valid_current_y = train_y.filter(pl.col(CONFIG.DATE_COL).is_in(valid_period))

        df_valid_retrain = retrain_x.filter(pl.col(CONFIG.DATE_COL).is_in(valid_period))
        df_valid_current_y_retrain = retrain_y.filter(pl.col(CONFIG.DATE_COL).is_in(valid_period))

        model_fold = copy.deepcopy(NN_model)

        model_fold.fit(
            train_set=(df_train, true_y),
            val_set=(df_valid, df_valid_current_y),
            retrain_set=(df_valid_retrain, df_valid_current_y_retrain),
            verbose=CONFIG.VERBOSE,
        )

        models.append(model_fold)

        torch.save(
            model_fold.model.state_dict(),
            f"{folder}/ensemble_{fold}.pth",
        )

        preds = []
        cnt_dates = 0
        model_save = copy.deepcopy(model_fold)

        model_fold.model.load_state_dict(
            torch.load(
                f"{folder}/ensemble_{fold}.pth",
                map_location=torch.device("cuda"),
            )
        )

        for date_id in tqdm(dates_valid):
            period = range(date_id - CONFIG.SEQ_LEN + 1, date_id + 1)

            df_valid_date = train_x.filter(pl.col(CONFIG.DATE_COL).is_in(period)).drop(CONFIG.DATE_COL)
            valid_lags = df_valid_date.to_numpy()[-CONFIG.SEQ_LEN :].astype(np.float64)

            if model_fold.refit and (cnt_dates > 0):
                upd_period = range(date_id - CONFIG.SEQ_LEN + 1, date_id + 1)
                df_upd = retrain_x.filter(pl.col(CONFIG.DATE_COL).is_in(upd_period)).drop(CONFIG.DATE_COL).to_numpy()
                df_upd_current_y = retrain_y.filter(pl.col(CONFIG.DATE_COL) == date_id).drop(CONFIG.DATE_COL).to_numpy()

                if len(df_upd) > 0:
                    model_fold.update(df_upd, df_upd_current_y)

            preds_i = model_fold.predict(valid_lags)

            preds += list(preds_i[-1].reshape(-1, CONFIG.NUM_TARGET_COLUMNS))

            cnt_dates += 1

        preds = np.array(preds)

        score = rank_correlation_sharpe(
            df_valid_current_y.drop(CONFIG.DATE_COL).to_numpy().astype(np.float64),
            preds,
        )
        scores.append(score)

        print(f"LAST VALIDIDATION Sharpe: {score:.5f}")

        model_real = copy.deepcopy(model_fold)
        preds = []
        cnt_dates = 0
        for date_id in tqdm(real_dates_unique):
            period = range(date_id - CONFIG.SEQ_LEN + 1, date_id + 1)

            df_valid_date = train_x.filter(pl.col(CONFIG.DATE_COL).is_in(period)).drop(CONFIG.DATE_COL)
            valid_lags = df_valid_date.to_numpy()[-CONFIG.SEQ_LEN :].astype(np.float64)

            if model_fold.refit and (cnt_dates > 0):
                upd_period = range(date_id - CONFIG.SEQ_LEN + 1, date_id + 1)
                df_upd = retrain_x.filter(pl.col(CONFIG.DATE_COL).is_in(upd_period)).drop(CONFIG.DATE_COL).to_numpy()
                df_upd_current_y = retrain_y.filter(pl.col(CONFIG.DATE_COL) == date_id).drop(CONFIG.DATE_COL).to_numpy()

                if len(df_upd) > 0:
                    model_real.update(df_upd, df_upd_current_y)

            preds_i = model_real.predict(valid_lags)

            preds += list(preds_i[-1].reshape(-1, CONFIG.NUM_TARGET_COLUMNS))

            cnt_dates += 1

        preds = np.array(preds)

        score = rank_correlation_sharpe(
            train_y.filter(pl.col(CONFIG.DATE_COL).is_in(real_dates_unique)).drop(CONFIG.DATE_COL).to_numpy().astype(np.float64),
            preds,
        )
        scores.append(score)
        print(f"REAL Sharpe: {score:.5f}")
else:
    dates_unique = (
        train_x.filter(pl.col(CONFIG.DATE_COL) <= CONFIG.MAX_TRAIN_DATE).select(pl.col(CONFIG.DATE_COL).unique().sort()).to_series().to_numpy()
    )
    real_dates_unique = (
        train_x.filter(pl.col(CONFIG.DATE_COL) > CONFIG.MAX_TRAIN_DATE).select(pl.col(CONFIG.DATE_COL).unique().sort()).to_series().to_numpy()
    )

    if CONFIG.VERBOSE:
        print(f"Train dates from {dates_unique.min()} to {dates_unique.max()}")
        print(f"Valid dates from {real_dates_unique.min()} to {real_dates_unique.max()}")

    dates_train = dates_unique
    dates_valid = real_dates_unique

    df_train = train_x.filter(pl.col(CONFIG.DATE_COL).is_in(dates_train))
    true_y = train_y.filter(pl.col(CONFIG.DATE_COL).is_in(dates_train))

    valid_period = range(min(dates_valid) - CONFIG.SEQ_LEN + 1, max(dates_valid) + 1)
    df_valid = train_x.filter(pl.col(CONFIG.DATE_COL).is_in(valid_period))
    df_valid_current_y = train_y.filter(pl.col(CONFIG.DATE_COL).is_in(valid_period))

    df_valid_retrain = retrain_x.filter(pl.col(CONFIG.DATE_COL).is_in(valid_period))
    df_valid_current_y_retrain = retrain_y.filter(pl.col(CONFIG.DATE_COL).is_in(valid_period))

    model_fold = copy.deepcopy(NN_model)

    model_fold.fit(
        train_set=(df_train, true_y),
        val_set=(df_valid, df_valid_current_y),
        retrain_set=(df_valid_retrain, df_valid_current_y_retrain),
        verbose=CONFIG.VERBOSE,
    )

    torch.save(
        model_fold.model.state_dict(),
        f"{folder}/ensemble_full.pth",
    )

    preds = []
    cnt_dates = 0
    model_save = copy.deepcopy(model_fold)

    model_fold.model.load_state_dict(
        torch.load(
            f"{folder}/ensemble_full.pth",
            map_location=torch.device("cuda"),
        )
    )

    model_real = copy.deepcopy(model_save)
    preds = []
    cnt_dates = 0
    for date_id in tqdm(real_dates_unique):
        period = range(date_id - CONFIG.SEQ_LEN + 1, date_id + 1)

        df_valid_date = train_x.filter(pl.col(CONFIG.DATE_COL).is_in(period)).drop(CONFIG.DATE_COL)
        valid_lags = df_valid_date.to_numpy()[-CONFIG.SEQ_LEN :].astype(np.float64)

        if model_fold.refit and (cnt_dates > 0):
            upd_period = range(date_id - CONFIG.SEQ_LEN + 1, date_id + 1)
            df_upd = retrain_x.filter(pl.col(CONFIG.DATE_COL).is_in(upd_period)).drop(CONFIG.DATE_COL).to_numpy()
            df_upd_current_y = retrain_y.filter(pl.col(CONFIG.DATE_COL) == date_id).drop(CONFIG.DATE_COL).to_numpy()

            if len(df_upd) > 0:
                model_real.update(df_upd, df_upd_current_y)

        preds_i = model_real.predict(valid_lags)

        preds += list(preds_i[-1].reshape(-1, CONFIG.NUM_TARGET_COLUMNS))

        cnt_dates += 1

    preds = np.array(preds)

    score = rank_correlation_sharpe(
        train_y.filter(pl.col(CONFIG.DATE_COL).is_in(real_dates_unique)).drop(CONFIG.DATE_COL).to_numpy().astype(np.float64),
        preds,
    )
    print(f"REAL Sharpe: {score:.5f}")

--------------------Fold 4--------------------
Train dates from 3 to 1522
Valid dates from 1523 to 1826
Device: cuda:0
Epoch | Train Loss | Train ICIR Loss | Train MSE Loss | Train Huber Loss  | Train Ranking Loss | Val Loss | Val ICIR Loss | Val MSE Loss |  Val Huber Loss  | Val Ranking Loss | Train sharpe | Val sharpe |   LR   
------------------------------------------------------------
validate_one_epoch took 3.7251 seconds
  1   |   2.2818   |     -0.0088     |     1.9915     |      0.6965       |      0.5983       |  2.3070  |    0.0303     |    1.9868    |      0.7202      |      0.5631      |  0.0185   | 0.0091  | 0.00045
validate_one_epoch took 3.6657 seconds
  2   |   2.1928   |     -0.0854     |     1.9800     |      0.6927       |      0.5955       |  2.1899  |    -0.0672    |    1.9694    |      0.7129      |      0.5595      |  0.0997   | 0.1122  | 0.00033
validate_one_epoch took 3.4927 seconds
  3   |   2.0659   |     -0.1965     |     1.9649     |      0.6878       |   

  0%|          | 0/304 [00:00<?, ?it/s]

LAST VALIDIDATION Sharpe: -0.09149


  0%|          | 0/90 [00:00<?, ?it/s]

REAL Sharpe: 0.38897


In [9]:
import optuna
from optuna.samplers import TPESampler
import optuna.study.study

In [None]:
def objective(trial: optuna.trial.Trial) -> float:
    huber_weight = trial.suggest_float(name="huber_weight", low=0.5, high=2.0, step=5.0)
    MSE_weight = trial.suggest_float(name="MSE_weight", low=0.5, high=2.0, step=5.0)
    ranking_weight = trial.suggest_float(name="ranking_weight", low=0.5, high=2.0, step=5.0)
    batchsize = trial.suggest_categorical(name="batch_size", choices=[32, 64, 128])
    seq_len = trial.suggest_categorical(name="seq_len", choices=[16, 32, 64, 128])
    hidden_dim = trial.suggest_categorical(name="hidden_dim", choices=[64, 128, 256, 512])
    lr = trial.suggest_float(name="lr", low=0.0001, high=0.001, step=0.0001)
    refit_lr = trial.suggest_float(name="refit_lr", low=0.0001, high=0.001, step=0.0001)

    NN_model = NN(
        model=ENSEMBLE_NN(input_dim=len(train_x.columns) - 1, hidden_dim=hidden_dim, output_dim=CONFIG.NUM_TARGET_COLUMNS, RNN="GRU"),
        batch_size=batchsize,
        lr=lr,
        seq_len=seq_len,
        lr_refit=refit_lr,
        epochs=200,
        early_stopping_patience=10,
        huber_weight=huber_weight,
        mse_weight=MSE_weight,
        ranking_weight=ranking_weight,
    )

    dates_unique = (
        train_x.filter(pl.col(CONFIG.DATE_COL) <= CONFIG.MAX_TRAIN_DATE).select(pl.col(CONFIG.DATE_COL).unique().sort()).to_series().to_numpy()
    )
    real_dates_unique = (
        train_x.filter(pl.col(CONFIG.DATE_COL) > CONFIG.MAX_TRAIN_DATE).select(pl.col(CONFIG.DATE_COL).unique().sort()).to_series().to_numpy()
    )

    cv = TimeSeriesSplit(n_splits=CONFIG.N_FOLDS)
    cv_split = cv.split(dates_unique)

    for fold, (train_idx, valid_idx) in enumerate(cv_split):
        if fold <= 3:
            continue
        if CONFIG.VERBOSE:
            print("-" * 20 + f"Fold {fold}" + "-" * 20)
            print(f"Train dates from {dates_unique[train_idx].min()} to {dates_unique[train_idx].max()}")
            print(f"Valid dates from {dates_unique[valid_idx].min()} to {dates_unique[valid_idx].max()}")

        dates_train = dates_unique[train_idx]
        dates_valid = dates_unique[valid_idx]

        df_train = train_x.filter(pl.col(CONFIG.DATE_COL).is_in(dates_train))
        true_y = train_y.filter(pl.col(CONFIG.DATE_COL).is_in(dates_train))

        valid_period = range(min(dates_valid) - seq_len + 1, max(dates_valid) + 1)
        df_valid = train_x.filter(pl.col(CONFIG.DATE_COL).is_in(valid_period))
        df_valid_current_y = train_y.filter(pl.col(CONFIG.DATE_COL).is_in(valid_period))

        df_valid_retrain = retrain_x.filter(pl.col(CONFIG.DATE_COL).is_in(valid_period))
        df_valid_current_y_retrain = retrain_y.filter(pl.col(CONFIG.DATE_COL).is_in(valid_period))

        model_fold = copy.deepcopy(NN_model)

        model_fold.fit(
            train_set=(df_train, true_y),
            val_set=(df_valid, df_valid_current_y),
            retrain_set=(df_valid_retrain, df_valid_current_y_retrain),
            verbose=CONFIG.VERBOSE,
        )

        preds = []
        cnt_dates = 0

        for date_id in tqdm(dates_valid):
            period = range(date_id - seq_len + 1, date_id + 1)

            df_valid_date = train_x.filter(pl.col(CONFIG.DATE_COL).is_in(period)).drop(CONFIG.DATE_COL)
            valid_lags = df_valid_date.to_numpy()[-seq_len:].astype(np.float64)

            if model_fold.refit and (cnt_dates > 0):
                upd_period = range(date_id - seq_len + 1, date_id + 1)
                df_upd = retrain_x.filter(pl.col(CONFIG.DATE_COL).is_in(upd_period)).drop(CONFIG.DATE_COL).to_numpy()
                df_upd_current_y = retrain_y.filter(pl.col(CONFIG.DATE_COL) == date_id).drop(CONFIG.DATE_COL).to_numpy()

                if len(df_upd) > 0:
                    model_fold.update(df_upd, df_upd_current_y)

            preds_i = model_fold.predict(valid_lags)

            preds += list(preds_i[-1].reshape(-1, CONFIG.NUM_TARGET_COLUMNS))

            cnt_dates += 1

        preds = np.array(preds)

        val_score = rank_correlation_sharpe(
            df_valid_current_y.drop(CONFIG.DATE_COL).to_numpy().astype(np.float64),
            preds,
        )
        print(f"LAST VALIDIDATION Sharpe: {val_score:.5f}")

        model_real = copy.deepcopy(model_fold)
        preds = []
        cnt_dates = 0
        for date_id in tqdm(real_dates_unique):
            period = range(date_id - seq_len + 1, date_id + 1)

            df_valid_date = train_x.filter(pl.col(CONFIG.DATE_COL).is_in(period)).drop(CONFIG.DATE_COL)
            valid_lags = df_valid_date.to_numpy()[-seq_len:].astype(np.float64)

            if model_real.refit and (cnt_dates > 0):
                upd_period = range(date_id - seq_len + 1, date_id + 1)
                df_upd = retrain_x.filter(pl.col(CONFIG.DATE_COL).is_in(upd_period)).drop(CONFIG.DATE_COL).to_numpy()
                df_upd_current_y = retrain_y.filter(pl.col(CONFIG.DATE_COL) == date_id).drop(CONFIG.DATE_COL).to_numpy()

                if len(df_upd) > 0:
                    model_real.update(df_upd, df_upd_current_y)

            preds_i = model_real.predict(valid_lags)

            preds += list(preds_i[-1].reshape(-1, CONFIG.NUM_TARGET_COLUMNS))

            cnt_dates += 1

        preds = np.array(preds)

        final_score = rank_correlation_sharpe(
            train_y.filter(pl.col(CONFIG.DATE_COL).is_in(real_dates_unique)).drop(CONFIG.DATE_COL).to_numpy().astype(np.float64),
            preds,
        )
        print(f"REAL Sharpe: {final_score:.5f}")

    return val_score * 0.2 + final_score * 0.8


In [11]:
def optimize(study_name, n_trials: int = 200) -> optuna.study.study:
    """
    Runs Optuna optimization over the defined search space.

    Parameters
    ----------
    n_trials : int, optional
        Number of trials to run, by default 200.

    Returns
    -------
    optuna.study.study
        study object
    """
    sampler = TPESampler(seed=CONFIG.RANDOM_STATE)
    study = optuna.create_study(
        study_name=study_name,
        direction="maximize",
        sampler=sampler,
    )
    study.optimize(
        objective,
        n_trials=20,
        show_progress_bar=True,
    )

    return study

In [None]:
optimize("go")

[I 2025-08-31 00:07:47,220] A new study created in memory with name: go


  0%|          | 0/20 [00:00<?, ?it/s]

--------------------Fold 4--------------------
Train dates from 2 to 1522
Valid dates from 1523 to 1826
Device: cuda:0
Epoch | Train Loss | Train ICIR Loss | Train MSE Loss | Train Ranking Loss | Val Loss | Val ICIR Loss | Val MSE Loss | Val Ranking Loss | Train sharpe | Val sharpe |   LR   
------------------------------------------------------------
validate_one_epoch took 13.3431 seconds
  1   |  48.7846   |     -0.1080     |     1.9425     |      0.5915       | 43.7670  |    -0.3793    |    1.8068    |      0.5292      |  0.1311   | 0.3625  | 0.00081
validate_one_epoch took 13.3076 seconds
  2   |  47.4762   |     -0.2077     |     1.8825     |      0.5813       | 42.8287  |    -0.3762    |    1.7757    |      0.5165      |  0.2088   | 0.3282  | 0.00059
validate_one_epoch took 12.5709 seconds
  3   |  46.4736   |     -0.2770     |     1.8470     |      0.5712       | 43.1944  |    -0.3198    |    1.7796    |      0.5208      |  0.2955   | 0.2395  | 0.00031
validate_one_epoch took 1

  0%|          | 0/304 [00:00<?, ?it/s]

LAST VALIDIDATION Sharpe: 0.23466


  0%|          | 0/90 [00:00<?, ?it/s]

REAL Sharpe: 0.17835
[I 2025-08-31 00:11:41,246] Trial 0 finished with value: 0.18961284667681624 and parameters: {'ranking_weight': 50.0, 'batch_size': 32, 'seq_len': 128, 'hidden_dim': 512, 'lr': 0.0009000000000000001, 'refit_lr': 0.00030000000000000003}. Best is trial 0 with value: 0.18961284667681624.
--------------------Fold 4--------------------
Train dates from 2 to 1522
Valid dates from 1523 to 1826
Device: cuda:0
Epoch | Train Loss | Train ICIR Loss | Train MSE Loss | Train Ranking Loss | Val Loss | Val ICIR Loss | Val MSE Loss | Val Ranking Loss | Train sharpe | Val sharpe |   LR   
------------------------------------------------------------
validate_one_epoch took 8.9766 seconds
  1   |  40.1233   |     -0.1842     |     1.9709     |      0.5938       | 37.4490  |    -0.3051    |    1.9005    |      0.5444      |  0.1524   | 0.3244  | 0.00018
validate_one_epoch took 8.4981 seconds
  2   |  38.9505   |     -0.3559     |     1.9234     |      0.5836       | 36.5319  |    -0.3

  0%|          | 0/304 [00:00<?, ?it/s]

LAST VALIDIDATION Sharpe: 0.17143


  0%|          | 0/90 [00:00<?, ?it/s]

REAL Sharpe: 0.32921
[I 2025-08-31 00:19:31,102] Trial 1 finished with value: 0.2976511604287139 and parameters: {'ranking_weight': 35.0, 'batch_size': 128, 'seq_len': 64, 'hidden_dim': 512, 'lr': 0.0002, 'refit_lr': 0.0006000000000000001}. Best is trial 1 with value: 0.2976511604287139.
--------------------Fold 4--------------------
Train dates from 2 to 1522
Valid dates from 1523 to 1826
Device: cuda:0
Epoch | Train Loss | Train ICIR Loss | Train MSE Loss | Train Ranking Loss | Val Loss | Val ICIR Loss | Val MSE Loss | Val Ranking Loss | Train sharpe | Val sharpe |   LR   
------------------------------------------------------------
validate_one_epoch took 8.9460 seconds
  1   |  61.1536   |     -0.1076     |     1.9752     |      0.5945       | 57.2230  |    -0.2619    |    1.9262    |      0.5498      |  0.1256   | 0.3016  | 0.00018
validate_one_epoch took 9.0130 seconds
  2   |  59.8847   |     -0.3012     |     1.9402     |      0.5869       | 55.8963  |    -0.3352    |    1.8775

  0%|          | 0/304 [00:00<?, ?it/s]

LAST VALIDIDATION Sharpe: 0.31633


  0%|          | 0/90 [00:00<?, ?it/s]

REAL Sharpe: 0.39920
[I 2025-08-31 00:23:16,446] Trial 2 finished with value: 0.38263071150391775 and parameters: {'ranking_weight': 70.0, 'batch_size': 64, 'seq_len': 64, 'hidden_dim': 256, 'lr': 0.0002, 'refit_lr': 0.0005}. Best is trial 2 with value: 0.38263071150391775.
--------------------Fold 4--------------------
Train dates from 2 to 1522
Valid dates from 1523 to 1826
Device: cuda:0
Epoch | Train Loss | Train ICIR Loss | Train MSE Loss | Train Ranking Loss | Val Loss | Val ICIR Loss | Val MSE Loss | Val Ranking Loss | Train sharpe | Val sharpe |   LR   
------------------------------------------------------------
validate_one_epoch took 4.0464 seconds
  1   |  31.4924   |     -0.1094     |     1.9799     |      0.5956       | 29.8783  |    -0.2201    |    1.9252    |      0.5533      |  0.1108   | 0.2459  | 0.00054
validate_one_epoch took 4.1106 seconds
  2   |  30.7949   |     -0.2362     |     1.9470     |      0.5898       | 29.1274  |    -0.2857    |    1.8829    |      0.5

  0%|          | 0/304 [00:00<?, ?it/s]

LAST VALIDIDATION Sharpe: 0.20030


  0%|          | 0/90 [00:00<?, ?it/s]

REAL Sharpe: 0.17219
[I 2025-08-31 00:25:33,816] Trial 3 finished with value: 0.17781193861540304 and parameters: {'ranking_weight': 20.0, 'batch_size': 32, 'seq_len': 64, 'hidden_dim': 64, 'lr': 0.0006000000000000001, 'refit_lr': 0.001}. Best is trial 2 with value: 0.38263071150391775.
--------------------Fold 4--------------------
Train dates from 2 to 1522
Valid dates from 1523 to 1826
Device: cuda:0
Epoch | Train Loss | Train ICIR Loss | Train MSE Loss | Train Ranking Loss | Val Loss | Val ICIR Loss | Val MSE Loss | Val Ranking Loss | Train sharpe | Val sharpe |   LR   
------------------------------------------------------------
validate_one_epoch took 10.0280 seconds
  1   |  34.4097   |     -0.1375     |     1.9795     |      0.5956       | 32.4778  |    -0.2551    |    1.9263    |      0.5490      |  0.1049   | 0.2964  | 0.00009
validate_one_epoch took 10.0239 seconds
  2   |  33.7096   |     -0.2653     |     1.9500     |      0.5896       | 31.7825  |    -0.3217    |    1.890

  0%|          | 0/304 [00:00<?, ?it/s]

LAST VALIDIDATION Sharpe: 0.30422


  0%|          | 0/90 [00:00<?, ?it/s]

REAL Sharpe: 0.21958
[I 2025-08-31 00:30:16,471] Trial 4 finished with value: 0.23650879768907185 and parameters: {'ranking_weight': 25.0, 'batch_size': 128, 'seq_len': 64, 'hidden_dim': 512, 'lr': 0.0001, 'refit_lr': 0.001}. Best is trial 2 with value: 0.38263071150391775.
--------------------Fold 4--------------------
Train dates from 2 to 1522
Valid dates from 1523 to 1826
Device: cuda:0
Epoch | Train Loss | Train ICIR Loss | Train MSE Loss | Train Ranking Loss | Val Loss | Val ICIR Loss | Val MSE Loss | Val Ranking Loss | Train sharpe | Val sharpe |   LR   
------------------------------------------------------------
validate_one_epoch took 10.1847 seconds
  1   |  70.2540   |     -0.0847     |     1.9764     |      0.5960       | 64.8364  |    -0.3608    |    1.9076    |      0.5468      |  0.0948   | 0.3590  | 0.00036
validate_one_epoch took 10.1138 seconds
  2   |  68.4825   |     -0.3334     |     1.9304     |      0.5864       | 63.4242  |    -0.3636    |    1.8602    |      0