In [1]:
from dataclasses import dataclass
from typing import Dict, List, Optional
import copy
import re
from tqdm.auto import tqdm
from abc import ABC, abstractmethod
import math
import os
import json
import random

import polars as pl
import numpy as np
from numba import njit, prange
from scipy.stats import spearmanr, rankdata, norm

from sklearn.model_selection import TimeSeriesSplit, KFold
from sklearn.base import clone

import torch
import torch.nn.functional as F
import torch.nn as nn
from torch.utils.data import Dataset, DataLoader
import torch.optim as optim

# from torch.profiler import profile, ProfilerActivity, record_function

from CONFIG import CONFIG
from PREPROCESSOR_V2 import PREPROCESSOR
from FEATURE_ENGINEERING_FOR_SUBMISSION import FEATURE_ENGINEERING
from DATASET import SequentialDataset
from SEQUENTIAL_NN_MODEL import CNNTransformerModel, GRUModel, LSTMModel, PureTransformerModel
from CROSS_SECTIONAL_NN_MODEL import DeepMLPModel, LinearModel, ResidualMLPModel
from ENSEMBLE_NN import ENSEMBLE_NN
from NN_V2 import NN
from LOSS_V2 import CombinedICIRLoss

import time


def timer(func):
    def wrapper(*args, **kwargs):
        start = time.time()
        result = func(*args, **kwargs)
        end = time.time()
        print(f"{func.__name__} took {end - start:.4f} seconds")
        return result

    return wrapper


In [2]:
torch.manual_seed(CONFIG.RANDOM_STATE)
random.seed(CONFIG.RANDOM_STATE)
np.random.seed(CONFIG.RANDOM_STATE)

In [3]:
folder = "nn_models/vf_submission_2"
os.makedirs(folder, exist_ok=True)
with open(f"{folder}/features.json", "w") as f:
    json.dump(CONFIG.IMPT_COL, f)

# with open(f"{folder}/features.json", "r") as f:
#     CONFIG.IMPT_COL = json.load(f)

In [4]:
def gaussian_rank_transform(arr: np.array):
    # n_samples, n_targets = arr.shape
    transformed_targets = np.full_like(arr, np.nan)

    for i, row in enumerate(arr):
        # Find valid (non-NaN) assets for this timestep
        valid_mask = ~np.isnan(row)
        valid_arr = row[valid_mask]
        ranks = rankdata(valid_arr, method="average")
        percentile_ranks = (ranks - 0.5) / (len(ranks))
        percentile_ranks = np.clip(percentile_ranks, 1e-8, 1 - 1e-8)
        gaussian_values = norm.ppf(percentile_ranks)
        transformed_targets[i, valid_mask] = gaussian_values
    return transformed_targets

In [5]:
# # --- Prepare DataLoader ---
# # Create the dataset

# train_x = pl.scan_csv(CONFIG.TRAIN_X_PATH).filter(pl.col("date_id") <= CONFIG.MAX_TRAIN_DATE)
# train_y = pl.scan_csv(CONFIG.TRAIN_Y_PATH).filter(pl.col("date_id") <= CONFIG.MAX_TRAIN_DATE).fill_null(0).collect()

# train_x = PREPROCESSOR(df=train_x)
# train_x.clean()
# train_x = train_x.transform().lazy()

# train_x = FEATURE_ENGINEERING(df=train_x)
# train_x = train_x.create_all_features().collect().pivot(index=CONFIG.DATE_COL, on=["type", "instr"])
# train_x = train_x.rename({col: re.sub(r'[{",}]', "", col).replace(" ", "_").replace(",", "_") for col in train_x.columns})
# train_x = train_x.select(set(CONFIG.IMPT_COLS + [CONFIG.DATE_COL]))

In [6]:
def rank_correlation_sharpe(targets, predictions) -> float:
    """
    Calculates the rank correlation between predictions and target values,
    and returns its Sharpe ratio (mean / standard deviation).

    :param merged_df: DataFrame containing prediction columns (starting with 'prediction_')
                    and target columns (starting with 'target_')
    :return: Sharpe ratio of the rank correlation
    :raises ZeroDivisionError: If the standard deviation is zero
    """
    correlations = []

    for i, (pred_row, target_row) in enumerate(zip(predictions, targets)):
        # Find valid (non-NaN) assets for this timestep
        valid_mask = ~np.isnan(target_row)
        valid_pred = pred_row[valid_mask]
        valid_target = target_row[valid_mask]

        if np.std(pred_row) == 0 or np.std(target_row) == 0:
            raise ZeroDivisionError("Zero standard deviation in a row.")

        rho = np.corrcoef(rankdata(valid_pred, method="average"), rankdata(valid_target, method="average"))[0, 1]
        correlations.append(rho)

    daily_rank_corrs = np.array(correlations)
    std_dev = daily_rank_corrs.std(ddof=0)
    if std_dev == 0:
        raise ZeroDivisionError("Denominator is zero, unable to compute Sharpe ratio.")

    sharpe_ratio = daily_rank_corrs.mean() / std_dev
    return float(sharpe_ratio)

In [7]:
# --- Prepare DataLoader ---
# Create the dataset

train_x = pl.scan_csv(CONFIG.TRAIN_X_PATH).drop(CONFIG.DROP_COL)
train_x = PREPROCESSOR(df=train_x)
train_x = train_x.clean()

features = FEATURE_ENGINEERING(df=train_x)
train_x: pl.DataFrame = features.create_market_features()

train_y = pl.scan_csv(CONFIG.TRAIN_Y_PATH)

curr_y = (
    train_y.with_columns([pl.col(CONFIG.LAGS[f"lag{i}"]).exclude(CONFIG.DATE_COL).shift(i + 1) for i in range(1, 5)])
    .with_columns(pl.all().exclude(CONFIG.DATE_COL).shift())
    .filter((pl.col(CONFIG.DATE_COL).is_in(train_x.select(CONFIG.DATE_COL).to_series())))
    .collect()
    .fill_null(0)
    .lazy()
)

y_feat = FEATURE_ENGINEERING(df=curr_y)
ys = y_feat.create_Y_market_features()

train_x = train_x.join(ys, on=CONFIG.DATE_COL).fill_nan(0)


train_y = train_y.filter((pl.col(CONFIG.DATE_COL).is_in(train_x.select(CONFIG.DATE_COL).to_series()))).collect()
train_x = (
    train_x.with_columns([pl.when(pl.col(col).is_infinite()).then(0.0).otherwise(pl.col(col)).alias(col) for col in train_x.columns])
    .with_columns(pl.all().shrink_dtype())
    .filter(pl.col(CONFIG.DATE_COL).is_in(train_y.select(CONFIG.DATE_COL).to_series()))
    .with_columns(pl.col(CONFIG.DATE_COL).cast(pl.Int64))
    .select([CONFIG.DATE_COL] + CONFIG.IMPT_COL)
)

retrain_x = train_x.with_columns(pl.all().exclude(CONFIG.DATE_COL).shift(5))
retrain_y = train_y.filter((pl.col(CONFIG.DATE_COL).is_in(train_x.select(CONFIG.DATE_COL).to_series()))).with_columns(
    pl.all().exclude(CONFIG.DATE_COL).shift(5)
)

train_y_arr = train_y.drop(CONFIG.DATE_COL).to_numpy()

train_y = pl.DataFrame(gaussian_rank_transform(train_y_arr), schema=train_y.drop(CONFIG.DATE_COL).columns).insert_column(
    0, train_y.select(CONFIG.DATE_COL).to_series()
)


# pl.DataFrame(
#     (train_y_arr - np.nanmean(train_y_arr, axis=1).reshape(train_y_arr.shape[0], -1))
#     / np.nanstd(train_y_arr, axis=1).reshape(train_y_arr.shape[0], -1),
#     schema=train_y.drop(CONFIG.DATE_COL).columns,
# ).insert_column(0, train_y.select(CONFIG.DATE_COL).to_series())

retrain_y_arr = retrain_y.drop(CONFIG.DATE_COL).to_numpy()
retrain_y = pl.DataFrame(gaussian_rank_transform(retrain_y_arr), schema=train_y.drop(CONFIG.DATE_COL).columns).insert_column(
    0, train_y.select(CONFIG.DATE_COL).to_series()
)


# pl.DataFrame(
#     (retrain_y_arr - np.nanmean(retrain_y_arr, axis=1).reshape(retrain_y_arr.shape[0], -1))
#     / np.nanstd(retrain_y_arr, axis=1).reshape(retrain_y_arr.shape[0], -1),
#     schema=train_y.drop(CONFIG.DATE_COL).columns,
# ).insert_column(0, train_y.select(CONFIG.DATE_COL).to_series())

create_market_features took 5.9711 seconds


In [11]:
NN_model = NN(
        model=ENSEMBLE_NN(input_dim=len(train_x.columns) - 1, hidden_dim=16, output_dim=CONFIG.NUM_TARGET_COLUMNS, RNN="GRU"),
        seq_len=8,
        batch_size=32,
        lr=0.005,
        lr_refit=0.008,
        epochs=200,
        early_stopping_patience=10,
        spearman_weight=0.2,
        listnet_weight=0.35,
        kendall_weight=0.05,
        pairwise_weight=0.45,
        topk_weight=0.3,
        mse_weight=0.1,  # Small MSE for stability
        listnet_temp=1.0,
        kendall_temp=0.1,
    )

In [12]:
# {
#     "batch_size": 32,
#     "seq_len": 8,
#     "hidden_dim": 16,
#     "lr": 0.005,
#     "refit_lr": 0.008,
#     "spearman_weight": 0.2,
#     "listnet_weight": 0.35000000000000003,
#     "kendall_weight": 0.05,
#     "pairwise_weight": 0.45,
#     "topk_weight": 0.3,
#     "mse_weight": 0.1,
# }
# {
#     "batch_size": 32,
#     "seq_len": 8,
#     "hidden_dim": 16,
#     "lr": 0.005,
#     "refit_lr": 0.008,
#     "spearman_weight": 0.15000000000000002,
#     "listnet_weight": 0.30000000000000004,
#     "kendall_weight": 0.05,
#     "pairwise_weight": 0.5,
#     "topk_weight": 0.25,
#     "mse_weight": 0.1,
# }

# {
#     "batch_size": 128,
#     "seq_len": 16,
#     "hidden_dim": 16,
#     "lr": 0.009000000000000001,
#     "refit_lr": 0.005,
#     "spearman_weight": 0.1,
#     "listnet_weight": 0.2,
#     "kendall_weight": 0.25,
#     "pairwise_weight": 0.45,
#     "topk_weight": 0.2,
#     "mse_weight": 0.05,
# }

In [13]:
if CONFIG.RUN_CV:
    dates_unique = (
        train_x.filter(pl.col(CONFIG.DATE_COL) <= CONFIG.MAX_TRAIN_DATE).select(pl.col(CONFIG.DATE_COL).unique().sort()).to_series().to_numpy()
    )
    real_dates_unique = (
        train_x.filter(pl.col(CONFIG.DATE_COL) > CONFIG.MAX_TRAIN_DATE).select(pl.col(CONFIG.DATE_COL).unique().sort()).to_series().to_numpy()
    )

    cv = TimeSeriesSplit(n_splits=CONFIG.N_FOLDS)
    cv_split = cv.split(dates_unique)

    scores = []
    models = []
    for fold, (train_idx, valid_idx) in enumerate(cv_split):
        if fold <= 3:
            continue
        if CONFIG.VERBOSE:
            print("-" * 20 + f"Fold {fold}" + "-" * 20)
            print(f"Train dates from {dates_unique[train_idx].min()} to {dates_unique[train_idx].max()}")
            print(f"Valid dates from {dates_unique[valid_idx].min()} to {dates_unique[valid_idx].max()}")

        dates_train = dates_unique[train_idx]
        dates_valid = dates_unique[valid_idx]

        df_train = train_x.filter(pl.col(CONFIG.DATE_COL).is_in(dates_train))
        true_y = train_y.filter(pl.col(CONFIG.DATE_COL).is_in(dates_train))

        valid_period = range(min(dates_valid) - CONFIG.SEQ_LEN + 1, max(dates_valid) + 1)
        df_valid = train_x.filter(pl.col(CONFIG.DATE_COL).is_in(valid_period))
        df_valid_current_y = train_y.filter(pl.col(CONFIG.DATE_COL).is_in(valid_period))

        df_valid_retrain = retrain_x.filter(pl.col(CONFIG.DATE_COL).is_in(valid_period))
        df_valid_current_y_retrain = retrain_y.filter(pl.col(CONFIG.DATE_COL).is_in(valid_period))

        model_fold = copy.deepcopy(NN_model)

        model_fold.fit(
            train_set=(df_train, true_y),
            val_set=(df_valid, df_valid_current_y),
            retrain_set=(df_valid_retrain, df_valid_current_y_retrain),
            verbose=CONFIG.VERBOSE,
        )

        models.append(model_fold)

        torch.save(
            model_fold.model.state_dict(),
            f"{folder}/ensemble_{fold}.pth",
        )

        preds = []
        cnt_dates = 0
        model_save = copy.deepcopy(model_fold)

        model_fold.model.load_state_dict(
            torch.load(
                f"{folder}/ensemble_{fold}.pth",
                map_location=torch.device("cuda"),
            )
        )

        for date_id in tqdm(dates_valid):
            period = range(date_id - CONFIG.SEQ_LEN + 1, date_id + 1)

            df_valid_date = train_x.filter(pl.col(CONFIG.DATE_COL).is_in(period)).drop(CONFIG.DATE_COL)
            valid_lags = df_valid_date.to_numpy()[-CONFIG.SEQ_LEN :].astype(np.float64)

            if model_fold.refit and (cnt_dates > 0):
                upd_period = range(date_id - CONFIG.SEQ_LEN + 1, date_id + 1)
                df_upd = retrain_x.filter(pl.col(CONFIG.DATE_COL).is_in(upd_period)).drop(CONFIG.DATE_COL).to_numpy()
                df_upd_current_y = retrain_y.filter(pl.col(CONFIG.DATE_COL) == date_id).drop(CONFIG.DATE_COL).to_numpy()

                if len(df_upd) > 0:
                    model_fold.update(df_upd, df_upd_current_y)

            preds_i = model_fold.predict(valid_lags)

            preds += list(preds_i[-1].reshape(-1, CONFIG.NUM_TARGET_COLUMNS))

            cnt_dates += 1

        preds = np.array(preds)

        score = rank_correlation_sharpe(
            df_valid_current_y.drop(CONFIG.DATE_COL).to_numpy().astype(np.float64),
            preds,
        )
        scores.append(score)

        print(f"LAST VALIDIDATION Sharpe: {score:.5f}")

        model_real = copy.deepcopy(model_fold)
        preds = []
        cnt_dates = 0
        for date_id in tqdm(real_dates_unique):
            period = range(date_id - CONFIG.SEQ_LEN + 1, date_id + 1)

            df_valid_date = train_x.filter(pl.col(CONFIG.DATE_COL).is_in(period)).drop(CONFIG.DATE_COL)
            valid_lags = df_valid_date.to_numpy()[-CONFIG.SEQ_LEN :].astype(np.float64)

            if model_fold.refit and (cnt_dates > 0):
                upd_period = range(date_id - CONFIG.SEQ_LEN + 1, date_id + 1)
                df_upd = retrain_x.filter(pl.col(CONFIG.DATE_COL).is_in(upd_period)).drop(CONFIG.DATE_COL).to_numpy()
                df_upd_current_y = retrain_y.filter(pl.col(CONFIG.DATE_COL) == date_id).drop(CONFIG.DATE_COL).to_numpy()

                if len(df_upd) > 0:
                    model_real.update(df_upd, df_upd_current_y)

            preds_i = model_real.predict(valid_lags)

            preds += list(preds_i[-1].reshape(-1, CONFIG.NUM_TARGET_COLUMNS))

            cnt_dates += 1

        preds = np.array(preds)

        score = rank_correlation_sharpe(
            train_y.filter(pl.col(CONFIG.DATE_COL).is_in(real_dates_unique)).drop(CONFIG.DATE_COL).to_numpy().astype(np.float64),
            preds,
        )
        scores.append(score)
        print(f"REAL Sharpe: {score:.5f}")
else:
    dates_unique = (
        train_x.filter(pl.col(CONFIG.DATE_COL) <= CONFIG.MAX_TRAIN_DATE).select(pl.col(CONFIG.DATE_COL).unique().sort()).to_series().to_numpy()
    )
    real_dates_unique = (
        train_x.filter(pl.col(CONFIG.DATE_COL) > CONFIG.MAX_TRAIN_DATE).select(pl.col(CONFIG.DATE_COL).unique().sort()).to_series().to_numpy()
    )

    if CONFIG.VERBOSE:
        print(f"Train dates from {dates_unique.min()} to {dates_unique.max()}")
        print(f"Valid dates from {real_dates_unique.min()} to {real_dates_unique.max()}")

    dates_train = dates_unique
    dates_valid = real_dates_unique

    df_train = train_x.filter(pl.col(CONFIG.DATE_COL).is_in(dates_train))
    true_y = train_y.filter(pl.col(CONFIG.DATE_COL).is_in(dates_train))

    valid_period = range(min(dates_valid) - CONFIG.SEQ_LEN + 1, max(dates_valid) + 1)
    df_valid = train_x.filter(pl.col(CONFIG.DATE_COL).is_in(valid_period))
    df_valid_current_y = train_y.filter(pl.col(CONFIG.DATE_COL).is_in(valid_period))

    df_valid_retrain = retrain_x.filter(pl.col(CONFIG.DATE_COL).is_in(valid_period))
    df_valid_current_y_retrain = retrain_y.filter(pl.col(CONFIG.DATE_COL).is_in(valid_period))

    model_fold = copy.deepcopy(NN_model)

    model_fold.fit(
        train_set=(df_train, true_y),
        val_set=(df_valid, df_valid_current_y),
        retrain_set=(df_valid_retrain, df_valid_current_y_retrain),
        verbose=CONFIG.VERBOSE,
    )

    torch.save(
        model_fold.model.state_dict(),
        f"{folder}/ensemble_full.pth",
    )

    preds = []
    cnt_dates = 0
    model_save = copy.deepcopy(model_fold)

    model_fold.model.load_state_dict(
        torch.load(
            f"{folder}/ensemble_full.pth",
            map_location=torch.device("cuda"),
        )
    )

    model_real = copy.deepcopy(model_save)
    preds = []
    cnt_dates = 0
    for date_id in tqdm(real_dates_unique):
        period = range(date_id - CONFIG.SEQ_LEN + 1, date_id + 1)

        df_valid_date = train_x.filter(pl.col(CONFIG.DATE_COL).is_in(period)).drop(CONFIG.DATE_COL)
        valid_lags = df_valid_date.to_numpy()[-CONFIG.SEQ_LEN :].astype(np.float64)

        if model_fold.refit and (cnt_dates > 0):
            upd_period = range(date_id - CONFIG.SEQ_LEN + 1, date_id + 1)
            df_upd = retrain_x.filter(pl.col(CONFIG.DATE_COL).is_in(upd_period)).drop(CONFIG.DATE_COL).to_numpy()
            df_upd_current_y = retrain_y.filter(pl.col(CONFIG.DATE_COL) == date_id).drop(CONFIG.DATE_COL).to_numpy()

            if len(df_upd) > 0:
                model_real.update(df_upd, df_upd_current_y)

        preds_i = model_real.predict(valid_lags)

        preds += list(preds_i[-1].reshape(-1, CONFIG.NUM_TARGET_COLUMNS))

        cnt_dates += 1

    preds = np.array(preds)

    score = rank_correlation_sharpe(
        train_y.filter(pl.col(CONFIG.DATE_COL).is_in(real_dates_unique)).drop(CONFIG.DATE_COL).to_numpy().astype(np.float64),
        preds,
    )
    print(f"REAL Sharpe: {score:.5f}")

--------------------Fold 4--------------------
Train dates from 3 to 1559
Valid dates from 1560 to 1870
Device: cuda:0
 Epoch  | TrainLoss  |  ValLoss   | TrainSharpe  | ValSharpe  | TrainICIR  |  ValICIR   | TrainListNet  |  ValListNet   | TrainKendall  |  ValKendall   | TrainPairwise  |  ValPairwise   |  TrainTopK  |   ValTopK   |  TrainMSE  |   ValMSE   | Train sharpe | Val sharpe |   LR   
------------------------------------------------------------
validate_one_epoch took 6.0312 seconds
   1    |   0.8809   |   0.7717   |  -0.0642   |  -0.3205   |    0.8822     |    0.7215     |    -0.0039    |    -0.0302    |     1.1875     |     1.1556     |   0.9779    |   0.8670    |   1.9779   |   1.9050   |    0.0832    |   0.3821   | 0.00500
validate_one_epoch took 6.0015 seconds
   2    |   0.7543   |   0.7315   |  -0.4017   |  -0.3860   |    0.7114     |    0.6635     |    -0.0274    |    -0.0493    |     1.1452     |     1.1269     |   0.8578    |   0.8335    |   1.9146   |   1.8757   | 

  0%|          | 0/311 [00:00<?, ?it/s]

LAST VALIDIDATION Sharpe: 0.69012


  0%|          | 0/90 [00:00<?, ?it/s]

REAL Sharpe: 0.63147


In [8]:
import optuna
from optuna.samplers import TPESampler
import optuna.study.study

In [9]:
def objective(trial: optuna.trial.Trial) -> float:
    batchsize = trial.suggest_categorical(name="batch_size", choices=[32, 64, 128, 256])
    seq_len = trial.suggest_categorical(name="seq_len", choices=[4, 8, 16, 32, 64, 128])
    hidden_dim = trial.suggest_categorical(name="hidden_dim", choices=[16, 32, 64])
    lr = trial.suggest_float(name="lr", low=0.005, high=0.01, step=0.001)
    refit_lr = trial.suggest_float(name="refit_lr", low=0.005, high=0.01, step=0.001)
    spearman_weight = trial.suggest_float("spearman_weight", 0.05, 0.2, step=0.05)
    listnet_weight = trial.suggest_float("listnet_weight", 0.2, 0.4, step=0.05)
    kendall_weight = trial.suggest_float("kendall_weight", 0.05, 0.25, step=0.05)
    pairwise_weight = trial.suggest_float("pairwise_weight", 0.2, 0.6, step=0.05)
    topk_weight = trial.suggest_float("topk_weight", 0.2, 0.3, step=0.05)
    mse_weight = trial.suggest_float("mse_weight", 0.05, 0.2, step=0.05)

    NN_model = NN(
        model=ENSEMBLE_NN(input_dim=len(train_x.columns) - 1, hidden_dim=hidden_dim, output_dim=CONFIG.NUM_TARGET_COLUMNS, RNN="GRU"),
        batch_size=batchsize,
        lr=lr,
        seq_len=seq_len,
        lr_refit=refit_lr,
        epochs=200,
        early_stopping_patience=10,
        spearman_weight=spearman_weight,
        listnet_weight=listnet_weight,
        kendall_weight=kendall_weight,
        pairwise_weight=pairwise_weight,
        topk_weight=topk_weight,
        mse_weight=mse_weight,  # Small MSE for stability
        listnet_temp=1.0,
        kendall_temp=0.1,
    )

    dates_unique = (
        train_x.filter(pl.col(CONFIG.DATE_COL) <= CONFIG.MAX_TRAIN_DATE).select(pl.col(CONFIG.DATE_COL).unique().sort()).to_series().to_numpy()
    )
    real_dates_unique = (
        train_x.filter(pl.col(CONFIG.DATE_COL) > CONFIG.MAX_TRAIN_DATE).select(pl.col(CONFIG.DATE_COL).unique().sort()).to_series().to_numpy()
    )

    cv = TimeSeriesSplit(n_splits=CONFIG.N_FOLDS)
    cv_split = cv.split(dates_unique)

    for fold, (train_idx, valid_idx) in enumerate(cv_split):
        if fold <= 3:
            continue
        if CONFIG.VERBOSE:
            print("-" * 20 + f"Fold {fold}" + "-" * 20)
            print(f"Train dates from {dates_unique[train_idx].min()} to {dates_unique[train_idx].max()}")
            print(f"Valid dates from {dates_unique[valid_idx].min()} to {dates_unique[valid_idx].max()}")

        dates_train = dates_unique[train_idx]
        dates_valid = dates_unique[valid_idx]

        df_train = train_x.filter(pl.col(CONFIG.DATE_COL).is_in(dates_train))
        true_y = train_y.filter(pl.col(CONFIG.DATE_COL).is_in(dates_train))

        valid_period = range(min(dates_valid) - seq_len + 1, max(dates_valid) + 1)
        df_valid = train_x.filter(pl.col(CONFIG.DATE_COL).is_in(valid_period))
        df_valid_current_y = train_y.filter(pl.col(CONFIG.DATE_COL).is_in(valid_period))

        df_valid_retrain = retrain_x.filter(pl.col(CONFIG.DATE_COL).is_in(valid_period))
        df_valid_current_y_retrain = retrain_y.filter(pl.col(CONFIG.DATE_COL).is_in(valid_period))

        model_fold = copy.deepcopy(NN_model)

        model_fold.fit(
            train_set=(df_train, true_y),
            val_set=(df_valid, df_valid_current_y),
            retrain_set=(df_valid_retrain, df_valid_current_y_retrain),
            verbose=CONFIG.VERBOSE,
        )

        preds = []
        cnt_dates = 0

        for date_id in tqdm(dates_valid):
            period = range(date_id - seq_len + 1, date_id + 1)

            df_valid_date = train_x.filter(pl.col(CONFIG.DATE_COL).is_in(period)).drop(CONFIG.DATE_COL)
            valid_lags = df_valid_date.to_numpy()[-seq_len:].astype(np.float64)

            if model_fold.refit and (cnt_dates > 0):
                upd_period = range(date_id - seq_len + 1, date_id + 1)
                df_upd = retrain_x.filter(pl.col(CONFIG.DATE_COL).is_in(upd_period)).drop(CONFIG.DATE_COL).to_numpy()
                df_upd_current_y = retrain_y.filter(pl.col(CONFIG.DATE_COL) == date_id).drop(CONFIG.DATE_COL).to_numpy()

                if len(df_upd) > 0:
                    model_fold.update(df_upd, df_upd_current_y)

            preds_i = model_fold.predict(valid_lags)

            preds += list(preds_i[-1].reshape(-1, CONFIG.NUM_TARGET_COLUMNS))

            cnt_dates += 1

        preds = np.array(preds)

        val_score = rank_correlation_sharpe(
            df_valid_current_y.drop(CONFIG.DATE_COL).to_numpy().astype(np.float64),
            preds,
        )
        print(f"LAST VALIDIDATION Sharpe: {val_score:.5f}")

        model_real = copy.deepcopy(model_fold)
        preds = []
        cnt_dates = 0
        for date_id in tqdm(real_dates_unique):
            period = range(date_id - seq_len + 1, date_id + 1)

            df_valid_date = train_x.filter(pl.col(CONFIG.DATE_COL).is_in(period)).drop(CONFIG.DATE_COL)
            valid_lags = df_valid_date.to_numpy()[-seq_len:].astype(np.float64)

            if model_real.refit and (cnt_dates > 0):
                upd_period = range(date_id - seq_len + 1, date_id + 1)
                df_upd = retrain_x.filter(pl.col(CONFIG.DATE_COL).is_in(upd_period)).drop(CONFIG.DATE_COL).to_numpy()
                df_upd_current_y = retrain_y.filter(pl.col(CONFIG.DATE_COL) == date_id).drop(CONFIG.DATE_COL).to_numpy()

                if len(df_upd) > 0:
                    model_real.update(df_upd, df_upd_current_y)

            preds_i = model_real.predict(valid_lags)

            preds += list(preds_i[-1].reshape(-1, CONFIG.NUM_TARGET_COLUMNS))

            cnt_dates += 1

        preds = np.array(preds)

        final_score = rank_correlation_sharpe(
            train_y.filter(pl.col(CONFIG.DATE_COL).is_in(real_dates_unique)).drop(CONFIG.DATE_COL).to_numpy().astype(np.float64),
            preds,
        )
        print(f"REAL Sharpe: {final_score:.5f}")

    return val_score * 0.2 + final_score * 0.8


In [10]:
def optimize(study_name, n_trials: int = 200) -> optuna.study.study:
    """
    Runs Optuna optimization over the defined search space.

    Parameters
    ----------
    n_trials : int, optional
        Number of trials to run, by default 200.

    Returns
    -------
    optuna.study.study
        study object
    """
    sampler = TPESampler(seed=CONFIG.RANDOM_STATE)
    study = optuna.create_study(
        study_name=study_name,
        storage=f"sqlite:///{folder}/{study_name}.db",
        direction="maximize",
        sampler=sampler,
        load_if_exists=True,
    )
    study.optimize(objective, n_trials=20, show_progress_bar=True, catch=(ZeroDivisionError,))

    return study

In [None]:
optimize("trials_vary_loss_v1")

[I 2025-09-21 14:30:49,456] Using an existing study with name 'trials_vary_loss_v1' instead of creating a new one.


  0%|          | 0/20 [00:00<?, ?it/s]

--------------------Fold 4--------------------
Train dates from 3 to 1522
Valid dates from 1523 to 1826
Device: cuda:0
 Epoch  | TrainLoss  |  ValLoss   | TrainSharpe  | ValSharpe  | TrainICIR  |  ValICIR   | TrainListNet  |  ValListNet   | TrainKendall  |  ValKendall   | TrainPairwise  |  ValPairwise   |  TrainTopK  |   ValTopK   |  TrainMSE  |   ValMSE   | Train sharpe | Val sharpe |   LR   
------------------------------------------------------------
validate_one_epoch took 5.4784 seconds
   1    |   0.9348   |   0.8529   |  -0.0856   |  -0.3426   |    0.8824     |    0.7456     |    -0.0050    |    -0.0315    |     1.1875     |     1.1542     |   0.9705    |   0.8567    |   1.9760   |   1.8907   |    0.1007    |   0.3442   | 0.00600
validate_one_epoch took 5.5948 seconds
   2    |   0.8552   |   0.8312   |  -0.3726   |  -0.3788   |    0.7298     |    0.7161     |    -0.0254    |    -0.0429    |     1.1548     |     1.1383     |   0.8693    |   0.8280    |   1.9210   |   1.8647   | 

  0%|          | 0/304 [00:00<?, ?it/s]

LAST VALIDIDATION Sharpe: 0.11892


  0%|          | 0/134 [00:00<?, ?it/s]

REAL Sharpe: 0.60336
[I 2025-09-21 14:37:40,069] Trial 4 finished with value: 0.5064740967023307 and parameters: {'batch_size': 64, 'seq_len': 32, 'hidden_dim': 32, 'lr': 0.006, 'refit_lr': 0.006, 'spearman_weight': 0.05, 'listnet_weight': 0.25, 'kendall_weight': 0.15000000000000002, 'pairwise_weight': 0.35000000000000003, 'topk_weight': 0.2, 'mse_weight': 0.15000000000000002}. Best is trial 2 with value: 0.5234361253757166.
--------------------Fold 4--------------------
Train dates from 3 to 1522
Valid dates from 1523 to 1826
Device: cuda:0
 Epoch  | TrainLoss  |  ValLoss   | TrainSharpe  | ValSharpe  | TrainICIR  |  ValICIR   | TrainListNet  |  ValListNet   | TrainKendall  |  ValKendall   | TrainPairwise  |  ValPairwise   |  TrainTopK  |   ValTopK   |  TrainMSE  |   ValMSE   | Train sharpe | Val sharpe |   LR   
------------------------------------------------------------
validate_one_epoch took 5.7067 seconds
   1    |   0.8327   |   0.7391   |   0.0218   |  -0.3301   |    0.9149   

  0%|          | 0/304 [00:00<?, ?it/s]

LAST VALIDIDATION Sharpe: 0.24781


  0%|          | 0/134 [00:00<?, ?it/s]

REAL Sharpe: 0.09993
[I 2025-09-21 14:44:57,027] Trial 5 finished with value: 0.1295054486809513 and parameters: {'batch_size': 256, 'seq_len': 4, 'hidden_dim': 64, 'lr': 0.01, 'refit_lr': 0.009000000000000001, 'spearman_weight': 0.1, 'listnet_weight': 0.2, 'kendall_weight': 0.2, 'pairwise_weight': 0.35000000000000003, 'topk_weight': 0.2, 'mse_weight': 0.1}. Best is trial 2 with value: 0.5234361253757166.
--------------------Fold 4--------------------
Train dates from 3 to 1522
Valid dates from 1523 to 1826
Device: cuda:0
 Epoch  | TrainLoss  |  ValLoss   | TrainSharpe  | ValSharpe  | TrainICIR  |  ValICIR   | TrainListNet  |  ValListNet   | TrainKendall  |  ValKendall   | TrainPairwise  |  ValPairwise   |  TrainTopK  |   ValTopK   |  TrainMSE  |   ValMSE   | Train sharpe | Val sharpe |   LR   
------------------------------------------------------------
validate_one_epoch took 5.5756 seconds
   1    |   1.0258   |   0.9340   |  -0.0782   |  -0.3747   |    0.9131     |    0.7784     | 

  0%|          | 0/304 [00:00<?, ?it/s]

LAST VALIDIDATION Sharpe: -0.02896


  0%|          | 0/134 [00:00<?, ?it/s]

REAL Sharpe: 0.66153
[I 2025-09-21 14:50:02,857] Trial 6 finished with value: 0.5234361253757166 and parameters: {'batch_size': 64, 'seq_len': 64, 'hidden_dim': 16, 'lr': 0.01, 'refit_lr': 0.005, 'spearman_weight': 0.05, 'listnet_weight': 0.2, 'kendall_weight': 0.1, 'pairwise_weight': 0.35000000000000003, 'topk_weight': 0.2, 'mse_weight': 0.2}. Best is trial 2 with value: 0.5234361253757166.
--------------------Fold 4--------------------
Train dates from 3 to 1522
Valid dates from 1523 to 1826
Device: cuda:0
 Epoch  | TrainLoss  |  ValLoss   | TrainSharpe  | ValSharpe  | TrainICIR  |  ValICIR   | TrainListNet  |  ValListNet   | TrainKendall  |  ValKendall   | TrainPairwise  |  ValPairwise   |  TrainTopK  |   ValTopK   |  TrainMSE  |   ValMSE   | Train sharpe | Val sharpe |   LR   
------------------------------------------------------------
validate_one_epoch took 5.4553 seconds
   1    |   0.7802   |   0.6988   |  -0.0581   |  -0.3147   |    0.9447     |    0.7985     |    -0.0023    

  0%|          | 0/304 [00:00<?, ?it/s]

LAST VALIDIDATION Sharpe: 0.24201


  0%|          | 0/134 [00:00<?, ?it/s]

REAL Sharpe: 0.71636
[I 2025-09-21 14:59:36,646] Trial 7 finished with value: 0.6214911851181973 and parameters: {'batch_size': 128, 'seq_len': 16, 'hidden_dim': 16, 'lr': 0.009000000000000001, 'refit_lr': 0.005, 'spearman_weight': 0.1, 'listnet_weight': 0.2, 'kendall_weight': 0.25, 'pairwise_weight': 0.45, 'topk_weight': 0.2, 'mse_weight': 0.05}. Best is trial 7 with value: 0.6214911851181973.
--------------------Fold 4--------------------
Train dates from 3 to 1522
Valid dates from 1523 to 1826
Device: cuda:0
 Epoch  | TrainLoss  |  ValLoss   | TrainSharpe  | ValSharpe  | TrainICIR  |  ValICIR   | TrainListNet  |  ValListNet   | TrainKendall  |  ValKendall   | TrainPairwise  |  ValPairwise   |  TrainTopK  |   ValTopK   |  TrainMSE  |   ValMSE   | Train sharpe | Val sharpe |   LR   
------------------------------------------------------------
validate_one_epoch took 5.8634 seconds
   1    |   0.9488   |   0.8681   |  -0.0409   |  -0.3408   |    0.9568     |    0.8114     |    -0.0031 

  0%|          | 0/304 [00:00<?, ?it/s]

LAST VALIDIDATION Sharpe: 0.38165


  0%|          | 0/134 [00:00<?, ?it/s]

REAL Sharpe: 0.63844
[I 2025-09-21 15:08:02,724] Trial 8 finished with value: 0.5870780885069999 and parameters: {'batch_size': 128, 'seq_len': 4, 'hidden_dim': 16, 'lr': 0.007, 'refit_lr': 0.005, 'spearman_weight': 0.05, 'listnet_weight': 0.2, 'kendall_weight': 0.2, 'pairwise_weight': 0.30000000000000004, 'topk_weight': 0.25, 'mse_weight': 0.2}. Best is trial 7 with value: 0.6214911851181973.
--------------------Fold 4--------------------
Train dates from 3 to 1522
Valid dates from 1523 to 1826
Device: cuda:0
 Epoch  | TrainLoss  |  ValLoss   | TrainSharpe  | ValSharpe  | TrainICIR  |  ValICIR   | TrainListNet  |  ValListNet   | TrainKendall  |  ValKendall   | TrainPairwise  |  ValPairwise   |  TrainTopK  |   ValTopK   |  TrainMSE  |   ValMSE   | Train sharpe | Val sharpe |   LR   
------------------------------------------------------------
validate_one_epoch took 6.3805 seconds
   1    |   0.7796   |   0.6481   |  -0.1073   |  -0.3847   |    0.9104     |    0.7564     |    -0.0041  

  0%|          | 0/304 [00:00<?, ?it/s]

LAST VALIDIDATION Sharpe: 0.13693


  0%|          | 0/134 [00:00<?, ?it/s]

REAL Sharpe: 0.59383
[I 2025-09-21 15:14:05,729] Trial 9 finished with value: 0.502451446997356 and parameters: {'batch_size': 128, 'seq_len': 32, 'hidden_dim': 16, 'lr': 0.01, 'refit_lr': 0.008, 'spearman_weight': 0.2, 'listnet_weight': 0.4, 'kendall_weight': 0.1, 'pairwise_weight': 0.2, 'topk_weight': 0.2, 'mse_weight': 0.1}. Best is trial 7 with value: 0.6214911851181973.
--------------------Fold 4--------------------
Train dates from 3 to 1522
Valid dates from 1523 to 1826
Device: cuda:0
 Epoch  | TrainLoss  |  ValLoss   | TrainSharpe  | ValSharpe  | TrainICIR  |  ValICIR   | TrainListNet  |  ValListNet   | TrainKendall  |  ValKendall   | TrainPairwise  |  ValPairwise   |  TrainTopK  |   ValTopK   |  TrainMSE  |   ValMSE   | Train sharpe | Val sharpe |   LR   
------------------------------------------------------------
validate_one_epoch took 5.7362 seconds
   1    |   0.9119   |   0.8204   |  -0.0559   |  -0.3599   |    0.8447     |    0.7167     |    -0.0056    |    -0.0285    |

  0%|          | 0/304 [00:00<?, ?it/s]

LAST VALIDIDATION Sharpe: 0.00993


  0%|          | 0/134 [00:00<?, ?it/s]

REAL Sharpe: 0.53479
[I 2025-09-21 15:19:10,579] Trial 10 finished with value: 0.429819800452744 and parameters: {'batch_size': 64, 'seq_len': 64, 'hidden_dim': 32, 'lr': 0.01, 'refit_lr': 0.01, 'spearman_weight': 0.1, 'listnet_weight': 0.30000000000000004, 'kendall_weight': 0.1, 'pairwise_weight': 0.30000000000000004, 'topk_weight': 0.2, 'mse_weight': 0.15000000000000002}. Best is trial 7 with value: 0.6214911851181973.
--------------------Fold 4--------------------
Train dates from 3 to 1522
Valid dates from 1523 to 1826
Device: cuda:0
 Epoch  | TrainLoss  |  ValLoss   | TrainSharpe  | ValSharpe  | TrainICIR  |  ValICIR   | TrainListNet  |  ValListNet   | TrainKendall  |  ValKendall   | TrainPairwise  |  ValPairwise   |  TrainTopK  |   ValTopK   |  TrainMSE  |   ValMSE   | Train sharpe | Val sharpe |   LR   
------------------------------------------------------------
validate_one_epoch took 5.5319 seconds
   1    |   0.7442   |   0.6472   |  -0.1376   |  -0.3718   |    0.8430     | 

  0%|          | 0/304 [00:00<?, ?it/s]

LAST VALIDIDATION Sharpe: 0.27216


  0%|          | 0/134 [00:00<?, ?it/s]

REAL Sharpe: 0.64874
[I 2025-09-21 15:28:09,095] Trial 11 finished with value: 0.5734219827380287 and parameters: {'batch_size': 32, 'seq_len': 16, 'hidden_dim': 16, 'lr': 0.008, 'refit_lr': 0.007, 'spearman_weight': 0.2, 'listnet_weight': 0.4, 'kendall_weight': 0.25, 'pairwise_weight': 0.55, 'topk_weight': 0.3, 'mse_weight': 0.05}. Best is trial 7 with value: 0.6214911851181973.
--------------------Fold 4--------------------
Train dates from 3 to 1522
Valid dates from 1523 to 1826
Device: cuda:0
 Epoch  | TrainLoss  |  ValLoss   | TrainSharpe  | ValSharpe  | TrainICIR  |  ValICIR   | TrainListNet  |  ValListNet   | TrainKendall  |  ValKendall   | TrainPairwise  |  ValPairwise   |  TrainTopK  |   ValTopK   |  TrainMSE  |   ValMSE   | Train sharpe | Val sharpe |   LR   
------------------------------------------------------------
validate_one_epoch took 5.3867 seconds
   1    |   0.7835   |   0.6983   |  -0.0460   |  -0.2912   |    0.9432     |    0.7875     |    -0.0032    |    -0.0241

  0%|          | 0/304 [00:00<?, ?it/s]

LAST VALIDIDATION Sharpe: 0.60196


  0%|          | 0/134 [00:00<?, ?it/s]

REAL Sharpe: 0.66037
[I 2025-09-21 15:36:46,219] Trial 12 finished with value: 0.6486853108662701 and parameters: {'batch_size': 128, 'seq_len': 8, 'hidden_dim': 16, 'lr': 0.008, 'refit_lr': 0.005, 'spearman_weight': 0.15000000000000002, 'listnet_weight': 0.30000000000000004, 'kendall_weight': 0.25, 'pairwise_weight': 0.5, 'topk_weight': 0.25, 'mse_weight': 0.05}. Best is trial 12 with value: 0.6486853108662701.
--------------------Fold 4--------------------
Train dates from 3 to 1522
Valid dates from 1523 to 1826
Device: cuda:0
 Epoch  | TrainLoss  |  ValLoss   | TrainSharpe  | ValSharpe  | TrainICIR  |  ValICIR   | TrainListNet  |  ValListNet   | TrainKendall  |  ValKendall   | TrainPairwise  |  ValPairwise   |  TrainTopK  |   ValTopK   |  TrainMSE  |   ValMSE   | Train sharpe | Val sharpe |   LR   
------------------------------------------------------------
validate_one_epoch took 5.4648 seconds
   1    |   0.7872   |   0.7021   |  -0.0505   |  -0.2861   |    0.9389     |    0.7863

  0%|          | 0/304 [00:00<?, ?it/s]

LAST VALIDIDATION Sharpe: 0.68664


  0%|          | 0/134 [00:00<?, ?it/s]

REAL Sharpe: 0.61487
[I 2025-09-21 15:44:26,413] Trial 13 finished with value: 0.6292244337820478 and parameters: {'batch_size': 128, 'seq_len': 8, 'hidden_dim': 16, 'lr': 0.008, 'refit_lr': 0.006, 'spearman_weight': 0.15000000000000002, 'listnet_weight': 0.35000000000000003, 'kendall_weight': 0.25, 'pairwise_weight': 0.5, 'topk_weight': 0.25, 'mse_weight': 0.05}. Best is trial 12 with value: 0.6486853108662701.
--------------------Fold 4--------------------
Train dates from 3 to 1522
Valid dates from 1523 to 1826
Device: cuda:0
 Epoch  | TrainLoss  |  ValLoss   | TrainSharpe  | ValSharpe  | TrainICIR  |  ValICIR   | TrainListNet  |  ValListNet   | TrainKendall  |  ValKendall   | TrainPairwise  |  ValPairwise   |  TrainTopK  |   ValTopK   |  TrainMSE  |   ValMSE   | Train sharpe | Val sharpe |   LR   
------------------------------------------------------------
validate_one_epoch took 6.1072 seconds
   1    |   0.8000   |   0.7156   |  -0.0488   |  -0.2886   |    0.9398     |    0.7868

  0%|          | 0/304 [00:00<?, ?it/s]

LAST VALIDIDATION Sharpe: 0.70871


  0%|          | 0/134 [00:00<?, ?it/s]

REAL Sharpe: 0.64529
[I 2025-09-21 15:54:18,665] Trial 14 finished with value: 0.6579716529913925 and parameters: {'batch_size': 128, 'seq_len': 8, 'hidden_dim': 16, 'lr': 0.008, 'refit_lr': 0.007, 'spearman_weight': 0.15000000000000002, 'listnet_weight': 0.35000000000000003, 'kendall_weight': 0.25, 'pairwise_weight': 0.55, 'topk_weight': 0.25, 'mse_weight': 0.05}. Best is trial 14 with value: 0.6579716529913925.
--------------------Fold 4--------------------
Train dates from 3 to 1522
Valid dates from 1523 to 1826
Device: cuda:0
 Epoch  | TrainLoss  |  ValLoss   | TrainSharpe  | ValSharpe  | TrainICIR  |  ValICIR   | TrainListNet  |  ValListNet   | TrainKendall  |  ValKendall   | TrainPairwise  |  ValPairwise   |  TrainTopK  |   ValTopK   |  TrainMSE  |   ValMSE   | Train sharpe | Val sharpe |   LR   
------------------------------------------------------------
validate_one_epoch took 6.3788 seconds
   1    |   0.8043   |   0.7202   |  -0.0399   |  -0.2930   |    0.8709     |    0.737

  0%|          | 0/304 [00:00<?, ?it/s]

LAST VALIDIDATION Sharpe: 1.22375


  0%|          | 0/134 [00:00<?, ?it/s]

REAL Sharpe: 0.14754
[I 2025-09-21 16:02:37,025] Trial 15 finished with value: 0.36278425099967976 and parameters: {'batch_size': 128, 'seq_len': 8, 'hidden_dim': 64, 'lr': 0.007, 'refit_lr': 0.007, 'spearman_weight': 0.15000000000000002, 'listnet_weight': 0.35000000000000003, 'kendall_weight': 0.25, 'pairwise_weight': 0.6, 'topk_weight': 0.3, 'mse_weight': 0.05}. Best is trial 14 with value: 0.6579716529913925.
--------------------Fold 4--------------------
Train dates from 3 to 1522
Valid dates from 1523 to 1826
Device: cuda:0
 Epoch  | TrainLoss  |  ValLoss   | TrainSharpe  | ValSharpe  | TrainICIR  |  ValICIR   | TrainListNet  |  ValListNet   | TrainKendall  |  ValKendall   | TrainPairwise  |  ValPairwise   |  TrainTopK  |   ValTopK   |  TrainMSE  |   ValMSE   | Train sharpe | Val sharpe |   LR   
------------------------------------------------------------
validate_one_epoch took 5.4046 seconds
   1    |   0.9221   |   0.8092   |  -0.0758   |  -0.3800   |    0.8853     |    0.7243

  0%|          | 0/304 [00:00<?, ?it/s]

LAST VALIDIDATION Sharpe: 0.75229


  0%|          | 0/134 [00:00<?, ?it/s]

REAL Sharpe: 0.65408
[I 2025-09-21 16:08:45,389] Trial 16 finished with value: 0.6737196135778993 and parameters: {'batch_size': 32, 'seq_len': 8, 'hidden_dim': 16, 'lr': 0.005, 'refit_lr': 0.008, 'spearman_weight': 0.15000000000000002, 'listnet_weight': 0.30000000000000004, 'kendall_weight': 0.05, 'pairwise_weight': 0.5, 'topk_weight': 0.25, 'mse_weight': 0.1}. Best is trial 16 with value: 0.6737196135778993.
--------------------Fold 4--------------------
Train dates from 3 to 1522
Valid dates from 1523 to 1826
Device: cuda:0
 Epoch  | TrainLoss  |  ValLoss   | TrainSharpe  | ValSharpe  | TrainICIR  |  ValICIR   | TrainListNet  |  ValListNet   | TrainKendall  |  ValKendall   | TrainPairwise  |  ValPairwise   |  TrainTopK  |   ValTopK   |  TrainMSE  |   ValMSE   | Train sharpe | Val sharpe |   LR   
------------------------------------------------------------
validate_one_epoch took 5.3391 seconds
   1    |   0.9346   |   0.8288   |  -0.1114   |  -0.3886   |    0.8930     |    0.7265  

  0%|          | 0/304 [00:00<?, ?it/s]

LAST VALIDIDATION Sharpe: 0.20354


  0%|          | 0/134 [00:00<?, ?it/s]

REAL Sharpe: 0.60920
[I 2025-09-21 16:22:32,214] Trial 17 finished with value: 0.5280658729960108 and parameters: {'batch_size': 32, 'seq_len': 128, 'hidden_dim': 16, 'lr': 0.005, 'refit_lr': 0.008, 'spearman_weight': 0.15000000000000002, 'listnet_weight': 0.35000000000000003, 'kendall_weight': 0.05, 'pairwise_weight': 0.6, 'topk_weight': 0.25, 'mse_weight': 0.1}. Best is trial 16 with value: 0.6737196135778993.
--------------------Fold 4--------------------
Train dates from 3 to 1522
Valid dates from 1523 to 1826
Device: cuda:0
 Epoch  | TrainLoss  |  ValLoss   | TrainSharpe  | ValSharpe  | TrainICIR  |  ValICIR   | TrainListNet  |  ValListNet   | TrainKendall  |  ValKendall   | TrainPairwise  |  ValPairwise   |  TrainTopK  |   ValTopK   |  TrainMSE  |   ValMSE   | Train sharpe | Val sharpe |   LR   
------------------------------------------------------------
validate_one_epoch took 5.4418 seconds
   1    |   0.8826   |   0.7593   |  -0.0626   |  -0.3682   |    0.8843     |    0.7248

  0%|          | 0/304 [00:00<?, ?it/s]

LAST VALIDIDATION Sharpe: 0.89177


  0%|          | 0/134 [00:00<?, ?it/s]

REAL Sharpe: 0.61737
[I 2025-09-21 16:28:37,569] Trial 18 finished with value: 0.6722514047363801 and parameters: {'batch_size': 32, 'seq_len': 8, 'hidden_dim': 16, 'lr': 0.005, 'refit_lr': 0.008, 'spearman_weight': 0.2, 'listnet_weight': 0.35000000000000003, 'kendall_weight': 0.05, 'pairwise_weight': 0.45, 'topk_weight': 0.3, 'mse_weight': 0.1}. Best is trial 16 with value: 0.6737196135778993.
--------------------Fold 4--------------------
Train dates from 3 to 1522
Valid dates from 1523 to 1826
Device: cuda:0
 Epoch  | TrainLoss  |  ValLoss   | TrainSharpe  | ValSharpe  | TrainICIR  |  ValICIR   | TrainListNet  |  ValListNet   | TrainKendall  |  ValKendall   | TrainPairwise  |  ValPairwise   |  TrainTopK  |   ValTopK   |  TrainMSE  |   ValMSE   | Train sharpe | Val sharpe |   LR   
------------------------------------------------------------
validate_one_epoch took 5.5545 seconds
   1    |   0.8716   |   0.7683   |  -0.0915   |  -0.3173   |    0.8539     |    0.7258     |    -0.0042 

  0%|          | 0/304 [00:00<?, ?it/s]

LAST VALIDIDATION Sharpe: 0.67860


  0%|          | 0/134 [00:00<?, ?it/s]

REAL Sharpe: 0.57516
[I 2025-09-21 16:38:26,066] Trial 19 finished with value: 0.5958454523002383 and parameters: {'batch_size': 32, 'seq_len': 8, 'hidden_dim': 32, 'lr': 0.005, 'refit_lr': 0.009000000000000001, 'spearman_weight': 0.2, 'listnet_weight': 0.30000000000000004, 'kendall_weight': 0.05, 'pairwise_weight': 0.45, 'topk_weight': 0.3, 'mse_weight': 0.1}. Best is trial 16 with value: 0.6737196135778993.
--------------------Fold 4--------------------
Train dates from 3 to 1522
Valid dates from 1523 to 1826
Device: cuda:0
 Epoch  | TrainLoss  |  ValLoss   | TrainSharpe  | ValSharpe  | TrainICIR  |  ValICIR   | TrainListNet  |  ValListNet   | TrainKendall  |  ValKendall   | TrainPairwise  |  ValPairwise   |  TrainTopK  |   ValTopK   |  TrainMSE  |   ValMSE   | Train sharpe | Val sharpe |   LR   
------------------------------------------------------------
validate_one_epoch took 5.9069 seconds
   1    |   0.9089   |   0.7975   |  -0.0627   |  -0.3614   |    0.8223     |    0.6887   

  0%|          | 0/304 [00:00<?, ?it/s]

LAST VALIDIDATION Sharpe: 0.15736


  0%|          | 0/134 [00:00<?, ?it/s]

REAL Sharpe: 0.53000
[I 2025-09-21 16:44:30,871] Trial 20 finished with value: 0.45547289873090185 and parameters: {'batch_size': 32, 'seq_len': 128, 'hidden_dim': 64, 'lr': 0.006, 'refit_lr': 0.008, 'spearman_weight': 0.2, 'listnet_weight': 0.25, 'kendall_weight': 0.05, 'pairwise_weight': 0.45, 'topk_weight': 0.3, 'mse_weight': 0.15000000000000002}. Best is trial 16 with value: 0.6737196135778993.
--------------------Fold 4--------------------
Train dates from 3 to 1522
Valid dates from 1523 to 1826
Device: cuda:0
 Epoch  | TrainLoss  |  ValLoss   | TrainSharpe  | ValSharpe  | TrainICIR  |  ValICIR   | TrainListNet  |  ValListNet   | TrainKendall  |  ValKendall   | TrainPairwise  |  ValPairwise   |  TrainTopK  |   ValTopK   |  TrainMSE  |   ValMSE   | Train sharpe | Val sharpe |   LR   
------------------------------------------------------------
validate_one_epoch took 5.2707 seconds
   1    |   0.8871   |   0.7676   |  -0.0830   |  -0.3789   |    0.8769     |    0.7200     |    -0.0

  0%|          | 0/304 [00:00<?, ?it/s]

LAST VALIDIDATION Sharpe: 0.82775


  0%|          | 0/134 [00:00<?, ?it/s]

REAL Sharpe: 0.64636
[I 2025-09-21 16:50:19,755] Trial 21 finished with value: 0.6826407263762049 and parameters: {'batch_size': 32, 'seq_len': 8, 'hidden_dim': 16, 'lr': 0.005, 'refit_lr': 0.01, 'spearman_weight': 0.2, 'listnet_weight': 0.4, 'kendall_weight': 0.05, 'pairwise_weight': 0.5, 'topk_weight': 0.3, 'mse_weight': 0.1}. Best is trial 21 with value: 0.6826407263762049.
--------------------Fold 4--------------------
Train dates from 3 to 1522
Valid dates from 1523 to 1826
Device: cuda:0
 Epoch  | TrainLoss  |  ValLoss   | TrainSharpe  | ValSharpe  | TrainICIR  |  ValICIR   | TrainListNet  |  ValListNet   | TrainKendall  |  ValKendall   | TrainPairwise  |  ValPairwise   |  TrainTopK  |   ValTopK   |  TrainMSE  |   ValMSE   | Train sharpe | Val sharpe |   LR   
------------------------------------------------------------
validate_one_epoch took 5.0747 seconds
   1    |   0.8871   |   0.7676   |  -0.0830   |  -0.3789   |    0.8769     |    0.7200     |    -0.0041    |    -0.0343   