In [1]:
from dataclasses import dataclass
from typing import Dict, List, Optional
import copy
import re
from tqdm.auto import tqdm
from abc import ABC, abstractmethod
import math

import polars as pl
import numpy as np
from numba import njit, prange
from scipy.stats import spearmanr, rankdata

from sklearn.model_selection import TimeSeriesSplit, KFold
from sklearn.base import clone

import torch
import torch.nn.functional as F
import torch.nn as nn
from torch.utils.data import Dataset, DataLoader
import torch.optim as optim

from torch.profiler import profile, ProfilerActivity, record_function

from CONFIG import CONFIG
from PREPROCESSOR_V2 import PREPROCESSOR
from FEATURE_ENGINEERING_V2 import FEATURE_ENGINEERING
from SEQUENTIAL_NN_MODEL import CNNTransformerModel, GRUModel, LSTMModel, PureTransformerModel
from CROSS_SECTIONAL_NN_MODEL import DeepMLPModel, LinearModel, ResidualMLPModel
from LOSS import CombinedICIRLoss

import time


def timer(func):
    def wrapper(*args, **kwargs):
        start = time.time()
        result = func(*args, **kwargs)
        end = time.time()
        print(f"{func.__name__} took {end - start:.4f} seconds")
        return result

    return wrapper

In [2]:
# # --- Prepare DataLoader ---
# # Create the dataset

# train_x = pl.scan_csv(CONFIG.TRAIN_X_PATH).filter(pl.col("date_id") <= CONFIG.MAX_TRAIN_DATE)
# train_y = pl.scan_csv(CONFIG.TRAIN_Y_PATH).filter(pl.col("date_id") <= CONFIG.MAX_TRAIN_DATE).fill_null(0).collect()

# train_x = PREPROCESSOR(df=train_x)
# train_x.clean()
# train_x = train_x.transform().lazy()

# train_x = FEATURE_ENGINEERING(df=train_x)
# train_x = train_x.create_all_features().collect().pivot(index=CONFIG.DATE_COL, on=["type", "instr"])
# train_x = train_x.rename({col: re.sub(r'[{",}]', "", col).replace(" ", "_").replace(",", "_") for col in train_x.columns})
# train_x = train_x.select(set(CONFIG.IMPT_COLS + [CONFIG.DATE_COL]))

In [3]:
def rank_correlation_sharpe(targets, predictions) -> float:
    """
    Calculates the rank correlation between predictions and target values,
    and returns its Sharpe ratio (mean / standard deviation).

    :param merged_df: DataFrame containing prediction columns (starting with 'prediction_')
                      and target columns (starting with 'target_')
    :return: Sharpe ratio of the rank correlation
    :raises ZeroDivisionError: If the standard deviation is zero
    """
    correlations = []

    for i, (pred_row, target_row) in enumerate(zip(predictions, targets)):
        # Find valid (non-NaN) assets for this timestep
        valid_mask = ~np.isnan(target_row)
        valid_pred = pred_row[valid_mask]
        valid_target = target_row[valid_mask]

        if np.std(pred_row) == 0 or np.std(target_row) == 0:
            raise ZeroDivisionError("Zero standard deviation in a row.")

        rho = np.corrcoef(rankdata(valid_pred, method="average"), rankdata(valid_target, method="average"))[0, 1]
        correlations.append(rho)

    daily_rank_corrs = np.array(correlations)
    std_dev = daily_rank_corrs.std(ddof=0)
    if std_dev == 0:
        raise ZeroDivisionError("Denominator is zero, unable to compute Sharpe ratio.")

    sharpe_ratio = daily_rank_corrs.mean() / std_dev
    return float(sharpe_ratio)

In [4]:
class BaseFinancialDataset(Dataset, ABC):
    """Base class for financial datasets"""

    def __init__(self, X: pl.DataFrame, y: pl.DataFrame, date_column: str = CONFIG.DATE_COL):
        """
        Base initialization

        Args:
            data: Preprocessed DataFrame (scaling already done)
            target_columns: List of target column names (424 targets)
            feature_columns: List of feature column names
            date_column: Name of date identifier column
        """
        self.X = X.clone()
        self.y = y.clone()
        self.date_column = date_column

        # Sort by date
        self.X = self.X.sort(by=CONFIG.DATE_COL)
        self.y = self.y.sort(by=CONFIG.DATE_COL)
        self.unique_dates = sorted(self.X[self.date_column].unique())
        self.device = torch.device("cpu")

        self._prepare_samples()

    def _prepare_samples(self):
        self.X = self.X.drop(CONFIG.DATE_COL).to_numpy()

        self.num_features = self.X.shape[-1]
        # Split continuous and categorical features
        self.dates = torch.tensor(self.unique_dates, dtype=torch.int16)
        self.continuous_data = torch.tensor(self.X, dtype=torch.float32)
        self.continuous_data = torch.nan_to_num(self.continuous_data, 0)

        if torch.isnan(self.continuous_data).any() or torch.isinf(self.continuous_data).any():
            print("Input contains NaN or Inf values!")

        self.y = torch.tensor(self.y.drop(CONFIG.DATE_COL).to_numpy(), dtype=torch.float32)

        self.unique_date, self.inverse_indices, self.counts = torch.unique(self.dates, return_inverse=True, return_counts=True)

        self.n_unique_dates = len(self.unique_date)

    @abstractmethod
    def __getitem__(self, idx):
        """Get item - implemented by subclasses"""
        pass

In [5]:
class SequentialDataset(BaseFinancialDataset):
    """Dataset for sequential models (LSTM, Transformers, CNN)"""

    def __init__(self, X: pl.DataFrame, Y: pl.DataFrame, date_column: str = CONFIG.DATE_COL, prediction_horizon: int = 1):
        """
        Sequential dataset for temporal models

        Args:
            data: Preprocessed DataFrame
            target_columns: Target column names
            feature_columns: Feature column names
            date_column: Date identifier column
            sequence_length: Number of time steps in sequence
            prediction_horizon: Steps ahead to predict (usually 1)
        """
        self.sequence_length = CONFIG.SEQ_LEN
        self.prediction_horizon = prediction_horizon

        super().__init__(X, Y)

        self._generate_sequence()

    def _generate_sequence(self):
        self.sequence_x = []
        self.sequence_y = []

        for date in range(self.sequence_length, self.n_unique_dates):
            self.sequence_x.append(self.continuous_data[date - self.sequence_length : date])
            self.sequence_y.append(self.y[date - 1])
        self.sequence_x = torch.stack(self.sequence_x)
        self.sequence_y = torch.stack(self.sequence_y)

    def __len__(self):
        return self.n_unique_dates - self.sequence_length

    def __getitem__(self, idx):
        """Get sequence, target, and date_id"""
        continuous_seq = self.sequence_x[idx]  # (seq_len, N_FEATURES)
        target = self.sequence_y[idx]  # (424,)

        return (
            continuous_seq,
            target,
        )

In [6]:
class CrossSectionalDataset(BaseFinancialDataset):
    """Dataset for cross-sectional models (MLP, XGBoost, Linear)"""

    def __init__(self, X: pl.DataFrame, Y: pl.DataFrame, date_column: str = CONFIG.DATE_COL):
        """
        Cross-sectional dataset for non-temporal models

        Args:
            data: Preprocessed DataFrame
            target_columns: Target column names
            feature_columns: Feature column names
            date_column: Date identifier column
        """

        super().__init__(X, Y)

    def __len__(self):
        return self.n_unique_dates - CONFIG.SEQ_LEN

    def __getitem__(self, idx):
        """Get features, target, and date_id"""
        features = self.continuous_data[idx].unsqueeze(0)  # (n_features,)
        target = self.y[idx]  # (n_targets,)

        return (
            features,
            target,
        )

In [7]:
def flatten_collate_fn(batch: list) -> dict[str, torch.Tensor]:
    """
    Collate function for DataLoader to flatten the batch.

    Args:
        batch (list): List of tuples containing tensors.

        tuple[torch.Tensor]: Flattened tensors (type, instr, X, y).
    """
    X, curr_y = zip(*batch)
    continuous_batch = torch.stack(X)
    curr_y = torch.stack(curr_y)

    continuous_batch = torch.nan_to_num(continuous_batch)

    return {"continuous": continuous_batch, "current": curr_y}


In [8]:
# seq_val_dataset = SequentialDataset(df_valid, df_valid_current_y, df_valid_true_delta)
# seq_val_dataloader = DataLoader(
#     seq_val_dataset,
#     batch_size=1,
#     shuffle=False,
#     collate_fn=flatten_collate_fn,
#     pin_memory=True,
#     # num_workers=6,
#     # persistent_workers=True,
#     # prefetch_factor=2,
#     drop_last=True,
# )

In [9]:
class ENSEMBLE_NN(nn.Module):
    """Ensemble of multiple architectures"""

    def __init__(self, input_dim: int, hidden_dim: int, output_dim: int):
        super().__init__()

        # seq models
        self.cnn_transformer = CNNTransformerModel(input_dim, hidden_dim, output_dim, CONFIG.SEQ_LEN)
        self.gru_model = GRUModel(input_dim, hidden_dim, output_dim)
        self.lstm_model = LSTMModel(input_dim, hidden_dim, output_dim)
        self.pure_transformer = PureTransformerModel(input_dim, hidden_dim, output_dim)

        # cross sectional models
        # self.mlp = DeepMLPModel(input_dim, [64, 32], output_dim)
        # self.linear = LinearModel(input_dim, output_dim)
        # self.residual = ResidualMLPModel(input_dim, hidden_dim, output_dim)

        # Ensemble weights (learnable)
        self.ensemble_weights = nn.Parameter(torch.ones(4) / 4)

        self.ensemble_dropout = nn.Dropout(0.1)
        self.prediction_dropout = nn.Dropout(0.1)

    def forward(
        self,
        x_seq,
    ):
        # Get predictions from all models
        x_cs = x_seq[:, -1, :]
        out1 = self.cnn_transformer(x_seq)
        out2 = self.gru_model(x_seq)
        out3 = self.lstm_model(x_seq)
        out4 = self.pure_transformer(x_seq)
        # out5 = self.mlp(x_cs)
        # out6 = self.linear(x_cs)
        # out7 = self.residual(x_cs)

        individual_outputs = [out1, out2, out3, out4]  # out5, out6, out7
        individual_outputs = [self.prediction_dropout(out) for out in individual_outputs]

        dropped_weights = self.ensemble_dropout(self.ensemble_weights)
        weights = F.softmax(dropped_weights, dim=0)

        ensemble_output = torch.zeros_like(individual_outputs[0])  # (batch_size, 424)
        for w, out in zip(weights, individual_outputs):
            ensemble_output += w * out

        return ensemble_output
        # return out2


In [10]:
class NN:
    def __init__(
        self,
        model,
        lr: float = 0.001,
        batch_size: int = 1,
        epochs: int = 100,
        early_stopping_patience: int = 10,
        early_stopping: bool = True,
        lr_patience: int = 2,
        lr_factor: float = 0.5,
        lr_refit: float = 0.001,
        random_seed: int = CONFIG.RANDOM_STATE,
        **kwargs,
    ) -> None:
        self.lr = lr
        self.batch_size = batch_size
        self.epochs = epochs
        self.early_stopping_patience = early_stopping_patience
        self.early_stopping = early_stopping
        self.lr_patience = lr_patience
        self.lr_factor = lr_factor
        self.lr_refit = lr_refit
        self.random_seed = random_seed

        self.criterion = CombinedICIRLoss()

        self.device = torch.device("cuda:0" if torch.cuda.is_available() else "cpu")
        self.model = model.to(self.device)
        self.optimizer = None
        self.refit_optimizer = torch.optim.AdamW(self.model.parameters(), lr=self.lr_refit, weight_decay=0.01)
        self.best_epoch = None
        self.features = None
        self.kwargs = kwargs

    def fit(self, train_set: tuple, val_set: tuple, retrain_set: tuple, verbose: bool = False) -> None:
        """Fit the model on the training set and validate on the validation set.

        Args:
            train_set (tuple): A tuple containing input data, targets for training.
            val_set (tuple): A tuple containing input data, targets for validation.
            verbose (bool, optional): If True, prints training progress. Defaults to False.
        """
        torch.manual_seed(self.random_seed)

        seq_train_dataset = SequentialDataset(*train_set)
        seq_train_dataloader = DataLoader(
            seq_train_dataset,
            batch_size=self.batch_size,
            shuffle=True,
            collate_fn=flatten_collate_fn,
            pin_memory=True,
            # num_workers=2,
            # persistent_workers=True,
            # prefetch_factor=2,
            drop_last=True,
        )

        seq_val_dataset = SequentialDataset(*val_set)
        seq_val_dataloader = DataLoader(
            seq_val_dataset,
            batch_size=1,
            shuffle=False,
            collate_fn=flatten_collate_fn,
            pin_memory=True,
            # num_workers=6,
            # persistent_workers=True,
            # prefetch_factor=2,
            drop_last=True,
        )

        retrain_val_dataset = SequentialDataset(*retrain_set)
        retrain_val_dataloader = DataLoader(
            retrain_val_dataset,
            batch_size=1,
            shuffle=False,
            collate_fn=flatten_collate_fn,
            pin_memory=True,
            # num_workers=6,
            # persistent_workers=True,
            # prefetch_factor=2,
            drop_last=True,
        )

        self.optimizer = torch.optim.AdamW(self.model.parameters(), lr=self.lr, weight_decay=0.01)
        self.scheduler = torch.optim.lr_scheduler.CosineAnnealingWarmRestarts(self.optimizer, T_0=5, T_mult=2, eta_min=1e-6)

        train_sharpes, val_sharpes = [], []
        if verbose:
            print(f"Device: {self.device}")
            print(
                f"{'Epoch':^5} | {'Train Loss':^10} | {'Train ICIR Loss':^15} | {'Train MSE Loss':^14} | {'Train Ranking Loss':^17} | {'Val Loss':^8} | {'Val ICIR Loss':^13} | {'Val MSE Loss':^12} | {'Val Ranking Loss':^16} | {'Train sharpe':^9} | {'Val sharpe':^7} | {'LR':^7}"
            )
            print("-" * 60)

        min_val_sharpe = -np.inf
        best_epoch = 0
        no_improvement = 0
        best_model = None
        for epoch in range(self.epochs):
            train_loss, train_sharpe, train_icir_loss, train_mse_loss, train_ranking_loss = self.train_one_epoch(seq_train_dataloader, verbose)
            val_loss, val_sharpe, val_icir_loss, val_mse_loss, val_ranking_loss = self.validate_one_epoch(
                seq_val_dataloader, retrain_val_dataloader, verbose
            )

            self.scheduler.step()
            lr_last = self.optimizer.param_groups[0]["lr"]

            train_sharpes.append(train_sharpe)
            val_sharpes.append(val_sharpe)

            if verbose:
                print(
                    f"{epoch + 1:^5} | {train_loss:^10.4f} | {train_icir_loss:^15.4f} | {train_mse_loss:^14.4f} | {train_ranking_loss:^17.4f} | {val_loss:^8.4f} | {val_icir_loss:^13.4f} | {val_mse_loss:^12.4f} | {val_ranking_loss:^16.4f} | {train_sharpe:^9.4f} | {val_sharpe:^7.4f} | {lr_last:^7.5f}"
                )

            if val_sharpe > min_val_sharpe:
                min_val_sharpe = val_sharpe
                best_model = copy.deepcopy(self.model.state_dict())
                no_improvement = 0
                best_epoch = epoch
            else:
                no_improvement += 1

            if self.early_stopping:
                if no_improvement >= self.early_stopping_patience + 1:
                    self.best_epoch = best_epoch + 1
                    if verbose:
                        print(f"Early stopping on epoch {best_epoch + 1}. Best score: {min_val_sharpe:.4f}")
                    break

        # Load the best model
        if self.early_stopping:
            self.model.load_state_dict(best_model)

    def train_one_epoch(self, seq_train_dataloader: DataLoader, verbose: bool) -> tuple:
        """Train the model for one epoch.

        Args:
            train_dataloader (DataLoader): DataLoader for the training set.
            verbose (bool): If True, shows progress using tqdm.

        Returns:
            tuple[float, float]: A tuple containing:
                - Train loss (float).
                - Spearman Sharpe for the training set (float).
        """
        self.model.train()
        total_loss = 0.0
        total_icir_loss = 0.0
        total_mse_loss = 0.0
        total_ranking_loss = 0.0

        y_total, preds_total = [], []

        for seq_batch in seq_train_dataloader:
            seq_batch = {key: value.to(self.device) for key, value in seq_batch.items()}
            seq_x_batch = seq_batch["continuous"]

            true_y = seq_batch["current"]

            self.optimizer.zero_grad()
            with torch.autocast(device_type="cuda"):
                pred_y = self.model(seq_x_batch)
                loss, icir_loss, mse_loss, ranking_loss, _ = self.criterion(pred_y, true_y).values()

            loss.backward()
            torch.nn.utils.clip_grad_norm_(self.model.parameters(), max_norm=1.0)

            self.optimizer.step()

            total_loss += loss.item()
            total_icir_loss += icir_loss.item()
            total_mse_loss += mse_loss.item()
            total_ranking_loss += ranking_loss.item()

            y_total.append(true_y)
            preds_total.append((pred_y).detach())

            torch.cuda.empty_cache()

        y_total = torch.cat(y_total).cpu().numpy().astype(np.float64)
        preds_total = torch.cat(preds_total).cpu().numpy().astype(np.float64)

        train_sharpe = rank_correlation_sharpe(y_total, preds_total)
        train_loss = total_loss / len(seq_train_dataloader)
        train_icir_loss = total_icir_loss / len(seq_train_dataloader)
        train_mse_loss = total_mse_loss / len(seq_train_dataloader)
        train_ranking_loss = total_ranking_loss / len(seq_train_dataloader)

        return train_loss, train_sharpe, train_icir_loss, train_mse_loss, train_ranking_loss

    @timer
    def validate_one_epoch(self, seq_val_dataloader: DataLoader, retrain_val_dataloader: DataLoader, verbose=False) -> tuple:
        """Validate the model on the validation set.

        Args:
            val_dataloader (DataLoader): DataLoader for the validation set.
            verbose (bool, optional): If True, shows progress using tqdm. Defaults to False.

        Returns:
            tuple[float, float]: A tuple containing:
                - Validation loss (float).
                - Spearman Sharpe for the validation set (float).
        """
        model = copy.deepcopy(self.model).to("cpu")

        losses, icir_losses, mse_losses, ranking_losses, all_y, all_preds = [], [], [], [], [], []

        for seq_batch in seq_val_dataloader:
            # seq_batch = {key: value.to(self.device) for key, value in seq_batch.items()}
            # cs_batch = {key: value.to(self.device) for key, value in cs_batch.items()}
            seq_x_batch = seq_batch["continuous"]

            true_y = seq_batch["current"]

            # Predict
            model.eval()
            with torch.inference_mode():
                pred_y = model(seq_x_batch)
                pred_y = torch.nan_to_num(pred_y)

                loss, icir_loss, mse_loss, ranking_loss, _ = self.criterion(pred_y, true_y).values()
                losses.append(loss.cpu().numpy())
                icir_losses.append(icir_loss.cpu().numpy())
                mse_losses.append(mse_loss.cpu().numpy())
                ranking_losses.append(ranking_loss.cpu().numpy())

                all_y.append(true_y)
                all_preds.append(pred_y)

            # Update weights
            if self.lr_refit > 0:
                for retrain_batch in retrain_val_dataloader:
                    retrain_seq_x_batch = retrain_batch["continuous"]
                    retrain_true_y = retrain_batch["current"]
                optimizer = torch.optim.AdamW(model.parameters(), lr=self.lr_refit, weight_decay=0.01)
                optimizer.zero_grad()
                model.train()
                pred_y = model(retrain_seq_x_batch)
                loss = self.criterion(pred_y, retrain_true_y)["total_loss"]
                loss.backward()
                torch.nn.utils.clip_grad_norm_(model.parameters(), max_norm=1.0)
                optimizer.step()

        all_y = torch.cat(all_y).numpy().astype(np.float64)
        all_preds = torch.cat(all_preds).numpy().astype(np.float64)
        loss = np.mean(losses)
        val_icir_loss = np.mean(icir_losses)
        val_mse_loss = np.mean(mse_losses)
        val_ranking_loss = np.mean(ranking_losses)

        sharpe = rank_correlation_sharpe(all_y, all_preds)

        return loss, sharpe, val_icir_loss, val_mse_loss, val_ranking_loss

    def update(self, seq_X: np.array, true_y: np.array):
        """Update the model with new data.

        Args:
            X (np.array): Input data.
            y (np.array): Target variable.
            n_times (int): Number of time steps.
        """
        torch.manual_seed(self.random_seed)
        if self.lr_refit == 0.0:
            return

        seq_continuous_data = torch.tensor(np.nan_to_num(seq_X, nan=0.0), dtype=torch.float32, device=self.device)

        true_y = torch.tensor(np.nan_to_num(true_y, nan=0.0), dtype=torch.float32, device=self.device)
        self.model.train()

        self.refit_optimizer.zero_grad()
        with torch.autocast(device_type="cuda"):
            pred_y = self.model(seq_continuous_data.unsqueeze(0))
            loss = self.criterion(pred_y, true_y)["total_loss"]

        loss.backward()
        torch.nn.utils.clip_grad_norm_(self.model.parameters(), max_norm=1.0)
        self.refit_optimizer.step()

    def predict(self, seq_X: np.array) -> tuple[np.array, torch.Tensor | list]:
        """Predict the target variable for the given input data.

        Args:
            X (np.array): Input data.

        Returns:
            tuple[np.array, torch.Tensor or list]: A tuple containing:
                - Predictions (np.array).
                - Hidden state (torch.Tensor or list).
        """
        torch.manual_seed(self.random_seed)
        seq_continuous_data = torch.tensor(np.nan_to_num(seq_X, nan=0.0), dtype=torch.float32, device=self.device)

        self.model.eval()
        with torch.inference_mode():
            preds = self.model(seq_continuous_data.unsqueeze(0))
            preds = torch.nan_to_num(preds)

        return preds.cpu().numpy().astype(np.float64)

In [11]:
# --- Prepare DataLoader ---
# Create the dataset

train_x = pl.scan_csv(CONFIG.TRAIN_X_PATH)
train_x = PREPROCESSOR(df=train_x)
train_x = train_x.clean()

features = FEATURE_ENGINEERING(df=train_x)
train_x: pl.DataFrame = features.create_market_features()

train_y = pl.scan_csv(CONFIG.TRAIN_Y_PATH)

# retrain_y = (
#     train_y.with_columns([pl.col(CONFIG.LAGS[f"lag{i}"]).exclude(CONFIG.DATE_COL).shift(i + 1) for i in range(1, 5)])
#     # .with_columns(pl.all().exclude(CONFIG.DATE_COL).shift())
#     .filter((pl.col(CONFIG.DATE_COL).is_in(train_x.select(CONFIG.DATE_COL).to_series())))
#     .collect()
# )
train_y = train_y.filter((pl.col(CONFIG.DATE_COL).is_in(train_x.select(CONFIG.DATE_COL).to_series()))).collect()
train_x = (
    train_x.with_columns([pl.when(pl.col(col).is_infinite()).then(0.0).otherwise(pl.col(col)).alias(col) for col in train_x.columns])
    .with_columns(pl.all().shrink_dtype())
    .filter(pl.col(CONFIG.DATE_COL).is_in(train_y.select(CONFIG.DATE_COL).to_series()))
    .with_columns(pl.col(CONFIG.DATE_COL).cast(pl.Int64))
    .select(([CONFIG.DATE_COL] + CONFIG.IMPT_COLS))
)

retrain_x = train_x.with_columns(pl.all().exclude(CONFIG.DATE_COL).shift(5))
retrain_y = train_y.filter((pl.col(CONFIG.DATE_COL).is_in(train_x.select(CONFIG.DATE_COL).to_series()))).with_columns(
    pl.all().exclude(CONFIG.DATE_COL).shift(5)
)

In [12]:
NN_model = NN(
    model=ENSEMBLE_NN(input_dim=len(train_x.columns) - 1, hidden_dim=128, output_dim=CONFIG.NUM_TARGET_COLUMNS),
    batch_size=CONFIG.BATCH_SIZE,
    lr=0.0005,
    lr_refit=0.0001,
    epochs=200,
    early_stopping_patience=10,
)

In [13]:
# test_size = (
#     TEST_SIZE
#     if len(dates_unique) > TEST_SIZE * (n_splits + 1)
#     else len(dates_unique) // (n_splits + 1)
# )  # For testing purposes on small samples

dates_unique = train_x.filter(pl.col(CONFIG.DATE_COL) <= CONFIG.MAX_TRAIN_DATE).select(pl.col(CONFIG.DATE_COL).unique().sort()).to_series().to_numpy()
real_dates_unique = (
    train_x.filter(pl.col(CONFIG.DATE_COL) > CONFIG.MAX_TRAIN_DATE).select(pl.col(CONFIG.DATE_COL).unique().sort()).to_series().to_numpy()
)

cv = TimeSeriesSplit(n_splits=CONFIG.N_FOLDS)
cv_split = cv.split(dates_unique)

scores = []
models = []
for fold, (train_idx, valid_idx) in enumerate(cv_split):
    if fold <= 3:
        continue
    if CONFIG.VERBOSE:
        print("-" * 20 + f"Fold {fold}" + "-" * 20)
        print(f"Train dates from {dates_unique[train_idx].min()} to {dates_unique[train_idx].max()}")
        print(f"Valid dates from {dates_unique[valid_idx].min()} to {dates_unique[valid_idx].max()}")

    dates_train = dates_unique[train_idx]
    dates_valid = dates_unique[valid_idx]

    df_train = train_x.filter(pl.col(CONFIG.DATE_COL).is_in(dates_train))
    true_y = train_y.filter(pl.col(CONFIG.DATE_COL).is_in(dates_train))

    df_valid = train_x.filter(pl.col(CONFIG.DATE_COL).is_in(dates_valid))
    df_valid_current_y = train_y.filter(pl.col(CONFIG.DATE_COL).is_in(dates_valid))

    df_valid_retrain = retrain_x.filter(pl.col(CONFIG.DATE_COL).is_in(dates_valid))
    df_valid_current_y_retrain = retrain_y.filter(pl.col(CONFIG.DATE_COL).is_in(dates_valid))

    model_fold = copy.deepcopy(NN_model)

    # model_fold.fit(
    #     train_set=(df_train, true_y),
    #     val_set=(df_valid, df_valid_current_y),
    #     retrain_set=(df_valid_retrain, df_valid_current_y_retrain),
    #     verbose=CONFIG.VERBOSE,
    # )

    # models.append(model_fold)

    # torch.save(
    #     model_fold.model.state_dict(),
    #     f"C:/Users/Admin/Desktop/Personal-Projects/Kaggle/MITSUI&CO. Commodity Prediction Challenge/ensemble_{fold}.pth",
    # )

    # preds = []
    # cnt_dates = 0
    # model_save = copy.deepcopy(model_fold)

    model_fold.model.load_state_dict(
        torch.load(
            f"C:/Users/Admin/Desktop/Personal-Projects/Kaggle/MITSUI&CO. Commodity Prediction Challenge/ensemble_{fold}.pth",
            map_location=torch.device("cuda"),
        )
    )

    # for date_id in tqdm(dates_valid):
    #     df_valid_date = (
    #         train_x.filter(pl.col(CONFIG.DATE_COL).is_in(range(date_id - CONFIG.SEQ_LEN + 1, date_id + 1)))
    #         .drop(CONFIG.DATE_COL)
    #         .to_numpy()
    #         .astype(np.float64)
    #     )

    #     if model_fold.lr_refit and (cnt_dates > 0):
    #         seq_period = range((date_id - CONFIG.SEQ_LEN), date_id)

    #         df_upd = train_x.filter(pl.col(CONFIG.DATE_COL).is_in(seq_period)).drop(CONFIG.DATE_COL).to_numpy()

    #         df_upd_current_y = train_y.filter(pl.col(CONFIG.DATE_COL).is_in(date_id)).drop(CONFIG.DATE_COL).to_numpy()

    #         if len(df_upd) > 0:
    #             model_save.update(df_upd, df_upd_current_y)

    #     preds_i = model_save.predict(df_valid_date)

    #     preds += list(preds_i[-1].reshape(-1, CONFIG.NUM_TARGET_COLUMNS))

    #     cnt_dates += 1

    # preds = np.array(preds)

    # score = rank_correlation_sharpe(
    #     df_valid_current_y.drop(CONFIG.DATE_COL).to_numpy().astype(np.float64),
    #     preds,
    # )
    # scores.append(score)

    # print(f"LAST VALIDIDATION Sharpe: {score:.5f}")

    model_real = copy.deepcopy(model_fold)
    preds = []
    cnt_dates = 0
    for date_id in tqdm(real_dates_unique):
        # print(date_id)
        df_valid_date = (
            train_x.filter(pl.col(CONFIG.DATE_COL).is_in(range(date_id - CONFIG.SEQ_LEN + 1, date_id + 1)))
            .drop(CONFIG.DATE_COL)
            .to_numpy()
            .astype(np.float64)
        )
        break

        if model_fold.lr_refit and (cnt_dates > 0):
            seq_period = range((date_id - CONFIG.SEQ_LEN), date_id)

            df_upd = retrain_x.filter(pl.col(CONFIG.DATE_COL).is_in(seq_period)).drop(CONFIG.DATE_COL).to_numpy()
            df_upd_current_y = retrain_y.filter(pl.col(CONFIG.DATE_COL).is_in(date_id)).drop(CONFIG.DATE_COL).to_numpy()

            if len(df_upd) > 0:
                model_real.update(df_upd, df_upd_current_y)

            # print(df_upd[:, 0])
            # print(df_upd_current_y[:, -1])
            # print(df_upd_true_delta[:, -1])
        preds_i = model_real.predict(df_valid_date)

        preds += list(preds_i[-1].reshape(-1, CONFIG.NUM_TARGET_COLUMNS))
        # print(preds_i[-1].reshape(-1, CONFIG.NUM_TARGET_COLUMNS))

        cnt_dates += 1
        # if cnt_dates == 2:
        #     break

    preds = np.array(preds)

    score = rank_correlation_sharpe(
        df_valid_current_y.drop(CONFIG.DATE_COL).to_numpy().astype(np.float64),
        preds,
    )
    scores.append(score)
    print(f"REAL Sharpe: {score:.5f}")

--------------------Fold 4--------------------
Train dates from 1 to 1522
Valid dates from 1523 to 1826


  0%|          | 0/90 [00:00<?, ?it/s]

REAL Sharpe: nan


  ret = _var(a, axis=axis, dtype=dtype, out=out, ddof=ddof,
  arrmean = um.true_divide(arrmean, div, out=arrmean,
  ret = ret.dtype.type(ret / rcount)
  sharpe_ratio = daily_rank_corrs.mean() / std_dev
  ret = ret.dtype.type(ret / rcount)


In [None]:
X_sample = X_sample.clone().detach().requires_grad_(True).to(device)
output = model(X_sample)
output.mean().backward()

grads = X_sample.grad.detach().cpu().numpy()
importance = np.abs(grads).mean(axis=0)  # Average across samples


In [None]:
shap.DeepExplainer(model_real,df_valid_date)    



ValueError: <class '__main__.NN'> is not currently a supported model type!