In [16]:
import pandas as pd
import polars as pl

class CONFIG:
    seed = 42
    target_col = "responder_6"
    feature_cols = ["symbol_id", "time_id"] \
        + [f"feature_{idx:02d}" for idx in range(79)] \
        + [f"responder_{idx}_lag_1" for idx in range(9)]
    weight_col = "weight"
    
train = pl.scan_parquet("/kaggle/input/js24-preprocessing-create-lags/training.parquet").collect().to_pandas()
valid = pl.scan_parquet("/kaggle/input/js24-preprocessing-create-lags/validation.parquet").collect().to_pandas()
train = pd.concat([train, valid]).reset_index(drop=True)

In [17]:
!export POLARS_ALLOW_FORKING_THREAD=1

In [18]:
X_train = train[ CONFIG.feature_cols ]
y_train = train[ CONFIG.target_col ]
w_train = train[ "weight" ]
X_valid = valid[ CONFIG.feature_cols ]
y_valid = valid[ CONFIG.target_col ]
w_valid = valid[ "weight" ]

In [19]:
import os
import warnings
import torch
import torch.nn as nn
import torch.nn.functional as F
import pytorch_lightning as pl
from pytorch_lightning import (LightningDataModule, LightningModule, Trainer)
from pytorch_lightning.callbacks import EarlyStopping, ModelCheckpoint, Timer
from pytorch_lightning.loggers import WandbLogger
import wandb
import pandas as pd
import numpy as np
from sklearn.metrics import r2_score
from sklearn.model_selection import train_test_split
from torch.utils.data import Dataset, DataLoader


class custom_args():
    def __init__(self):
        self.usegpu = True
        self.gpuid = 0
        self.seed = 42
        self.model = 'nn'
        self.use_wandb = False
        self.project = 'js-xs-nn-with-lags'
        self.dname = "./input_df/"
        self.loader_workers = 5
        self.bs = 8192
        self.lr = 1e-3
        self.weight_decay = 5e-4
        self.dropouts = [0.1, 0.1]
        self.n_hidden = [512, 512, 256]
        self.patience = 25
        self.max_epochs = 2000
        self.N_fold = 5


my_args = custom_args()
args = my_args
device = torch.device(f'cuda:{args.gpuid}' if torch.cuda.is_available() and args.usegpu else 'cpu')

In [20]:
from torch.utils.data import DataLoader, Dataset

class CustomDataset(Dataset):
    def __init__(self, df, accelerator):
        self.features = torch.FloatTensor(df[CONFIG.feature_cols].values).to(accelerator)
        self.labels = torch.FloatTensor(df[CONFIG.target_col].values).to(accelerator)
        self.weights = torch.FloatTensor(df[CONFIG.weight_col].values).to(accelerator)
    
    def __len__(self):
        return len(self.labels)
    
    def __getitem__(self, idx):
        x = self.features[idx]
        y = self.labels[idx]
        w = self.weights[idx]
        return x, y, w

In [21]:
class DataModule(LightningDataModule):
    def __init__(self, train_df, batch_size, valid_df=None, accelerator='gpu'):
        super().__init__()
        self.train_df = train_df
        self.valid_df = valid_df
        self.batch_size = batch_size
        self.accelerator = accelerator

        self.dates = self.train_df['date_id'].unique()
        
        self.train_dataset = None
        self.val_dataset = None
    
    def setup(self, fold=0, n_fold=5, stage=None):
        selected_dates = [date for ii, date in enumerate(self.dates) if ii % n_fold != fold]
        df_train = self.train_df.loc[self.train_df['date_id'].isin(selected_dates)]
        self.train_dataset = CustomDataset(df_train, self.accelerator)
        if self.valid_df is not None:
            df_valid = self.valid_df
            self.val_dataset = CustomDataset(df_valid, self.accelerator)
    
    def train_dataloader(self, num_workers=4):
        return DataLoader(
            self.train_dataset,
            batch_size=self.batch_size,
            shuffle=True,
            num_workers=num_workers,
        )
    
    def val_dataloader(self, num_workers=4):
        return DataLoader(
            self.val_dataset,
            batch_size=self.batch_size,
            shuffle=False,
            num_workers=num_workers,
        )

In [22]:
class NN(LightningModule):
    def __init__(self, input_dim, hidden_dims, dropouts, lr, weight_decay):
        super().__init__()
        self.save_hyperparameters()

        layers = []
        in_dim = input_dim
        for i, hidden_dim in enumerate(hidden_dims):
            layers.append(nn.BatchNorm1d(in_dim))
            if i > 0:
                layers.append(nn.SiLU())
            if i < len(dropouts):
                layers.append(nn.Dropout(dropouts[i]))
            layers.append(nn.Linear(in_dim, hidden_dim))
            in_dim = hidden_dim
        layers.append(nn.Linear(in_dim, 1)) 
        layers.append(nn.Tanh())
        self.model = nn.Sequential(*layers)
        self.lr = lr
        self.weight_decay = weight_decay
        self.validation_step_outputs = []
    
    def forward(self, x):
        return 5 * self.model(x).squeeze(-1)

    def training_step(self, batch):
        x, y, w = batch
        y_hat = self(x)
        loss = F.mse_loss(y_hat, y, reduction='none') * w
        loss = loss.mean()
        self.log('train_loss', loss, on_step=False, on_epoch=True, prog_bar=True, batch_size=x.size(0))
        return loss
    
    def validation_step(self, batch):
        x, y, w = batch
        y_hat = self(x)
        loss = F.mse_loss(y_hat, y, reduction='none') * w
        loss = loss.mean()
        self.log('val_loss', loss, on_step=False, on_epoch=True, prog_bar=True, batch_size=x.size(0))
        self.validation_step_outputs.append((y_hat, y, w))
        return loss
    
    def on_train_epoch_end(self):
        if self.trainer.sanity_checking:
            return
        epoch = self.trainer.current_epoch
        metrics = {k: v.item() if isinstance(v, torch.Tensor) else v for k, v in self.trainer.logged_metrics.items()}
        formatted_metrics = {k: f"{v:.5f}" for k, v in metrics.items()}
        print(f"Epoch {epoch}: {formatted_metrics}")

    def on_validation_epoch_end(self):
        """Calculate validation WRMSE at the end of the epoch."""
        y = torch.cat([x[1] for x in self.validation_step_outputs]).cpu().numpy()
        if self.trainer.sanity_checking:
            prob = torch.cat([x[0] for x in self.validation_step_outputs]).cpu().numpy()
        else:
            prob = torch.cat([x[0] for x in self.validation_step_outputs]).cpu().numpy()
            weights = torch.cat([x[2] for x in self.validation_step_outputs]).cpu().numpy()
            val_r_square = r2_score(y, prob, sample_weight=weights)
            self.log("val_r_square", val_r_square, prog_bar=True, on_step=False, on_epoch=True)
        self.validation_step_outputs.clear()

    def configure_optimizers(self):
        optimizer = torch.optim.Adam(
            self.parameters(), 
            lr=self.lr, 
            weight_decay=self.weight_decay
        )
        scheduler = torch.optim.lr_scheduler.ReduceLROnPlateau(
            optimizer, 
            mode='min', 
            factor=0.5, 
            patience=5,
        )
        return {
            'optimizer': optimizer,
            'lr_scheduler': {
                'scheduler': scheduler,
                'monitor': 'val_loss',
            }
        }
    

In [23]:
args = my_args
device = torch.device(f'cuda:{args.gpuid}' if torch.cuda.is_available() and args.usegpu else 'cpu')
accelerator = 'gpu' if torch.cuda.is_available() and args.usegpu else 'cpu'
loader_device = 'cpu'

train = train.ffill().fillna(0)
valid = valid.ffill().fillna(0)


In [24]:
data_module = DataModule(
    train_df=train, 
    valid_df=valid, 
    batch_size=args.bs, 
    accelerator=loader_device
)

In [25]:
import gc
del train, valid
gc.collect()

from pytorch_lightning import (LightningDataModule, LightningModule, Trainer)

for fold in range(args.N_fold):
    torch.set_float32_matmul_precision('medium')
    data_module.setup(fold, args.N_fold)
    input_dim = data_module.train_dataset.features.shape[1]

    model = NN(
        input_dim=input_dim,
        hidden_dims=args.n_hidden,
        dropouts=args.dropouts,
        lr=args.lr,
        weight_decay=args.weight_decay
    )

    if args.use_wandb:
        wandb_run = wandb.init(project=args.project, config=vars(args), reinit=True)
        logger = WandbLogger(experiment=wandb_run)
    else:
        logger = None

    early_stopping = EarlyStopping('val_loss', patience=args.patience, mode='min', verbose=False)
    checkpoint_callback = ModelCheckpoint(monitor='val_loss', mode='min', save_top_k=1, verbose=False, filename=f"./models/nn_{fold}.model") 
    timer = Timer()

    trainer = Trainer(
        max_epochs=args.max_epochs,
        accelerator=accelerator,
        devices=[args.gpuid] if args.usegpu else None,
        logger=logger,
        callbacks=[early_stopping, checkpoint_callback, timer],
        enable_progress_bar=True
    )
    trainer.fit(
        model=model, 
        train_dataloaders=data_module.train_dataloader(args.loader_workers), 
        val_dataloaders=data_module.val_dataloader(args.loader_workers)
    )
    print(f'Fold-{fold} Training completed in {timer.time_elapsed("train"):.2f}s')

GPU available: True (cuda), used: True
TPU available: False, using: 0 TPU cores
HPU available: False, using: 0 HPUs
LOCAL_RANK: 0 - CUDA_VISIBLE_DEVICES: [0]

  | Name  | Type       | Params | Mode 
---------------------------------------------
0 | model | Sequential | 443 K  | train
---------------------------------------------
443 K     Trainable params
0         Non-trainable params
443 K     Total params
1.772     Total estimated model params size (MB)
13        Modules in train mode
0         Modules in eval mode


Epoch 2:   8%|▊         | 370/4600 [04:10<47:42,  1.48it/s, v_num=9, val_loss=1.040, val_r_square=0.00529, train_loss=1.420]
Epoch 2:   8%|▊         | 370/4600 [04:10<47:42,  1.48it/s, v_num=9, val_loss=1.040, val_r_square=0.00529, train_loss=1.420]
Epoch 2:   8%|▊         | 370/4600 [04:10<47:42,  1.48it/s, v_num=9, val_loss=1.040, val_r_square=0.00529, train_loss=1.420]
Epoch 2:   8%|▊         | 370/4600 [04:10<47:42,  1.48it/s, v_num=9, val_loss=1.040, val_r_square=0.00529, train_loss=1.420]
Epoch 2:   8%|▊         | 370/4600 [04:10<47:42,  1.48it/s, v_num=9, val_loss=1.040, val_r_square=0.00529, train_loss=1.420]
Epoch 2:   8%|▊         | 370/4600 [07:11<1:22:14,  0.86it/s, v_num=9, val_loss=1.040, val_r_square=0.00529, train_loss=1.420]
Epoch 2:   8%|▊         | 370/4600 [07:11<1:22:14,  0.86it/s, v_num=9, val_loss=1.040, val_r_square=0.00529, train_loss=1.420]
Epoch 2:   8%|▊         | 370/4600 [07:11<1:22:14,  0.86it/s, v_num=9, val_loss=1.040, val_r_square=0.00529, train_loss=1

GPU available: True (cuda), used: True
TPU available: False, using: 0 TPU cores
HPU available: False, using: 0 HPUs
LOCAL_RANK: 0 - CUDA_VISIBLE_DEVICES: [0]

  | Name  | Type       | Params | Mode 
---------------------------------------------
0 | model | Sequential | 443 K  | train
---------------------------------------------
443 K     Trainable params
0         Non-trainable params
443 K     Total params
1.772     Total estimated model params size (MB)
13        Modules in train mode
0         Modules in eval mode


Epoch 2:   8%|▊         | 370/4600 [2:07:11<24:14:09,  0.05it/s, v_num=9, val_loss=1.040, val_r_square=0.00529, train_loss=1.420]
Epoch 2:   8%|▊         | 370/4600 [2:07:12<24:14:14,  0.05it/s, v_num=9, val_loss=1.040, val_r_square=0.00529, train_loss=1.420]
Epoch 2:   8%|▊         | 370/4600 [2:07:12<24:14:14,  0.05it/s, v_num=9, val_loss=1.040, val_r_square=0.00529, train_loss=1.420]
Epoch 2:   8%|▊         | 370/4600 [2:07:12<24:14:14,  0.05it/s, v_num=9, val_loss=1.040, val_r_square=0.00529, train_loss=1.420]
Epoch 2:   8%|▊         | 370/4600 [2:07:12<24:14:14,  0.05it/s, v_num=9, val_loss=1.040, val_r_square=0.00529, train_loss=1.420]
Epoch 2:   8%|▊         | 370/4600 [2:10:09<24:48:00,  0.05it/s, v_num=9, val_loss=1.040, val_r_square=0.00529, train_loss=1.420]
Epoch 2:   8%|▊         | 370/4600 [2:10:09<24:48:00,  0.05it/s, v_num=9, val_loss=1.040, val_r_square=0.00529, train_loss=1.420]
Epoch 2:   8%|▊         | 370/4600 [2:10:09<24:48:00,  0.05it/s, v_num=9, val_loss=1.040, 

GPU available: True (cuda), used: True
TPU available: False, using: 0 TPU cores
HPU available: False, using: 0 HPUs
LOCAL_RANK: 0 - CUDA_VISIBLE_DEVICES: [0]

  | Name  | Type       | Params | Mode 
---------------------------------------------
0 | model | Sequential | 443 K  | train
---------------------------------------------
443 K     Trainable params
0         Non-trainable params
443 K     Total params
1.772     Total estimated model params size (MB)
13        Modules in train mode
0         Modules in eval mode


Epoch 2:   8%|▊         | 370/4600 [3:54:33<44:41:34,  0.03it/s, v_num=9, val_loss=1.040, val_r_square=0.00529, train_loss=1.420]
Epoch 2:   8%|▊         | 370/4600 [3:54:33<44:41:35,  0.03it/s, v_num=9, val_loss=1.040, val_r_square=0.00529, train_loss=1.420]
Epoch 2:   8%|▊         | 370/4600 [3:54:33<44:41:35,  0.03it/s, v_num=9, val_loss=1.040, val_r_square=0.00529, train_loss=1.420]
Epoch 2:   8%|▊         | 370/4600 [3:54:33<44:41:37,  0.03it/s, v_num=9, val_loss=1.040, val_r_square=0.00529, train_loss=1.420]
Epoch 2:   8%|▊         | 370/4600 [3:54:33<44:41:37,  0.03it/s, v_num=9, val_loss=1.040, val_r_square=0.00529, train_loss=1.420]
Epoch 2:   8%|▊         | 370/4600 [3:57:43<45:17:48,  0.03it/s, v_num=9, val_loss=1.040, val_r_square=0.00529, train_loss=1.420]
Epoch 2:   8%|▊         | 370/4600 [3:57:43<45:17:48,  0.03it/s, v_num=9, val_loss=1.040, val_r_square=0.00529, train_loss=1.420]
Epoch 2:   8%|▊         | 370/4600 [3:57:43<45:17:48,  0.03it/s, v_num=9, val_loss=1.040, 