In [7]:
import numpy as np # linear algebra
import pandas as pd # data processing, CSV file I/O (e.g. pd.read_csv)
import os
import polars as pl
import time
from tqdm import tqdm
import matplotlib.pyplot as plt
import torch

In [8]:
import torch
import torch.nn as nn
import torch.nn.functional as F
import pandas as pd
import numpy as np
import statistics as stat
from sklearn.preprocessing import StandardScaler
from sklearn.metrics import roc_auc_score
import copy
from torch.utils.data import TensorDataset, DataLoader
from sklearn.model_selection import train_test_split
import torch.optim as optim
import copy
import polars as pl

In [9]:
class LSTM(nn.Module):
    def __init__(self,
                 num_features: int,
                 hidden_size: int = 128,
                 num_layers: int = 1,
                 output_size: int = 1,
                 dropout_rate: float = 0.5):
        super(LSTM, self).__init__()

        self.hidden_size = hidden_size
        self.num_layers = num_layers
        self.expand = nn.Sequential(
            nn.LayerNorm(num_features),
            nn.Dropout(dropout_rate),
            nn.Linear(num_features, hidden_size),
            nn.ReLU()
        )

        # Convolutional layers with corrected in_channels
        self.conv1 = nn.Sequential(
            nn.Conv1d(in_channels=self.hidden_size // 16, out_channels=64, kernel_size=3, padding=1),
            nn.ReLU(),
            nn.MaxPool1d(kernel_size=2)
        )
        
        self.conv2 = nn.Sequential(
            nn.Conv1d(in_channels=64, out_channels=128, kernel_size=3, padding=1),
            nn.ReLU(),
            nn.MaxPool1d(kernel_size=2)
        )

        # LSTM layer
        self.lstm = nn.LSTM(
            input_size=128,
            hidden_size=hidden_size,
            num_layers=num_layers,
            batch_first=True,
            dropout=dropout_rate if num_layers > 1 else 0
        )

        # Fully connected layer
        self.fc = nn.Sequential(
            nn.Dropout(dropout_rate),
            nn.Linear(hidden_size, output_size),
            nn.Tanh()
        )
    
        self._init_weights()

    def _init_weights(self):
        for name, param in self.lstm.named_parameters():
            if 'weight_ih' in name:
                nn.init.xavier_uniform_(param.data)
            elif 'weight_hh' in name:
                nn.init.orthogonal_(param.data)
            elif 'bias' in name:
                param.data.fill_(0)

    def forward(self, x):
        # x shape: (batch_size, num_features)
        x = self.expand(x)
        batch_size = x.size(0)
        seq_length = x.size(1) // (self.hidden_size // 16)
        x = x.view(batch_size, self.hidden_size // 16, seq_length)

        x = self.conv1(x)
        x = self.conv2(x)
        
        # Prepare for LSTM
        x = x.permute(0, 2, 1)  # (batch_size, seq_length, features)
        
        # Initialize hidden and cell states
        h0 = torch.zeros(self.num_layers, x.size(0), self.hidden_size).to(x.device)
        c0 = torch.zeros(self.num_layers, x.size(0), self.hidden_size).to(x.device)

        self.lstm.flatten_parameters()

        # LSTM layer
        out, _ = self.lstm(x, (h0, c0))

        # Output layer
        out = self.fc(out[:, -1, :])
        return out.squeeze()

In [12]:
class LSTM_RES(nn.Module):
    def __init__(self,
                 num_features: int,
                 hidden_size: int = 128,
                 num_layers: int = 1,
                 output_size: int = 1,
                 dropout_rate: float = 0.5):
        super(LSTM_RES, self).__init__()

        self.hidden_size = hidden_size
        self.num_layers = num_layers
        self.expand = nn.Sequential(
            nn.LayerNorm(num_features),
            nn.Dropout(dropout_rate),
            nn.Linear(num_features, hidden_size),
            nn.ReLU()
        )

        # Convolutional layers
        self.conv1 = nn.Sequential(
            nn.Conv1d(in_channels=self.hidden_size // 16, out_channels=64, kernel_size=3, padding=1),
            nn.ReLU(),
            nn.MaxPool1d(kernel_size=2)
        )
        
        self.conv2 = nn.Sequential(
            nn.Conv1d(in_channels=64, out_channels=128, kernel_size=3, padding=1),
            nn.ReLU(),
            nn.MaxPool1d(kernel_size=2)
        )

        # Residual connection for convolutional layers
        self.residual_conv = nn.Sequential(
            nn.Conv1d(in_channels=self.hidden_size // 16, out_channels=128, kernel_size=1),
            nn.MaxPool1d(kernel_size=4)
        )

        # LSTM layer
        self.lstm = nn.LSTM(
            input_size=128,
            hidden_size=hidden_size,
            num_layers=num_layers,
            batch_first=True,
            dropout=dropout_rate if num_layers > 1 else 0
        )

        # Fully connected layer
        self.fc = nn.Sequential(
            nn.Dropout(dropout_rate),
            nn.Linear(hidden_size, output_size),
            nn.Tanh()
        )
    
        self._init_weights()

    def _init_weights(self):
        for name, param in self.lstm.named_parameters():
            if 'weight_ih' in name:
                nn.init.xavier_uniform_(param.data)
            elif 'weight_hh' in name:
                nn.init.orthogonal_(param.data)
            elif 'bias' in name:
                param.data.fill_(0)

    def forward(self, x):
        # x shape: (batch_size, num_features)
        x = self.expand(x)
        batch_size = x.size(0)
        seq_length = x.size(1) // (self.hidden_size // 16)
        x = x.view(batch_size, self.hidden_size // 16, seq_length)

        # Save input for residual connection
        residual = x.clone()

        x = self.conv1(x)
        x = self.conv2(x)
        
        # Apply residual connection
        residual = self.residual_conv(residual)
        x += residual

        # Prepare for LSTM
        x = x.permute(0, 2, 1)  # (batch_size, seq_length, features)

        # Initialize hidden and cell states
        h0 = torch.zeros(self.num_layers, x.size(0), self.hidden_size).to(x.device)
        c0 = torch.zeros(self.num_layers, x.size(0), self.hidden_size).to(x.device)

        self.lstm.flatten_parameters()

        # LSTM layer
        out, _ = self.lstm(x, (h0, c0))

        # Output layer
        out = self.fc(out[:, -1, :])
        return out.squeeze()

In [13]:
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
model_original = LSTM(num_features=79, hidden_size=512, output_size = 1, num_layers = 1).to(device)
model_res = LSTM_RES(num_features=79, hidden_size=512, output_size = 1, num_layers = 1).to(device)


In [30]:
means = {'feature_00': 0.640198826789856, 'feature_01': 0.03755598142743111, 'feature_02': 0.6368075609207153, 'feature_03': 0.6365063786506653, 'feature_04': 0.013741530478000641, 'feature_05': -0.02173694409430027, 'feature_06': -0.006415014620870352, 'feature_07': -0.010971736162900925, 'feature_08': -0.04653771221637726, 'feature_09': 32.596106194690265, 'feature_10': 4.95929203539823, 'feature_11': 167.6541592920354, 'feature_12': -0.13415881991386414, 'feature_13': -0.07573335617780685, 'feature_14': -0.12015637010335922, 'feature_15': -0.7470195889472961, 'feature_16': -0.6257441639900208, 'feature_17': -0.7294047474861145, 'feature_18': -0.042215555906295776, 'feature_19': -0.08798160403966904, 'feature_20': -0.15741558372974396, 'feature_21': 0.10528526455163956, 'feature_22': 0.018054703250527382, 'feature_23': 0.03165541961789131, 'feature_24': 2.733017921447754, 'feature_25': 0.39958420395851135, 'feature_26': -0.11045943945646286, 'feature_27': -0.5332594513893127, 'feature_28': -0.4522790312767029, 'feature_29': -0.5739678144454956, 'feature_30': -0.7905704975128174, 'feature_31': 0.10600688308477402, 'feature_32': 0.40044134855270386, 'feature_33': -0.021725023165345192, 'feature_34': 0.4226262867450714, 'feature_35': 0.42143046855926514, 'feature_36': -0.00023802756913937628, 'feature_37': 0.027961043640971184, 'feature_38': 0.010258913040161133, 'feature_39': 0.005768273025751114, 'feature_40': 0.017485467717051506, 'feature_41': 0.038347117602825165, 'feature_42': -0.06123563274741173, 'feature_43': -0.11644423753023148, 'feature_44': -0.12342483550310135, 'feature_45': -0.028769943863153458, 'feature_46': -0.015200662426650524, 'feature_47': 0.015717582777142525, 'feature_48': -0.0033910537604242563, 'feature_49': -0.0052393232472240925, 'feature_50': -0.2285808026790619, 'feature_51': -0.3548349440097809, 'feature_52': -0.358092725276947, 'feature_53': 0.2607136368751526, 'feature_54': 0.18796788156032562, 'feature_55': 0.3154229521751404, 'feature_56': -0.1471923440694809, 'feature_57': 0.15730056166648865, 'feature_58': -0.021774644032120705, 'feature_59': -0.0037768862675875425, 'feature_60': -0.010220836848020554, 'feature_61': -0.03178725391626358, 'feature_62': -0.3769100308418274, 'feature_63': -0.3229374587535858, 'feature_64': -0.3718394339084625, 'feature_65': -0.10233989357948303, 'feature_66': -0.13688170909881592, 'feature_67': -0.14402112364768982, 'feature_68': -0.06875362992286682, 'feature_69': -0.11862917989492416, 'feature_70': -0.11789549142122269, 'feature_71': -0.06013699993491173, 'feature_72': -0.10766122490167618, 'feature_73': -0.09921672940254211, 'feature_74': -0.10233042389154434, 'feature_75': -0.05991339311003685, 'feature_76': -0.06349952518939972, 'feature_77': -0.07424316555261612, 'feature_78': -0.07759837061166763}
stds = {'feature_00': 1.027751088142395, 'feature_01': 1.0967519283294678, 'feature_02': 1.0156300067901611, 'feature_03': 1.0170334577560425, 'feature_04': 1.0726385116577148, 'feature_05': 0.9639211297035217, 'feature_06': 1.0963259935379028, 'feature_07': 1.0789952278137207, 'feature_08': 0.7962697148323059, 'feature_09': 23.72976726545254, 'feature_10': 3.1867162933797224, 'feature_11': 163.44513161352285, 'feature_12': 0.6700984835624695, 'feature_13': 0.5805172920227051, 'feature_14': 0.664044201374054, 'feature_15': 0.37517768144607544, 'feature_16': 0.3393096327781677, 'feature_17': 0.3603287935256958, 'feature_18': 0.9911752939224243, 'feature_19': 1.0550744533538818, 'feature_20': 0.6643751263618469, 'feature_21': 0.38239365816116333, 'feature_22': 0.950261116027832, 'feature_23': 0.8119344711303711, 'feature_24': 1.4362775087356567, 'feature_25': 1.0947270393371582, 'feature_26': 1.077124834060669, 'feature_27': 1.0645726919174194, 'feature_28': 1.0676648616790771, 'feature_29': 0.2640742361545563, 'feature_30': 0.19689509272575378, 'feature_31': 0.3815343976020813, 'feature_32': 1.2996565103530884, 'feature_33': 0.9989405870437622, 'feature_34': 1.3409572839736938, 'feature_35': 1.3365675210952759, 'feature_36': 0.8695492148399353, 'feature_37': 0.7334080934524536, 'feature_38': 0.698810338973999, 'feature_39': 0.7965824604034424, 'feature_40': 0.518515944480896, 'feature_41': 0.6384949088096619, 'feature_42': 0.8168442249298096, 'feature_43': 0.5228385925292969, 'feature_44': 0.6521403193473816, 'feature_45': 0.8666537404060364, 'feature_46': 0.9039222002029419, 'feature_47': 3.2711963653564453, 'feature_48': 0.6570901274681091, 'feature_49': 0.7083076238632202, 'feature_50': 1.0132617950439453, 'feature_51': 0.6081287860870361, 'feature_52': 0.9250587224960327, 'feature_53': 1.0421689748764038, 'feature_54': 0.5859629511833191, 'feature_55': 0.9191848039627075, 'feature_56': 0.9549097418785095, 'feature_57': 1.0204777717590332, 'feature_58': 0.8327276110649109, 'feature_59': 0.8309783339500427, 'feature_60': 0.8389413356781006, 'feature_61': 1.192766547203064, 'feature_62': 1.388945460319519, 'feature_63': 0.09957146644592285, 'feature_64': 0.3396177291870117, 'feature_65': 1.01683509349823, 'feature_66': 1.0824761390686035, 'feature_67': 0.642227828502655, 'feature_68': 0.5312599539756775, 'feature_69': 0.6208390593528748, 'feature_70': 0.6724499464035034, 'feature_71': 0.5356909036636353, 'feature_72': 0.6534596681594849, 'feature_73': 1.0855497121810913, 'feature_74': 1.0880277156829834, 'feature_75': 1.2321789264678955, 'feature_76': 1.2345560789108276, 'feature_77': 1.0921478271484375, 'feature_78': 1.0924347639083862}
def normalize_dataframe(df: pl.DataFrame, means: dict, stds: dict) -> pl.DataFrame:
    """
    Normalize a Polars DataFrame using the provided means and standard deviations.

    Args:
    df (pl.DataFrame): The input DataFrame to normalize
    means (dict): A dictionary of column means
    stds (dict): A dictionary of column standard deviations

    Returns:
    pl.DataFrame: The normalized DataFrame
    """

    # Create a list to store our normalization expressions
    normalize_exprs = []

    for col in df.columns:
        if col in means and col in stds:
            # Ensure we don't divide by zero
            if stds[col] != 0:
                normalize_exprs.append(
                    ((pl.col(col) - means[col]) / stds[col]).alias(col)
                )
            else:
                # If std is 0, just subtract the mean
                normalize_exprs.append(
                    (pl.col(col) - means[col]).alias(col)
                )
        else:
            # If we don't have mean/std for this column, leave it as is
            normalize_exprs.append(pl.col(col))

    # Apply the normalization to the dataframe
    normalized_df = df.select(normalize_exprs)

    return normalized_df
    
def predict(test: pl.DataFrame, lags: pl.DataFrame | None) -> pl.DataFrame | pd.DataFrame:
    """Make a prediction."""
    global model_original
    global model_res
    global device
    device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')

    model_original.load_state_dict(
        torch.load(
            '/kaggle/input/js-cnn-lstm/cnn-lstm-adamw.pth',
            map_location=device,
            weights_only=True
        )
    )
    model_res.load_state_dict(
        torch.load(
            "/kaggle/input/js-cnn-lstm-res/cnn-lstm-adamw-res.pth", 
            map_location=device,
            weights_only=True
        )
    )
    model_original.eval()  # Set the model to evaluation mode
    sel_cols  = [f"feature_{i:02d}" for i in range(79)]
    missing_cols = set(sel_cols) - set(test.columns)
    if missing_cols:
        raise ValueError(f"Missing columns in test data: {missing_cols}")
    test_features = test.select(sel_cols)
    test_features = test_features.fill_null(strategy='forward').fill_null(0)
    test_features = normalize_dataframe(test_features,means,stds)
    X_test = test_features.to_numpy()
    X_test_tensor = torch.tensor(X_test, dtype=torch.float32).to(device)
    with torch.no_grad():
        outputs_original    = model_original(X_test_tensor)
        outputs_main        = model_res(X_test_tensor)
        predictions_original= outputs_original.squeeze().cpu().numpy() * 0.4
        predictions_main    = outputs_main.squeeze().cpu().numpy() * 0.6
    predictions_df = pl.DataFrame({
        'row_id': test['row_id'],
        'responder_6': predictions_original + predictions_main
    })
    assert isinstance(predictions_df, (pl.DataFrame, pd.DataFrame))
    assert predictions_df.columns == ['row_id', 'responder_6']
    assert len(predictions_df) == len(test)
    return predictions_df

In [31]:
import kaggle_evaluation.jane_street_inference_server

In [1]:
%%time

inference_server = kaggle_evaluation.jane_street_inference_server.JSInferenceServer(predict)
test_dir = '/kaggle/input/jane-street-realtime-marketdata-forecasting/test.parquet'
lags_dir = '/kaggle/input/jane-street-realtime-marketdata-forecasting/lags.parquet'


if os.getenv('KAGGLE_IS_COMPETITION_RERUN'):
    inference_server.serve()
else:
    inference_server.run_local_gateway(
        (
            test_dir,
            lags_dir,
        )
    )

NameError: name 'kaggle_evaluation' is not defined