In [1]:
import pandas as pd
import numpy as np
import torch.nn as nn
import torch.nn.functional as F
import torch

In [7]:
class CNN1DModel(nn.Module):
    def __init__(self,
                 num_numerical_features: int,
                 hidden_size: int = 1024,
                 n_target: int = 1,
                 channel_1: int = 64,
                 channel_2: int = 128,
                 kernel_size: int = 5,
                 dropout_rate: float = 0.2):
        super().__init__()

        self.hidden_size = hidden_size  # Store hidden_size as an instance variable

        # 1. Expand 단계: Dense 레이어
        self.expand = nn.Sequential(
            nn.LayerNorm(num_numerical_features),
            nn.Dropout(dropout_rate),
            nn.Linear(num_numerical_features, hidden_size),
            nn.ReLU()
        )

        self.conv1 = nn.Sequential(
            nn.Conv1d(
                in_channels=hidden_size // 16, 
                out_channels=channel_1, 
                kernel_size=kernel_size, 
                stride=1, 
                padding=kernel_size // 2
            ),
            nn.ReLU(),
            nn.Dropout(dropout_rate)
        )

        # 3. Conv 블록 2
        self.conv2 = nn.Sequential(
            nn.Conv1d(
                in_channels=channel_1, 
                out_channels=channel_2, 
                kernel_size=kernel_size, 
                stride=1, 
                padding=kernel_size // 2
            ),
            nn.ReLU(),
            nn.Dropout(dropout_rate),
            nn.AdaptiveAvgPool1d(output_size=16)
        )

        # 4. Flatten and Dense
        self.flatten = nn.Flatten()
        self.dense = nn.Sequential(
            nn.Linear(channel_2 * 16, 640),
            nn.ReLU(),
            nn.Dropout(dropout_rate),
            nn.Linear(640, n_target)
        )

        # 추가된 Tanh
        self.tanh = nn.Tanh()

    # Start of Selection
    def forward(self, x):
        x = self.expand(x)
        batch_size = x.size(0)
        seq_length = x.size(1) // (self.hidden_size // 16)
        x = x.view(batch_size, self.hidden_size // 16, seq_length)

        # Conv block 1 with residual connection
        residual = x
        x = self.conv1(x)
        if x.size() == residual.size():
            x = x + residual

        # Conv block 2 with residual connection
        residual = x
        x = self.conv2(x)
        if x.size() == residual.size():
            x = x + residual

        # Flatten and Dense
        x = self.flatten(x)
        x = self.dense(x)
        x = 5 * self.tanh(x)

        return x

In [8]:
def weighted_mse_loss(y_true, y_pred, weights):
    """
    Multi-target weighted MSE loss

    Args:
        y_true: target values (batch_size, n_targets)
        y_pred: predicted values (batch_size, n_targets)
        weights: weights for each target (batch_size, n_targets)
    """
    return torch.mean(weights * (y_true - y_pred)**2)

def weighted_r2_score(y_true, y_pred, weights):
    """
    Multi-target weighted R2 score

    Args:
        y_true: target values (batch_size, n_targets)
        y_pred: predicted values (batch_size, n_targets)
        weights: weights for each target (batch_size, n_targets)

    Returns:
        weighted R2 score (scalar)
    """
    # Ensure inputs are on CPU and converted to numpy
    y_true = y_true.detach().cpu().numpy()
    y_pred = y_pred.detach().cpu().numpy()
    weights = weights.detach().cpu().numpy()

    weights = np.repeat(weights, y_true.shape[1], axis=1)

    # print(y_true.shape, y_pred.shape, weights.shape)
    # Calculate weighted means for each target
    weighted_mean = np.average(y_true, weights=weights, axis=0)

    # Calculate total sum of squares
    total_ss = np.sum(weights * (y_true - weighted_mean) ** 2, axis=0)

    # Calculate residual sum of squares
    residual_ss = np.sum(weights * (y_true - y_pred) ** 2, axis=0)

    # Calculate R2 score for each target
    r2_scores = 1 - (residual_ss / total_ss)

    # Return mean R2 score across all targets
    return np.mean(r2_scores)

In [9]:
import torch
from torch.utils.data import Dataset, DataLoader
from tqdm import tqdm

class CustomDataset(Dataset):
    def __init__(self, data, numerical_columns, target_columns, weight_columns=None):
        """
        Args:
            data: pandas DataFrame containing all features
            numerical_columns: list of column names for numerical features
            target_columns: list of target column names
            weight_columns: list of weight column names (optional)
        """
        self.numerical_features = torch.FloatTensor(data[numerical_columns].values)
        self.symbol = torch.LongTensor(data['symbol_id'].values)
        self.feature_09 = torch.LongTensor(data['feature_09'].values)
        self.feature_10 = torch.LongTensor(data['feature_10'].values)
        self.feature_11 = torch.LongTensor(data['feature_11'].values)
        self.time = torch.LongTensor(data['time_id'].values)

        # Multi-target 처리
        self.targets = torch.FloatTensor(data[target_columns].values)

        # 가중치 처리 (옵션)
        if weight_columns:
            self.weights = torch.FloatTensor(data[weight_columns].values)
        else:
            self.weights = torch.ones_like(self.targets)

    def __len__(self):
        return len(self.targets)

    def __getitem__(self, idx):
        return {
            'numerical_features': self.numerical_features[idx],
            'symbol_id': self.symbol[idx],
            'feature_09': self.feature_09[idx],
            'feature_10': self.feature_10[idx],
            'feature_11': self.feature_11[idx],
            'time_id': self.time[idx],
            'targets': self.targets[idx],
            'weights': self.weights[idx]
        }

def create_data_loaders(train_data, valid_data, numerical_columns,
                        target_columns, weight_columns=None,
                        batch_size=256, num_workers=1):
    """
    데이터로더를 생성하는 함수

    Args:
        train_data: 학습 데이터가 담긴 DataFrame
        valid_data: 검증 데이터가 담긴 DataFrame
        numerical_columns: 수치형 특성들의 컬럼명 리스트
        target_columns: 타겟 변수들의 컬럼명 리스트
        weight_columns: 가중치 컬럼명 리스트 (옵션)
        batch_size: 배치 크기
        num_workers: 데이터 로딩에 사용할 워커 수
    """

    # Dataset 객체 생성
    train_dataset = CustomDataset(train_data, numerical_columns, target_columns, weight_columns)
    valid_dataset = CustomDataset(valid_data, numerical_columns, target_columns, weight_columns)

    # DataLoader 생성
    train_loader = DataLoader(
        train_dataset,
        batch_size=batch_size,
        shuffle=True,
        num_workers=num_workers,
        pin_memory=True
    )

    valid_loader = DataLoader(
        valid_dataset,
        batch_size=batch_size,
        shuffle=False,
        num_workers=num_workers,
        pin_memory=True
    )

    return train_loader, valid_loader

def train_epoch(model, train_loader, optimizer, device):
    model.train()
    total_loss = 0
    total_r2 = 0
    num_batches = len(train_loader)


    for batch_idx, batch in enumerate(tqdm(train_loader, desc="Training Batches")):
        numerical_features = batch['numerical_features'].to(device)
        targets = batch['targets'].to(device)
        weights = batch['weights'].to(device)
        outputs = model(numerical_features)
        loss = weighted_mse_loss(targets, outputs, weights)
        optimizer.zero_grad()
        loss.backward()
        optimizer.step()

        total_loss += loss.item()
        
        if (batch_idx + 1) % 50 == 0:
            r2 = weighted_r2_score(targets, outputs, weights)
            total_r2 += r2

        # 배치별 진행상황 출력 (10배치마다)
        if (batch_idx + 1) % 100 == 0:
            avg_loss = total_loss / (batch_idx + 1)
            avg_r2 = total_r2 / (batch_idx + 1)
            print(f'Batch [{batch_idx+1}/{num_batches}] Loss: {avg_loss:.4f}, R2: {avg_r2:.4f}')

    return total_loss / num_batches, total_r2 / num_batches

def validate(model, valid_loader, device):
    model.eval()
    total_loss = 0
    total_r2 = 0
    num_batches = len(valid_loader)

    with torch.no_grad():
        for batch_idx, batch in enumerate(valid_loader):
            numerical_features = batch['numerical_features'].to(device)
            feature_09 = batch['feature_09'].to(device)
            feature_10 = batch['feature_10'].to(device)
            feature_11 = batch['feature_11'].to(device)
            targets = batch['targets'].to(device)
            weights = batch['weights'].to(device)

            outputs = model(numerical_features)

            loss = weighted_mse_loss(targets, outputs, weights)
            r2 = weighted_r2_score(targets, outputs, weights)

            total_loss += loss.item()
            total_r2 += r2

        if (batch_idx + 1) % 1000 == 0:
            avg_loss = total_loss / (batch_idx + 1)
            avg_r2 = total_r2 / (batch_idx + 1)
            print(f'Batch [{batch_idx+1}/{num_batches}] Loss: {avg_loss:.4f}, R2: {avg_r2:.4f}')
        
    return total_loss / num_batches, total_r2 / num_batches

In [10]:
import polars as pl
train = pl.scan_parquet("/kaggle/input/js24-preprocessing-create-lags/training.parquet").collect()
valid = pl.scan_parquet("/kaggle/input/js24-preprocessing-create-lags/validation.parquet").collect()

In [11]:
import joblib

device = torch.device('cuda:0' if torch.cuda.is_available() else 'cpu')
feature_train_list = [f"feature_{idx:02d}" for idx in range(79)] 
target_col = "responder_6"
feature_train = feature_train_list + [f"responder_{idx}_lag_1" for idx in range(9)] 

feature_cat = ["feature_09", "feature_10", "feature_11"]
feature_cont = [item for item in feature_train if item not in feature_cat]
std_feature = [i for i in feature_train_list if i not in feature_cat] + [f"responder_{idx}_lag_1" for idx in range(9)]

data_stats = joblib.load("/kaggle/input/jane-street-data-preprocessing/data_stats.pkl")
means = data_stats['mean']
stds = data_stats['std']

def standardize(df, feature_cols, means, stds):
    return df.with_columns([
        ((pl.col(col) - means[col]) / stds[col]).alias(col) for col in feature_cols
    ])

numerical_columns = feature_train + ['date_id', 'symbol_id', 'time_id']
target_columns = ['responder_6'] # 예측할 타겟들
weight_columns = ['weight']  # 각 타겟에 대한 가중치 (옵션)

train = standardize(train, numerical_columns, means, stds).to_pandas()
valid = standardize(valid, numerical_columns, means, stds).to_pandas()


In [12]:
device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
device

device(type='cuda')

In [13]:
train = train.dropna()
valid = valid.dropna()

In [14]:
# 컬럼 정의
# 데이터로더 생성
train_loader, valid_loader = create_data_loaders(
    train_data=train,
    valid_data=valid,
    numerical_columns=numerical_columns,
    target_columns=target_columns,
    weight_columns=weight_columns,
    batch_size=2048 * 4,
    num_workers=0
)

In [15]:
# 모델 초기화
model = CNN1DModel(
    num_numerical_features=len(numerical_columns),
    n_target=len(target_columns),
    dropout_rate=0.4
).to(device)

In [20]:
 # 학습 루프 수정
num_epochs = 100
import os
os.makedirs('/kaggle/working', exist_ok=True)
optimizer = torch.optim.AdamW(model.parameters(), lr=1e-4, weight_decay = 5e-4)

for epoch in range(num_epochs):
    torch.save(model.state_dict(), f'/kaggle/working/exported_1dcnn_model_{epoch+1}.pth')
    train_loss, train_r2 = train_epoch(model, train_loader, optimizer, device)
    valid_loss, valid_r2 = validate(model, valid_loader, device)

    print(f'Epoch {epoch+1}/{num_epochs}:')
    print(f'Train Loss: {train_loss:.4f}, Train R2: {train_r2:.4f}')
    print(f'Valid Loss: {valid_loss:.4f}, Valid R2: {valid_r2:.4f}')
    print('-' * 50)

Training Batches:   0%|          | 0/4212 [00:00<?, ?it/s]

Training Batches:   2%|▏         | 99/4212 [00:22<15:57,  4.30it/s] 

Batch [100/4212] Loss: 1.2434, R2: 0.0001


Training Batches:   5%|▍         | 200/4212 [00:44<14:28,  4.62it/s]

Batch [200/4212] Loss: 1.2391, R2: 0.0001


Training Batches:   7%|▋         | 299/4212 [01:06<14:04,  4.63it/s]

Batch [300/4212] Loss: 1.2387, R2: 0.0001


Training Batches:   9%|▉         | 399/4212 [01:28<14:20,  4.43it/s]

Batch [400/4212] Loss: 1.2378, R2: 0.0001


Training Batches:  12%|█▏        | 499/4212 [01:49<13:28,  4.59it/s]

Batch [500/4212] Loss: 1.2388, R2: 0.0001


Training Batches:  14%|█▍        | 600/4212 [02:11<13:06,  4.59it/s]

Batch [600/4212] Loss: 1.2388, R2: 0.0001


Training Batches:  17%|█▋        | 699/4212 [02:33<12:47,  4.58it/s]

Batch [700/4212] Loss: 1.2391, R2: 0.0001


Training Batches:  19%|█▉        | 801/4212 [02:55<11:42,  4.85it/s]

Batch [800/4212] Loss: 1.2392, R2: 0.0001


Training Batches:  21%|██▏       | 900/4212 [03:19<12:54,  4.28it/s]

Batch [900/4212] Loss: 1.2393, R2: 0.0001


Training Batches:  24%|██▎       | 1000/4212 [03:41<10:54,  4.91it/s]

Batch [1000/4212] Loss: 1.2395, R2: 0.0001


Training Batches:  26%|██▌       | 1100/4212 [04:04<11:21,  4.57it/s]

Batch [1100/4212] Loss: 1.2393, R2: 0.0001


Training Batches:  28%|██▊       | 1200/4212 [04:27<11:02,  4.54it/s]

Batch [1200/4212] Loss: 1.2394, R2: 0.0001


Training Batches:  31%|███       | 1299/4212 [04:50<11:04,  4.38it/s]

Batch [1300/4212] Loss: 1.2380, R2: 0.0001


Training Batches:  33%|███▎      | 1400/4212 [05:13<11:26,  4.10it/s]

Batch [1400/4212] Loss: 1.2376, R2: 0.0001


Training Batches:  36%|███▌      | 1500/4212 [05:36<09:34,  4.72it/s]

Batch [1500/4212] Loss: 1.2378, R2: 0.0001


Training Batches:  38%|███▊      | 1600/4212 [06:00<10:20,  4.21it/s]

Batch [1600/4212] Loss: 1.2376, R2: 0.0001


Training Batches:  40%|████      | 1700/4212 [06:22<09:07,  4.59it/s]

Batch [1700/4212] Loss: 1.2374, R2: 0.0001


Training Batches:  43%|████▎     | 1800/4212 [06:44<09:01,  4.45it/s]

Batch [1800/4212] Loss: 1.2372, R2: 0.0001


Training Batches:  45%|████▌     | 1900/4212 [07:05<08:02,  4.80it/s]

Batch [1900/4212] Loss: 1.2376, R2: 0.0002


Training Batches:  47%|████▋     | 2000/4212 [07:28<08:32,  4.32it/s]

Batch [2000/4212] Loss: 1.2374, R2: 0.0001


Training Batches:  50%|████▉     | 2100/4212 [07:51<08:18,  4.23it/s]

Batch [2100/4212] Loss: 1.2372, R2: 0.0002


Training Batches:  52%|█████▏    | 2200/4212 [08:14<08:01,  4.18it/s]

Batch [2200/4212] Loss: 1.2372, R2: 0.0001


Training Batches:  55%|█████▍    | 2301/4212 [08:37<06:24,  4.97it/s]

Batch [2300/4212] Loss: 1.2373, R2: 0.0001


Training Batches:  57%|█████▋    | 2400/4212 [09:00<07:39,  3.94it/s]

Batch [2400/4212] Loss: 1.2375, R2: 0.0001


Training Batches:  59%|█████▉    | 2500/4212 [09:23<06:18,  4.52it/s]

Batch [2500/4212] Loss: 1.2373, R2: 0.0001


Training Batches:  62%|██████▏   | 2599/4212 [09:45<06:18,  4.26it/s]

Batch [2600/4212] Loss: 1.2371, R2: 0.0001


Training Batches:  64%|██████▍   | 2700/4212 [10:08<05:25,  4.64it/s]

Batch [2700/4212] Loss: 1.2372, R2: 0.0001


Training Batches:  66%|██████▋   | 2799/4212 [10:32<06:08,  3.84it/s]

Batch [2800/4212] Loss: 1.2372, R2: 0.0001


Training Batches:  69%|██████▉   | 2900/4212 [10:55<04:57,  4.41it/s]

Batch [2900/4212] Loss: 1.2372, R2: 0.0002


Training Batches:  71%|███████   | 2999/4212 [11:17<04:54,  4.11it/s]

Batch [3000/4212] Loss: 1.2373, R2: 0.0002


Training Batches:  74%|███████▎  | 3100/4212 [11:40<04:30,  4.11it/s]

Batch [3100/4212] Loss: 1.2371, R2: 0.0002


Training Batches:  76%|███████▌  | 3200/4212 [12:03<03:48,  4.43it/s]

Batch [3200/4212] Loss: 1.2373, R2: 0.0002


Training Batches:  78%|███████▊  | 3300/4212 [12:27<03:27,  4.39it/s]

Batch [3300/4212] Loss: 1.2373, R2: 0.0002


Training Batches:  81%|████████  | 3400/4212 [12:50<02:44,  4.93it/s]

Batch [3400/4212] Loss: 1.2374, R2: 0.0002


Training Batches:  83%|████████▎ | 3499/4212 [13:13<02:49,  4.21it/s]

Batch [3500/4212] Loss: 1.2374, R2: 0.0002


Training Batches:  85%|████████▌ | 3600/4212 [13:37<02:16,  4.48it/s]

Batch [3600/4212] Loss: 1.2373, R2: 0.0002


Training Batches:  88%|████████▊ | 3700/4212 [13:59<02:02,  4.16it/s]

Batch [3700/4212] Loss: 1.2374, R2: 0.0002


Training Batches:  90%|█████████ | 3799/4212 [14:22<01:40,  4.11it/s]

Batch [3800/4212] Loss: 1.2375, R2: 0.0002


Training Batches:  93%|█████████▎| 3901/4212 [14:45<01:02,  4.96it/s]

Batch [3900/4212] Loss: 1.2375, R2: 0.0002


Training Batches:  95%|█████████▍| 3999/4212 [15:17<00:51,  4.10it/s]

Batch [4000/4212] Loss: 1.2372, R2: 0.0002


Training Batches:  97%|█████████▋| 4101/4212 [15:40<00:23,  4.72it/s]

Batch [4100/4212] Loss: 1.2371, R2: 0.0002


Training Batches: 100%|█████████▉| 4200/4212 [16:03<00:02,  4.49it/s]

Batch [4200/4212] Loss: 1.2372, R2: 0.0002


Training Batches: 100%|██████████| 4212/4212 [16:07<00:00,  4.35it/s]


Epoch 1/100:
Train Loss: 1.2372, Train R2: 0.0002
Valid Loss: 0.9191, Valid R2: 0.0011
--------------------------------------------------


Training Batches:   2%|▏         | 101/4212 [00:24<15:04,  4.54it/s]

Batch [100/4212] Loss: 1.2332, R2: 0.0002


Training Batches:   5%|▍         | 201/4212 [00:48<15:41,  4.26it/s]

Batch [200/4212] Loss: 1.2365, R2: 0.0002


Training Batches:   7%|▋         | 299/4212 [01:11<16:28,  3.96it/s]

Batch [300/4212] Loss: 1.2397, R2: 0.0001


Training Batches:   9%|▉         | 400/4212 [01:35<13:54,  4.57it/s]

Batch [400/4212] Loss: 1.2375, R2: 0.0001


Training Batches:  12%|█▏        | 500/4212 [01:59<14:34,  4.25it/s]

Batch [500/4212] Loss: 1.2360, R2: 0.0001


Training Batches:  14%|█▍        | 600/4212 [02:22<15:00,  4.01it/s]

Batch [600/4212] Loss: 1.2353, R2: 0.0001


Training Batches:  17%|█▋        | 701/4212 [02:46<13:16,  4.41it/s]

Batch [700/4212] Loss: 1.2353, R2: 0.0001


Training Batches:  19%|█▉        | 800/4212 [03:10<13:39,  4.16it/s]

Batch [800/4212] Loss: 1.2358, R2: 0.0001


Training Batches:  21%|██▏       | 901/4212 [03:33<11:51,  4.66it/s]

Batch [900/4212] Loss: 1.2363, R2: 0.0002


Training Batches:  24%|██▍       | 1001/4212 [03:57<12:14,  4.37it/s]

Batch [1000/4212] Loss: 1.2361, R2: 0.0002


Training Batches:  26%|██▌       | 1100/4212 [04:20<12:44,  4.07it/s]

Batch [1100/4212] Loss: 1.2365, R2: 0.0002


Training Batches:  28%|██▊       | 1199/4212 [04:44<12:21,  4.06it/s]

Batch [1200/4212] Loss: 1.2363, R2: 0.0002


Training Batches:  31%|███       | 1299/4212 [05:07<12:17,  3.95it/s]

Batch [1300/4212] Loss: 1.2362, R2: 0.0002


Training Batches:  33%|███▎      | 1401/4212 [05:32<10:28,  4.47it/s]

Batch [1400/4212] Loss: 1.2362, R2: 0.0002


Training Batches:  36%|███▌      | 1501/4212 [05:57<10:13,  4.42it/s]

Batch [1500/4212] Loss: 1.2357, R2: 0.0002


Training Batches:  38%|███▊      | 1600/4212 [06:24<10:29,  4.15it/s]

Batch [1600/4212] Loss: 1.2356, R2: 0.0002


Training Batches:  40%|████      | 1700/4212 [06:49<09:42,  4.31it/s]

Batch [1700/4212] Loss: 1.2353, R2: 0.0002


Training Batches:  43%|████▎     | 1800/4212 [07:14<09:17,  4.32it/s]

Batch [1800/4212] Loss: 1.2352, R2: 0.0002


Training Batches:  45%|████▌     | 1900/4212 [07:40<09:42,  3.97it/s]

Batch [1900/4212] Loss: 1.2355, R2: 0.0002


Training Batches:  47%|████▋     | 2000/4212 [08:05<09:55,  3.72it/s]

Batch [2000/4212] Loss: 1.2358, R2: 0.0002


Training Batches:  50%|████▉     | 2101/4212 [08:31<07:50,  4.49it/s]

Batch [2100/4212] Loss: 1.2359, R2: 0.0002


Training Batches:  52%|█████▏    | 2201/4212 [08:56<07:49,  4.28it/s]

Batch [2200/4212] Loss: 1.2357, R2: 0.0002


Training Batches:  55%|█████▍    | 2300/4212 [09:21<07:35,  4.20it/s]

Batch [2300/4212] Loss: 1.2356, R2: 0.0002


Training Batches:  57%|█████▋    | 2400/4212 [09:46<07:02,  4.29it/s]

Batch [2400/4212] Loss: 1.2357, R2: 0.0002


Training Batches:  59%|█████▉    | 2501/4212 [10:12<06:35,  4.32it/s]

Batch [2500/4212] Loss: 1.2357, R2: 0.0002


Training Batches:  62%|██████▏   | 2600/4212 [10:37<06:18,  4.26it/s]

Batch [2600/4212] Loss: 1.2357, R2: 0.0002


Training Batches:  64%|██████▍   | 2701/4212 [11:03<05:57,  4.22it/s]

Batch [2700/4212] Loss: 1.2356, R2: 0.0002


Training Batches:  66%|██████▋   | 2800/4212 [11:29<06:08,  3.83it/s]

Batch [2800/4212] Loss: 1.2353, R2: 0.0002


Training Batches:  69%|██████▉   | 2900/4212 [11:54<05:29,  3.98it/s]

Batch [2900/4212] Loss: 1.2352, R2: 0.0002


Training Batches:  71%|███████   | 3000/4212 [12:19<05:17,  3.81it/s]

Batch [3000/4212] Loss: 1.2351, R2: 0.0002


Training Batches:  74%|███████▎  | 3099/4212 [12:45<04:55,  3.77it/s]

Batch [3100/4212] Loss: 1.2354, R2: 0.0002


Training Batches:  76%|███████▌  | 3200/4212 [13:10<03:38,  4.64it/s]

Batch [3200/4212] Loss: 1.2354, R2: 0.0002


Training Batches:  78%|███████▊  | 3301/4212 [13:35<03:14,  4.68it/s]

Batch [3300/4212] Loss: 1.2355, R2: 0.0002


Training Batches:  81%|████████  | 3400/4212 [14:00<03:13,  4.20it/s]

Batch [3400/4212] Loss: 1.2354, R2: 0.0002


Training Batches:  83%|████████▎ | 3500/4212 [14:26<03:01,  3.92it/s]

Batch [3500/4212] Loss: 1.2356, R2: 0.0002


Training Batches:  85%|████████▌ | 3599/4212 [14:51<02:52,  3.55it/s]

Batch [3600/4212] Loss: 1.2356, R2: 0.0002


Training Batches:  88%|████████▊ | 3700/4212 [15:17<02:13,  3.84it/s]

Batch [3700/4212] Loss: 1.2354, R2: 0.0002


Training Batches:  90%|█████████ | 3800/4212 [15:42<01:39,  4.15it/s]

Batch [3800/4212] Loss: 1.2355, R2: 0.0002


Training Batches:  93%|█████████▎| 3900/4212 [16:07<01:22,  3.77it/s]

Batch [3900/4212] Loss: 1.2354, R2: 0.0002


Training Batches:  95%|█████████▍| 4000/4212 [16:32<00:45,  4.64it/s]

Batch [4000/4212] Loss: 1.2354, R2: 0.0002


Training Batches:  97%|█████████▋| 4100/4212 [16:58<00:25,  4.45it/s]

Batch [4100/4212] Loss: 1.2353, R2: 0.0002


Training Batches: 100%|█████████▉| 4200/4212 [17:23<00:03,  3.90it/s]

Batch [4200/4212] Loss: 1.2352, R2: 0.0002


Training Batches: 100%|██████████| 4212/4212 [17:27<00:00,  4.02it/s]


Epoch 2/100:
Train Loss: 1.2352, Train R2: 0.0002
Valid Loss: 0.9194, Valid R2: 0.0002
--------------------------------------------------


Training Batches:   2%|▏         | 100/4212 [00:26<15:13,  4.50it/s]

Batch [100/4212] Loss: 1.2334, R2: 0.0002


Training Batches:   5%|▍         | 200/4212 [00:52<17:29,  3.82it/s]

Batch [200/4212] Loss: 1.2363, R2: 0.0002


Training Batches:   7%|▋         | 300/4212 [01:17<14:11,  4.60it/s]

Batch [300/4212] Loss: 1.2313, R2: 0.0002


Training Batches:   9%|▉         | 400/4212 [01:42<16:15,  3.91it/s]

Batch [400/4212] Loss: 1.2315, R2: 0.0002


Training Batches:  12%|█▏        | 500/4212 [02:08<16:26,  3.76it/s]

Batch [500/4212] Loss: 1.2340, R2: 0.0002


Training Batches:  14%|█▍        | 600/4212 [02:33<13:01,  4.62it/s]

Batch [600/4212] Loss: 1.2350, R2: 0.0002


Training Batches:  17%|█▋        | 700/4212 [02:58<13:12,  4.43it/s]

Batch [700/4212] Loss: 1.2350, R2: 0.0002


Training Batches:  19%|█▉        | 800/4212 [03:21<13:59,  4.06it/s]

Batch [800/4212] Loss: 1.2350, R2: 0.0002


Training Batches:  21%|██▏       | 900/4212 [03:44<13:14,  4.17it/s]

Batch [900/4212] Loss: 1.2350, R2: 0.0002


Training Batches:  24%|██▎       | 999/4212 [04:06<12:39,  4.23it/s]

Batch [1000/4212] Loss: 1.2341, R2: 0.0002


Training Batches:  26%|██▌       | 1101/4212 [04:29<10:29,  4.95it/s]

Batch [1100/4212] Loss: 1.2340, R2: 0.0002


Training Batches:  28%|██▊       | 1200/4212 [04:51<11:35,  4.33it/s]

Batch [1200/4212] Loss: 1.2340, R2: 0.0002


Training Batches:  31%|███       | 1300/4212 [05:15<10:10,  4.77it/s]

Batch [1300/4212] Loss: 1.2340, R2: 0.0002


Training Batches:  33%|███▎      | 1400/4212 [05:38<11:17,  4.15it/s]

Batch [1400/4212] Loss: 1.2335, R2: 0.0002


Training Batches:  36%|███▌      | 1500/4212 [06:02<10:48,  4.18it/s]

Batch [1500/4212] Loss: 1.2331, R2: 0.0002


Training Batches:  38%|███▊      | 1601/4212 [06:28<09:15,  4.70it/s]

Batch [1600/4212] Loss: 1.2323, R2: 0.0002


Training Batches:  40%|████      | 1700/4212 [06:53<09:35,  4.37it/s]

Batch [1700/4212] Loss: 1.2326, R2: 0.0002


Training Batches:  43%|████▎     | 1800/4212 [07:17<09:30,  4.22it/s]

Batch [1800/4212] Loss: 1.2326, R2: 0.0002


Training Batches:  45%|████▌     | 1900/4212 [07:43<08:29,  4.54it/s]

Batch [1900/4212] Loss: 1.2328, R2: 0.0002


Training Batches:  47%|████▋     | 1959/4212 [07:57<09:14,  4.06it/s]

In [None]:
torch.save(model.state_dict(), '/kaggle/working/exported_1dcnn_model.pth')

In [None]:
def predict(test: pl.DataFrame, lags: pl.DataFrame | None) -> pl.DataFrame | pd.DataFrame:
    """Make a prediction."""
    global model
    global device
    device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
    model.load_state_dict(torch.load('/kaggle/working/exported_1dcnn_model.pth', map_location=device,weights_only= True))
    model.eval() 

    global numerical_columns
    sel_cols = numerical_columns
    missing_cols = set(sel_cols) - set(test.columns)
    if missing_cols:
        raise ValueError(f"Missing columns in test data: {missing_cols}")

    test_features = test.select(sel_cols)
    test_features = test_features.fill_null(strategy='forward').fill_null(0)
    test_features = standardize(test_features,sel_cols, means,stds)
    X_test = test_features.to_numpy()
    X_test_tensor = torch.tensor(X_test, dtype=torch.float32).to(device)
    with torch.no_grad():
        outputs = model(X_test_tensor)
        predictions = outputs.squeeze().cpu().numpy()
    predictions_df = pl.DataFrame({
        'row_id': test['row_id'],
        'responder_6': predictions
    })
    assert isinstance(predictions_df, (pl.DataFrame, pd.DataFrame))
    assert predictions_df.columns == ['row_id', 'responder_6']
    assert len(predictions_df) == len(test)
    return predictions_df

In [None]:
import os

import pandas as pd
import polars as pl

import kaggle_evaluation.jane_street_inference_server


inference_server = \
kaggle_evaluation.jane_street_inference_server.JSInferenceServer(predict)

if os.getenv('KAGGLE_IS_COMPETITION_RERUN'):
    inference_server.serve()
else:
    inference_server.run_local_gateway(
        (
            '/kaggle/input/jane-street-realtime-marketdata-forecasting/test.parquet',
            '/kaggle/input/jane-street-realtime-marketdata-forecasting/lags.parquet',
        )
    )
