In [None]:
import numpy as np # linear algebra
import pandas as pd # data processing, CSV file I/O (e.g. pd.read_csv)
from joblib import load
import polars as pl
import kaggle_evaluation.jane_street_inference_server
import gc
import lightgbm as lgb
import torch
import torch.nn as nn
import torch.optim as optim
from torch.utils.data import DataLoader, WeightedRandomSampler, TensorDataset
from tqdm.auto import tqdm
# import tensorflow as tf
# from tensorflow.keras import layers, models

import os
# os.environ['CUDA_LAUNCH_BLOCKING'] = '1'

In [None]:
# AE-MLP with Dropout & L2-regulirization
class AE_MLP(nn.Module):
    def __init__(self, input_dim, hidden_dim=128, output_dim=1, dropout_rate=0.3):
        super(AE_MLP, self).__init__()
        self.encoder = nn.Sequential(
            nn.Linear(input_dim, hidden_dim),
            nn.ReLU(),
            nn.Dropout(p=dropout_rate),  # Dropout after activation not to overfit
            nn.Linear(hidden_dim, hidden_dim // 2),
            nn.ReLU(),
            nn.Dropout(p=dropout_rate)
        )
        self.decoder = nn.Sequential(
            nn.Linear(hidden_dim // 2, hidden_dim),
            nn.ReLU(),
            nn.Dropout(p=dropout_rate),
            nn.Linear(hidden_dim, output_dim)
        )
    
    def forward(self, x):
        encoded = self.encoder(x)
        decoded = self.decoder(encoded)
        return decoded

# Взвешенная Huber Loss
def weighted_loss(predictions, targets, weights, delta=1.0):
    loss = nn.SmoothL1Loss(beta=delta, reduction='none')  # Huber Loss
    per_sample_loss = loss(predictions, targets)
    weighted_loss = (per_sample_loss * weights).mean()  # weight loss
    return weighted_loss

    
ae_model = AE_MLP(input_dim=91, hidden_dim=128) 

In [None]:
ae_model.load_state_dict(torch.load('/kaggle/input/ae_mlp_js24_v2/pytorch/ae_mlp_js24_v2/1/ae_mlp_model_06_01_2025.pth', weights_only=True))
# ae_model.eval()
ae_model.to('cuda:0')
scaler = load('/kaggle/input/scaler_new/other/scaler_new/1/robust_scaler_07_01.pkl')

In [None]:
class CONFIG:
    seed = 42
    target_col = "responder_6"
    feature_cols_ae_mlp = ["symbol_id", "time_id"] + [f"feature_{idx:02d}" for idx in range(79)]+ [f"responder_{idx}_lag_1" for idx in range(9)]
    feature_cols = [f"feature_{idx:02d}" for idx in range(79)]+["responder_6_lag_1"]
    #[f"responder_{idx}_lag_1" for idx in range(9)]
    
xgb_feature_cols = ["date_id", "time_id", "symbol_id"] + CONFIG.feature_cols
ae_mlp_cols =[f"feature_{idx:02d}" for idx in range(79)] + [f"responder_{idx}_lag_1" for idx in range(9)] + ['symbol_id', 'time_id', 'date_id']

In [None]:
# lags_ : pl.DataFrame | None = None
    
# def predict(test: pl.DataFrame, lags: pl.DataFrame | None) -> pl.DataFrame | pd.DataFrame:
#     global lags_
#     device = torch.device("cuda:0" if torch.cuda.is_available() else "cpu")
#     ae_model.to(device)
    
#     if lags is not None:
#         lags_ = lags

#     predictions = test.select(
#         'row_id',
#         pl.lit(0.0).alias('responder_6'),
#     )
#     symbol_ids = test.select('symbol_id').to_numpy()[:, 0]

#     if not lags is None:
#         lags = lags.group_by(["date_id", "symbol_id"], maintain_order=True).last() # pick up last record of previous date
#         features_lags = ["date_id", "symbol_id"]+[f"responder_{idx}_lag_1" for idx in range(9)]
#         lags = lags[features_lags]
#         test = test.join(lags, on=["date_id", "symbol_id"],  how="left")
#     else:
#         test = test.with_columns(
#             ( pl.lit(0.0).alias(f'responder_6_lag_1'))
#         )

#     preds = np.zeros((test.shape[0],))
#     test_input = test[ae_mlp_cols].to_pandas()
#     test_input = test_input.ffill().fillna(0)
#     test_input = torch.FloatTensor(test_input.values).to(device)
    
#     with torch.no_grad():
#         ae_model.eval()
#         # Перемещаем входные данные на правильное устройство
#         test_input = test_input.to(device)
#         preds = ae_model(test_input).to("cpu").numpy().flatten()  # Возвращаем на 
        
#     print(f"predict> preds.shape =", preds.shape)
#     # print(f"preds.shape: {preds.shape}, test.shape: {test.shape}")
    
#     predictions = test.select('row_id').with_columns(
#     pl.Series(
#         name='responder_6',
#         values=np.clip(preds, a_min=-5, a_max=5).astype(np.float64),
#         dtype=pl.Float64,
#     )
# )


#     # The predict function must return a DataFrame
#     assert isinstance(predictions, pl.DataFrame | pd.DataFrame)
#     # with columns 'row_id', 'responer_6'
#     assert list(predictions.columns) == ['row_id', 'responder_6']
#     # and as many rows as the test data.
#     assert len(predictions) == len(test)
#     assert len(preds) == len(test)

#     return predictions

In [None]:
# def predict(test: pl.DataFrame, lags: pl.DataFrame | None) -> pl.DataFrame | pd.DataFrame:
#     global lags_
#     device = torch.device("cuda:0" if torch.cuda.is_available() else "cpu")
#     ae_model.to(device)
    
#     if lags is not None:
#         lags_ = lags

#     predictions = test.select(
#         'row_id',
#         pl.lit(0.0).alias('responder_6'),
#     )

#     if not lags is None:
#         lags = lags.group_by(["date_id", "symbol_id"], maintain_order=True).last()
#         # lags = lags_.clone().group_by(["date_id", "symbol_id"], maintain_order=True).last()
#         features_lags = ["date_id", "symbol_id"] + [f"responder_{idx}_lag_1" for idx in range(9)]
#         lags = lags[features_lags]
#         test = test.join(lags, on=["date_id", "symbol_id"], how="left")
#     else:
#         test = test.with_columns(
#             pl.lit(0.0).alias(f'responder_6_lag_1')
#         )

#     # Apply the RobustScaler to the feature columns (X)
#     test_input = test[ae_mlp_cols].to_pandas()
#     test_input = test_input.ffill().fillna(0)  # Fill missing values (if any)

#     # Apply the robust scaler to the input features
#     test_input_scaled = scaler.transform(test_input.values)  # Apply scaling to the input features

#     # Convert scaled data to a tensor for model input
#     test_input_tensor = torch.FloatTensor(test_input_scaled).to(device)
    
#     # Predict using the model
#     with torch.no_grad():
#         ae_model.eval()
#         preds = ae_model(test_input_tensor).to("cpu").numpy().flatten()

#     print(f"predict> preds.shape = {preds.shape}")
    
#     predictions = test.select('row_id').with_columns(
#         pl.Series(
#             name='responder_6',
#             values=np.clip(preds, a_min=-5, a_max=5).astype(np.float64),
#             dtype=pl.Float64,
#         )
#     )

#     # Check assertions for correct output
#     assert isinstance(predictions, pl.DataFrame | pd.DataFrame)
#     assert list(predictions.columns) == ['row_id', 'responder_6']
#     assert len(predictions) == len(test)
#     assert len(preds) == len(test)

#     return predictions


In [None]:
# Global variable to store lagged features
lags_: pl.DataFrame | None = None

def predict(test: pl.DataFrame, lags: pl.DataFrame | None) -> pl.DataFrame | pd.DataFrame:
    """
    Make predictions using ensemble of XGBoost and Neural Network models
    
    Args:
        test: DataFrame containing test data
        lags: DataFrame containing lagged features (optional)
        
    Returns:
        DataFrame with predictions
    """
    global lags_
    
    # Store lags in global variable if provided
    if lags is not None:
        lags_ = lags

    # Initialize predictions DataFrame with row_id and placeholder predictions
    predictions_nn = test.select('row_id', pl.lit(0.0).alias('responder_6',))

    # Process lagged features
    # Get last record for each date_id and symbol_id combination
    lags = lags_.clone().group_by(["date_id", "symbol_id"], maintain_order=True).last()
    
    # Join test data with lagged features
    test = test.join(lags, on=["date_id", "symbol_id"], how="left")
    preds_nn = np.zeros((test.shape[0],))   # Neural Network predictions

    # Generate Neural Network predictions
    # Prepare input data
    test_input = test[ae_mlp_cols].to_pandas()
    # Handle missing values: forward fill then fill remaining with zeros
    test_input = test_input.fillna(method='ffill').fillna(0)
    test_input = scaler.transform(test_input.values)
    # Convert to PyTorch tensor and move to GPU
    test_input = torch.FloatTensor(test_input).to("cuda:0")

    # Generate predictions from Neural Network ensemble
    with torch.no_grad():  # Disable gradient calculation for inference
        ae_model.eval()  # Set model to evaluation mode
        # Average predictions from all models
        preds_nn = ae_model(test_input).cpu().numpy().flatten()


    # Create final predictions DataFrame
    predictions_nn = test.select('row_id').\
        with_columns(
            pl.Series(
                name='responder_6',
                values=np.clip(preds_nn, a_min=-5, a_max=5),  # Clip predictions to [-5, 5] range
                dtype=pl.Float64,
            )
        )
    
    return predictions_nn

In [None]:
inference_server = kaggle_evaluation.jane_street_inference_server.JSInferenceServer(predict)

if os.getenv('KAGGLE_IS_COMPETITION_RERUN'):
    inference_server.serve()
else:
    inference_server.run_local_gateway(
        (
            '/kaggle/input/jane-street-real-time-market-data-forecasting/test.parquet',
            '/kaggle/input/jane-street-real-time-market-data-forecasting/lags.parquet',
        )
    )