In [2]:
!kaggle competitions download -c jane-street-real-time-market-data-forecasting -p ~/kaggle_data

### Imports

In [None]:
import os

import polars as pl

import kaggle_evaluation.jane_street_inference_server

### Load the training data

In [None]:
train_data = pl.read_parquet('~/kaggle_data/train.parquet')

train_data.head()

### Load the lags data

In [None]:
lags_data = pl.read_parquet('~/kaggle_data/lags.parquet')

lags_data.head()

### Load the test data

In [None]:
test_data = pl.read_parquet('~/kaggle_data/test.parquet')

test_data.head()

In [None]:
import pandas as pd
from sklearn.model_selection import train_test_split
from sklearn.linear_model import LinearRegression
from sklearn.metrics import mean_squared_error

# Define the features and target
features = [f'feature_{i:02}' for i in range(79)]
target = 'responder_6'

# Split the data into features and target
X = train_data[features]
y = train_data[target]

# Split the data into training and validation sets
X_train, X_val, y_train, y_val = train_test_split(X, y, test_size=0.2, random_state=42)

# Initialize the model
model = LinearRegression()

# Train the model
model.fit(X_train, y_train)

# Make predictions on the validation set
y_pred = model.predict(X_val)

# Evaluate the model
mse = mean_squared_error(y_val, y_pred)
print(f'Mean Squared Error: {mse}')

In [None]:
from sklearn.linear_model import LinearRegression
import numpy as np

# Assume that the model is trained elsewhere and available here
# For demonstration, we'll create a dummy model
model = LinearRegression()

def predict(test: pl.DataFrame, lags: pl.DataFrame | None) -> pl.DataFrame:
    """Make a prediction."""
    global lags_
    if lags is not None:
        lags_ = lags

    # Extract features from the test data
    features = test.select([f'feature_{i:02}' for i in range(79)]).to_numpy()

    # If lags are available, you can use them as additional features
    if lags_ is not None:
        lag_features = lags_.select([f'responder_{i}' for i in range(9)]).to_numpy()
        features = np.hstack((features, lag_features))

    # Make predictions using the model
    predictions = model.predict(features)

    # Create a DataFrame with the predictions
    predictions_df = pl.DataFrame({
        'row_id': test['row_id'],
        'responder_6': predictions
    })

    # Ensure the predictions are in the correct format
    assert isinstance(predictions_df, pl.DataFrame)
    assert predictions_df.columns == ['row_id', 'responder_6']
    assert len(predictions_df) == len(test)

    return predictions_df

In [None]:
inference_server = kaggle_evaluation.jane_street_inference_server.JSInferenceServer(predict)

if os.getenv('KAGGLE_IS_COMPETITION_RERUN'):
    inference_server.serve()
else:
    inference_server.run_local_gateway(
        (
            '/kaggle/input/jane-street-real-time-market-data-forecasting/test.parquet',
            '/kaggle/input/jane-street-real-time-market-data-forecasting/lags.parquet',
        )
    )