In [2]:
!pip install xgboost

Collecting xgboost
  Downloading xgboost-3.0.5-py3-none-win_amd64.whl (56.8 MB)
     ---------------------------------------- 0.0/56.8 MB ? eta -:--:--
     ---------------------------------------- 0.2/56.8 MB 7.6 MB/s eta 0:00:08
     ---------------------------------------- 0.7/56.8 MB 11.1 MB/s eta 0:00:06
     - -------------------------------------- 1.5/56.8 MB 11.7 MB/s eta 0:00:05
     - -------------------------------------- 2.0/56.8 MB 12.9 MB/s eta 0:00:05
     - -------------------------------------- 2.7/56.8 MB 13.0 MB/s eta 0:00:05
     -- ------------------------------------- 3.2/56.8 MB 12.7 MB/s eta 0:00:05
     -- ------------------------------------- 3.7/56.8 MB 13.2 MB/s eta 0:00:05
     -- ------------------------------------- 4.1/56.8 MB 12.4 MB/s eta 0:00:05
     --- ------------------------------------ 4.4/56.8 MB 12.4 MB/s eta 0:00:05
     --- ------------------------------------ 4.9/56.8 MB 12.1 MB/s eta 0:00:05
     --- ------------------------------------ 5.5


[notice] A new release of pip is available: 23.0.1 -> 25.2
[notice] To update, run: C:\Users\Aryan\AppData\Local\Microsoft\WindowsApps\PythonSoftwareFoundation.Python.3.10_qbz5n2kfra8p0\python.exe -m pip install --upgrade pip


In [3]:
import pandas as pd
import numpy as np
import lightgbm as lgb
import xgboost as xgb
import warnings

warnings.filterwarnings('ignore')

In [4]:
try:
    print ("\nLoading the data")
    train_df = pd.read_csv("C:/Users/Aryan/Downloads/Quant_Challenge_2025/train.csv")
    test_df = pd.read_csv("C:/Users/Aryan/Downloads/Quant_Challenge_2025/test.csv")
    print("data loaded")
except FileNotFoundError as e:
    print(f"Error: {e}")
    exit()


Loading the data
data loaded


In [5]:
#Feature Engineering

combined_df = pd.concat([train_df.drop(['Y1', 'Y2'], axis=1), test_df], ignore_index=True)

features = [col for col in train_df.columns if col not in ['time', 'Y1', 'Y2']]
WINDOW_SIZE = 10 

for feature in features:
    #lag features
    combined_df[f'{feature}_lag1'] = combined_df[feature].shift(1)
    
    #rolling mean features
    combined_df[f'{feature}_rolling_mean{WINDOW_SIZE}'] = combined_df[feature].rolling(window=WINDOW_SIZE).mean()
    
    #rolling volatility (standard deviation) features
    combined_df[f'{feature}_rolling_std{WINDOW_SIZE}'] = combined_df[feature].rolling(window=WINDOW_SIZE).std()

train_processed_df = combined_df.iloc[:len(train_df)].copy()
test_processed_df = combined_df.iloc[len(train_df):].copy()

train_processed_df['Y1'] = train_df['Y1']
train_processed_df['Y2'] = train_df['Y2']

print(f"Shape before handling NaNs: {train_processed_df.shape}")
train_processed_df.bfill(inplace=True)
train_processed_df.ffill(inplace=True)
print(f"Shape after filling NaNs: {train_processed_df.shape}")

Shape before handling NaNs: (80000, 60)
Shape after filling NaNs: (80000, 60)


In [6]:
#Models training

features_to_use = [col for col in train_processed_df.columns if col not in ['id', 'time', 'Y1', 'Y2']]
X_train = train_processed_df[features_to_use]
y1_train = train_processed_df['Y1']
y2_train = train_processed_df['Y2']

X_test = test_processed_df[features_to_use]
X_test.bfill(inplace=True)
X_test.ffill(inplace=True)

#LightGBM 
lgb_params = {
    'objective': 'regression_l1', 'metric': 'rmse', 'n_estimators': 1500,
    'learning_rate': 0.03, 'feature_fraction': 0.8, 'bagging_fraction': 0.8,
    'bagging_freq': 1, 'lambda_l1': 0.1, 'lambda_l2': 0.1,
    'num_leaves': 40, 'verbose': -1, 'n_jobs': -1
}
model_y1_lgb = lgb.LGBMRegressor(**lgb_params)
model_y1_lgb.fit(X_train, y1_train)
predictions_y1_lgb = model_y1_lgb.predict(X_test)

model_y2_lgb = lgb.LGBMRegressor(**lgb_params)
model_y2_lgb.fit(X_train, y2_train)
predictions_y2_lgb = model_y2_lgb.predict(X_test)
print("LightGBM model trained.")

#XGBoost
xgb_params = {
    'objective': 'reg:squarederror', 'eval_metric': 'rmse', 'n_estimators': 1000,
    'learning_rate': 0.04, 'max_depth': 4, 'subsample': 0.8,
    'colsample_bytree': 0.8, 'n_jobs': -1
}
model_y1_xgb = xgb.XGBRegressor(**xgb_params)
model_y1_xgb.fit(X_train, y1_train)
predictions_y1_xgb = model_y1_xgb.predict(X_test)

model_y2_xgb = xgb.XGBRegressor(**xgb_params)
model_y2_xgb.fit(X_train, y2_train)
predictions_y2_xgb = model_y2_xgb.predict(X_test)
print("XGBoost model trained.")

LightGBM model trained.
XGBoost model trained.


In [7]:
ensemble_preds_y1 = (predictions_y1_lgb + predictions_y1_xgb) / 2.0
ensemble_preds_y2 = (predictions_y2_lgb + predictions_y2_xgb) / 2.0

submission_ensemble = pd.DataFrame({'id': test_df['id'], 'Y1': ensemble_preds_y1, 'Y2': ensemble_preds_y2})
submission_ensemble.to_csv('submission_ensemble.csv', index=False)

print("\n✅ Success! Submission file 'submission_ensemble.csv' created.")


✅ Success! Submission file 'submission_ensemble.csv' created.
