# Ensemble All the Lag_Reg Models.

In this notebook, we are going to use Ridge regression to ensemble the 11 models we trained in the (training different lag models based on features sampled by clusters) notebook.

In [3]:
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
from sklearn.linear_model import Ridge, SGDRegressor, LinearRegression
from sklearn.preprocessing import StandardScaler
import os
import gc
import warnings
from tqdm.notebook import tqdm
import time
from collections import defaultdict
warnings.filterwarnings('ignore')

In [3]:
train_pred0 = pd.read_parquet('train_pred0.parquet')

train_pred1 = pd.read_parquet('train_pred1.parquet')

train_pred2 = pd.read_parquet('train_pred2.parquet')

val_pred0 = pd.read_parquet('val_pred0.parquet')

val_pred1 = pd.read_parquet('val_pred1.parquet')

val_pred2 = pd.read_parquet('val_pred2.parquet')

Y = pd.read_parquet('data/train.parquet')['label']

## Training The Ridge Model

We train several ridge models with different alphas on the output of our 11 models. The best alpha is 2100 with validation score of 0.98 which is a great improvement from the X_clustered model with 0.92 validation loss.

In [4]:
y_train = Y[:-100000]

y_val = Y[-100000:]

In [24]:
ridge0 = Ridge(alpha=1000)

ridge0.fit(train_pred0, y_train)

y_pred_0 = ridge0.predict(val_pred0)

np.corrcoef(y_val, y_pred_0)

array([[1.       , 0.9725614],
       [0.9725614, 1.       ]])

In [28]:
ridge1 = Ridge(alpha=1000)

ridge1.fit(train_pred1, y_train)

y_pred_1 = ridge1.predict(val_pred1)

np.corrcoef(y_val, y_pred_1)

array([[1.        , 0.95865278],
       [0.95865278, 1.        ]])

In [40]:
ridge2 = Ridge(alpha=500)

ridge2.fit(train_pred2, y_train)

y_pred_2 = ridge2.predict(val_pred2)

np.corrcoef(y_val, y_pred_2)

array([[1.        , 0.96961475],
       [0.96961475, 1.        ]])

In [44]:
train_pred = pd.concat([train_pred0, train_pred1, train_pred2], axis=1)

val_pred = pd.concat([val_pred0, val_pred1, val_pred2], axis=1)

ridge = Ridge(alpha=2500)

ridge.fit(train_pred, y_train)

y_pred = ridge.predict(val_pred)

np.corrcoef(y_val, y_pred)

array([[1.        , 0.97843364],
       [0.97843364, 1.        ]])

In [96]:
ridge = Ridge(alpha=150, fit_intercept=False)

ridge.fit(train_pred, y_train)

y_pred = ridge.predict(val_pred)

np.corrcoef(y_val, y_pred)

array([[1.        , 0.97922451],
       [0.97922451, 1.        ]])

In [66]:
ridge.coef_.sum()

1.3576026888471366

In [7]:
train_pred = pd.concat([train_pred0, train_pred1, train_pred2], axis=1)

val_pred = pd.concat([val_pred0, val_pred1, val_pred2], axis=1)

In [9]:
train_pred1 = pd.read_parquet('train_pred_[3,4,5,7]')

train_pred2 = pd.read_parquet('train_pred_[6,8,..]')

In [11]:
val_pred1 = pd.read_parquet('val_pred_[3,4,5,7]')

val_pred2 = pd.read_parquet('val_pred_[6,8,..]')

In [13]:
train = pd.concat([train_pred, train_pred1, train_pred2], axis=1)

val = pd.concat([val_pred, val_pred1, val_pred2], axis=1)

In [45]:
ridge = Ridge(alpha=2100, fit_intercept=False)

ridge.fit(train, y_train)

y_pred = ridge.predict(val)

np.corrcoef(y_val, y_pred)

array([[1.        , 0.98319669],
       [0.98319669, 1.        ]])

In [5]:
train1 = pd.read_parquet('train_preds_[0,1,2].parquet')

train2 = pd.read_parquet('train_preds_[3,4,5,7].parquet')

train3 = pd.read_parquet('train_preds_[6,8].parquet')

train4 = pd.read_parquet('train_preds_[9,10,11].parquet')

In [6]:
train_pred = pd.concat([train1, train2, train3, train4], axis=1)

del train1, train2, train3, train4
gc.collect()

0

In [7]:
test1 = pd.read_parquet('test_preds_[0,1,2].parquet')

test2 = pd.read_parquet('test_preds_[3,4,5,7].parquet')

test3 = pd.read_parquet('test_preds_[6,8].parquet')

test4 = pd.read_parquet('test_preds_[9,10,11].parquet')

In [8]:
test_pred = pd.concat([test1, test2, test3, test4], axis=1)

del test1, test2, test3, test4
gc.collect()

0

In [9]:
Y = pd.read_parquet('data/train.parquet')['label']

In [29]:
ridge = Ridge(alpha=4.2e4, fit_intercept=False)

ridge.fit(train_pred, Y)

y_pred_ridge = ridge.predict(test_pred) 

In [53]:
ridge.coef_.sum()

1.3175969729014754

In [30]:
ridge.coef_.sum()

1.266998222587056

In [27]:
del y_pred_ridge, A, ridge, new , test
gc.collect()

0

## Evaluation on the Test Set

Evaluate the model on the test set and save the result.

In [33]:
test = pd.read_parquet('data/test.parquet')

row_id = pd.read_csv('closest_rows.csv', index_col=0)

new = test.reset_index()

new['row_id'] = row_id['0']

new.head()

Unnamed: 0,ID,bid_qty,ask_qty,buy_qty,sell_qty,volume,X1,X2,X3,X4,...,X773,X774,X775,X776,X777,X778,X779,X780,label,row_id
0,1,0.317,8.102,13.164,10.272,23.436,-0.341229,0.041851,-0.020094,-0.206221,...,-0.043417,1.521787,1.548965,1.495735,1.16673,0.281056,-0.187831,-0.599553,0,112334
1,2,2.608,2.111,123.562,40.163,163.725,-1.029564,-1.382505,-1.214935,-1.020241,...,-0.07709,-0.703054,-0.716951,-0.721292,-0.674619,-0.639318,-0.736268,-0.86222,0,69300
2,3,2.768,10.787,126.137,118.266,244.403,-2.59409,-5.486158,-4.744466,-3.930152,...,-0.030627,-0.703514,-0.717525,-0.731701,-0.750998,-0.789366,-0.850941,-1.033131,0,152075
3,4,0.948,12.157,16.069,31.723,47.792,0.240745,0.997585,1.028965,1.081052,...,-0.03338,1.521167,1.551771,1.582833,1.62583,1.762155,1.911924,1.962445,0,255828
4,5,1.084,3.493,32.679,37.327,70.006,0.067189,0.772852,0.772152,0.714846,...,-0.004915,-0.703161,-0.7169,-0.714699,-0.652209,-0.623165,-0.699887,-0.640094,0,390226


In [34]:
new = new.sort_values('row_id')

new.head()

Unnamed: 0,ID,bid_qty,ask_qty,buy_qty,sell_qty,volume,X1,X2,X3,X4,...,X773,X774,X775,X776,X777,X778,X779,X780,label,row_id
192617,192618,1.037,20.607,20.377,18.854,39.231,1.90712,0.428308,0.981918,1.342953,...,-0.046392,1.523177,1.547193,1.46273,1.084867,0.196849,-0.24948,-0.644863,0,-1
188498,188499,24.169,2.624,7.885,9.266,17.151,0.555521,0.46112,0.72813,0.766869,...,-0.012611,1.52299,1.553631,1.584729,1.626858,1.580276,1.221571,0.683121,0,-1
424484,424485,9.5,10.256,27.151,12.778,39.929,0.696916,-0.095659,-0.216179,-0.155187,...,-0.017567,-0.703437,-0.717389,-0.731427,-0.747933,-0.755011,-0.796735,-0.778168,0,-1
424482,424483,1.91,4.205,121.387,90.664,212.051,1.207702,0.99058,1.02987,1.112571,...,-0.046124,-0.703374,-0.669391,-0.506669,-0.38612,-0.519117,-0.647202,-0.578245,0,-1
268120,268121,4.567,4.285,37.114,25.157,62.271,-1.007777,-0.636524,-0.791924,-0.824496,...,-0.02553,1.521299,1.55117,1.54263,1.314757,0.461959,-0.050952,-0.498569,0,-1


In [37]:
A = new.copy()

A['pred'] = y_pred_ridge

A.sort_values('ID', inplace=True)

A.head()

Unnamed: 0,ID,bid_qty,ask_qty,buy_qty,sell_qty,volume,X1,X2,X3,X4,...,X774,X775,X776,X777,X778,X779,X780,label,row_id,pred
0,1,0.317,8.102,13.164,10.272,23.436,-0.341229,0.041851,-0.020094,-0.206221,...,1.521787,1.548965,1.495735,1.16673,0.281056,-0.187831,-0.599553,0,112334,-0.217626
1,2,2.608,2.111,123.562,40.163,163.725,-1.029564,-1.382505,-1.214935,-1.020241,...,-0.703054,-0.716951,-0.721292,-0.674619,-0.639318,-0.736268,-0.86222,0,69300,0.321515
2,3,2.768,10.787,126.137,118.266,244.403,-2.59409,-5.486158,-4.744466,-3.930152,...,-0.703514,-0.717525,-0.731701,-0.750998,-0.789366,-0.850941,-1.033131,0,152075,-3.167622
3,4,0.948,12.157,16.069,31.723,47.792,0.240745,0.997585,1.028965,1.081052,...,1.521167,1.551771,1.582833,1.62583,1.762155,1.911924,1.962445,0,255828,-0.294751
4,5,1.084,3.493,32.679,37.327,70.006,0.067189,0.772852,0.772152,0.714846,...,-0.703161,-0.7169,-0.714699,-0.652209,-0.623165,-0.699887,-0.640094,0,390226,0.292119


In [39]:
y_pred_test = A['pred']

submission = pd.read_csv('data/sample_submission.csv')

submission['prediction'] = y_pred_test.reset_index(drop=True)


submission.to_csv('submission.csv', index = False)

out = pd.read_csv('submission.csv')

out.head()

Unnamed: 0,ID,prediction
0,1,-0.217626
1,2,0.321515
2,3,-3.167622
3,4,-0.294751
4,5,0.292119
