In [1]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns

from sklearn.model_selection import RandomizedSearchCV, StratifiedKFold

import warnings
warnings.filterwarnings('ignore')

from sklearn.metrics import mean_squared_error

SEED = 15

This data is from my last notebook of Feature Engineering and Preprocessing. Here I have used various models and Stacked them to get a good score

In [4]:
train = pd.read_csv("../input/ai-hack1-preprocessed-data/train_pre1.csv")
test_data = pd.read_csv("../input/ai-hack1-preprocessed-data/test_pre1.csv")

In [7]:
test_cate = test_data.copy()
test = test_data.copy()

In [8]:
test = test.drop(columns = ['continent', 'major', 'country'])

In [9]:
X = train.drop(columns = ['continent', 'nerdy', 'major', 'country'])
y = train['nerdy']

#### Splitting the dataset

In [10]:
from sklearn.model_selection import train_test_split

In [11]:
X_train, X_val, y_train, y_val = train_test_split(X, y , test_size = 0.25, random_state = 15)
X_val, X_test, y_val, y_test = train_test_split(X_val, y_val, test_size = 0.4, random_state = 15)

#### Min- Max Scaler

In [13]:
from sklearn.preprocessing import MinMaxScaler
col2 = ['que_score', 'tipi_score', 'total_time_min', 'introelapse', 'surveyelapse', 'testelapse', 'screenh', 'screenw']
mx = MinMaxScaler()
for col in col2:
    X_train[col] = mx.fit_transform(np.array(X_train[col]).reshape(-1,1))
    X_val[col] = mx.transform(np.array(X_val[col]).reshape(-1,1))
    X_test[col] = mx.transform(np.array(X_test[col]).reshape(-1,1))
    test[col] = mx.transform(np.array(test[col]).reshape(-1,1))
    test_cate[col] = mx.transform(np.array(test_cate[col]).reshape(-1,1))

First we will train several models like XGB, Random Forest, LightGBM, CatBoost, etc. and save their predictions

## Base Models

### XGB

In [14]:
import xgboost as xgb

In [15]:
# params = {
 #   'learning_rate' :[0.01, 0.05, 0.1, 0.2, 0.3],
 #   'min_child_weight': [1 , 3, 5, 7],
 #   'max_depth' : [5, 7, 9, 11, 13],
 #   'n_estimators': [100, 300, 500],}

# reg = xgb.XGBRegressor()

# random_search = RandomizedSearchCV(reg, params, scoring = 'neg_root_mean_squared_error', cv = 5, verbose = 3, n_iter = 5, n_jobs = -1)
# random_search.fit(X_train, y_train)
# random_search.best_params_

In [16]:
xgbr = xgb.XGBRegressor(base_score=0.5, booster='gbtree', colsample_bylevel=1,
             colsample_bynode=1, colsample_bytree=1, gamma=0, gpu_id=-1,
             importance_type='gain', interaction_constraints='',
             learning_rate=0.01, max_delta_step=0, max_depth=5,
             min_child_weight=7,monotone_constraints='()',
             n_estimators=500, n_jobs=8, num_parallel_tree=1, random_state=0,
             reg_alpha=0, reg_lambda=1, scale_pos_weight=1, subsample=1,
             tree_method='exact', validate_parameters=1, verbosity=None)

xgbr.fit(X_train, y_train)

y_xgb_train = xgbr.predict(X_train)
y_xgb_val = xgbr.predict(X_val)
y_xgb_test = xgbr.predict(X_test)
test_xgb = xgbr.predict(test)

print(mean_squared_error(y_train,y_xgb_train, squared = False))
print(mean_squared_error(y_val,y_xgb_val, squared = False))
print(mean_squared_error(y_test,y_xgb_test, squared = False))

1.022224536971489
1.1273035716502202
1.152636996052547


In [17]:
test_xgb

array([6.1032157, 5.6441846, 4.8173757, ..., 5.655197 , 5.1178102,
       5.9032516], dtype=float32)

### RF

In [18]:
from sklearn.ensemble import RandomForestRegressor, RandomForestClassifier

In [19]:
# rf = RandomForestRegressor()
# n_estimators = [int(x) for x in np.linspace(start = 200, stop = 2000, num = 10)]
# max_features = ['auto', 'sqrt']
# max_depth = [int(x) for x in np.linspace(10, 110, num = 11)]
# max_depth.append(None)
# min_samples_split = [2, 5, 10]
# min_samples_leaf = [1, 2, 4]
# bootstrap = [True, False]

# random_grid = {'n_estimators': n_estimators,
#               'max_features': max_features,
#               'max_depth': max_depth,
#               'min_samples_split': min_samples_split,
#               'min_samples_leaf': min_samples_leaf,
#               'bootstrap': bootstrap}

# random_search_rf = RandomizedSearchCV(rf, random_grid, cv = 10, scoring = 'neg_root_mean_squared_error', verbose = 3, n_jobs = -1)

# random_search_rf.fit(X_train, y_train)
# random_search_rf.best_params_

In [20]:
rf = RandomForestRegressor(max_depth=90, min_samples_leaf=2, min_samples_split=10,
                      n_estimators=400, bootstrap = True, max_features = 'auto')

rf.fit(X_train, y_train)

y_rf_train = rf.predict(X_train)
y_rf_val = rf.predict(X_val)
y_rf_test = rf.predict(X_test)
test_rf = rf.predict(test)

print(mean_squared_error(y_train,y_rf_train, squared = False))
print(mean_squared_error(y_val,y_rf_val, squared = False))
print(mean_squared_error(y_test,y_rf_test, squared = False))

0.6067353847710544
1.1367851429779217
1.1583475571445108


In [21]:
test_rf

array([6.2394901 , 5.40555132, 4.69916291, ..., 5.61183499, 5.10699819,
       5.78767199])

### LGBM

In [22]:
from lightgbm import LGBMRegressor, LGBMClassifier

In [23]:
# params_lgbm = {
#    'num_leaves' : [5 , 10, 15, 20, 30, 40],
#    'min_data_in_leaf' : [10, 15, 20, 25, 30],
#    'learning_rate' : [0.01, 0.1, 0.3, 0.05],
#    'num_iterations' : [100, 300, 500, 700],
# }

# lgbm = LGBMRegressor(is_unbalance = True)

# random_search_lgbm = RandomizedSearchCV(lgbm, params_lgbm, scoring = 'neg_root_mean_squared_error', cv = 10, verbose = 3,n_jobs = -1)
# random_search_lgbm.fit(X_train, y_train)
# random_search_lgbm.best_params_

In [24]:
lgbm = LGBMRegressor(learning_rate=0.05, min_data_in_leaf=5, num_iterations=700,
              num_leaves=5)

lgbm.fit(X_train, y_train)

y_lgbm_train = lgbm.predict(X_train)
y_lgbm_val = lgbm.predict(X_val)
y_lgbm_test = lgbm.predict(X_test)
test_lgbm = lgbm.predict(test)

print(mean_squared_error(y_train,y_lgbm_train, squared = False))
print(mean_squared_error(y_val,y_lgbm_val, squared = False))
print(mean_squared_error(y_test,y_lgbm_test, squared = False))

1.0019660197622624
1.1194132895521387
1.147373785746555


In [25]:
test_lgbm

array([6.26275572, 5.69849148, 4.64703571, ..., 5.63703714, 5.13998284,
       5.74199647])

### CatBoost

In [26]:
from catboost import CatBoostRegressor

In [27]:
cat_x = train.drop(columns = ['nerdy', 'continent'])
cat_y = train['nerdy']

cat_X_train, cat_X_val, cat_y_train, cat_y_val = train_test_split(cat_x, cat_y , test_size = 0.25, random_state = 15)
cat_X_val, cat_X_test, cat_y_val, cat_y_test = train_test_split(cat_X_val, cat_y_val, test_size = 0.4, random_state = 15)

cat_feat = np.where(cat_X_train.dtypes == np.object)[0]

In [28]:
cat = CatBoostRegressor(learning_rate=0.01 , depth=7, iterations = 1500)

cat.fit(cat_X_train, cat_y_train, cat_features = cat_feat, eval_set = (cat_X_val, cat_y_val), plot = True)

y_cat_train = cat.predict(cat_X_train)
y_cat_val = cat.predict(cat_X_val)
y_cat_test = cat.predict(cat_X_test)
test_cat = cat.predict(test_cate.drop(columns = ['continent']))

print(mean_squared_error(cat_y_train,y_cat_train, squared = False))
print(mean_squared_error(cat_y_val,y_cat_val, squared = False))
print(mean_squared_error(cat_y_test,y_cat_test, squared = False))

MetricVisualizer(layout=Layout(align_self='stretch', height='500px'))

0:	learn: 1.5322813	test: 1.6340403	best: 1.6340403 (0)	total: 73.8ms	remaining: 1m 50s
1:	learn: 1.5264976	test: 1.6274683	best: 1.6274683 (1)	total: 91ms	remaining: 1m 8s
2:	learn: 1.5207519	test: 1.6209709	best: 1.6209709 (2)	total: 107ms	remaining: 53.2s
3:	learn: 1.5152897	test: 1.6147070	best: 1.6147070 (3)	total: 122ms	remaining: 45.6s
4:	learn: 1.5102646	test: 1.6090853	best: 1.6090853 (4)	total: 136ms	remaining: 40.7s
5:	learn: 1.5047742	test: 1.6030729	best: 1.6030729 (5)	total: 149ms	remaining: 37.1s
6:	learn: 1.4992555	test: 1.5967977	best: 1.5967977 (6)	total: 162ms	remaining: 34.6s
7:	learn: 1.4942231	test: 1.5908869	best: 1.5908869 (7)	total: 175ms	remaining: 32.6s
8:	learn: 1.4888508	test: 1.5846705	best: 1.5846705 (8)	total: 188ms	remaining: 31.1s
9:	learn: 1.4835240	test: 1.5784408	best: 1.5784408 (9)	total: 199ms	remaining: 29.6s
10:	learn: 1.4785084	test: 1.5726361	best: 1.5726361 (10)	total: 216ms	remaining: 29.2s
11:	learn: 1.4732638	test: 1.5666014	best: 1.566601

In [29]:
test_cat

array([5.0537179 , 4.69344492, 3.69811198, ..., 4.35899545, 4.16382239,
       4.60387232])

It's a good practice to feed a bit different data to our models while stacking so that different models exploit different relations and we get a better result. Hence , uptil now, I was using One-Hot encoded Continent column, but below I will train some model with that column to be Label Encoded.

### LabelEnconded DF

In [30]:
train_le = pd.read_csv("../input/ai-hack1-preprocessed-data/train_prele.csv")
test_le = pd.read_csv("../input/ai-hack1-preprocessed-data/test_prele.csv")

In [31]:
X_le = train_le.drop(columns = ['nerdy', 'major', 'country'])
y_le = train_le['nerdy']

test_le = test_le.drop(columns = ['major', 'country'])

In [32]:
X_train_le, X_val_le, y_train_le, y_val_le = train_test_split(X_le, y_le , test_size = 0.25, random_state = 15)
X_val_le, X_test_le, y_val_le, y_test_le = train_test_split(X_val_le, y_val_le, test_size = 0.4, random_state = 15)

#### XGB

In [33]:
xgbr = xgb.XGBRegressor(base_score=0.5, booster='gbtree', colsample_bylevel=1,
             colsample_bynode=1, colsample_bytree=1, gamma=0, gpu_id=-1,
             importance_type='gain', interaction_constraints='',
             learning_rate=0.01, max_delta_step=0, max_depth=5,
             min_child_weight=7,monotone_constraints='()',
             n_estimators=500, n_jobs=8, num_parallel_tree=1, random_state=0,
             reg_alpha=0, reg_lambda=1, scale_pos_weight=1, subsample=1,
             tree_method='exact', validate_parameters=1, verbosity=None)

xgbr.fit(X_train_le, y_train_le)

y_xgb_train_le = xgbr.predict(X_train_le)
y_xgb_val_le = xgbr.predict(X_val_le)
y_xgb_test_le = xgbr.predict(X_test_le)
test_xgb_le = xgbr.predict(test_le)

print(mean_squared_error(y_train_le,y_xgb_train_le, squared = False))
print(mean_squared_error(y_val_le,y_xgb_val_le, squared = False))
print(mean_squared_error(y_test_le,y_xgb_test_le, squared = False))

1.0222591070273443
1.1267596997832374
1.1502478413340187


In [34]:
test_xgb_le

array([6.096798, 5.572791, 4.765979, ..., 5.640714, 5.116461, 5.913208],
      dtype=float32)

### RF

In [35]:
rf = RandomForestRegressor(max_depth=90, min_samples_leaf=2, min_samples_split=10,
                      n_estimators=400, bootstrap = True, max_features = 'auto')

rf.fit(X_train_le, y_train_le)

y_rf_train_le = rf.predict(X_train_le)
y_rf_val_le = rf.predict(X_val_le)
y_rf_test_le = rf.predict(X_test_le)
test_rf_le = rf.predict(test_le)

print(mean_squared_error(y_train_le,y_rf_train_le, squared = False))
print(mean_squared_error(y_val_le,y_rf_val_le, squared = False))
print(mean_squared_error(y_test_le,y_rf_test_le, squared = False))

0.6037620894139676
1.138280857198874
1.1552365597667484


As we know that our target value has only integer values from 0-7. Hence this dataset can also be treated as Multi-Class Classification and I would train some classifiers to get a better and more diverse stacked output.

### Classfiiers

In [36]:
xgbclf = xgb.XGBClassifier()

xgbclf.fit(X_train, y_train)

y_xgbclf_train = xgbclf.predict(X_train)
y_xgbclf_val = xgbclf.predict(X_val)
y_xgbclf_test = xgbclf.predict(X_test)
test_xgbclf = xgbclf.predict(test)

print(mean_squared_error(y_train,y_xgbclf_train, squared = False))
print(mean_squared_error(y_val,y_xgbclf_val, squared = False))
print(mean_squared_error(y_test,y_xgbclf_test, squared = False))

0.27187164725684687
1.3329110932012138
1.369848944029373


In [37]:
lgbmclf = LGBMClassifier()

lgbmclf.fit(X_train, y_train)

y_lgbmclf_train = lgbmclf.predict(X_train)
y_lgbmclf_val = lgbmclf.predict(X_val)
y_lgbmclf_test = lgbmclf.predict(X_test)
test_lgbmclf = lgbmclf.predict(test)

print(mean_squared_error(y_train,y_lgbmclf_train, squared = False))
print(mean_squared_error(y_val,y_lgbmclf_val, squared = False))
print(mean_squared_error(y_test,y_lgbmclf_test, squared = False))

0.47173664202697585
1.3231253882197038
1.328232205829379


In [38]:
rfclf = RandomForestClassifier(max_depth = 9)

rfclf.fit(X_train, y_train)

y_rfclf_train = rfclf.predict(X_train)
y_rfclf_val = rfclf.predict(X_val)
y_rfclf_test = rfclf.predict(X_test)
test_rfclf = rfclf.predict(test)

print(mean_squared_error(y_train,y_rfclf_train, squared = False))
print(mean_squared_error(y_val,y_rfclf_val, squared = False))
print(mean_squared_error(y_test,y_rfclf_test, squared = False))

1.2010112322179438
1.3501509134118246
1.3734604687361691


Now I have trained all the models, and now I will stack each of there outputs and make a new array, whose columns would be the predictions of the Base Models. All the Base models would have a very correlated predications, and hence we would train shallow networks to overcome overfitting.

### Stack - Level1

In [39]:
stack_val = np.column_stack((y_xgb_val, y_cat_val, y_lgbm_val, y_rf_val, y_xgb_val_le, y_rf_val_le, y_xgbclf_val, y_lgbmclf_val, y_rfclf_val))
stack_test = np.column_stack((y_xgb_test, y_cat_test, y_lgbm_test, y_rf_test, y_xgb_test_le, y_rf_test_le, y_xgbclf_test, y_lgbmclf_test, y_rfclf_test))
test_stack = np.column_stack((test_xgb, test_cat, test_lgbm, test_rf, test_xgb_le, test_rf_le, test_xgbclf, test_lgbmclf, test_rfclf))

In [40]:
stack_train, stack_val, y_stack_train, y_stack_val = train_test_split(stack_val, y_val, test_size = 0.15, random_state = SEED)

#### XGB

Bagging XGB Models

In [41]:
xgb_stack1 = xgb.XGBRegressor(n_estimators = 25, max_depth = 3, learning_rate = 0.3)

xgb_stack1.fit(stack_train, y_stack_train)

y_stack1_xgb_train = xgb_stack1.predict(stack_train)
y_stack1_xgb_val = xgb_stack1.predict(stack_val)
y_stack1_xgb_test = xgb_stack1.predict(stack_test)
test_stack1_Xgb = xgb_stack1.predict(test_stack)

print(mean_squared_error(y_stack_train,y_stack1_xgb_train, squared = False))
print(mean_squared_error(y_stack_val,y_stack1_xgb_val, squared = False))
print(mean_squared_error(y_test,y_stack1_xgb_test, squared = False))

1.0046284724330674
1.1007243927890298
1.1706212855976794


In [42]:
xgb2_stack1 = xgb.XGBRegressor(n_estimators = 100, max_depth = 4, learning_rate = 0.05)

xgb2_stack1.fit(stack_train, y_stack_train)

y_stack1_xgb2_train = xgb2_stack1.predict(stack_train)
y_stack1_xgb2_val = xgb2_stack1.predict(stack_val)
y_stack1_xgb2_test = xgb2_stack1.predict(stack_test)
test_stack1_Xgb2 = xgb2_stack1.predict(test_stack)

print(mean_squared_error(y_stack_train,y_stack1_xgb2_train, squared = False))
print(mean_squared_error(y_stack_val,y_stack1_xgb2_val, squared = False))
print(mean_squared_error(y_test,y_stack1_xgb2_test, squared = False))

0.975788028533355
1.0974508007454027
1.165601215358065


In [43]:
xgb3_stack1 = xgb.XGBRegressor(n_estimators = 200, max_depth = 3, learning_rate = 0.06)

xgb3_stack1.fit(stack_train, y_stack_train)

y_stack1_xgb3_train = xgb3_stack1.predict(stack_train)
y_stack1_xgb3_val = xgb3_stack1.predict(stack_val)
y_stack1_xgb3_test = xgb3_stack1.predict(stack_test)
test_stack1_Xgb3 = xgb3_stack1.predict(test_stack)

print(mean_squared_error(y_stack_train,y_stack1_xgb3_train, squared = False))
print(mean_squared_error(y_stack_val,y_stack1_xgb3_val, squared = False))
print(mean_squared_error(y_test,y_stack1_xgb3_test, squared = False))

0.9455448429824107
1.10170493342366
1.173057928117463


In [44]:
y_stack1_xgbf_train = (y_stack1_xgb_train + y_stack1_xgb2_train + y_stack1_xgb3_train)/3
y_stack1_xgbf_val = (y_stack1_xgb_val + y_stack1_xgb2_val + y_stack1_xgb3_val)/3
y_stack1_xgbf_test = (y_stack1_xgb_test + y_stack1_xgb2_test + y_stack1_xgb3_test)/3

In [45]:
print(mean_squared_error(y_stack_train,y_stack1_xgbf_train, squared = False))
print(mean_squared_error(y_stack_val,y_stack1_xgbf_val, squared = False))
print(mean_squared_error(y_test,y_stack1_xgbf_test, squared = False))

0.9719152028694601
1.0961339588064087
1.1660124663124027


In [46]:
test_stack1_xgbf = (test_stack1_Xgb + test_stack1_Xgb2 + test_stack1_Xgb3)/3

In [47]:
test_stack1_xgbf

array([4.5204606, 4.2139096, 4.57003  , ..., 3.7538388, 4.427202 ,
       3.7898266], dtype=float32)

#### RF

Bagging Random Forest Models

In [48]:
rf_stack1 = RandomForestRegressor(max_depth = 3, n_estimators = 25, random_state = 15, min_samples_split = 4)

rf_stack1.fit(stack_train, y_stack_train)

y_stack1_rf_train = rf_stack1.predict(stack_train)
y_stack1_rf_val = rf_stack1.predict(stack_val)
y_stack1_rf_test = rf_stack1.predict(stack_test)
test_stack1_rf = rf_stack1.predict(test_stack)

print(mean_squared_error(y_stack_train,y_stack1_rf_train, squared = False))
print(mean_squared_error(y_stack_val,y_stack1_rf_val, squared = False))
print(mean_squared_error(y_test,y_stack1_rf_test, squared = False))

1.0939400005232347
1.122062211169213
1.1536835051850982


In [49]:
rf2_stack1 = RandomForestRegressor(max_depth = 4, n_estimators = 25, random_state = 18, min_samples_split = 4)

rf2_stack1.fit(stack_train, y_stack_train)

y_stack1_rf2_train = rf2_stack1.predict(stack_train)
y_stack1_rf2_val = rf2_stack1.predict(stack_val)
y_stack1_rf2_test = rf2_stack1.predict(stack_test)
test_stack1_rf2 = rf2_stack1.predict(test_stack)

print(mean_squared_error(y_stack_train,y_stack1_rf2_train, squared = False))
print(mean_squared_error(y_stack_val,y_stack1_rf2_val, squared = False))
print(mean_squared_error(y_test,y_stack1_rf2_test, squared = False))

1.0747282899054422
1.115485193985559
1.153902148180315


In [50]:
rf3_stack1 = RandomForestRegressor(max_depth = 4, n_estimators = 50, random_state = 2021)

rf3_stack1.fit(stack_train, y_stack_train)

y_stack1_rf3_train = rf3_stack1.predict(stack_train)
y_stack1_rf3_val = rf3_stack1.predict(stack_val)
y_stack1_rf3_test = rf3_stack1.predict(stack_test)
test_stack1_rf3 = rf3_stack1.predict(test_stack)

print(mean_squared_error(y_stack_train,y_stack1_rf3_train, squared = False))
print(mean_squared_error(y_stack_val,y_stack1_rf3_val, squared = False))
print(mean_squared_error(y_test,y_stack1_rf3_test, squared = False))

1.071449092016098
1.1144930621375864
1.149000967224518


In [51]:
rf4_stack1 = RandomForestRegressor(max_depth = 4, n_estimators = 100, random_state = 2025)

rf4_stack1.fit(stack_train, y_stack_train)

y_stack1_rf4_train = rf4_stack1.predict(stack_train)
y_stack1_rf4_val = rf4_stack1.predict(stack_val)
y_stack1_rf4_test = rf4_stack1.predict(stack_test)
test_stack1_rf4 = rf4_stack1.predict(test_stack)

print(mean_squared_error(y_stack_train,y_stack1_rf4_train, squared = False))
print(mean_squared_error(y_stack_val,y_stack1_rf4_val, squared = False))
print(mean_squared_error(y_test,y_stack1_rf4_test, squared = False))

1.0713164346580044
1.1197832105671652
1.1504377031448125


In [52]:
y_stack1_rff_train = (y_stack1_rf_train + y_stack1_rf2_train + y_stack1_rf3_train + y_stack1_rf4_train)/4
y_stack1_rff_val = (y_stack1_rf_val + y_stack1_rf2_val + y_stack1_rf3_val + y_stack1_rf4_val)/4
y_stack1_rff_test = (y_stack1_rf_test + y_stack1_rf2_test + y_stack1_rf3_test + y_stack1_rf4_test)/4

In [53]:
print(mean_squared_error(y_stack_train,y_stack1_rff_train, squared = False))
print(mean_squared_error(y_stack_val,y_stack1_rff_val, squared = False))
print(mean_squared_error(y_test,y_stack1_rff_test, squared = False))

1.0763102571568544
1.1165491655568758
1.1503965686533144


In [54]:
test_stack1_rff = (test_stack1_rf + test_stack1_rf2 + test_stack1_rf3 + test_stack1_rf4)/4
test_stack1_rff

array([4.86436426, 4.69424586, 4.4059597 , ..., 4.44535058, 4.46771271,
       4.55909548])

#### CATBOOST

In [55]:
cat_stack1 = CatBoostRegressor()

cat_stack1.fit(stack_train, y_stack_train)

y_stack1_cat_train = cat_stack1.predict(stack_train)
y_stack1_cat_val = cat_stack1.predict(stack_val)
y_stack1_cat_test = cat_stack1.predict(stack_test)
test_stack1_cat = cat_stack1.predict(test_stack)

print(mean_squared_error(y_stack_train,y_stack1_cat_train, squared = False))
print(mean_squared_error(y_stack_val,y_stack1_cat_val, squared = False))
print(mean_squared_error(y_test,y_stack1_cat_test, squared = False))

Learning rate set to 0.045422
0:	learn: 1.5860704	total: 2.38ms	remaining: 2.38s
1:	learn: 1.5540583	total: 9.33ms	remaining: 4.65s
2:	learn: 1.5234050	total: 11.1ms	remaining: 3.7s
3:	learn: 1.4948083	total: 12.9ms	remaining: 3.21s
4:	learn: 1.4680490	total: 14.4ms	remaining: 2.86s
5:	learn: 1.4426889	total: 16ms	remaining: 2.66s
6:	learn: 1.4190846	total: 17.8ms	remaining: 2.52s
7:	learn: 1.3978261	total: 19.4ms	remaining: 2.4s
8:	learn: 1.3769052	total: 21.1ms	remaining: 2.32s
9:	learn: 1.3572612	total: 22.7ms	remaining: 2.25s
10:	learn: 1.3399364	total: 24.1ms	remaining: 2.17s
11:	learn: 1.3229907	total: 25.8ms	remaining: 2.12s
12:	learn: 1.3076112	total: 27.3ms	remaining: 2.07s
13:	learn: 1.2933895	total: 28.8ms	remaining: 2.03s
14:	learn: 1.2800136	total: 30.3ms	remaining: 1.99s
15:	learn: 1.2674320	total: 31.8ms	remaining: 1.96s
16:	learn: 1.2561222	total: 33.4ms	remaining: 1.93s
17:	learn: 1.2451002	total: 34.8ms	remaining: 1.9s
18:	learn: 1.2348006	total: 36.4ms	remaining: 1.8

Now we will stack predictions of the 1st level models and also the predictions of Base Models to make our 2nd Layer of Stacking

### 2nd Level

In [56]:
stack2_val = np.column_stack((y_stack1_xgbf_val, y_stack1_rff_val, y_stack1_cat_val, stack_val)) 
stack2_test = np.column_stack((y_stack1_xgbf_test, y_stack1_rff_test, y_stack1_cat_test, stack_test)) 
test_stack2 = np.column_stack((test_stack1_xgbf, test_stack1_rff, test_stack1_cat, test_stack))

In [57]:
lgbm_stack2 = LGBMRegressor(num_leaves = 7, max_depth = 3, n_estimators = 25)

lgbm_stack2.fit(stack2_val , y_stack_val)

y_stack2_lgbm_val = lgbm_stack2.predict(stack2_val)
y_stack2_lgbm_test = lgbm_stack2.predict(stack2_test)
test_stack2_lgbm = lgbm_stack2.predict(test_stack2)

print(mean_squared_error(y_stack_val,y_stack2_lgbm_val, squared = False))
print(mean_squared_error(y_test,y_stack2_lgbm_test, squared = False))

1.024394387821257
1.1940220295097201


In [58]:
from sklearn.linear_model import LinearRegression
from sklearn.neighbors import KNeighborsRegressor

In [59]:
lr_stack2 = LinearRegression()

lr_stack2.fit(stack2_val , y_stack_val)

y_stack2_lr_val = lr_stack2.predict(stack2_val)
y_stack2_lr_test = lr_stack2.predict(stack2_test)
test_stack2_lr = lr_stack2.predict(test_stack2)

print(mean_squared_error(y_stack_val,y_stack2_lr_val, squared = False))
print(mean_squared_error(y_test,y_stack2_lr_test, squared = False))

1.0565218618867551
1.1833132600344325


In [60]:
kn_stack2 = KNeighborsRegressor()

kn_stack2.fit(stack2_val , y_stack_val)

y_stack2_kn_val = kn_stack2.predict(stack2_val)
y_stack2_kn_test = kn_stack2.predict(stack2_test)
test_stack2_kn = kn_stack2.predict(test_stack2)

print(mean_squared_error(y_stack_val,y_stack2_kn_val, squared = False))
print(mean_squared_error(y_test,y_stack2_kn_test, squared = False))

0.9882001768077231
1.2534443958024466


In [62]:
y_test_2level = (y_stack2_kn_test + y_stack2_lgbm_test + y_stack2_lr_test)/3

In [63]:
test_2level = (test_stack2_kn + test_stack2_lgbm + test_stack2_lr)/3

Finally completing our 3rd Level Stacking

### 3rd Level

In [64]:
stack_test_3 = np.column_stack((stack2_test, y_stack2_kn_test, y_stack2_lgbm_test, y_stack2_lr_test, y_test_2level))
test_stack3 = np.column_stack((test_stack2, test_stack2_kn , test_stack2_lgbm, test_stack2_lr, test_2level))

In [65]:
stack_test_3.shape

(1514, 16)

In [66]:
from tensorflow.keras.layers import Dense, Activation, Dropout
from tensorflow.keras.models import Sequential
from tensorflow.keras.callbacks import ModelCheckpoint
import tensorflow as tf

In [81]:
def nn1():
    
    model = Sequential()
    
    model.add(Dense(16, input_dim = 16, activation = 'relu', kernel_initializer = 'normal'))
    
    model.add(Dense(1, kernel_initializer = 'normal', activation = 'linear'))
    
    model.summary()
    
    return model

In [82]:
nn1 = nn1()

Model: "sequential_1"
_________________________________________________________________
Layer (type)                 Output Shape              Param #   
dense_2 (Dense)              (None, 16)                272       
_________________________________________________________________
dense_3 (Dense)              (None, 1)                 17        
Total params: 289
Trainable params: 289
Non-trainable params: 0
_________________________________________________________________


In [69]:
nn1.compile(loss='mean_squared_error', optimizer='adam', metrics=['mean_squared_error'])
callback = tf.keras.callbacks.EarlyStopping(monitor='mean_squared_error', patience=7)
nn1.fit(stack_test_3, y_test, epochs=20, validation_split = 0.2, callbacks=callback, shuffle = True)

2021-10-30 07:32:02.111747: I tensorflow/compiler/mlir/mlir_graph_optimization_pass.cc:185] None of the MLIR Optimization Passes are enabled (registered 2)


Epoch 1/20
Epoch 2/20
Epoch 3/20
Epoch 4/20
Epoch 5/20
Epoch 6/20
Epoch 7/20
Epoch 8/20
Epoch 9/20
Epoch 10/20
Epoch 11/20
Epoch 12/20
Epoch 13/20
Epoch 14/20
Epoch 15/20
Epoch 16/20


<keras.callbacks.History at 0x7f0f66066910>

In [70]:
pred_test_stack3 = nn1.predict(test_stack3)

In [71]:
pred_test_stack3 = pred_test_stack3.reshape(-1)

In [72]:
lr_3 = LinearRegression()

lr_3.fit(stack_test_3, y_test)

LinearRegression()

In [73]:
mean_squared_error(lr_3.predict(stack_test_3), y_test)

1.2820572732564055

In [74]:
test_stack3.shape

(10091, 16)

In [75]:
test_lr_3 = lr_3.predict(test_stack3)

In [76]:
test_lr_3

array([4.99241847, 4.6783163 , 3.56920069, ..., 4.3199026 , 4.41073817,
       4.53436488])

In [77]:
test_stack3_pred = (pred_test_stack3 + test_lr_3)/2

In [78]:
test_stack3_pred.shape

(10091,)

In [79]:
res8 = pd.DataFrame(test_stack3_pred, columns=['nerdy'])
res8['id'] = test['id']
res8 = res8[['id', 'nerdy']]
res8.to_csv("res8.csv",index=False)
res8

Unnamed: 0,id,nerdy
0,869598,5.330090
1,682098,4.847706
2,278454,3.923966
3,119007,4.304492
4,49950,5.013132
...,...,...
10086,702877,4.981655
10087,183546,5.600043
10088,435694,4.645777
10089,825049,4.659667


This method helped me to get a public leaderboard score of 1.112 while Private Leaderboard score of 1.101. This proves that this method (if performed carefully) does not lead to overfitting and actually is very helpful to get a good score.