# Stacking Ensemble

In [17]:
import numpy as np 
import pandas as pd 
import datetime

from sklearn.model_selection import StratifiedKFold
from sklearn.preprocessing import LabelEncoder
from sklearn.preprocessing import StandardScaler

from lightgbm import LGBMClassifier
from xgboost import XGBClassifier
from catboost import CatBoostClassifier

import lightgbm as lgb

pd.set_option('display.max_columns', 100)
pd.set_option('display.max_rows', 25)

import gensim

### Load Pre-Processed Data

In [18]:
lgb_oof_train = pd.read_csv('lgb_oof_train.csv', low_memory=False)
lgb_oof_test = pd.read_csv('lgb_oof_test.csv', low_memory=False)

In [19]:
rf_oof_train = pd.read_csv('rf_oof_train.csv', low_memory=False)
rf_oof_test = pd.read_csv('rf_oof_test.csv', low_memory=False)

In [20]:
cat_oof_train = pd.read_csv('cat_oof_train.csv', low_memory=False)
cat_oof_test = pd.read_csv('cat_oof_test.csv', low_memory=False)

In [None]:
nn_oof_train = pd.read_csv('nn_oof_train.csv', low_memory=False)
nn_oof_test = pd.read_csv('nn_oof_test.csv', low_memory=False)

In [None]:
xgb_oof_train = pd.read_csv('xgb_oof_train.csv', low_memory=False)
xgb_oof_test = pd.read_csv('xgb_oof_test.csv', low_memory=False)

### Load Original Training Data for Labels

In [23]:
train_df = pd.read_csv('train.csv', low_memory=False)

train_df.drop_duplicates(inplace=True)

y_cats = train_df['Category']
unique_cats = np.sort(y_cats.unique())

le = LabelEncoder()
y_train = le.fit_transform(y_cats)

### Reshape Model Output Files

In [24]:
lgb_oof_train = lgb_oof_train.to_numpy().reshape(-1, 39)
lgb_oof_test = lgb_oof_test.to_numpy().reshape(-1, 39)

In [None]:
rf_oof_train = rf_oof_train.to_numpy().reshape(-1, 39)
rf_oof_test = rf_oof_test.to_numpy().reshape(-1, 39)

In [25]:
cat_oof_train = cat_oof_train.to_numpy().reshape(-1, 39)
cat_oof_test = cat_oof_test.to_numpy().reshape(-1, 39)

In [26]:
nn_oof_train = nn_oof_train.to_numpy().reshape(-1, 39)
nn_oof_test = nn_oof_test.to_numpy().reshape(-1, 39)

In [None]:
xgb_oof_train = xgb_oof_train.to_numpy().reshape(-1, 39)
xgb_oof_test = xgb_oof_test.to_numpy().reshape(-1, 39)

### Create Column Names for Each Set of Model Predictions

In [27]:
lgb_cols = ['LGB_' + x for x in unique_cats]
lgb_train = pd.DataFrame(lgb_oof_train, columns=lgb_cols)
lgb_test = pd.DataFrame(lgb_oof_test, columns=lgb_cols)

In [None]:
rf_cols = ['RF_' + x for x in unique_cats]
rf_train = pd.DataFrame(rf_oof_train, columns=rf_cols)
rf_test = pd.DataFrame(rf_oof_test, columns=rf_cols)

In [28]:
cat_cols = ['Cat_' + x for x in unique_cats]
cat_train = pd.DataFrame(cat_oof_train, columns=cat_cols)
cat_test = pd.DataFrame(cat_oof_test, columns=cat_cols)

In [29]:
nn_cols = ['NN_' + x for x in unique_cats]
nn_train = pd.DataFrame(nn_oof_train, columns=nn_cols)
nn_test = pd.DataFrame(nn_oof_test, columns=nn_cols)

In [None]:
xgb_cols = ['XGB_' + x for x in unique_cats]
xgb_train = pd.DataFrame(xgb_oof_train, columns=xgb_cols)
xgb_test = pd.DataFrame(xgb_oof_test, columns=xgb_cols)

### Combine Model Predictions

In [30]:
X_train = pd.concat([lgb_train, rf_train, cat_train, nn_train, xgb_train], axis=1, sort=False)
X_test = pd.concat([lgb_test, rf_test, cat_test, nn_test, xgb_test], axis=1, sort=False)

In [31]:
def get_model(x_tr, y_tr):
    
    param = {
         'num_classes': 39,
         'learning_rate': 0.01,
         'objective': 'multiclass',
         'boosting': "gbdt",
         'metric': 'multi_logloss',
         'verbosity': 1
    }
    
    train_ds = lgb.Dataset(x_tr, label=y_tr)
        
    num_round = 1000
    mod_results = {}
    
    model = lgb.train(param, train_ds, num_round, valid_sets=[train_ds], valid_names=['train'], 
                      evals_result=mod_results)
            

    return model, mod_results

In [32]:
mod = get_model(X_train, y_train)

[1]	train's multi_logloss: 2.66618
[2]	train's multi_logloss: 2.65545
[3]	train's multi_logloss: 2.64579
[4]	train's multi_logloss: 2.63687
[5]	train's multi_logloss: 2.62844
[6]	train's multi_logloss: 2.62045
[7]	train's multi_logloss: 2.61282
[8]	train's multi_logloss: 2.6055
[9]	train's multi_logloss: 2.59845
[10]	train's multi_logloss: 2.59165
[11]	train's multi_logloss: 2.58507
[12]	train's multi_logloss: 2.57869
[13]	train's multi_logloss: 2.57249
[14]	train's multi_logloss: 2.56643
[15]	train's multi_logloss: 2.56057
[16]	train's multi_logloss: 2.55485
[17]	train's multi_logloss: 2.54927
[18]	train's multi_logloss: 2.54381
[19]	train's multi_logloss: 2.53851
[20]	train's multi_logloss: 2.5333
[21]	train's multi_logloss: 2.5282
[22]	train's multi_logloss: 2.52321
[23]	train's multi_logloss: 2.51833
[24]	train's multi_logloss: 2.51357
[25]	train's multi_logloss: 2.50886
[26]	train's multi_logloss: 2.50428
[27]	train's multi_logloss: 2.49978
[28]	train's multi_logloss: 2.49536
[29]

[226]	train's multi_logloss: 2.17517
[227]	train's multi_logloss: 2.1746
[228]	train's multi_logloss: 2.17404
[229]	train's multi_logloss: 2.17348
[230]	train's multi_logloss: 2.17293
[231]	train's multi_logloss: 2.17238
[232]	train's multi_logloss: 2.17183
[233]	train's multi_logloss: 2.1713
[234]	train's multi_logloss: 2.17076
[235]	train's multi_logloss: 2.17022
[236]	train's multi_logloss: 2.16969
[237]	train's multi_logloss: 2.16916
[238]	train's multi_logloss: 2.16863
[239]	train's multi_logloss: 2.16812
[240]	train's multi_logloss: 2.16759
[241]	train's multi_logloss: 2.16708
[242]	train's multi_logloss: 2.16656
[243]	train's multi_logloss: 2.16606
[244]	train's multi_logloss: 2.16555
[245]	train's multi_logloss: 2.16505
[246]	train's multi_logloss: 2.16454
[247]	train's multi_logloss: 2.16404
[248]	train's multi_logloss: 2.16356
[249]	train's multi_logloss: 2.16306
[250]	train's multi_logloss: 2.16257
[251]	train's multi_logloss: 2.16208
[252]	train's multi_logloss: 2.1616
[253

[448]	train's multi_logloss: 2.10357
[449]	train's multi_logloss: 2.10337
[450]	train's multi_logloss: 2.10318
[451]	train's multi_logloss: 2.10299
[452]	train's multi_logloss: 2.10279
[453]	train's multi_logloss: 2.10259
[454]	train's multi_logloss: 2.1024
[455]	train's multi_logloss: 2.1022
[456]	train's multi_logloss: 2.10201
[457]	train's multi_logloss: 2.10182
[458]	train's multi_logloss: 2.10163
[459]	train's multi_logloss: 2.10144
[460]	train's multi_logloss: 2.10125
[461]	train's multi_logloss: 2.10105
[462]	train's multi_logloss: 2.10087
[463]	train's multi_logloss: 2.10068
[464]	train's multi_logloss: 2.10049
[465]	train's multi_logloss: 2.1003
[466]	train's multi_logloss: 2.10012
[467]	train's multi_logloss: 2.09993
[468]	train's multi_logloss: 2.09975
[469]	train's multi_logloss: 2.09956
[470]	train's multi_logloss: 2.09938
[471]	train's multi_logloss: 2.0992
[472]	train's multi_logloss: 2.09902
[473]	train's multi_logloss: 2.09884
[474]	train's multi_logloss: 2.09866
[475]

[671]	train's multi_logloss: 2.06903
[672]	train's multi_logloss: 2.0689
[673]	train's multi_logloss: 2.06878
[674]	train's multi_logloss: 2.06865
[675]	train's multi_logloss: 2.06852
[676]	train's multi_logloss: 2.0684
[677]	train's multi_logloss: 2.06827
[678]	train's multi_logloss: 2.06815
[679]	train's multi_logloss: 2.06802
[680]	train's multi_logloss: 2.06789
[681]	train's multi_logloss: 2.06776
[682]	train's multi_logloss: 2.06763
[683]	train's multi_logloss: 2.0675
[684]	train's multi_logloss: 2.06736
[685]	train's multi_logloss: 2.06724
[686]	train's multi_logloss: 2.06712
[687]	train's multi_logloss: 2.06699
[688]	train's multi_logloss: 2.06686
[689]	train's multi_logloss: 2.06673
[690]	train's multi_logloss: 2.06661
[691]	train's multi_logloss: 2.06648
[692]	train's multi_logloss: 2.06636
[693]	train's multi_logloss: 2.06624
[694]	train's multi_logloss: 2.06612
[695]	train's multi_logloss: 2.06599
[696]	train's multi_logloss: 2.06586
[697]	train's multi_logloss: 2.06574
[698

[894]	train's multi_logloss: 2.04322
[895]	train's multi_logloss: 2.04312
[896]	train's multi_logloss: 2.04301
[897]	train's multi_logloss: 2.0429
[898]	train's multi_logloss: 2.0428
[899]	train's multi_logloss: 2.0427
[900]	train's multi_logloss: 2.04259
[901]	train's multi_logloss: 2.0425
[902]	train's multi_logloss: 2.04239
[903]	train's multi_logloss: 2.0423
[904]	train's multi_logloss: 2.04219
[905]	train's multi_logloss: 2.04209
[906]	train's multi_logloss: 2.04199
[907]	train's multi_logloss: 2.04189
[908]	train's multi_logloss: 2.04178
[909]	train's multi_logloss: 2.04168
[910]	train's multi_logloss: 2.04158
[911]	train's multi_logloss: 2.04148
[912]	train's multi_logloss: 2.04138
[913]	train's multi_logloss: 2.04128
[914]	train's multi_logloss: 2.04118
[915]	train's multi_logloss: 2.04108
[916]	train's multi_logloss: 2.04098
[917]	train's multi_logloss: 2.04088
[918]	train's multi_logloss: 2.04078
[919]	train's multi_logloss: 2.04068
[920]	train's multi_logloss: 2.04057
[921]	

In [33]:
preds = mod[0].predict(X_test)

In [34]:
sub_df = pd.DataFrame(preds, columns=unique_cats)

sub_df.index = sub_df.index.set_names(['Id'])
sub_df.reset_index(drop=False, inplace=True)

sub_df.to_csv('stack_13.csv', index=False)