In [1]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns
%matplotlib inline

data_dir = './data'
from tqdm import tqdm
import optuna

from scipy.stats import loguniform


from sklearn.ensemble import ExtraTreesClassifier, RandomForestClassifier
from sklearn.tree import ExtraTreeClassifier
from lightgbm import LGBMClassifier
from xgboost import XGBClassifier
from catboost import CatBoostClassifier

from sklearn.linear_model import LogisticRegression
from sklearn.svm import SVC
from sklearn.neighbors import KNeighborsClassifier
from sklearn.gaussian_process import GaussianProcessClassifier
from sklearn.naive_bayes import GaussianNB, MultinomialNB, ComplementNB

import keras
import keras.backend as K
from keras.layers import (Dense, Input, Activation, Convolution2D, MaxPooling2D, BatchNormalization, 
                          Dropout, Flatten, SpatialDropout2D, Add, Concatenate , Reshape,
                         GlobalAveragePooling2D)
from keras.callbacks import EarlyStopping, ModelCheckpoint
from keras.models import Model
from keras.wrappers.scikit_learn import KerasRegressor
from keras.optimizers import Nadam

def mish(x):
    return x * K.tanh(K.softplus(x))

from sklearn.model_selection import cross_val_score, cross_val_predict
from sklearn.metrics import f1_score, accuracy_score

import optuna
import random

In [2]:
from tensorflow.compat.v1 import ConfigProto, InteractiveSession
config=ConfigProto()
config.gpu_options.allow_growth=True
session=InteractiveSession(config=config)

# Load Data

In [3]:
train = pd.read_csv(data_dir + '/train.csv', index_col = 'id')
test = pd.read_csv(data_dir + '/test.csv', index_col='id')
submission = pd.read_csv(data_dir + '/submission.csv', index_col='id')
print(train.shape, test.shape, submission.shape)

(2048, 786) (20480, 785) (20480, 1)


In [4]:
feature_names = list(test)
feature_names.remove('letter')

train[feature_names] = (train[feature_names] / 255)
test[feature_names] = (test[feature_names] / 255)

train= pd.get_dummies(train, columns=['letter'])
test = pd.get_dummies(test, columns=['letter'])

Xtrain = train[list(test)]
Xtest = test.copy()
Ytrain = np.array(train['digit'])

print(Xtrain.shape, Ytrain.shape, Xtest.shape)

(2048, 810) (2048,) (20480, 810)


# Build Models

In [5]:
def build_fn(filter_size=64, init='glorot_normal', num_conv=3):
    dropout_rate=0.5
    
    
    model_in = Input(shape=Xtrain.shape[1:])

    image_in = model_in[:,:-26]
    image_in = Reshape((28,28,1))(image_in)
    
    letter_in = model_in[:,-26:]
    
    if num_conv == 3:
        x = Convolution2D(filter_size, 3, padding='same',kernel_initializer=init)(image_in)
        x_res = x
        x = BatchNormalization()(x)
        x = Activation(mish)(x)
        x = SpatialDropout2D(dropout_rate)(x)

        x = Convolution2D(filter_size, 3, padding='same', kernel_initializer=init)(x)
        x = BatchNormalization()(x)
        x = Activation(mish)(x)
        x = SpatialDropout2D(dropout_rate)(x)

        x = Convolution2D(filter_size, 3, padding='same', kernel_initializer=init)(x)
        x = Add()([x_res, x])
        x = BatchNormalization()(x)
        x = Activation(mish)(x)
        x = MaxPooling2D()(x)
        x = SpatialDropout2D(dropout_rate)(x)


        x = Convolution2D(filter_size*2, 3, padding='same',kernel_initializer=init)(x)
        x_res = x
        x = BatchNormalization()(x)
        x = Activation(mish)(x)
        x = SpatialDropout2D(dropout_rate)(x)

        x = Convolution2D(filter_size*2, 3, padding='same', kernel_initializer=init)(x)
        x = BatchNormalization()(x)
        x = Activation(mish)(x)
        x = SpatialDropout2D(dropout_rate)(x)

        x = Convolution2D(filter_size*2, 3, padding='same', kernel_initializer=init)(x)
        x = Add()([x_res, x])
        x = BatchNormalization()(x)
        x = Activation(mish)(x)
        x = MaxPooling2D()(x)
        x = SpatialDropout2D(dropout_rate)(x)



        x = Convolution2D(filter_size*4, 3, padding='same',kernel_initializer=init)(x)
        x_res = x
        x = BatchNormalization()(x)
        x = Activation(mish)(x)
        x = SpatialDropout2D(dropout_rate)(x)

        x = Convolution2D(filter_size*4, 3, padding='same', kernel_initializer=init)(x)
        x = BatchNormalization()(x)
        x = Activation(mish)(x)
        x = SpatialDropout2D(dropout_rate)(x)

        x = Convolution2D(filter_size*4, 3, padding='same', kernel_initializer=init)(x)
        x = Add()([x_res, x])
        x = BatchNormalization()(x)
        x = Activation(mish)(x)
        x = MaxPooling2D()(x)
        x = SpatialDropout2D(dropout_rate)(x)


        x = GlobalAveragePooling2D()(x)

        x = Concatenate(axis=1)([x, letter_in])


        model_out = Dense(units=10, activation='softmax')(x)
        model = Model(model_in, model_out)
        model.compile(loss='sparse_categorical_crossentropy', optimizer='Nadam', metrics=['accuracy'])

        return model
    
    elif num_conv == 4:
        x = Convolution2D(filter_size, 3, padding='same',kernel_initializer=init)(image_in)
        x_res = x
        x = BatchNormalization()(x)
        x = Activation(mish)(x)
        x = SpatialDropout2D(dropout_rate)(x)

        x = Convolution2D(filter_size, 3, padding='same', kernel_initializer=init)(x)
        x_res1 = x
        x = BatchNormalization()(x)
        x = Activation(mish)(x)
        x = SpatialDropout2D(dropout_rate)(x)

        x = Convolution2D(filter_size, 3, padding='same', kernel_initializer=init)(x)
        x = Add()([x_res1, x])
        x = BatchNormalization()(x)
        x = Activation(mish)(x)
        x = SpatialDropout2D(dropout_rate)(x)

        x = Convolution2D(filter_size, 3, padding='same', kernel_initializer=init)(x)
        x = Add()([x_res, x])
        x = BatchNormalization()(x)
        x = Activation(mish)(x)
        x = MaxPooling2D()(x)
        x = SpatialDropout2D(dropout_rate)(x)




        x = Convolution2D(filter_size*2, 3, padding='same',kernel_initializer=init)(x)
        x_res = x
        x = BatchNormalization()(x)
        x = Activation(mish)(x)
        x = SpatialDropout2D(dropout_rate)(x)

        x = Convolution2D(filter_size*2, 3, padding='same', kernel_initializer=init)(x)
        x_res1 = x
        x = BatchNormalization()(x)
        x = Activation(mish)(x)
        x = SpatialDropout2D(dropout_rate)(x)

        x = Convolution2D(filter_size*2, 3, padding='same', kernel_initializer=init)(x)
        x = Add()([x_res1, x])
        x = BatchNormalization()(x)
        x = Activation(mish)(x)
        x = SpatialDropout2D(dropout_rate)(x)

        x = Convolution2D(filter_size*2, 3, padding='same', kernel_initializer=init)(x)
        x = Add()([x_res, x])
        x = BatchNormalization()(x)
        x = Activation(mish)(x)
        x = MaxPooling2D()(x)
        x = SpatialDropout2D(dropout_rate)(x)



        x = Convolution2D(filter_size*4, 3, padding='same',kernel_initializer=init)(x)
        x_res = x
        x = BatchNormalization()(x)
        x = Activation(mish)(x)
        x = SpatialDropout2D(dropout_rate)(x)

        x = Convolution2D(filter_size*4, 3, padding='same', kernel_initializer=init)(x)
        x_res1 = x
        x = BatchNormalization()(x)
        x = Activation(mish)(x)
        x = SpatialDropout2D(dropout_rate)(x)

        x = Convolution2D(filter_size*4, 3, padding='same', kernel_initializer=init)(x)
        x = Add()([x_res1, x])
        x = BatchNormalization()(x)
        x = Activation(mish)(x)
        x = SpatialDropout2D(dropout_rate)(x)

        x = Convolution2D(filter_size*4, 3, padding='same', kernel_initializer=init)(x)
        x = Add()([x_res, x])
        x = BatchNormalization()(x)
        x = Activation(mish)(x)
        x = MaxPooling2D()(x)
        x = SpatialDropout2D(dropout_rate)(x)


        x = GlobalAveragePooling2D()(x)

        x = Concatenate(axis=1)([x, letter_in])

        model_out = Dense(units=10, activation='softmax')(x)
        model = Model(model_in, model_out)
        model.compile(loss='sparse_categorical_crossentropy', optimizer='Nadam', metrics=['accuracy'])

        return model
    
    else:
        raise(ValueError)

# Stacking

## Ensemble Lv. 0

Lv. 0 에서는 케라스로 만든 24개의 모델과, 다른 알고리즘들을 랜덤한 파라미터로 학습시켜 총 428개의 모델을 만듭니다.  
케라스 모델은 나름 열심히 실험해서 만들었지만 정확도를 높이는 것에 한계를 느껴서, 나머지 모델들의 집단지성으로 케라스 모델이 틀렸던 부분 몇 개를 더 맞추게 하는 것이 목표입니다. 

In [6]:
stack_train_1 = np.array(pd.DataFrame(data=None, index=train.index))
stack_test_1 = np.array(pd.DataFrame(data=None, index=submission.index))

### Keras

In [7]:
for filter_size in [128,64]:
    for init in ['he_normal','he_uniform','glorot_normal','glorot_uniform','lecun_normal','lecun_uniform']:
        for num_conv in [3, 4]:
            params = {'filter_size' : filter_size, 'init' : init, 'num_conv' : num_conv}
            model = KerasRegressor(build_fn, **params, epochs=150, batch_size=32, verbose=0, shuffle=True)
            
            train_preds = cross_val_predict(model, Xtrain, Ytrain, cv=5)
            print(f"Score : {accuracy_score(Ytrain, np.argmax(train_preds, axis=1))}")
            stack_train_1 = np.concatenate((stack_train_1, train_preds), axis=1)
            
            model.fit(Xtrain, Ytrain)
            stack_test_1 = np.concatenate((stack_test_1, model.predict(Xtest)), axis=1)
            
            
            pd.DataFrame(stack_train_1).to_csv('stack_train_1.csv', index=False)
            pd.DataFrame(stack_test_1).to_csv('stack_test_1.csv', index=False)
            
            print(stack_train_1.shape, stack_test_1.shape, '\n')
            K.clear_session()

Score : 0.90966796875
(2048, 10) (20480, 10) 

Score : 0.9013671875
(2048, 20) (20480, 20) 

Score : 0.8955078125
(2048, 30) (20480, 30) 

Score : 0.90478515625
(2048, 40) (20480, 40) 

Score : 0.8935546875
(2048, 50) (20480, 50) 

Score : 0.9013671875
(2048, 60) (20480, 60) 

Score : 0.904296875
(2048, 70) (20480, 70) 

Score : 0.900390625
(2048, 80) (20480, 80) 

Score : 0.89697265625
(2048, 90) (20480, 90) 

Score : 0.8916015625
(2048, 100) (20480, 100) 

Score : 0.90380859375
(2048, 110) (20480, 110) 

Score : 0.8955078125
(2048, 120) (20480, 120) 

Score : 0.89453125
(2048, 130) (20480, 130) 

Score : 0.90185546875
(2048, 140) (20480, 140) 

Score : 0.89501953125
(2048, 150) (20480, 150) 

Score : 0.8994140625
(2048, 160) (20480, 160) 

Score : 0.9013671875
(2048, 170) (20480, 170) 

Score : 0.8994140625
(2048, 180) (20480, 180) 

Score : 0.90625
(2048, 190) (20480, 190) 

Score : 0.90771484375
(2048, 200) (20480, 200) 

Score : 0.8984375
(2048, 210) (20480, 210) 

Score : 0.90429

### LightGBM

In [8]:
num_models=50
lgb_params_list=[]

for _ in range(num_models):
    params = {'n_estimators' : np.random.randint(low=100, high=1000), 
             'colsample_bytree' : np.random.uniform(low=0.5, high=1), 
             'subsample' : np.random.uniform(low=0.5, high=1), 
             'reg_alpha' : np.random.uniform(low=0, high=30), 
             'reg_lambda' : np.random.uniform(low=0, high=30),
             'learning_rate' : np.random.uniform(low=0.01, high=0.3), 
             'drop_rate' : np.random.uniform(low=0.3, high=0.8),
             'uniform_drop' : np.random.choice([True, False]),
              'num_leaves' : np.random.randint(low=7, high=128)
             }
    lgb_params_list.append(params)
    
    model =  LGBMClassifier(boosting_type='dart', objective='softmax', tree_learner='feature', num_class=10, subsample_freq=1, 
                       random_state=18, max_drop=-1,
                      **params)
                           
    train_preds = cross_val_predict(model, Xtrain, Ytrain, cv=5, method='predict_proba')
    print(f"Score : {accuracy_score(Ytrain, np.argmax(train_preds, axis=1))}")
    stack_train_1 = np.concatenate((stack_train_1, train_preds), axis=1)
    model.fit(Xtrain, Ytrain)
    stack_test_1 = np.concatenate((stack_test_1, model.predict_proba(Xtest)), axis=1)

    pd.DataFrame(stack_train_1).to_csv('stack_train_1.csv', index=False)
    pd.DataFrame(stack_test_1).to_csv('stack_test_1.csv', index=False)

    print(stack_train_1.shape, stack_test_1.shape, '\n')
    
lgb_params_list = pd.DataFrame.from_dict(lgb_params_list)
lgb_params_list.to_csv('lgb_params_list.csv', index=False)
lgb_params_list.head(10)

Score : 0.49462890625
(2048, 250) (20480, 250) 

Score : 0.4892578125
(2048, 260) (20480, 260) 

Score : 0.5166015625
(2048, 270) (20480, 270) 

Score : 0.45703125
(2048, 280) (20480, 280) 

Score : 0.28125
(2048, 290) (20480, 290) 

Score : 0.44677734375
(2048, 300) (20480, 300) 

Score : 0.466796875
(2048, 310) (20480, 310) 

Score : 0.5244140625
(2048, 320) (20480, 320) 

Score : 0.4453125
(2048, 330) (20480, 330) 

Score : 0.53564453125
(2048, 340) (20480, 340) 

Score : 0.49169921875
(2048, 350) (20480, 350) 

Score : 0.46923828125
(2048, 360) (20480, 360) 

Score : 0.52099609375
(2048, 370) (20480, 370) 

Score : 0.53759765625
(2048, 380) (20480, 380) 

Score : 0.5283203125
(2048, 390) (20480, 390) 

Score : 0.52685546875
(2048, 400) (20480, 400) 

Score : 0.47021484375
(2048, 410) (20480, 410) 

Score : 0.517578125
(2048, 420) (20480, 420) 

Score : 0.5322265625
(2048, 430) (20480, 430) 

Score : 0.26708984375
(2048, 440) (20480, 440) 

Score : 0.54248046875
(2048, 450) (20480, 

Unnamed: 0,n_estimators,colsample_bytree,subsample,reg_alpha,reg_lambda,learning_rate,drop_rate,uniform_drop,num_leaves
0,709,0.507673,0.726647,16.06203,29.047716,0.042097,0.798576,False,38
1,228,0.747998,0.728451,14.765572,29.322917,0.150799,0.699343,False,83
2,276,0.723912,0.614903,6.995573,18.186237,0.248113,0.463669,False,123
3,169,0.625281,0.558786,19.2154,19.231749,0.095519,0.305552,True,91
4,389,0.51969,0.507942,23.742333,28.399527,0.248631,0.745379,False,97
5,758,0.590926,0.853112,26.767077,24.07968,0.044559,0.655872,True,59
6,150,0.825917,0.891805,24.500612,0.255633,0.283326,0.733359,True,35
7,657,0.790547,0.683691,9.065626,5.599624,0.049973,0.760734,True,65
8,159,0.58328,0.802579,25.71442,7.563112,0.124395,0.602382,True,94
9,958,0.63463,0.853613,9.275917,5.036291,0.066675,0.340392,False,25


```python
lgb_params_list = pd.read_csv('lgb_params_list.csv')
other_model_list=[]

for i in lgb_params_list.index:
    params = dict(lgb_params_list.loc[i])

    model =  LGBMClassifier(boosting_type='dart', objective='softmax', tree_learner='feature', num_class=10, subsample_freq=1, 
                       random_state=18, max_drop=-1,
                      **params)
                           
    train_preds = cross_val_predict(model, Xtrain, Ytrain, cv=5, method='predict_proba')
    print(f"Score : {accuracy_score(Ytrain, np.argmax(train_preds, axis=1))}")
    stack_train_1 = np.concatenate((stack_train_1, train_preds), axis=1)
    model.fit(Xtrain, Ytrain)
    stack_test_1 = np.concatenate((stack_test_1, model.predict_proba(Xtest)), axis=1)

    pd.DataFrame(stack_train_1).to_csv('stack_train_1.csv', index=False)
    pd.DataFrame(stack_test_1).to_csv('stack_test_1.csv', index=False)

```

### XGBoost

In [9]:
num_models=50
xgb_params_list=[]

for _ in range(num_models):
    params = {'n_estimators' : np.random.randint(low=100, high=1000), 
             'colsample_bytree' : np.random.uniform(low=0.5, high=1), 
             'subsample' : np.random.uniform(low=0.5, high=1), 
             'reg_alpha' : np.random.uniform(low=0, high=30), 
             'reg_lambda' : np.random.uniform(low=0, high=30),
             'learning_rate' : np.random.uniform(low=0.01, high=0.3),
             'max_depth' : np.random.randint(low=3, high=8)
             }
    xgb_params_list.append(params)
    
    model = XGBClassifier(n_jobs=-1, tree_method='gpu_hist', objective='multi:softmax', num_class=10, random_state=18,
                              **params)
    
    train_preds = cross_val_predict(model, Xtrain, Ytrain, cv=5, method='predict_proba')
    print(f"Score : {accuracy_score(Ytrain, np.argmax(train_preds, axis=1))}")
    stack_train_1 = np.concatenate((stack_train_1, train_preds), axis=1)
    model.fit(Xtrain, Ytrain)
    stack_test_1 = np.concatenate((stack_test_1, model.predict_proba(Xtest)), axis=1)

    pd.DataFrame(stack_train_1).to_csv('stack_train_1.csv', index=False)
    pd.DataFrame(stack_test_1).to_csv('stack_test_1.csv', index=False)

    print(stack_train_1.shape, stack_test_1.shape, '\n')
    
xgb_params_list = pd.DataFrame.from_dict(xgb_params_list)
xgb_params_list.to_csv('xgb_params_list.csv', index=False)
xgb_params_list.head(10)

Score : 0.5244140625
(2048, 750) (20480, 750) 

Score : 0.53173828125
(2048, 760) (20480, 760) 

Score : 0.5224609375
(2048, 770) (20480, 770) 

Score : 0.42724609375
(2048, 780) (20480, 780) 

Score : 0.49755859375
(2048, 790) (20480, 790) 

Score : 0.52880859375
(2048, 800) (20480, 800) 

Score : 0.5341796875
(2048, 810) (20480, 810) 

Score : 0.48291015625
(2048, 820) (20480, 820) 

Score : 0.4912109375
(2048, 830) (20480, 830) 

Score : 0.48876953125
(2048, 840) (20480, 840) 

Score : 0.51513671875
(2048, 850) (20480, 850) 

Score : 0.47900390625
(2048, 860) (20480, 860) 

Score : 0.521484375
(2048, 870) (20480, 870) 

Score : 0.509765625
(2048, 880) (20480, 880) 

Score : 0.46337890625
(2048, 890) (20480, 890) 

Score : 0.51806640625
(2048, 900) (20480, 900) 

Score : 0.486328125
(2048, 910) (20480, 910) 

Score : 0.4775390625
(2048, 920) (20480, 920) 

Score : 0.53857421875
(2048, 930) (20480, 930) 

Score : 0.462890625
(2048, 940) (20480, 940) 

Score : 0.51123046875
(2048, 950)

Unnamed: 0,n_estimators,colsample_bytree,subsample,reg_alpha,reg_lambda,learning_rate,max_depth
0,942,0.968659,0.97142,9.820418,28.961912,0.278052,5
1,947,0.614533,0.820757,3.889623,12.298325,0.274399,6
2,904,0.689063,0.929035,11.175177,7.44612,0.019145,5
3,790,0.990205,0.859184,28.698659,27.800165,0.023393,3
4,432,0.669152,0.921836,17.255938,28.995191,0.196755,4
5,771,0.76594,0.727291,0.205481,11.291804,0.272442,7
6,334,0.950341,0.821278,1.482066,10.714593,0.139132,4
7,844,0.595454,0.658106,28.299421,25.097476,0.261306,7
8,180,0.518369,0.964693,18.203937,17.106788,0.174357,6
9,267,0.847582,0.634792,14.356008,25.888397,0.136727,4


```python
xgb_params_list = pd.read_csv('xgb_params_list.csv')

for i in xgb_params_list.index:
    params = dict(lgb_params_list.loc[i])

    model = XGBClassifier(n_jobs=-1, tree_method='gpu_hist', objective='multi:softmax', num_class=10, random_state=18,
                              **params)
    
    train_preds = cross_val_predict(model, Xtrain, Ytrain, cv=5, method='predict_proba')
    print(f"Score : {accuracy_score(Ytrain, np.argmax(train_preds, axis=1))}")
    stack_train_1 = np.concatenate((stack_train_1, train_preds), axis=1)
    model.fit(Xtrain, Ytrain)
    stack_test_1 = np.concatenate((stack_test_1, model.predict_proba(Xtest)), axis=1)

    pd.DataFrame(stack_train_1).to_csv('stack_train_1.csv', index=False)
    pd.DataFrame(stack_test_1).to_csv('stack_test_1.csv', index=False)

    print(stack_train_1.shape, stack_test_1.shape, '\n')
```

### CatBoost

In [10]:
num_models=50
cat_params_list=[]

for _ in range(num_models):
    params = {'n_estimators' : np.random.randint(low=100, high=1000), 
             'subsample' : np.random.uniform(low=0.5, high=1),
             'reg_lambda' : np.random.uniform(low=0, high=30),
             'learning_rate' : np.random.uniform(low=0.01, high=0.3),
              'depth' : np.random.randint(low=3, high=8),
             }
    cat_params_list.append(params)
    
    model = CatBoostClassifier(task_type='GPU', bootstrap_type='Bernoulli', loss_function='MultiClass', eval_metric='Accuracy', 
                               verbose=False, random_seed=18,
                                       **params)
    
    train_preds = cross_val_predict(model, Xtrain, Ytrain, cv=5, method='predict_proba')
    print(f"Score : {accuracy_score(Ytrain, np.argmax(train_preds, axis=1))}")
    stack_train_1 = np.concatenate((stack_train_1, train_preds), axis=1)
    model.fit(Xtrain, Ytrain)
    stack_test_1 = np.concatenate((stack_test_1, model.predict_proba(Xtest)), axis=1)

    pd.DataFrame(stack_train_1).to_csv('stack_train_1.csv', index=False)
    pd.DataFrame(stack_test_1).to_csv('stack_test_1.csv', index=False)

    print(stack_train_1.shape, stack_test_1.shape, '\n')
    
cat_params_list = pd.DataFrame.from_dict(cat_params_list)
cat_params_list.to_csv('cat_params_list.csv', index=False)
cat_params_list.head(10)



Score : 0.5390625




(2048, 1250) (20480, 1250) 





Score : 0.56396484375




(2048, 1260) (20480, 1260) 





Score : 0.50732421875




(2048, 1270) (20480, 1270) 





Score : 0.56298828125




(2048, 1280) (20480, 1280) 





Score : 0.5615234375




(2048, 1290) (20480, 1290) 





Score : 0.52978515625




(2048, 1300) (20480, 1300) 





Score : 0.5576171875




(2048, 1310) (20480, 1310) 





Score : 0.5283203125




(2048, 1320) (20480, 1320) 





Score : 0.5546875




(2048, 1330) (20480, 1330) 





Score : 0.53564453125




(2048, 1340) (20480, 1340) 





Score : 0.560546875




(2048, 1350) (20480, 1350) 





Score : 0.509765625




(2048, 1360) (20480, 1360) 





Score : 0.5595703125




(2048, 1370) (20480, 1370) 





Score : 0.56201171875




(2048, 1380) (20480, 1380) 





Score : 0.55908203125




(2048, 1390) (20480, 1390) 





Score : 0.43212890625




(2048, 1400) (20480, 1400) 





Score : 0.55859375




(2048, 1410) (20480, 1410) 





Score : 0.5517578125




(2048, 1420) (20480, 1420) 





Score : 0.54345703125




(2048, 1430) (20480, 1430) 





Score : 0.55029296875




(2048, 1440) (20480, 1440) 





Score : 0.56103515625




(2048, 1450) (20480, 1450) 





Score : 0.56201171875




(2048, 1460) (20480, 1460) 





Score : 0.572265625




(2048, 1470) (20480, 1470) 





Score : 0.544921875




(2048, 1480) (20480, 1480) 





Score : 0.49951171875




(2048, 1490) (20480, 1490) 





Score : 0.55615234375




(2048, 1500) (20480, 1500) 





Score : 0.54931640625




(2048, 1510) (20480, 1510) 





Score : 0.5458984375




(2048, 1520) (20480, 1520) 





Score : 0.50732421875




(2048, 1530) (20480, 1530) 





Score : 0.54736328125




(2048, 1540) (20480, 1540) 





Score : 0.57470703125




(2048, 1550) (20480, 1550) 





Score : 0.552734375




(2048, 1560) (20480, 1560) 





Score : 0.53564453125




(2048, 1570) (20480, 1570) 





Score : 0.5439453125




(2048, 1580) (20480, 1580) 





Score : 0.55126953125




(2048, 1590) (20480, 1590) 





Score : 0.52734375




(2048, 1600) (20480, 1600) 





Score : 0.56591796875




(2048, 1610) (20480, 1610) 





Score : 0.55810546875




(2048, 1620) (20480, 1620) 





Score : 0.54345703125




(2048, 1630) (20480, 1630) 





Score : 0.54638671875




(2048, 1640) (20480, 1640) 





Score : 0.55615234375




(2048, 1650) (20480, 1650) 





Score : 0.5458984375




(2048, 1660) (20480, 1660) 





Score : 0.57421875




(2048, 1670) (20480, 1670) 





Score : 0.5341796875




(2048, 1680) (20480, 1680) 





Score : 0.55908203125




(2048, 1690) (20480, 1690) 





Score : 0.4638671875




(2048, 1700) (20480, 1700) 





Score : 0.55078125




(2048, 1710) (20480, 1710) 





Score : 0.5556640625




(2048, 1720) (20480, 1720) 





Score : 0.5498046875




(2048, 1730) (20480, 1730) 





Score : 0.54931640625




(2048, 1740) (20480, 1740) 



Unnamed: 0,n_estimators,subsample,reg_lambda,learning_rate,depth
0,572,0.516861,19.235388,0.079548,6
1,872,0.666039,14.389545,0.235731,6
2,277,0.787349,4.153752,0.025905,4
3,403,0.513263,28.069161,0.298804,4
4,427,0.921287,6.543541,0.273615,4
5,322,0.760377,26.169278,0.086207,5
6,931,0.759618,17.795291,0.166271,6
7,275,0.563454,19.575724,0.295691,7
8,406,0.515476,17.366784,0.220607,6
9,621,0.530726,22.589044,0.235982,7


```python
cat_params_list = pd.read_csv('cat_params_list.csv')

for i in cat_params_list.index:
    params = dict(cat_params_list.loc[i])

    model = CatBoostClassifier(task_type='GPU', bootstrap_type='Bernoulli', loss_function='MultiClass', eval_metric='Accuracy', 
                               verbose=False, random_seed=18,
                                       **params)
    
    train_preds = cross_val_predict(model, Xtrain, Ytrain, cv=5, method='predict_proba')
    print(f"Score : {accuracy_score(Ytrain, np.argmax(train_preds, axis=1))}")
    stack_train_1 = np.concatenate((stack_train_1, train_preds), axis=1)
    model.fit(Xtrain, Ytrain)
    stack_test_1 = np.concatenate((stack_test_1, model.predict_proba(Xtest)), axis=1)

    pd.DataFrame(stack_train_1).to_csv('stack_train_1.csv', index=False)
    pd.DataFrame(stack_test_1).to_csv('stack_test_1.csv', index=False)

    print(stack_train_1.shape, stack_test_1.shape, '\n')
```

### Random Forest

In [11]:
num_models=50
rf_params_list=[]

for _ in range(num_models):
    params = {'n_estimators' : np.random.randint(low=100, high=500), 
             'max_depth' : np.random.randint(low=3, high=20),
              'criterion' : np.random.choice(['gini','entropy'])
             }
    
    rf_params_list.append(params)
    
    model = RandomForestClassifier(n_jobs=-1, random_state=18, **params)
    
    train_preds = cross_val_predict(model, Xtrain, Ytrain, cv=5, method='predict_proba')
    print(f"Score : {accuracy_score(Ytrain, np.argmax(train_preds, axis=1))}")
    stack_train_1 = np.concatenate((stack_train_1, train_preds), axis=1)
    model.fit(Xtrain, Ytrain)
    stack_test_1 = np.concatenate((stack_test_1, model.predict_proba(Xtest)), axis=1)

    pd.DataFrame(stack_train_1).to_csv('stack_train_1.csv', index=False)
    pd.DataFrame(stack_test_1).to_csv('stack_test_1.csv', index=False)

    print(stack_train_1.shape, stack_test_1.shape, '\n')
    
rf_params_list = pd.DataFrame.from_dict(rf_params_list)
rf_params_list.to_csv('rf_params_list.csv', index=False)
rf_params_list.head(10)

Score : 0.40771484375
(2048, 1750) (20480, 1750) 

Score : 0.46484375
(2048, 1760) (20480, 1760) 

Score : 0.4912109375
(2048, 1770) (20480, 1770) 

Score : 0.47998046875
(2048, 1780) (20480, 1780) 

Score : 0.45703125
(2048, 1790) (20480, 1790) 

Score : 0.4404296875
(2048, 1800) (20480, 1800) 

Score : 0.4794921875
(2048, 1810) (20480, 1810) 

Score : 0.42919921875
(2048, 1820) (20480, 1820) 

Score : 0.466796875
(2048, 1830) (20480, 1830) 

Score : 0.484375
(2048, 1840) (20480, 1840) 

Score : 0.4765625
(2048, 1850) (20480, 1850) 

Score : 0.47412109375
(2048, 1860) (20480, 1860) 

Score : 0.43994140625
(2048, 1870) (20480, 1870) 

Score : 0.478515625
(2048, 1880) (20480, 1880) 

Score : 0.45263671875
(2048, 1890) (20480, 1890) 

Score : 0.47021484375
(2048, 1900) (20480, 1900) 

Score : 0.455078125
(2048, 1910) (20480, 1910) 

Score : 0.48681640625
(2048, 1920) (20480, 1920) 

Score : 0.48388671875
(2048, 1930) (20480, 1930) 

Score : 0.4453125
(2048, 1940) (20480, 1940) 

Score : 

Unnamed: 0,n_estimators,max_depth,criterion
0,428,3,entropy
1,213,9,gini
2,443,16,gini
3,310,12,gini
4,154,9,entropy
5,477,4,gini
6,451,19,entropy
7,499,4,entropy
8,454,7,gini
9,284,11,gini


```python
rf_params_list = pd.read_csv('rf_params_list.csv')

for i in rf_params_list.index:
    params = dict(rf_params_list.loc[i])


    model = RandomForestClassifier(n_jobs=-1, random_state=18, **params)
    
    train_preds = cross_val_predict(model, Xtrain, Ytrain, cv=5, method='predict_proba')
    print(f"Score : {accuracy_score(Ytrain, np.argmax(train_preds, axis=1))}")
    stack_train_1 = np.concatenate((stack_train_1, train_preds), axis=1)
    model.fit(Xtrain, Ytrain)
    stack_test_1 = np.concatenate((stack_test_1, model.predict_proba(Xtest)), axis=1)

    pd.DataFrame(stack_train_1).to_csv('stack_train_1.csv', index=False)
    pd.DataFrame(stack_test_1).to_csv('stack_test_1.csv', index=False)

    print(stack_train_1.shape, stack_test_1.shape, '\n')
```

### ExtraTrees

In [12]:
num_models=50
et_params_list=[]

for _ in range(num_models):
    params = {'n_estimators' : np.random.randint(low=100, high=500), 
             'max_depth' : np.random.randint(low=3, high=20),
              'criterion' : np.random.choice(['gini','entropy'])
             }
    
    et_params_list.append(params)
    
    model = ExtraTreesClassifier(n_jobs=-1, random_state=18, **params)
    
    train_preds = cross_val_predict(model, Xtrain, Ytrain, cv=5, method='predict_proba')
    print(f"Score : {accuracy_score(Ytrain, np.argmax(train_preds, axis=1))}")
    stack_train_1 = np.concatenate((stack_train_1, train_preds), axis=1)
    model.fit(Xtrain, Ytrain)
    stack_test_1 = np.concatenate((stack_test_1, model.predict_proba(Xtest)), axis=1)

    pd.DataFrame(stack_train_1).to_csv('stack_train_1.csv', index=False)
    pd.DataFrame(stack_test_1).to_csv('stack_test_1.csv', index=False)

    print(stack_train_1.shape, stack_test_1.shape, '\n')
        
    
et_params_list = pd.DataFrame.from_dict(et_params_list)
et_params_list.to_csv('et_params_list.csv', index=False)
et_params_list.head(10)

Score : 0.44970703125
(2048, 2250) (20480, 2250) 

Score : 0.4443359375
(2048, 2260) (20480, 2260) 

Score : 0.42724609375
(2048, 2270) (20480, 2270) 

Score : 0.45751953125
(2048, 2280) (20480, 2280) 

Score : 0.4404296875
(2048, 2290) (20480, 2290) 

Score : 0.43310546875
(2048, 2300) (20480, 2300) 

Score : 0.4423828125
(2048, 2310) (20480, 2310) 

Score : 0.44580078125
(2048, 2320) (20480, 2320) 

Score : 0.47314453125
(2048, 2330) (20480, 2330) 

Score : 0.46435546875
(2048, 2340) (20480, 2340) 

Score : 0.4638671875
(2048, 2350) (20480, 2350) 

Score : 0.46337890625
(2048, 2360) (20480, 2360) 

Score : 0.4521484375
(2048, 2370) (20480, 2370) 

Score : 0.43701171875
(2048, 2380) (20480, 2380) 

Score : 0.42333984375
(2048, 2390) (20480, 2390) 

Score : 0.46337890625
(2048, 2400) (20480, 2400) 

Score : 0.4365234375
(2048, 2410) (20480, 2410) 

Score : 0.44580078125
(2048, 2420) (20480, 2420) 

Score : 0.46044921875
(2048, 2430) (20480, 2430) 

Score : 0.45751953125
(2048, 2440) (2

Unnamed: 0,n_estimators,max_depth,criterion
0,367,7,gini
1,448,7,gini
2,125,7,entropy
3,213,15,gini
4,197,8,entropy
5,106,10,gini
6,316,6,gini
7,163,19,gini
8,489,18,gini
9,449,11,entropy


```python
et_params_list = pd.read_csv('et_params_list.csv')

for i in et_params_list.index:
    params = dict(et_params_list.loc[i])

    model = ExtraTreesClassifier(n_jobs=-1, random_state=18, **params)
    
    train_preds = cross_val_predict(model, Xtrain, Ytrain, cv=5, method='predict_proba')
    print(f"Score : {accuracy_score(Ytrain, np.argmax(train_preds, axis=1))}")
    stack_train_1 = np.concatenate((stack_train_1, train_preds), axis=1)
    model.fit(Xtrain, Ytrain)
    stack_test_1 = np.concatenate((stack_test_1, model.predict_proba(Xtest)), axis=1)

    pd.DataFrame(stack_train_1).to_csv('stack_train_1.csv', index=False)
    pd.DataFrame(stack_test_1).to_csv('stack_test_1.csv', index=False)

    print(stack_train_1.shape, stack_test_1.shape, '\n')
```

### LogisticRegression

In [13]:
num_models=50
logistic_params_list=[]

for _ in range(num_models):
    params = {
             'l1_ratio' : np.random.uniform(low=0, high=1),
              'C' : loguniform.rvs(1e-4, 10)
             }
    
    logistic_params_list.append(params)
    
    model = LogisticRegression(solver='saga', penalty='elasticnet', random_state=18, max_iter=10000, multi_class='multinomial', 
                               n_jobs=-1, **params
                              )
    
    train_preds = cross_val_predict(model, Xtrain, Ytrain, cv=5, method='predict_proba', n_jobs=5)
    print(f"Score : {accuracy_score(Ytrain, np.argmax(train_preds, axis=1))}")
    stack_train_1 = np.concatenate((stack_train_1, train_preds), axis=1)
    model.fit(Xtrain, Ytrain)
    stack_test_1 = np.concatenate((stack_test_1, model.predict_proba(Xtest)), axis=1)

    pd.DataFrame(stack_train_1).to_csv('stack_train_1.csv', index=False)
    pd.DataFrame(stack_test_1).to_csv('stack_test_1.csv', index=False)

    print(stack_train_1.shape, stack_test_1.shape, '\n')
        
    
logistic_params_list = pd.DataFrame.from_dict(logistic_params_list)
logistic_params_list.to_csv('logistic_params_list.csv', index=False)
logistic_params_list.head(10)

Score : 0.28515625
(2048, 2750) (20480, 2750) 

Score : 0.34765625
(2048, 2760) (20480, 2760) 

Score : 0.11083984375
(2048, 2770) (20480, 2770) 

Score : 0.2783203125
(2048, 2780) (20480, 2780) 

Score : 0.2412109375
(2048, 2790) (20480, 2790) 

Score : 0.28662109375
(2048, 2800) (20480, 2800) 

Score : 0.29541015625
(2048, 2810) (20480, 2810) 

Score : 0.11083984375
(2048, 2820) (20480, 2820) 

Score : 0.302734375
(2048, 2830) (20480, 2830) 

Score : 0.3427734375
(2048, 2840) (20480, 2840) 

Score : 0.11083984375
(2048, 2850) (20480, 2850) 

Score : 0.11181640625
(2048, 2860) (20480, 2860) 

Score : 0.17236328125
(2048, 2870) (20480, 2870) 

Score : 0.29052734375
(2048, 2880) (20480, 2880) 

Score : 0.232421875
(2048, 2890) (20480, 2890) 

Score : 0.11083984375
(2048, 2900) (20480, 2900) 

Score : 0.11083984375
(2048, 2910) (20480, 2910) 

Score : 0.33544921875
(2048, 2920) (20480, 2920) 

Score : 0.11083984375
(2048, 2930) (20480, 2930) 

Score : 0.32958984375
(2048, 2940) (20480, 2

Unnamed: 0,l1_ratio,C
0,0.107496,9.288898
1,0.896127,0.571785
2,0.921306,0.000177
3,0.641018,9.709969
4,0.042959,0.0039
5,0.347702,6.418053
6,0.667746,0.099545
7,0.922505,0.000531
8,0.408538,0.079419
9,0.609769,0.658013


```python
logistic_params_list = pd.read_csv('logistic_params_list.csv')

for i in logistic_params_list.index:
    params = dict(logistic_params_list.loc[i])

    model = LogisticRegression(solver='saga', penalty='elasticnet', random_state=18, max_iter=10000, multi_class='multinomial', 
                               n_jobs=-1, **params
                              )
    
    train_preds = cross_val_predict(model, Xtrain, Ytrain, cv=5, method='predict_proba')
    print(f"Score : {accuracy_score(Ytrain, np.argmax(train_preds, axis=1))}")
    stack_train_1 = np.concatenate((stack_train_1, train_preds), axis=1)
    model.fit(Xtrain, Ytrain)
    stack_test_1 = np.concatenate((stack_test_1, model.predict_proba(Xtest)), axis=1)

    pd.DataFrame(stack_train_1).to_csv('stack_train_1.csv', index=False)
    pd.DataFrame(stack_test_1).to_csv('stack_test_1.csv', index=False)

    print(stack_train_1.shape, stack_test_1.shape, '\n')
```

### SVM

In [14]:
num_models=50
svm_params_list=[]

for _ in range(num_models):
    params = {'C' : loguniform.rvs(1e-4, 10),
              'kernel' : np.random.choice(['rbf','linear','poly','sigmoid']),
              'degree' : np.random.uniform(low=0, high=10),
              'coef0' : np.random.uniform(low=0, high=10)
             }
    
    svm_params_list.append(params)
    
    model = SVC(random_state=18, **params, probability=True)

    train_preds = cross_val_predict(model, Xtrain, Ytrain, cv=5, method='predict_proba', n_jobs=-1)
    print(f"Score : {accuracy_score(Ytrain, np.argmax(train_preds, axis=1))}")
    stack_train_1 = np.concatenate((stack_train_1, train_preds), axis=1)
    model.fit(Xtrain, Ytrain)
    stack_test_1 = np.concatenate((stack_test_1, model.predict_proba(Xtest)), axis=1)

    pd.DataFrame(stack_train_1).to_csv('stack_train_1.csv', index=False)
    pd.DataFrame(stack_test_1).to_csv('stack_test_1.csv', index=False)

    print(stack_train_1.shape, stack_test_1.shape, '\n')
        
    
svm_params_list = pd.DataFrame.from_dict(svm_params_list)
svm_params_list.to_csv('svm_params_list.csv', index=False)
svm_params_list.head(10)

Score : 0.2294921875
(2048, 3250) (20480, 3250) 

Score : 0.203125
(2048, 3260) (20480, 3260) 

Score : 0.17236328125
(2048, 3270) (20480, 3270) 

Score : 0.17529296875
(2048, 3280) (20480, 3280) 

Score : 0.2958984375
(2048, 3290) (20480, 3290) 

Score : 0.26806640625
(2048, 3300) (20480, 3300) 

Score : 0.2998046875
(2048, 3310) (20480, 3310) 

Score : 0.35888671875
(2048, 3320) (20480, 3320) 

Score : 0.28173828125
(2048, 3330) (20480, 3330) 

Score : 0.23583984375
(2048, 3340) (20480, 3340) 

Score : 0.3564453125
(2048, 3350) (20480, 3350) 

Score : 0.1953125
(2048, 3360) (20480, 3360) 

Score : 0.29638671875
(2048, 3370) (20480, 3370) 

Score : 0.11181640625
(2048, 3380) (20480, 3380) 

Score : 0.2568359375
(2048, 3390) (20480, 3390) 

Score : 0.22265625
(2048, 3400) (20480, 3400) 

Score : 0.17431640625
(2048, 3410) (20480, 3410) 

Score : 0.3330078125
(2048, 3420) (20480, 3420) 

Score : 0.353515625
(2048, 3430) (20480, 3430) 

Score : 0.29638671875
(2048, 3440) (20480, 3440) 



Unnamed: 0,C,kernel,degree,coef0
0,0.000462,rbf,6.500212,5.234756
1,7.750169,sigmoid,8.375497,5.171611
2,0.962193,sigmoid,5.184296,6.581142
3,0.000213,sigmoid,7.99068,2.791058
4,0.007019,linear,6.571852,6.132748
5,0.114085,rbf,3.017537,6.285275
6,1.115022,linear,3.137375,7.23633
7,1.127971,rbf,7.680175,9.140542
8,0.004068,linear,7.188356,0.427105
9,0.036825,rbf,4.795133,5.98218


```python
svm_params_list = pd.read_csv('svm_params_list.csv')

for i in svm_params_list.index:
    params = dict(svm_params_list.loc[i])

    model = SVC(random_state=18, **params, probability=True)

    train_preds = cross_val_predict(model, Xtrain, Ytrain, cv=5, method='predict_proba', n_jobs=-1)
    print(f"Score : {accuracy_score(Ytrain, np.argmax(train_preds, axis=1))}")
    stack_train_1 = np.concatenate((stack_train_1, train_preds), axis=1)
    model.fit(Xtrain, Ytrain)
    stack_test_1 = np.concatenate((stack_test_1, model.predict_proba(Xtest)), axis=1)

    pd.DataFrame(stack_train_1).to_csv('stack_train_1.csv', index=False)
    pd.DataFrame(stack_test_1).to_csv('stack_test_1.csv', index=False)

    print(stack_train_1.shape, stack_test_1.shape, '\n')
```

### GaussianProcess

In [15]:
model = GaussianProcessClassifier(random_state=18, n_jobs=-1, max_iter_predict=1000)
train_preds = cross_val_predict(model, Xtrain, Ytrain, cv=5, method='predict_proba')
print(f"Score : {accuracy_score(Ytrain, np.argmax(train_preds, axis=1))}")
stack_train_1 = np.concatenate((stack_train_1, train_preds), axis=1)
model.fit(Xtrain, Ytrain)
stack_test_1 = np.concatenate((stack_test_1, model.predict_proba(Xtest)), axis=1)

pd.DataFrame(stack_train_1).to_csv('stack_train_1.csv', index=False)
pd.DataFrame(stack_test_1).to_csv('stack_test_1.csv', index=False)

print(stack_train_1.shape, stack_test_1.shape, '\n')

Score : 0.20849609375
(2048, 3750) (20480, 3750) 



### NaiveBayes

In [16]:
for model in [GaussianNB(), MultinomialNB(), ComplementNB()]:
    
    train_preds = cross_val_predict(model, Xtrain, Ytrain, cv=5, method='predict_proba', n_jobs=-1)
    print(f"Score : {accuracy_score(Ytrain, np.argmax(train_preds, axis=1))}")
    stack_train_1 = np.concatenate((stack_train_1, train_preds), axis=1)
    model.fit(Xtrain, Ytrain)
    stack_test_1 = np.concatenate((stack_test_1, model.predict_proba(Xtest)), axis=1)

    pd.DataFrame(stack_train_1).to_csv('stack_train_1.csv', index=False)
    pd.DataFrame(stack_test_1).to_csv('stack_test_1.csv', index=False)

    print(stack_train_1.shape, stack_test_1.shape, '\n')

Score : 0.27587890625
(2048, 3760) (20480, 3760) 

Score : 0.24853515625
(2048, 3770) (20480, 3770) 

Score : 0.25244140625
(2048, 3780) (20480, 3780) 



### NearestNeighbors

In [17]:
num_models=50
knn_params_list=[]

for _ in range(num_models):
    params = {'n_neighbors' : np.random.randint(low=2, high=512),
              'p' : np.random.uniform(low=1, high=10)
              
             }
    
    knn_params_list.append(params)
    
    model = KNeighborsClassifier(weights='distance', n_jobs=-1, **params)
    
    train_preds = cross_val_predict(model, Xtrain, Ytrain, cv=5, method='predict_proba')
    print(f"Score : {accuracy_score(Ytrain, np.argmax(train_preds, axis=1))}")
    stack_train_1 = np.concatenate((stack_train_1, train_preds), axis=1)
    model.fit(Xtrain, Ytrain)
    stack_test_1 = np.concatenate((stack_test_1, model.predict_proba(Xtest)), axis=1)

    pd.DataFrame(stack_train_1).to_csv('stack_train_1.csv', index=False)
    pd.DataFrame(stack_test_1).to_csv('stack_test_1.csv', index=False)

    print(stack_train_1.shape, stack_test_1.shape, '\n')
        
    
knn_params_list = pd.DataFrame.from_dict(knn_params_list)
knn_params_list.to_csv('knn_params_list.csv', index=False)
knn_params_list.head(10)

Score : 0.16552734375
(2048, 3790) (20480, 3790) 

Score : 0.16455078125
(2048, 3800) (20480, 3800) 

Score : 0.205078125
(2048, 3810) (20480, 3810) 

Score : 0.2255859375
(2048, 3820) (20480, 3820) 

Score : 0.1875
(2048, 3830) (20480, 3830) 

Score : 0.18212890625
(2048, 3840) (20480, 3840) 

Score : 0.19287109375
(2048, 3850) (20480, 3850) 

Score : 0.173828125
(2048, 3860) (20480, 3860) 

Score : 0.21044921875
(2048, 3870) (20480, 3870) 

Score : 0.18017578125
(2048, 3880) (20480, 3880) 

Score : 0.2001953125
(2048, 3890) (20480, 3890) 

Score : 0.19873046875
(2048, 3900) (20480, 3900) 

Score : 0.23046875
(2048, 3910) (20480, 3910) 

Score : 0.1875
(2048, 3920) (20480, 3920) 

Score : 0.177734375
(2048, 3930) (20480, 3930) 

Score : 0.19287109375
(2048, 3940) (20480, 3940) 

Score : 0.16455078125
(2048, 3950) (20480, 3950) 

Score : 0.18896484375
(2048, 3960) (20480, 3960) 

Score : 0.16748046875
(2048, 3970) (20480, 3970) 

Score : 0.22607421875
(2048, 3980) (20480, 3980) 

Score

Unnamed: 0,n_neighbors,p
0,201,7.993459
1,268,8.809664
2,448,2.947875
3,33,2.144354
4,428,5.300591
5,68,6.467453
6,344,3.777199
7,61,7.75255
8,446,2.502033
9,299,4.863291


```python
knn_params_list = pd.read_csv('knn_params_list.csv')

for i in knn_params_list.index:
    params = dict(knn_params_list.loc[i])

    model = KNeighborsClassifier(weights='distance', n_jobs=-1, **params)
    
    train_preds = cross_val_predict(model, Xtrain, Ytrain, cv=5, method='predict_proba')
    print(f"Score : {accuracy_score(Ytrain, np.argmax(train_preds, axis=1))}")
    stack_train_1 = np.concatenate((stack_train_1, train_preds), axis=1)
    model.fit(Xtrain, Ytrain)
    stack_test_1 = np.concatenate((stack_test_1, model.predict_proba(Xtest)), axis=1)

    pd.DataFrame(stack_train_1).to_csv('stack_train_1.csv', index=False)
    pd.DataFrame(stack_test_1).to_csv('stack_test_1.csv', index=False)

    print(stack_train_1.shape, stack_test_1.shape, '\n')
```

## Ensemble Lv. 1

Lv. 1 에서는 Lv. 0 모델들의 예측값(확률)들을 통합하는 모델을 만듭니다.  
각 모델들은 통상적으로 괜찮다고 여겨질 수 있는 파라미터를 이용해 종류별로 1개씩 만듭니다. 

```python
stack_train_1 = np.array(pd.read_csv('stack_train_1.csv'))
stack_test_1 = np.array(pd.read_csv('stack_test_1.csv'))
```

In [18]:
stack_train_2 = np.array(pd.DataFrame(data=None, index=train.index))
stack_test_2 = np.array(pd.DataFrame(data=None, index=submission.index))

stack_train_2.shape, stack_test_2.shape

((2048, 0), (20480, 0))

### XGBoost

In [19]:
params = {'n_estimators': 284, 'max_depth': 5, 'reg_lambda': 23.07067677225717, 'reg_alpha': 0.01019264032218297, 
          'subsample': 0.5023461874889559, 'colsample_bytree': 0.8286922539714039}

xgb_model = XGBClassifier(n_jobs=-1, tree_method='gpu_hist', objective='multi:softmax', num_class=10, 
                      random_state=18, learning_rate=0.03, **params)

train_preds = cross_val_predict(xgb_model, stack_train_1, Ytrain, cv=5, method='predict_proba')
print(f"Score : {accuracy_score(Ytrain, np.argmax(train_preds, axis=1))}")
stack_train_2 = np.concatenate((stack_train_2, train_preds), axis=1)

xgb_model.fit(stack_train_1, Ytrain)
stack_test_2 = np.concatenate((stack_test_2, xgb_model.predict_proba(stack_test_1)), axis=1)


pd.DataFrame(stack_train_2).to_csv('stack_train_2.csv', index=False)
pd.DataFrame(stack_test_2).to_csv('stack_test_2.csv', index=False)

print(stack_train_2.shape, stack_test_2.shape, '\n')

Score : 0.9150390625
(2048, 10) (20480, 10) 



### Keras (MLP)

In [20]:
def build_fn2(init='he_uniform', learning_rate=0.0001, final_dense_size=64):
    dropout_rate=0.5

    model_in = Input(shape = (stack_train_1.shape[1], ))
    
    x = Dense(units=final_dense_size*8, kernel_initializer=init)(model_in)
    x = BatchNormalization()(x)
    x = Activation(mish)(x)
    x = Dropout(dropout_rate)(x)
    
    x = Dense(units=final_dense_size*4, kernel_initializer=init)(x)
    x = BatchNormalization()(x)
    x = Activation(mish)(x)
    x = Dropout(dropout_rate)(x)
    
    x = Dense(units=final_dense_size*2, kernel_initializer=init)(x)
    x = BatchNormalization()(x)
    x = Activation(mish)(x)
    x = Dropout(dropout_rate)(x)
    
    x = Dense(units=final_dense_size, kernel_initializer=init)(x)
    x = BatchNormalization()(x)
    x = Activation(mish)(x)
    x = Dropout(dropout_rate)(x)
    

    model_out = Dense(units=10, activation='softmax')(x)
    model = Model(model_in, model_out)
    model.compile(loss='sparse_categorical_crossentropy', optimizer=Nadam(learning_rate=learning_rate), metrics=['accuracy'])

    return model

In [21]:
params = {'epochs': 8, 'final_dense_size': 105}
keras_model = KerasRegressor(build_fn2, **params, batch_size=32, shuffle=True, verbose=0, learning_rate=0.0001)

train_preds = cross_val_predict(keras_model, stack_train_1, Ytrain, cv=5)
print(f"Score : {accuracy_score(Ytrain, np.argmax(train_preds, axis=1))}")
stack_train_2 = np.concatenate((stack_train_2, train_preds), axis=1)

keras_model.fit(stack_train_1, Ytrain)
stack_test_2 = np.concatenate((stack_test_2, keras_model.predict(stack_test_1)), axis=1)


pd.DataFrame(stack_train_2).to_csv('stack_train_2.csv', index=False)
pd.DataFrame(stack_test_2).to_csv('stack_test_2.csv', index=False)

print(stack_train_2.shape, stack_test_2.shape, '\n')
K.clear_session()

Score : 0.9140625
(2048, 20) (20480, 20) 



### LightGBM

In [22]:
params = {'n_estimators': 211, 'num_leaves': 181, 'reg_lambda': 10.072965139609353, 'reg_alpha': 0.8557393193365549, 
          'subsample': 0.7309764427665186, 'colsample_bytree': 0.9816611006194951}

lgb_model = LGBMClassifier(boosting_type='dart', objective='softmax', tree_learner='feature', num_class=10, 
                       subsample_freq=1, random_state=18, drop_rate=0.5, max_drop=-1, **params)

train_preds = cross_val_predict(lgb_model, stack_train_1, Ytrain, cv=5, method='predict_proba')
print(f"Score : {accuracy_score(Ytrain, np.argmax(train_preds, axis=1))}")
stack_train_2 = np.concatenate((stack_train_2, train_preds), axis=1)

lgb_model.fit(stack_train_1, Ytrain)
stack_test_2 = np.concatenate((stack_test_2, lgb_model.predict_proba(stack_test_1)), axis=1)


pd.DataFrame(stack_train_2).to_csv('stack_train_2.csv', index=False)
pd.DataFrame(stack_test_2).to_csv('stack_test_2.csv', index=False)

print(stack_train_2.shape, stack_test_2.shape, '\n')

Score : 0.91162109375
(2048, 30) (20480, 30) 



### RF

In [23]:
params = {'n_estimators': 114, 'max_features': 0.02328004365242331, 'max_samples': 0.2691106016152467, 
          'criterion': 'entropy', 'max_depth': 14}
rf_model = RandomForestClassifier(n_jobs=-1, random_state=18, **params)

train_preds = cross_val_predict(rf_model, stack_train_1, Ytrain, cv=5, method='predict_proba')
print(f"Score : {accuracy_score(Ytrain, np.argmax(train_preds, axis=1))}")
stack_train_2 = np.concatenate((stack_train_2, train_preds), axis=1)

rf_model.fit(stack_train_1, Ytrain)
stack_test_2 = np.concatenate((stack_test_2, rf_model.predict_proba(stack_test_1)), axis=1)


pd.DataFrame(stack_train_2).to_csv('stack_train_2.csv', index=False)
pd.DataFrame(stack_test_2).to_csv('stack_test_2.csv', index=False)

print(stack_train_2.shape, stack_test_2.shape, '\n')

Score : 0.91357421875
(2048, 40) (20480, 40) 



### ET

In [24]:
params = {'n_estimators': 58, 'max_features': 0.017429888281159878, 'criterion': 'entropy', 'max_depth': 5}
et_model = ExtraTreesClassifier(n_jobs=-1, random_state=18, **params)

train_preds = cross_val_predict(et_model, stack_train_1, Ytrain, cv=5, method='predict_proba')
print(f"Score : {accuracy_score(Ytrain, np.argmax(train_preds, axis=1))}")
stack_train_2 = np.concatenate((stack_train_2, train_preds), axis=1)

et_model.fit(stack_train_1, Ytrain)
stack_test_2 = np.concatenate((stack_test_2, et_model.predict_proba(stack_test_1)), axis=1)


pd.DataFrame(stack_train_2).to_csv('stack_train_2.csv', index=False)
pd.DataFrame(stack_test_2).to_csv('stack_test_2.csv', index=False)

print(stack_train_2.shape, stack_test_2.shape, '\n')

Score : 0.9140625
(2048, 50) (20480, 50) 



### LogisticRegression

In [25]:
params = {'C': 0.014164150051132234, 'l1_ratio': 0.7717991634542027}
logistic_model = LogisticRegression(n_jobs=-1, solver='saga', penalty='elasticnet', random_state=18, max_iter=10000, verbose=True,
                                    multi_class='multinomial', **params)


train_preds = cross_val_predict(logistic_model, stack_train_1, Ytrain, cv=5, method='predict_proba', n_jobs=5)
print(f"Score : {accuracy_score(Ytrain, np.argmax(train_preds, axis=1))}")
stack_train_2 = np.concatenate((stack_train_2, train_preds), axis=1)

logistic_model.fit(stack_train_1, Ytrain)
stack_test_2 = np.concatenate((stack_test_2, logistic_model.predict_proba(stack_test_1)), axis=1)


pd.DataFrame(stack_train_2).to_csv('stack_train_2.csv', index=False)
pd.DataFrame(stack_test_2).to_csv('stack_test_2.csv', index=False)

print(stack_train_2.shape, stack_test_2.shape, '\n')

Score : 0.916015625


[Parallel(n_jobs=-1)]: Using backend ThreadingBackend with 6 concurrent workers.


convergence after 49 epochs took 23 seconds


[Parallel(n_jobs=-1)]: Done   1 out of   1 | elapsed:   23.6s finished


(2048, 60) (20480, 60) 



### Support Vector Machine

In [31]:
params = {'kernel': 'linear', 'C': 0.04117756794800978, 'gamma': 'scale', 'coef0': 3.015125901030289}
svc_model = SVC(random_state=18, probability=True, break_ties=True, verbose=True, **params)

train_preds = cross_val_predict(svc_model, stack_train_1, Ytrain, cv=5, method='predict_proba', n_jobs=-1)
print(f"Score : {accuracy_score(Ytrain, np.argmax(train_preds, axis=1))}")
stack_train_2 = np.concatenate((stack_train_2, train_preds), axis=1)

svc_model.fit(stack_train_1, Ytrain)
stack_test_2 = np.concatenate((stack_test_2, svc_model.predict_proba(stack_test_1)), axis=1)


pd.DataFrame(stack_train_2).to_csv('stack_train_2.csv', index=False)
pd.DataFrame(stack_test_2).to_csv('stack_test_2.csv', index=False)

print(stack_train_2.shape, stack_test_2.shape, '\n')

Score : 0.9130859375
[LibSVM](2048, 70) (20480, 70) 



## Ensemble Lv. 2

Lv. 2 에서는 Lv. 1 의 결과를 통합하는 모델을 만듭니다.  
Fold 수를 20으로 늘려서 더 정밀한 교차검증을 진행했습니다. 
시간이 없어서 단순평균이나 가중평균을 구하지는 못했고, LogisticRegression 단일 모델을 만들기로 했습니다. 
똑바로 생각한건지 모르겠지만, 모델을 고른 기준은 
- 못봤던 데이터를 맞추는 능력(교차검증예측)
- 봤던 데이터를 맞추는 능력(학습후 학습한 것을 다시 예측)  
- 위 두개의 점수가 비슷하면서 최대한 점수가 좋은것

이었습니다.

봤던 데이터는 잘 맞추는데, 못봤던 데이터를 못맞춘다면 과적합이 일어날 가능성이 높다고 생각했습니다.  
이 노트북 파일 말고 이전 실험에서 Final Model 로 KNN 을 튜닝할때는 CV 는 0.916 이었지만, 재예측 점수는 1.0 이었습니다.  
XGBoost 는 CV가 0.915 정도에, 재예측 점수가 0.93 정도였습니다.  
  
  재예측 점수가 과연 의미가 있었는지는 잘 모르겠습니다. 다른 파일들의 Private Score 들도 공개가 되면 피드백을 해볼 수 있을 것 같은데... 알 수가 없네요...  
  
Public 이 1%여서 조금이라도 Public 에 대해서 오버피팅이 나면 최종 랭킹에서 장난없을 것 같아서 표시되는 점수가 정확도가 적더라도 교차검증 점수만 생각했습니다.

```python
stack_train_2 = pd.read_csv('stack_train_2.csv')
stack_test_2 = pd.read_csv('stack_test_2.csv')
```

## LogisticRegression

In [32]:
params = {'C': 0.003072518594816285, 'l1_ratio': 0.015667092361463714}
model = LogisticRegression(n_jobs=-1, solver='saga', penalty='elasticnet', random_state=18, max_iter=10000, verbose=False,
                                multi_class='multinomial',**params)

print(f"CV Score : {cross_val_score(model, stack_train_2, Ytrain, cv=20, scoring='accuracy').mean()}")
model.fit(stack_train_2, Ytrain)
print(f"Re-Fit Score : {accuracy_score(Ytrain, model.predict(stack_train_2))}")

CV Score : 0.9180135160860459
Re-Fit Score : 0.91796875


In [33]:
model.fit(stack_train_2, Ytrain)

submission['digit'] = model.predict(stack_test_2)
submission.to_csv('Final_LogisticRegression_20CV_0.91801_Single.csv')
submission.head()

Unnamed: 0_level_0,digit
id,Unnamed: 1_level_1
2049,6
2050,9
2051,8
2052,0
2053,3
