In [2]:
from sklearn.svm import SVC
import numpy as np
import csv
import pandas as pd
import xgboost as xgb
from sklearn.model_selection import GridSearchCV
from sklearn.svm import SVC
from sklearn.ensemble import VotingClassifier
from sklearn.neural_network import MLPClassifier
from sklearn.ensemble import RandomForestClassifier
from sklearn.linear_model import LogisticRegression
from sklearn.model_selection import KFold

### Preprocessing the data:

In [3]:
def addFeatures(data):
    num_attr = int(data.shape[1]/2)
    num_group = data.shape[0]
    abs = np.abs(data[:, :num_attr] - data[:, num_attr:])
    p = data[:, :num_attr] * data[:, num_attr:]
    features = np.zeros([num_group, 2*num_attr])
    features[:, :num_attr] = abs
    features[:, num_attr:] = p
    return np.concatenate((data, features),axis=1)

In [4]:
use_cols = np.ones(73)
use_cols[17] = 0 # smile
#use_cols[53] = 0 # color photo
use_cols[54] = 0 # pose photo

use_cols = np.tile(use_cols, 2) == 1

cols = np.arange(146)
cols = cols[use_cols] + 1

In [5]:
X_train = pd.read_csv(r'train.csv', header=None, usecols=cols).values
y_train = pd.read_csv(r'train.csv', header=None, usecols=[0])
X_train = pd.DataFrame(X_train)
y_train = pd.DataFrame(y_train)

X_validation = pd.read_csv(r'validation.csv', header=None, usecols=cols).values
y_validation = pd.read_csv(r'validation.csv', header=None, usecols=[0])
X_validation = pd.DataFrame(X_validation)
y_validation = pd.DataFrame(y_validation)

test = pd.read_csv(r'test.csv', header=None, usecols=cols).values
test = pd.DataFrame(test)


In [7]:
X_train=X_train.append(X_validation, ignore_index=True)
y_train=y_train.append(y_validation, ignore_index=True)

y_train = y_train.values.ravel()


In [44]:
#normalize the data
max_feature = np.max(X_train,axis=0)
X_train = X_train*1./max_feature

#X_validation = X_validation*1./max_feature
test = test*1./max_feature

In [9]:
X_train = X_train.values
test = test.values

In [11]:
X_train = addFeatures(X_train)
#X_validation = addFeatures(X_validation)
test = addFeatures(test)

### Grid Search

In [17]:
def param_search(model, param):
    grid = GridSearchCV(model, param, cv=5, verbose=10, n_jobs=-1)
    grid.fit(X_train, y_train)
    print (model)
    print ('Best accuracy score is ', grid.best_score_)
    print ('Best parameter is ', grid.best_params_)
    return grid

Thinking: if wearing sunglasses(16), then Narrow Eyes(37), eyes open(38), Brown Eyes(70) does not matter.
if eyes open(38), then Brown Eyes(70) does not matter.

In [8]:
xgb_params = {
    'n_estimators': [50, 150, 250],
    'learning_rate': [0.1],
    'max_depth': [3,4,5], 
    'subsample': [0.7, 1],
    'colsample_bytree': [0.6,0.9], 
    'colsample_bylevel': [0.6, 0.9], 
    'seed': [42]
}
grid_xgb = xgb.XGBClassifier()
grid_xgb = param_search(grid_xgb, xgb_params)

Fitting 5 folds for each of 72 candidates, totalling 360 fits
[CV] colsample_bylevel=0.6, colsample_bytree=0.6, learning_rate=0.1, max_depth=3, n_estimators=50, seed=42, subsample=0.7 
[CV] colsample_bylevel=0.6, colsample_bytree=0.6, learning_rate=0.1, max_depth=3, n_estimators=50, seed=42, subsample=0.7 
[CV] colsample_bylevel=0.6, colsample_bytree=0.6, learning_rate=0.1, max_depth=3, n_estimators=50, seed=42, subsample=0.7 
[CV] colsample_bylevel=0.6, colsample_bytree=0.6, learning_rate=0.1, max_depth=3, n_estimators=50, seed=42, subsample=0.7 
[CV]  colsample_bylevel=0.6, colsample_bytree=0.6, learning_rate=0.1, max_depth=3, n_estimators=50, seed=42, subsample=0.7, score=0.782900, total=  42.3s
[CV] colsample_bylevel=0.6, colsample_bytree=0.6, learning_rate=0.1, max_depth=3, n_estimators=50, seed=42, subsample=0.7 
[CV]  colsample_bylevel=0.6, colsample_bytree=0.6, learning_rate=0.1, max_depth=3, n_estimators=50, seed=42, subsample=0.7, score=0.784700, total=  43.0s
[CV] colsample_

[Parallel(n_jobs=-1)]: Done   5 tasks      | elapsed:  1.4min


[CV]  colsample_bylevel=0.6, colsample_bytree=0.6, learning_rate=0.1, max_depth=3, n_estimators=50, seed=42, subsample=1, score=0.783300, total=  52.6s
[CV]  colsample_bylevel=0.6, colsample_bytree=0.6, learning_rate=0.1, max_depth=3, n_estimators=50, seed=42, subsample=1, score=0.784400, total=  52.1s
[CV] colsample_bylevel=0.6, colsample_bytree=0.6, learning_rate=0.1, max_depth=3, n_estimators=50, seed=42, subsample=1 
[CV] colsample_bylevel=0.6, colsample_bytree=0.6, learning_rate=0.1, max_depth=3, n_estimators=150, seed=42, subsample=0.7 
[CV]  colsample_bylevel=0.6, colsample_bytree=0.6, learning_rate=0.1, max_depth=3, n_estimators=50, seed=42, subsample=1, score=0.785800, total=  53.1s
[CV] colsample_bylevel=0.6, colsample_bytree=0.6, learning_rate=0.1, max_depth=3, n_estimators=150, seed=42, subsample=0.7 
[CV]  colsample_bylevel=0.6, colsample_bytree=0.6, learning_rate=0.1, max_depth=3, n_estimators=50, seed=42, subsample=1, score=0.784400, total=  59.5s
[CV] colsample_bylevel=

[Parallel(n_jobs=-1)]: Done  10 tasks      | elapsed:  2.5min


[CV]  colsample_bylevel=0.6, colsample_bytree=0.6, learning_rate=0.1, max_depth=3, n_estimators=150, seed=42, subsample=0.7, score=0.802900, total= 1.7min
[CV] colsample_bylevel=0.6, colsample_bytree=0.6, learning_rate=0.1, max_depth=3, n_estimators=150, seed=42, subsample=0.7 
[CV]  colsample_bylevel=0.6, colsample_bytree=0.6, learning_rate=0.1, max_depth=3, n_estimators=150, seed=42, subsample=0.7, score=0.805900, total= 1.7min
[CV] colsample_bylevel=0.6, colsample_bytree=0.6, learning_rate=0.1, max_depth=3, n_estimators=150, seed=42, subsample=1 
[CV]  colsample_bylevel=0.6, colsample_bytree=0.6, learning_rate=0.1, max_depth=3, n_estimators=150, seed=42, subsample=0.7, score=0.810100, total= 1.5min
[CV] colsample_bylevel=0.6, colsample_bytree=0.6, learning_rate=0.1, max_depth=3, n_estimators=150, seed=42, subsample=1 
[CV]  colsample_bylevel=0.6, colsample_bytree=0.6, learning_rate=0.1, max_depth=3, n_estimators=150, seed=42, subsample=0.7, score=0.801200, total= 1.4min
[CV] colsamp

[Parallel(n_jobs=-1)]: Done  17 tasks      | elapsed:  6.6min


[CV]  colsample_bylevel=0.6, colsample_bytree=0.6, learning_rate=0.1, max_depth=3, n_estimators=150, seed=42, subsample=1, score=0.810000, total= 2.7min
[CV] colsample_bylevel=0.6, colsample_bytree=0.6, learning_rate=0.1, max_depth=3, n_estimators=250, seed=42, subsample=0.7 
[CV]  colsample_bylevel=0.6, colsample_bytree=0.6, learning_rate=0.1, max_depth=3, n_estimators=150, seed=42, subsample=1, score=0.802600, total= 3.0min
[CV] colsample_bylevel=0.6, colsample_bytree=0.6, learning_rate=0.1, max_depth=3, n_estimators=250, seed=42, subsample=0.7 
[CV]  colsample_bylevel=0.6, colsample_bytree=0.6, learning_rate=0.1, max_depth=3, n_estimators=150, seed=42, subsample=1, score=0.809400, total= 3.0min
[CV] colsample_bylevel=0.6, colsample_bytree=0.6, learning_rate=0.1, max_depth=3, n_estimators=250, seed=42, subsample=0.7 
[CV]  colsample_bylevel=0.6, colsample_bytree=0.6, learning_rate=0.1, max_depth=3, n_estimators=250, seed=42, subsample=0.7, score=0.814000, total= 4.4min
[CV] colsample

[Parallel(n_jobs=-1)]: Done  24 tasks      | elapsed: 13.3min


[CV]  colsample_bylevel=0.6, colsample_bytree=0.6, learning_rate=0.1, max_depth=3, n_estimators=250, seed=42, subsample=0.7, score=0.821300, total= 4.6min
[CV] colsample_bylevel=0.6, colsample_bytree=0.6, learning_rate=0.1, max_depth=3, n_estimators=250, seed=42, subsample=1 
[CV]  colsample_bylevel=0.6, colsample_bytree=0.6, learning_rate=0.1, max_depth=3, n_estimators=250, seed=42, subsample=1, score=0.812000, total= 5.3min
[CV] colsample_bylevel=0.6, colsample_bytree=0.6, learning_rate=0.1, max_depth=3, n_estimators=250, seed=42, subsample=1 
[CV]  colsample_bylevel=0.6, colsample_bytree=0.6, learning_rate=0.1, max_depth=3, n_estimators=250, seed=42, subsample=1, score=0.812600, total= 5.3min
[CV] colsample_bylevel=0.6, colsample_bytree=0.6, learning_rate=0.1, max_depth=4, n_estimators=50, seed=42, subsample=0.7 
[CV]  colsample_bylevel=0.6, colsample_bytree=0.6, learning_rate=0.1, max_depth=3, n_estimators=250, seed=42, subsample=1, score=0.817800, total= 5.2min
[CV] colsample_byle

[Parallel(n_jobs=-1)]: Done  33 tasks      | elapsed: 21.0min


[CV]  colsample_bylevel=0.6, colsample_bytree=0.6, learning_rate=0.1, max_depth=3, n_estimators=250, seed=42, subsample=1, score=0.820300, total= 5.1min
[CV] colsample_bylevel=0.6, colsample_bytree=0.6, learning_rate=0.1, max_depth=4, n_estimators=50, seed=42, subsample=1 
[CV]  colsample_bylevel=0.6, colsample_bytree=0.6, learning_rate=0.1, max_depth=4, n_estimators=50, seed=42, subsample=0.7, score=0.796000, total= 1.2min
[CV] colsample_bylevel=0.6, colsample_bytree=0.6, learning_rate=0.1, max_depth=4, n_estimators=50, seed=42, subsample=1 
[CV]  colsample_bylevel=0.6, colsample_bytree=0.6, learning_rate=0.1, max_depth=4, n_estimators=50, seed=42, subsample=1, score=0.790900, total= 1.4min
[CV] colsample_bylevel=0.6, colsample_bytree=0.6, learning_rate=0.1, max_depth=4, n_estimators=50, seed=42, subsample=1 
[CV]  colsample_bylevel=0.6, colsample_bytree=0.6, learning_rate=0.1, max_depth=4, n_estimators=50, seed=42, subsample=1, score=0.795700, total= 1.4min
[CV] colsample_bylevel=0.6

[Parallel(n_jobs=-1)]: Done  42 tasks      | elapsed: 26.2min


[CV]  colsample_bylevel=0.6, colsample_bytree=0.6, learning_rate=0.1, max_depth=4, n_estimators=150, seed=42, subsample=0.7, score=0.817100, total= 3.1min
[CV] colsample_bylevel=0.6, colsample_bytree=0.6, learning_rate=0.1, max_depth=4, n_estimators=150, seed=42, subsample=1 
[CV]  colsample_bylevel=0.6, colsample_bytree=0.6, learning_rate=0.1, max_depth=4, n_estimators=150, seed=42, subsample=0.7, score=0.815100, total= 3.1min
[CV] colsample_bylevel=0.6, colsample_bytree=0.6, learning_rate=0.1, max_depth=4, n_estimators=150, seed=42, subsample=1 
[CV]  colsample_bylevel=0.6, colsample_bytree=0.6, learning_rate=0.1, max_depth=4, n_estimators=150, seed=42, subsample=0.7, score=0.817400, total= 3.0min
[CV] colsample_bylevel=0.6, colsample_bytree=0.6, learning_rate=0.1, max_depth=4, n_estimators=150, seed=42, subsample=1 
[CV]  colsample_bylevel=0.6, colsample_bytree=0.6, learning_rate=0.1, max_depth=4, n_estimators=150, seed=42, subsample=1, score=0.813100, total= 3.2min
[CV] colsample_b

[Parallel(n_jobs=-1)]: Done  53 tasks      | elapsed: 37.8min


[CV]  colsample_bylevel=0.6, colsample_bytree=0.6, learning_rate=0.1, max_depth=4, n_estimators=250, seed=42, subsample=0.7, score=0.829100, total= 5.5min
[CV] colsample_bylevel=0.6, colsample_bytree=0.6, learning_rate=0.1, max_depth=4, n_estimators=250, seed=42, subsample=1 
[CV]  colsample_bylevel=0.6, colsample_bytree=0.6, learning_rate=0.1, max_depth=4, n_estimators=250, seed=42, subsample=0.7, score=0.828400, total= 5.4min
[CV] colsample_bylevel=0.6, colsample_bytree=0.6, learning_rate=0.1, max_depth=4, n_estimators=250, seed=42, subsample=1 
[CV]  colsample_bylevel=0.6, colsample_bytree=0.6, learning_rate=0.1, max_depth=4, n_estimators=250, seed=42, subsample=1, score=0.824200, total= 6.4min
[CV] colsample_bylevel=0.6, colsample_bytree=0.6, learning_rate=0.1, max_depth=4, n_estimators=250, seed=42, subsample=1 
[CV]  colsample_bylevel=0.6, colsample_bytree=0.6, learning_rate=0.1, max_depth=4, n_estimators=250, seed=42, subsample=1, score=0.824500, total= 6.3min
[CV] colsample_byl

[Parallel(n_jobs=-1)]: Done  64 tasks      | elapsed: 48.1min


[CV]  colsample_bylevel=0.6, colsample_bytree=0.6, learning_rate=0.1, max_depth=5, n_estimators=50, seed=42, subsample=0.7, score=0.802500, total= 1.1min
[CV] colsample_bylevel=0.6, colsample_bytree=0.6, learning_rate=0.1, max_depth=5, n_estimators=50, seed=42, subsample=1 
[CV]  colsample_bylevel=0.6, colsample_bytree=0.6, learning_rate=0.1, max_depth=5, n_estimators=50, seed=42, subsample=1, score=0.799500, total= 1.1min
[CV] colsample_bylevel=0.6, colsample_bytree=0.6, learning_rate=0.1, max_depth=5, n_estimators=50, seed=42, subsample=1 
[CV]  colsample_bylevel=0.6, colsample_bytree=0.6, learning_rate=0.1, max_depth=5, n_estimators=50, seed=42, subsample=1, score=0.803400, total=  52.6s
[CV] colsample_bylevel=0.6, colsample_bytree=0.6, learning_rate=0.1, max_depth=5, n_estimators=150, seed=42, subsample=0.7 
[CV]  colsample_bylevel=0.6, colsample_bytree=0.6, learning_rate=0.1, max_depth=5, n_estimators=50, seed=42, subsample=1, score=0.803900, total=  51.7s
[CV] colsample_bylevel=0

[Parallel(n_jobs=-1)]: Done  77 tasks      | elapsed: 53.0min


[CV]  colsample_bylevel=0.6, colsample_bytree=0.6, learning_rate=0.1, max_depth=5, n_estimators=150, seed=42, subsample=1, score=0.828400, total= 1.9min
[CV] colsample_bylevel=0.6, colsample_bytree=0.6, learning_rate=0.1, max_depth=5, n_estimators=250, seed=42, subsample=0.7 
[CV]  colsample_bylevel=0.6, colsample_bytree=0.6, learning_rate=0.1, max_depth=5, n_estimators=150, seed=42, subsample=1, score=0.824000, total= 1.9min
[CV] colsample_bylevel=0.6, colsample_bytree=0.6, learning_rate=0.1, max_depth=5, n_estimators=250, seed=42, subsample=0.7 
[CV]  colsample_bylevel=0.6, colsample_bytree=0.6, learning_rate=0.1, max_depth=5, n_estimators=150, seed=42, subsample=1, score=0.826300, total= 1.9min
[CV] colsample_bylevel=0.6, colsample_bytree=0.6, learning_rate=0.1, max_depth=5, n_estimators=250, seed=42, subsample=0.7 
[CV]  colsample_bylevel=0.6, colsample_bytree=0.6, learning_rate=0.1, max_depth=5, n_estimators=250, seed=42, subsample=0.7, score=0.832200, total= 2.8min
[CV] colsample

[Parallel(n_jobs=-1)]: Done  90 tasks      | elapsed: 61.2min


[CV]  colsample_bylevel=0.6, colsample_bytree=0.9, learning_rate=0.1, max_depth=3, n_estimators=50, seed=42, subsample=0.7, score=0.790200, total=  29.3s
[CV] colsample_bylevel=0.6, colsample_bytree=0.9, learning_rate=0.1, max_depth=3, n_estimators=50, seed=42, subsample=0.7 
[CV]  colsample_bylevel=0.6, colsample_bytree=0.6, learning_rate=0.1, max_depth=5, n_estimators=250, seed=42, subsample=1, score=0.833600, total= 3.0min
[CV] colsample_bylevel=0.6, colsample_bytree=0.9, learning_rate=0.1, max_depth=3, n_estimators=50, seed=42, subsample=1 
[CV]  colsample_bylevel=0.6, colsample_bytree=0.9, learning_rate=0.1, max_depth=3, n_estimators=50, seed=42, subsample=0.7, score=0.787100, total=  29.5s
[CV] colsample_bylevel=0.6, colsample_bytree=0.9, learning_rate=0.1, max_depth=3, n_estimators=50, seed=42, subsample=1 
[CV]  colsample_bylevel=0.6, colsample_bytree=0.9, learning_rate=0.1, max_depth=3, n_estimators=50, seed=42, subsample=0.7, score=0.788400, total=  29.4s
[CV] colsample_bylev

[Parallel(n_jobs=-1)]: Done 105 tasks      | elapsed: 65.1min


[CV]  colsample_bylevel=0.6, colsample_bytree=0.9, learning_rate=0.1, max_depth=3, n_estimators=150, seed=42, subsample=1, score=0.803700, total= 1.6min
[CV] colsample_bylevel=0.6, colsample_bytree=0.9, learning_rate=0.1, max_depth=3, n_estimators=150, seed=42, subsample=1 
[CV]  colsample_bylevel=0.6, colsample_bytree=0.9, learning_rate=0.1, max_depth=3, n_estimators=150, seed=42, subsample=1, score=0.804400, total= 1.6min
[CV] colsample_bylevel=0.6, colsample_bytree=0.9, learning_rate=0.1, max_depth=3, n_estimators=250, seed=42, subsample=0.7 
[CV]  colsample_bylevel=0.6, colsample_bytree=0.9, learning_rate=0.1, max_depth=3, n_estimators=150, seed=42, subsample=1, score=0.810100, total= 1.6min
[CV] colsample_bylevel=0.6, colsample_bytree=0.9, learning_rate=0.1, max_depth=3, n_estimators=250, seed=42, subsample=0.7 
[CV]  colsample_bylevel=0.6, colsample_bytree=0.9, learning_rate=0.1, max_depth=3, n_estimators=150, seed=42, subsample=1, score=0.802700, total= 1.6min
[CV] colsample_byl

[Parallel(n_jobs=-1)]: Done 120 tasks      | elapsed: 72.8min


[CV]  colsample_bylevel=0.6, colsample_bytree=0.9, learning_rate=0.1, max_depth=3, n_estimators=250, seed=42, subsample=1, score=0.810000, total= 2.7min
[CV] colsample_bylevel=0.6, colsample_bytree=0.9, learning_rate=0.1, max_depth=4, n_estimators=50, seed=42, subsample=0.7 
[CV]  colsample_bylevel=0.6, colsample_bytree=0.9, learning_rate=0.1, max_depth=4, n_estimators=50, seed=42, subsample=0.7, score=0.797400, total=  38.6s
[CV] colsample_bylevel=0.6, colsample_bytree=0.9, learning_rate=0.1, max_depth=4, n_estimators=50, seed=42, subsample=1 
[CV]  colsample_bylevel=0.6, colsample_bytree=0.9, learning_rate=0.1, max_depth=4, n_estimators=50, seed=42, subsample=0.7, score=0.790800, total=  38.8s
[CV] colsample_bylevel=0.6, colsample_bytree=0.9, learning_rate=0.1, max_depth=4, n_estimators=50, seed=42, subsample=1 
[CV]  colsample_bylevel=0.6, colsample_bytree=0.9, learning_rate=0.1, max_depth=3, n_estimators=250, seed=42, subsample=1, score=0.818200, total= 2.7min
[CV] colsample_byleve

[Parallel(n_jobs=-1)]: Done 137 tasks      | elapsed: 78.6min


[CV]  colsample_bylevel=0.6, colsample_bytree=0.9, learning_rate=0.1, max_depth=4, n_estimators=150, seed=42, subsample=1, score=0.819000, total= 2.2min
[CV] colsample_bylevel=0.6, colsample_bytree=0.9, learning_rate=0.1, max_depth=4, n_estimators=250, seed=42, subsample=0.7 
[CV]  colsample_bylevel=0.6, colsample_bytree=0.9, learning_rate=0.1, max_depth=4, n_estimators=150, seed=42, subsample=1, score=0.812600, total= 2.2min
[CV] colsample_bylevel=0.6, colsample_bytree=0.9, learning_rate=0.1, max_depth=4, n_estimators=250, seed=42, subsample=0.7 
[CV]  colsample_bylevel=0.6, colsample_bytree=0.9, learning_rate=0.1, max_depth=4, n_estimators=150, seed=42, subsample=1, score=0.819400, total= 2.2min
[CV] colsample_bylevel=0.6, colsample_bytree=0.9, learning_rate=0.1, max_depth=4, n_estimators=250, seed=42, subsample=0.7 
[CV]  colsample_bylevel=0.6, colsample_bytree=0.9, learning_rate=0.1, max_depth=4, n_estimators=250, seed=42, subsample=0.7, score=0.824400, total= 3.1min
[CV] colsample

[Parallel(n_jobs=-1)]: Done 154 tasks      | elapsed: 90.0min


[CV]  colsample_bylevel=0.6, colsample_bytree=0.9, learning_rate=0.1, max_depth=5, n_estimators=50, seed=42, subsample=0.7, score=0.806400, total= 1.2min
[CV] colsample_bylevel=0.6, colsample_bytree=0.9, learning_rate=0.1, max_depth=5, n_estimators=50, seed=42, subsample=1 
[CV]  colsample_bylevel=0.6, colsample_bytree=0.9, learning_rate=0.1, max_depth=5, n_estimators=50, seed=42, subsample=1, score=0.801700, total= 1.2min
[CV] colsample_bylevel=0.6, colsample_bytree=0.9, learning_rate=0.1, max_depth=5, n_estimators=50, seed=42, subsample=1 
[CV]  colsample_bylevel=0.6, colsample_bytree=0.9, learning_rate=0.1, max_depth=5, n_estimators=50, seed=42, subsample=1, score=0.799300, total=  59.7s
[CV] colsample_bylevel=0.6, colsample_bytree=0.9, learning_rate=0.1, max_depth=5, n_estimators=150, seed=42, subsample=0.7 
[CV]  colsample_bylevel=0.6, colsample_bytree=0.9, learning_rate=0.1, max_depth=5, n_estimators=50, seed=42, subsample=1, score=0.799400, total=  56.1s
[CV] colsample_bylevel=0

[Parallel(n_jobs=-1)]: Done 173 tasks      | elapsed: 102.0min


[CV]  colsample_bylevel=0.6, colsample_bytree=0.9, learning_rate=0.1, max_depth=5, n_estimators=250, seed=42, subsample=0.7, score=0.832700, total= 4.1min
[CV] colsample_bylevel=0.6, colsample_bytree=0.9, learning_rate=0.1, max_depth=5, n_estimators=250, seed=42, subsample=1 
[CV]  colsample_bylevel=0.6, colsample_bytree=0.9, learning_rate=0.1, max_depth=5, n_estimators=250, seed=42, subsample=0.7, score=0.837800, total= 4.1min
[CV] colsample_bylevel=0.6, colsample_bytree=0.9, learning_rate=0.1, max_depth=5, n_estimators=250, seed=42, subsample=1 
[CV]  colsample_bylevel=0.6, colsample_bytree=0.9, learning_rate=0.1, max_depth=5, n_estimators=250, seed=42, subsample=1, score=0.835600, total= 5.0min
[CV] colsample_bylevel=0.6, colsample_bytree=0.9, learning_rate=0.1, max_depth=5, n_estimators=250, seed=42, subsample=1 
[CV]  colsample_bylevel=0.6, colsample_bytree=0.9, learning_rate=0.1, max_depth=5, n_estimators=250, seed=42, subsample=1, score=0.832200, total= 5.4min
[CV] colsample_byl

[Parallel(n_jobs=-1)]: Done 192 tasks      | elapsed: 112.6min


[CV]  colsample_bylevel=0.9, colsample_bytree=0.6, learning_rate=0.1, max_depth=3, n_estimators=150, seed=42, subsample=0.7, score=0.810300, total= 1.7min
[CV] colsample_bylevel=0.9, colsample_bytree=0.6, learning_rate=0.1, max_depth=3, n_estimators=150, seed=42, subsample=1 
[CV]  colsample_bylevel=0.9, colsample_bytree=0.6, learning_rate=0.1, max_depth=3, n_estimators=150, seed=42, subsample=0.7, score=0.803700, total= 1.7min
[CV] colsample_bylevel=0.9, colsample_bytree=0.6, learning_rate=0.1, max_depth=3, n_estimators=150, seed=42, subsample=1 
[CV]  colsample_bylevel=0.9, colsample_bytree=0.6, learning_rate=0.1, max_depth=3, n_estimators=150, seed=42, subsample=0.7, score=0.808800, total= 1.8min
[CV] colsample_bylevel=0.9, colsample_bytree=0.6, learning_rate=0.1, max_depth=3, n_estimators=150, seed=42, subsample=1 
[CV]  colsample_bylevel=0.9, colsample_bytree=0.6, learning_rate=0.1, max_depth=3, n_estimators=150, seed=42, subsample=1, score=0.802000, total= 2.1min
[CV] colsample_b

[Parallel(n_jobs=-1)]: Done 213 tasks      | elapsed: 123.5min


[CV]  colsample_bylevel=0.9, colsample_bytree=0.6, learning_rate=0.1, max_depth=4, n_estimators=50, seed=42, subsample=0.7, score=0.789500, total=  38.7s
[CV] colsample_bylevel=0.9, colsample_bytree=0.6, learning_rate=0.1, max_depth=4, n_estimators=50, seed=42, subsample=1 
[CV]  colsample_bylevel=0.9, colsample_bytree=0.6, learning_rate=0.1, max_depth=4, n_estimators=50, seed=42, subsample=0.7, score=0.799000, total=  38.7s
[CV] colsample_bylevel=0.9, colsample_bytree=0.6, learning_rate=0.1, max_depth=4, n_estimators=50, seed=42, subsample=1 
[CV]  colsample_bylevel=0.9, colsample_bytree=0.6, learning_rate=0.1, max_depth=4, n_estimators=50, seed=42, subsample=1, score=0.792000, total=  45.2s
[CV] colsample_bylevel=0.9, colsample_bytree=0.6, learning_rate=0.1, max_depth=4, n_estimators=50, seed=42, subsample=1 
[CV]  colsample_bylevel=0.9, colsample_bytree=0.6, learning_rate=0.1, max_depth=4, n_estimators=50, seed=42, subsample=1, score=0.792900, total=  45.0s
[CV] colsample_bylevel=0.

[Parallel(n_jobs=-1)]: Done 234 tasks      | elapsed: 584.7min


[CV]  colsample_bylevel=0.9, colsample_bytree=0.6, learning_rate=0.1, max_depth=4, n_estimators=250, seed=42, subsample=0.7, score=0.827600, total=120.0min
[CV] colsample_bylevel=0.9, colsample_bytree=0.6, learning_rate=0.1, max_depth=4, n_estimators=250, seed=42, subsample=1 
[CV]  colsample_bylevel=0.9, colsample_bytree=0.6, learning_rate=0.1, max_depth=4, n_estimators=250, seed=42, subsample=1, score=0.827600, total=129.9min
[CV] colsample_bylevel=0.9, colsample_bytree=0.6, learning_rate=0.1, max_depth=4, n_estimators=250, seed=42, subsample=1 
[CV]  colsample_bylevel=0.9, colsample_bytree=0.6, learning_rate=0.1, max_depth=4, n_estimators=250, seed=42, subsample=1, score=0.823900, total=85.9min
[CV] colsample_bylevel=0.9, colsample_bytree=0.6, learning_rate=0.1, max_depth=5, n_estimators=50, seed=42, subsample=0.7 
[CV]  colsample_bylevel=0.9, colsample_bytree=0.6, learning_rate=0.1, max_depth=4, n_estimators=250, seed=42, subsample=1, score=0.827600, total=95.3min
[CV] colsample_by

[Parallel(n_jobs=-1)]: Done 257 tasks      | elapsed: 730.3min


[CV]  colsample_bylevel=0.9, colsample_bytree=0.6, learning_rate=0.1, max_depth=5, n_estimators=150, seed=42, subsample=1, score=0.826800, total= 3.0min
[CV] colsample_bylevel=0.9, colsample_bytree=0.6, learning_rate=0.1, max_depth=5, n_estimators=250, seed=42, subsample=0.7 
[CV]  colsample_bylevel=0.9, colsample_bytree=0.6, learning_rate=0.1, max_depth=5, n_estimators=150, seed=42, subsample=1, score=0.823200, total=13.6min
[CV] colsample_bylevel=0.9, colsample_bytree=0.6, learning_rate=0.1, max_depth=5, n_estimators=250, seed=42, subsample=0.7 
[CV]  colsample_bylevel=0.9, colsample_bytree=0.6, learning_rate=0.1, max_depth=5, n_estimators=150, seed=42, subsample=1, score=0.826500, total=27.0min
[CV] colsample_bylevel=0.9, colsample_bytree=0.6, learning_rate=0.1, max_depth=5, n_estimators=250, seed=42, subsample=0.7 
[CV]  colsample_bylevel=0.9, colsample_bytree=0.6, learning_rate=0.1, max_depth=5, n_estimators=250, seed=42, subsample=0.7, score=0.830800, total=66.6min
[CV] colsample

[Parallel(n_jobs=-1)]: Done 280 tasks      | elapsed: 811.5min


[CV]  colsample_bylevel=0.9, colsample_bytree=0.9, learning_rate=0.1, max_depth=3, n_estimators=150, seed=42, subsample=0.7, score=0.808200, total= 2.7min
[CV] colsample_bylevel=0.9, colsample_bytree=0.9, learning_rate=0.1, max_depth=3, n_estimators=150, seed=42, subsample=0.7 
[CV]  colsample_bylevel=0.9, colsample_bytree=0.9, learning_rate=0.1, max_depth=3, n_estimators=150, seed=42, subsample=0.7, score=0.806500, total= 2.7min
[CV] colsample_bylevel=0.9, colsample_bytree=0.9, learning_rate=0.1, max_depth=3, n_estimators=150, seed=42, subsample=1 
[CV]  colsample_bylevel=0.9, colsample_bytree=0.9, learning_rate=0.1, max_depth=3, n_estimators=150, seed=42, subsample=0.7, score=0.811700, total= 2.7min
[CV] colsample_bylevel=0.9, colsample_bytree=0.9, learning_rate=0.1, max_depth=3, n_estimators=150, seed=42, subsample=1 
[CV]  colsample_bylevel=0.9, colsample_bytree=0.9, learning_rate=0.1, max_depth=3, n_estimators=150, seed=42, subsample=0.7, score=0.803300, total= 2.8min
[CV] colsamp

[Parallel(n_jobs=-1)]: Done 305 tasks      | elapsed: 832.1min


[CV]  colsample_bylevel=0.9, colsample_bytree=0.9, learning_rate=0.1, max_depth=4, n_estimators=50, seed=42, subsample=1, score=0.793000, total= 1.4min
[CV] colsample_bylevel=0.9, colsample_bytree=0.9, learning_rate=0.1, max_depth=4, n_estimators=50, seed=42, subsample=1 
[CV]  colsample_bylevel=0.9, colsample_bytree=0.9, learning_rate=0.1, max_depth=4, n_estimators=50, seed=42, subsample=1, score=0.796800, total= 1.4min
[CV] colsample_bylevel=0.9, colsample_bytree=0.9, learning_rate=0.1, max_depth=4, n_estimators=150, seed=42, subsample=0.7 
[CV]  colsample_bylevel=0.9, colsample_bytree=0.9, learning_rate=0.1, max_depth=4, n_estimators=50, seed=42, subsample=1, score=0.797400, total= 1.3min
[CV] colsample_bylevel=0.9, colsample_bytree=0.9, learning_rate=0.1, max_depth=4, n_estimators=150, seed=42, subsample=0.7 
[CV]  colsample_bylevel=0.9, colsample_bytree=0.9, learning_rate=0.1, max_depth=4, n_estimators=50, seed=42, subsample=1, score=0.791100, total= 1.4min
[CV] colsample_bylevel=

[Parallel(n_jobs=-1)]: Done 330 tasks      | elapsed: 857.4min


[CV]  colsample_bylevel=0.9, colsample_bytree=0.9, learning_rate=0.1, max_depth=4, n_estimators=250, seed=42, subsample=1, score=0.824800, total= 5.8min
[CV] colsample_bylevel=0.9, colsample_bytree=0.9, learning_rate=0.1, max_depth=5, n_estimators=50, seed=42, subsample=0.7 
[CV]  colsample_bylevel=0.9, colsample_bytree=0.9, learning_rate=0.1, max_depth=5, n_estimators=50, seed=42, subsample=0.7, score=0.806600, total= 1.2min
[CV] colsample_bylevel=0.9, colsample_bytree=0.9, learning_rate=0.1, max_depth=5, n_estimators=50, seed=42, subsample=1 
[CV]  colsample_bylevel=0.9, colsample_bytree=0.9, learning_rate=0.1, max_depth=5, n_estimators=50, seed=42, subsample=0.7, score=0.799800, total= 1.3min
[CV] colsample_bylevel=0.9, colsample_bytree=0.9, learning_rate=0.1, max_depth=5, n_estimators=50, seed=42, subsample=1 
[CV]  colsample_bylevel=0.9, colsample_bytree=0.9, learning_rate=0.1, max_depth=4, n_estimators=250, seed=42, subsample=1, score=0.830200, total= 5.9min
[CV] colsample_byleve

[Parallel(n_jobs=-1)]: Done 360 out of 360 | elapsed: 903.6min finished


XGBClassifier(base_score=0.5, colsample_bylevel=1, colsample_bytree=1,
       gamma=0, learning_rate=0.1, max_delta_step=0, max_depth=3,
       min_child_weight=1, missing=None, n_estimators=100, nthread=-1,
       objective='binary:logistic', reg_alpha=0, reg_lambda=1,
       scale_pos_weight=1, seed=0, silent=True, subsample=1)
Best accuracy score is  0.83582
Best parameter is  {'colsample_bylevel': 0.9, 'colsample_bytree': 0.9, 'learning_rate': 0.1, 'max_depth': 5, 'n_estimators': 250, 'seed': 42, 'subsample': 0.7}


In [23]:
xgb_params = {
    'n_estimators': [250],
    'learning_rate': [0.1],
    'max_depth': [5], 
    'subsample': [0.7, 0.6],
    'colsample_bytree': [0.9, 1], 
    'colsample_bylevel': [0.9, 1], 
    'seed': [42]
}
grid_xgb = xgb.XGBClassifier()
grid_xgb = param_search(grid_xgb, xgb_params)

Fitting 5 folds for each of 8 candidates, totalling 40 fits
[CV] colsample_bylevel=0.9, colsample_bytree=0.9, learning_rate=0.1, max_depth=5, n_estimators=250, seed=42, subsample=0.7 
[CV] colsample_bylevel=0.9, colsample_bytree=0.9, learning_rate=0.1, max_depth=5, n_estimators=250, seed=42, subsample=0.7 
[CV] colsample_bylevel=0.9, colsample_bytree=0.9, learning_rate=0.1, max_depth=5, n_estimators=250, seed=42, subsample=0.7 
[CV] colsample_bylevel=0.9, colsample_bytree=0.9, learning_rate=0.1, max_depth=5, n_estimators=250, seed=42, subsample=0.7 
[CV]  colsample_bylevel=0.9, colsample_bytree=0.9, learning_rate=0.1, max_depth=5, n_estimators=250, seed=42, subsample=0.7, score=0.835300, total= 6.9min
[CV] colsample_bylevel=0.9, colsample_bytree=0.9, learning_rate=0.1, max_depth=5, n_estimators=250, seed=42, subsample=0.7 
[CV]  colsample_bylevel=0.9, colsample_bytree=0.9, learning_rate=0.1, max_depth=5, n_estimators=250, seed=42, subsample=0.7, score=0.831700, total= 6.9min
[CV] colsa

[Parallel(n_jobs=-1)]: Done   5 tasks      | elapsed: 30.7min


[CV]  colsample_bylevel=0.9, colsample_bytree=0.9, learning_rate=0.1, max_depth=5, n_estimators=250, seed=42, subsample=0.6, score=0.833000, total=23.8min
[CV] colsample_bylevel=0.9, colsample_bytree=0.9, learning_rate=0.1, max_depth=5, n_estimators=250, seed=42, subsample=0.6 
[CV]  colsample_bylevel=0.9, colsample_bytree=0.9, learning_rate=0.1, max_depth=5, n_estimators=250, seed=42, subsample=0.6, score=0.839200, total=23.7min
[CV] colsample_bylevel=0.9, colsample_bytree=1, learning_rate=0.1, max_depth=5, n_estimators=250, seed=42, subsample=0.7 
[CV]  colsample_bylevel=0.9, colsample_bytree=0.9, learning_rate=0.1, max_depth=5, n_estimators=250, seed=42, subsample=0.7, score=0.838000, total=24.4min
[CV] colsample_bylevel=0.9, colsample_bytree=1, learning_rate=0.1, max_depth=5, n_estimators=250, seed=42, subsample=0.7 
[CV]  colsample_bylevel=0.9, colsample_bytree=0.9, learning_rate=0.1, max_depth=5, n_estimators=250, seed=42, subsample=0.6, score=0.832500, total= 6.1min
[CV] colsamp

[Parallel(n_jobs=-1)]: Done  10 tasks      | elapsed: 36.9min


[CV]  colsample_bylevel=0.9, colsample_bytree=1, learning_rate=0.1, max_depth=5, n_estimators=250, seed=42, subsample=0.7, score=0.832600, total= 7.2min
[CV] colsample_bylevel=0.9, colsample_bytree=1, learning_rate=0.1, max_depth=5, n_estimators=250, seed=42, subsample=0.7 
[CV]  colsample_bylevel=0.9, colsample_bytree=1, learning_rate=0.1, max_depth=5, n_estimators=250, seed=42, subsample=0.7, score=0.834300, total= 7.1min
[CV] colsample_bylevel=0.9, colsample_bytree=1, learning_rate=0.1, max_depth=5, n_estimators=250, seed=42, subsample=0.6 
[CV]  colsample_bylevel=0.9, colsample_bytree=1, learning_rate=0.1, max_depth=5, n_estimators=250, seed=42, subsample=0.7, score=0.834700, total= 7.6min
[CV] colsample_bylevel=0.9, colsample_bytree=1, learning_rate=0.1, max_depth=5, n_estimators=250, seed=42, subsample=0.6 
[CV]  colsample_bylevel=0.9, colsample_bytree=1, learning_rate=0.1, max_depth=5, n_estimators=250, seed=42, subsample=0.7, score=0.835100, total= 7.6min
[CV] colsample_bylevel

[Parallel(n_jobs=-1)]: Done  17 tasks      | elapsed: 51.5min


[CV]  colsample_bylevel=0.9, colsample_bytree=1, learning_rate=0.1, max_depth=5, n_estimators=250, seed=42, subsample=0.6, score=0.837100, total= 7.0min
[CV] colsample_bylevel=1, colsample_bytree=0.9, learning_rate=0.1, max_depth=5, n_estimators=250, seed=42, subsample=0.7 
[CV]  colsample_bylevel=0.9, colsample_bytree=1, learning_rate=0.1, max_depth=5, n_estimators=250, seed=42, subsample=0.6, score=0.834600, total= 6.8min
[CV] colsample_bylevel=1, colsample_bytree=0.9, learning_rate=0.1, max_depth=5, n_estimators=250, seed=42, subsample=0.7 
[CV]  colsample_bylevel=0.9, colsample_bytree=1, learning_rate=0.1, max_depth=5, n_estimators=250, seed=42, subsample=0.6, score=0.836000, total= 6.8min
[CV] colsample_bylevel=1, colsample_bytree=0.9, learning_rate=0.1, max_depth=5, n_estimators=250, seed=42, subsample=0.7 
[CV]  colsample_bylevel=1, colsample_bytree=0.9, learning_rate=0.1, max_depth=5, n_estimators=250, seed=42, subsample=0.7, score=0.834200, total= 7.7min
[CV] colsample_bylevel

[Parallel(n_jobs=-1)]: Done  24 tasks      | elapsed: 60.6min


[CV]  colsample_bylevel=1, colsample_bytree=0.9, learning_rate=0.1, max_depth=5, n_estimators=250, seed=42, subsample=0.6, score=0.833600, total= 7.3min
[CV] colsample_bylevel=1, colsample_bytree=0.9, learning_rate=0.1, max_depth=5, n_estimators=250, seed=42, subsample=0.6 
[CV]  colsample_bylevel=1, colsample_bytree=0.9, learning_rate=0.1, max_depth=5, n_estimators=250, seed=42, subsample=0.7, score=0.839000, total= 7.7min
[CV] colsample_bylevel=1, colsample_bytree=0.9, learning_rate=0.1, max_depth=5, n_estimators=250, seed=42, subsample=0.6 
[CV]  colsample_bylevel=1, colsample_bytree=0.9, learning_rate=0.1, max_depth=5, n_estimators=250, seed=42, subsample=0.6, score=0.832900, total= 7.2min
[CV] colsample_bylevel=1, colsample_bytree=1, learning_rate=0.1, max_depth=5, n_estimators=250, seed=42, subsample=0.7 
[CV]  colsample_bylevel=1, colsample_bytree=0.9, learning_rate=0.1, max_depth=5, n_estimators=250, seed=42, subsample=0.6, score=0.836300, total= 7.2min
[CV] colsample_bylevel=1

[Parallel(n_jobs=-1)]: Done  33 tasks      | elapsed: 81.4min


[CV]  colsample_bylevel=1, colsample_bytree=1, learning_rate=0.1, max_depth=5, n_estimators=250, seed=42, subsample=0.7, score=0.833600, total= 8.3min
[CV] colsample_bylevel=1, colsample_bytree=1, learning_rate=0.1, max_depth=5, n_estimators=250, seed=42, subsample=0.6 
[CV]  colsample_bylevel=1, colsample_bytree=1, learning_rate=0.1, max_depth=5, n_estimators=250, seed=42, subsample=0.6, score=0.832800, total= 7.8min
[CV] colsample_bylevel=1, colsample_bytree=1, learning_rate=0.1, max_depth=5, n_estimators=250, seed=42, subsample=0.6 
[CV]  colsample_bylevel=1, colsample_bytree=1, learning_rate=0.1, max_depth=5, n_estimators=250, seed=42, subsample=0.7, score=0.835800, total= 8.4min
[CV] colsample_bylevel=1, colsample_bytree=1, learning_rate=0.1, max_depth=5, n_estimators=250, seed=42, subsample=0.6 
[CV]  colsample_bylevel=1, colsample_bytree=1, learning_rate=0.1, max_depth=5, n_estimators=250, seed=42, subsample=0.6, score=0.833500, total= 7.4min
[CV]  colsample_bylevel=1, colsample

[Parallel(n_jobs=-1)]: Done  38 out of  40 | elapsed: 89.1min remaining:  4.7min


[CV]  colsample_bylevel=1, colsample_bytree=1, learning_rate=0.1, max_depth=5, n_estimators=250, seed=42, subsample=0.6, score=0.834200, total= 6.5min
[CV]  colsample_bylevel=1, colsample_bytree=1, learning_rate=0.1, max_depth=5, n_estimators=250, seed=42, subsample=0.6, score=0.835400, total= 6.3min


[Parallel(n_jobs=-1)]: Done  40 out of  40 | elapsed: 90.4min finished


XGBClassifier(base_score=0.5, colsample_bylevel=1, colsample_bytree=1,
       gamma=0, learning_rate=0.1, max_delta_step=0, max_depth=3,
       min_child_weight=1, missing=None, n_estimators=100, nthread=-1,
       objective='binary:logistic', reg_alpha=0, reg_lambda=1,
       scale_pos_weight=1, seed=0, silent=True, subsample=1)
Best accuracy score is  0.8368
Best parameter is  {'colsample_bylevel': 1, 'colsample_bytree': 0.9, 'learning_rate': 0.1, 'max_depth': 5, 'n_estimators': 250, 'seed': 42, 'subsample': 0.7}


In [12]:
xgb = xgb.XGBClassifier(base_score=0.5, colsample_bylevel=1, colsample_bytree=0.9,
       gamma=0, learning_rate=0.01, max_delta_step=0, max_depth=5,
       min_child_weight=1, missing=None, n_estimators=250, nthread=-1,
       objective='binary:logistic', reg_alpha=0, reg_lambda=1,
       scale_pos_weight=1, seed=0, silent=True, subsample=0.7)
#xgb.fit(X_train, y_train)

In [9]:
trainacc = xgb.score(X_train,y_train)
print ("train acc ", trainacc)

validationacc = xgb.score(X_validation,y_validation)
print ("validation acc ", validationacc)

train acc  0.80418
validation acc  0.7639


In [8]:
mlp_params = {
    'hidden_layer_sizes': [50, 100, 150],
    'alpha': [0.01, 0.001, 0.0001]
}
grid_mlp = MLPClassifier()
grid_mlp = param_search(grid_mlp, mlp_params)

Fitting 5 folds for each of 9 candidates, totalling 45 fits
[CV] alpha=0.01, hidden_layer_sizes=50 ...............................
[CV] alpha=0.01, hidden_layer_sizes=50 ...............................
[CV] alpha=0.01, hidden_layer_sizes=50 ...............................
[CV] alpha=0.01, hidden_layer_sizes=50 ...............................
[CV]  alpha=0.01, hidden_layer_sizes=50, score=0.845200, total= 1.6min
[CV] alpha=0.01, hidden_layer_sizes=50 ...............................
[CV]  alpha=0.01, hidden_layer_sizes=50, score=0.837600, total= 1.7min
[CV] alpha=0.01, hidden_layer_sizes=100 ..............................
[CV]  alpha=0.01, hidden_layer_sizes=50, score=0.834300, total= 2.6min
[CV] alpha=0.01, hidden_layer_sizes=100 ..............................
[CV]  alpha=0.01, hidden_layer_sizes=50, score=0.842900, total= 1.4min
[CV] alpha=0.01, hidden_layer_sizes=100 ..............................
[CV]  alpha=0.01, hidden_layer_sizes=50, score=0.838600, total= 3.5min
[CV] alpha=0.01, 

[Parallel(n_jobs=-1)]: Done   5 tasks      | elapsed:  3.6min


[CV]  alpha=0.01, hidden_layer_sizes=100, score=0.836500, total= 1.7min
[CV] alpha=0.01, hidden_layer_sizes=100 ..............................
[CV]  alpha=0.01, hidden_layer_sizes=100, score=0.839000, total= 3.7min
[CV] alpha=0.01, hidden_layer_sizes=150 ..............................
[CV]  alpha=0.01, hidden_layer_sizes=100, score=0.838200, total= 2.2min
[CV] alpha=0.01, hidden_layer_sizes=150 ..............................
[CV]  alpha=0.01, hidden_layer_sizes=100, score=0.840600, total= 3.8min
[CV] alpha=0.01, hidden_layer_sizes=150 ..............................
[CV]  alpha=0.01, hidden_layer_sizes=100, score=0.837800, total= 4.0min
[CV] alpha=0.01, hidden_layer_sizes=150 ..............................


[Parallel(n_jobs=-1)]: Done  10 tasks      | elapsed:  8.3min


[CV]  alpha=0.01, hidden_layer_sizes=150, score=0.846900, total= 2.9min
[CV] alpha=0.01, hidden_layer_sizes=150 ..............................
[CV]  alpha=0.01, hidden_layer_sizes=150, score=0.843100, total= 2.9min
[CV] alpha=0.001, hidden_layer_sizes=50 ..............................
[CV]  alpha=0.01, hidden_layer_sizes=150, score=0.846600, total= 4.4min
[CV] alpha=0.001, hidden_layer_sizes=50 ..............................
[CV]  alpha=0.01, hidden_layer_sizes=150, score=0.849300, total= 2.2min
[CV] alpha=0.001, hidden_layer_sizes=50 ..............................
[CV]  alpha=0.001, hidden_layer_sizes=50, score=0.839800, total= 1.9min
[CV] alpha=0.001, hidden_layer_sizes=50 ..............................
[CV]  alpha=0.001, hidden_layer_sizes=50, score=0.833700, total= 2.2min
[CV] alpha=0.001, hidden_layer_sizes=50 ..............................
[CV]  alpha=0.001, hidden_layer_sizes=50, score=0.838500, total= 1.5min
[CV] alpha=0.001, hidden_layer_sizes=100 .............................

[Parallel(n_jobs=-1)]: Done  17 tasks      | elapsed: 13.2min


[CV]  alpha=0.01, hidden_layer_sizes=150, score=0.851100, total= 4.9min
[CV] alpha=0.001, hidden_layer_sizes=100 .............................
[CV]  alpha=0.001, hidden_layer_sizes=50, score=0.838600, total= 1.4min
[CV] alpha=0.001, hidden_layer_sizes=100 .............................
[CV]  alpha=0.001, hidden_layer_sizes=50, score=0.836400, total= 3.8min
[CV] alpha=0.001, hidden_layer_sizes=100 .............................
[CV]  alpha=0.001, hidden_layer_sizes=100, score=0.832400, total= 2.9min
[CV] alpha=0.001, hidden_layer_sizes=100 .............................
[CV]  alpha=0.001, hidden_layer_sizes=100, score=0.841400, total= 3.6min
[CV] alpha=0.001, hidden_layer_sizes=150 .............................
[CV]  alpha=0.001, hidden_layer_sizes=100, score=0.846600, total= 3.7min
[CV] alpha=0.001, hidden_layer_sizes=150 .............................
[CV]  alpha=0.001, hidden_layer_sizes=100, score=0.842600, total= 3.3min


[Parallel(n_jobs=-1)]: Done  24 tasks      | elapsed: 18.2min


[CV] alpha=0.001, hidden_layer_sizes=150 .............................
[CV]  alpha=0.001, hidden_layer_sizes=100, score=0.842700, total= 2.5min
[CV] alpha=0.001, hidden_layer_sizes=150 .............................
[CV]  alpha=0.001, hidden_layer_sizes=150, score=0.843100, total= 3.9min
[CV] alpha=0.001, hidden_layer_sizes=150 .............................
[CV]  alpha=0.001, hidden_layer_sizes=150, score=0.849700, total= 4.1min
[CV] alpha=0.0001, hidden_layer_sizes=50 .............................
[CV]  alpha=0.001, hidden_layer_sizes=150, score=0.855400, total= 3.5min
[CV] alpha=0.0001, hidden_layer_sizes=50 .............................
[CV]  alpha=0.001, hidden_layer_sizes=150, score=0.846500, total= 4.0min
[CV] alpha=0.0001, hidden_layer_sizes=50 .............................
[CV]  alpha=0.0001, hidden_layer_sizes=50, score=0.841600, total= 2.2min
[CV] alpha=0.0001, hidden_layer_sizes=50 .............................
[CV]  alpha=0.0001, hidden_layer_sizes=50, score=0.832100, total=

[Parallel(n_jobs=-1)]: Done  33 tasks      | elapsed: 25.8min


[CV] alpha=0.0001, hidden_layer_sizes=100 ............................
[CV]  alpha=0.0001, hidden_layer_sizes=50, score=0.831900, total= 2.1min
[CV] alpha=0.0001, hidden_layer_sizes=100 ............................
[CV]  alpha=0.0001, hidden_layer_sizes=50, score=0.835700, total= 2.5min
[CV] alpha=0.0001, hidden_layer_sizes=100 ............................
[CV]  alpha=0.0001, hidden_layer_sizes=100, score=0.841700, total= 2.7min
[CV] alpha=0.0001, hidden_layer_sizes=100 ............................
[CV]  alpha=0.0001, hidden_layer_sizes=100, score=0.839100, total= 3.0min
[CV] alpha=0.0001, hidden_layer_sizes=150 ............................
[CV]  alpha=0.0001, hidden_layer_sizes=100, score=0.848000, total= 3.7min
[CV] alpha=0.0001, hidden_layer_sizes=150 ............................
[CV]  alpha=0.0001, hidden_layer_sizes=100, score=0.840300, total= 4.5min
[CV] alpha=0.0001, hidden_layer_sizes=150 ............................
[CV]  alpha=0.0001, hidden_layer_sizes=100, score=0.841900, t

[Parallel(n_jobs=-1)]: Done  43 out of  45 | elapsed: 34.3min remaining:  1.6min


[CV]  alpha=0.0001, hidden_layer_sizes=150, score=0.845400, total= 2.9min
[CV]  alpha=0.0001, hidden_layer_sizes=150, score=0.845300, total= 2.1min


[Parallel(n_jobs=-1)]: Done  45 out of  45 | elapsed: 36.1min finished


MLPClassifier(activation='relu', alpha=0.0001, batch_size='auto', beta_1=0.9,
       beta_2=0.999, early_stopping=False, epsilon=1e-08,
       hidden_layer_sizes=(100,), learning_rate='constant',
       learning_rate_init=0.001, max_iter=200, momentum=0.9,
       nesterovs_momentum=True, power_t=0.5, random_state=None,
       shuffle=True, solver='adam', tol=0.0001, validation_fraction=0.1,
       verbose=False, warm_start=False)
Best accuracy score is  0.84848
Best parameter is  {'alpha': 0.001, 'hidden_layer_sizes': 150}


In [13]:
mlp_params = {
    'hidden_layer_sizes': [250,350, 150],
    'alpha': [0.001]
}
grid_mlp = MLPClassifier()
grid_mlp = param_search(grid_mlp, mlp_params)

Fitting 5 folds for each of 3 candidates, totalling 15 fits
[CV] alpha=0.001, hidden_layer_sizes=250 .............................
[CV] alpha=0.001, hidden_layer_sizes=250 .............................
[CV] alpha=0.001, hidden_layer_sizes=250 .............................
[CV] alpha=0.001, hidden_layer_sizes=250 .............................
[CV]  alpha=0.001, hidden_layer_sizes=250, score=0.853500, total= 3.1min
[CV] alpha=0.001, hidden_layer_sizes=250 .............................
[CV]  alpha=0.001, hidden_layer_sizes=250, score=0.854300, total= 3.5min
[CV] alpha=0.001, hidden_layer_sizes=350 .............................
[CV]  alpha=0.001, hidden_layer_sizes=250, score=0.854100, total= 3.5min
[CV] alpha=0.001, hidden_layer_sizes=350 .............................
[CV]  alpha=0.001, hidden_layer_sizes=250, score=0.859000, total= 3.5min
[CV] alpha=0.001, hidden_layer_sizes=350 .............................
[CV]  alpha=0.001, hidden_layer_sizes=250, score=0.853000, total= 4.2min
[CV] al

[Parallel(n_jobs=-1)]: Done   5 tasks      | elapsed:  7.3min


[CV]  alpha=0.001, hidden_layer_sizes=350, score=0.857500, total= 4.6min
[CV] alpha=0.001, hidden_layer_sizes=350 .............................
[CV]  alpha=0.001, hidden_layer_sizes=350, score=0.859600, total= 4.6min
[CV] alpha=0.001, hidden_layer_sizes=150 .............................
[CV]  alpha=0.001, hidden_layer_sizes=350, score=0.863700, total= 5.3min
[CV] alpha=0.001, hidden_layer_sizes=150 .............................
[CV]  alpha=0.001, hidden_layer_sizes=150, score=0.845000, total= 4.1min
[CV] alpha=0.001, hidden_layer_sizes=150 .............................
[CV]  alpha=0.001, hidden_layer_sizes=150, score=0.850800, total= 5.0min
[CV] alpha=0.001, hidden_layer_sizes=150 .............................


[Parallel(n_jobs=-1)]: Done  10 out of  15 | elapsed: 13.3min remaining:  6.6min


[CV]  alpha=0.001, hidden_layer_sizes=350, score=0.845700, total= 6.1min
[CV] alpha=0.001, hidden_layer_sizes=150 .............................
[CV]  alpha=0.001, hidden_layer_sizes=350, score=0.861200, total= 6.7min


[Parallel(n_jobs=-1)]: Done  12 out of  15 | elapsed: 14.9min remaining:  3.7min


[CV]  alpha=0.001, hidden_layer_sizes=150, score=0.848100, total= 3.1min
[CV]  alpha=0.001, hidden_layer_sizes=150, score=0.846400, total= 3.7min
[CV]  alpha=0.001, hidden_layer_sizes=150, score=0.844400, total= 3.2min


[Parallel(n_jobs=-1)]: Done  15 out of  15 | elapsed: 16.8min finished


MLPClassifier(activation='relu', alpha=0.0001, batch_size='auto', beta_1=0.9,
       beta_2=0.999, early_stopping=False, epsilon=1e-08,
       hidden_layer_sizes=(100,), learning_rate='constant',
       learning_rate_init=0.001, max_iter=200, momentum=0.9,
       nesterovs_momentum=True, power_t=0.5, random_state=None,
       shuffle=True, solver='adam', tol=0.0001, validation_fraction=0.1,
       verbose=False, warm_start=False)
Best accuracy score is  0.85754
Best parameter is  {'alpha': 0.001, 'hidden_layer_sizes': 350}


In [None]:
rf_params = {
    'n_estimators': [10, 20, 30]
}
grid_rf = RandomForestClassifier()
grid_rf = param_search(grid_rf, rf_params)

Fitting 5 folds for each of 3 candidates, totalling 15 fits
[CV] n_estimators=10 .................................................
[CV] n_estimators=10 .................................................
[CV] n_estimators=10 .................................................
[CV] n_estimators=10 .................................................
[CV] .................. n_estimators=10, score=0.777500, total=  31.0s
[CV] n_estimators=10 .................................................
[CV] .................. n_estimators=10, score=0.777200, total=  31.9s
[CV] n_estimators=20 .................................................
[CV] .................. n_estimators=10, score=0.774100, total=  32.3s
[CV] n_estimators=20 .................................................
[CV] .................. n_estimators=10, score=0.778900, total=  32.9s
[CV] n_estimators=20 .................................................
[CV] .................. n_estimators=10, score=0.779200, total=  34.2s
[CV] n_estimators

[Parallel(n_jobs=-1)]: Done   5 tasks      | elapsed:  1.1min


[CV] .................. n_estimators=20, score=0.803100, total= 1.0min
[CV] n_estimators=20 .................................................
[CV] .................. n_estimators=20, score=0.799900, total= 1.1min
[CV] n_estimators=30 .................................................
[CV] .................. n_estimators=20, score=0.799400, total= 1.0min
[CV] n_estimators=30 .................................................
[CV] .................. n_estimators=20, score=0.801300, total= 1.0min
[CV] n_estimators=30 .................................................
[CV] .................. n_estimators=20, score=0.805400, total= 1.2min


[Parallel(n_jobs=-1)]: Done  10 out of  15 | elapsed:  2.9min remaining:  1.4min


[CV] n_estimators=30 .................................................
[CV] .................. n_estimators=30, score=0.805300, total= 1.7min
[CV] n_estimators=30 .................................................
[CV] .................. n_estimators=30, score=0.810700, total= 1.8min


[Parallel(n_jobs=-1)]: Done  12 out of  15 | elapsed:  3.4min remaining:   51.5s


[CV] .................. n_estimators=30, score=0.811100, total= 1.6min
[CV] .................. n_estimators=30, score=0.808000, total= 1.3min
[CV] .................. n_estimators=30, score=0.810000, total= 1.1min


[Parallel(n_jobs=-1)]: Done  15 out of  15 | elapsed:  4.6min finished


RandomForestClassifier(bootstrap=True, class_weight=None, criterion='gini',
            max_depth=None, max_features='auto', max_leaf_nodes=None,
            min_impurity_split=1e-07, min_samples_leaf=1,
            min_samples_split=2, min_weight_fraction_leaf=0.0,
            n_estimators=10, n_jobs=1, oob_score=False, random_state=None,
            verbose=0, warm_start=False)
Best accuracy score is  0.80902
Best parameter is  {'n_estimators': 30}


In [11]:
rf_params = {
    'n_estimators': [250, 150, 350]
}
grid_rf = RandomForestClassifier()
grid_rf = param_search(grid_rf, rf_params)

Fitting 5 folds for each of 3 candidates, totalling 15 fits
[CV] n_estimators=250 ................................................
[CV] n_estimators=250 ................................................
[CV] n_estimators=250 ................................................
[CV] n_estimators=250 ................................................
[CV] ................. n_estimators=250, score=0.825900, total= 6.1min
[CV] n_estimators=250 ................................................
[CV] ................. n_estimators=250, score=0.826000, total= 6.1min
[CV] n_estimators=150 ................................................
[CV] ................. n_estimators=250, score=0.823900, total= 6.2min
[CV] n_estimators=150 ................................................
[CV] ................. n_estimators=250, score=0.821000, total= 6.2min
[CV] n_estimators=150 ................................................
[CV] ................. n_estimators=150, score=0.822000, total= 3.7min
[CV] n_estimators

[Parallel(n_jobs=-1)]: Done   5 tasks      | elapsed: 10.0min


[CV] ................. n_estimators=150, score=0.824200, total= 3.7min
[CV] n_estimators=150 ................................................
[CV] ................. n_estimators=150, score=0.826300, total= 3.8min
[CV] n_estimators=350 ................................................
[CV] ................. n_estimators=250, score=0.831400, total= 6.5min
[CV] n_estimators=350 ................................................
[CV] ................. n_estimators=150, score=0.825100, total= 4.3min
[CV] n_estimators=350 ................................................
[CV] ................. n_estimators=150, score=0.826200, total= 4.3min
[CV] n_estimators=350 ................................................


[Parallel(n_jobs=-1)]: Done  10 out of  15 | elapsed: 14.4min remaining:  7.2min


[CV] ................. n_estimators=350, score=0.824600, total=10.3min
[CV] n_estimators=350 ................................................
[CV] ................. n_estimators=350, score=0.826700, total= 9.7min


[Parallel(n_jobs=-1)]: Done  12 out of  15 | elapsed: 22.6min remaining:  5.7min


[CV] ................. n_estimators=350, score=0.825900, total= 9.1min
[CV] ................. n_estimators=350, score=0.826300, total= 9.1min
[CV] ................. n_estimators=350, score=0.828100, total= 5.7min


[Parallel(n_jobs=-1)]: Done  15 out of  15 | elapsed: 26.2min finished


RandomForestClassifier(bootstrap=True, class_weight=None, criterion='gini',
            max_depth=None, max_features='auto', max_leaf_nodes=None,
            min_impurity_split=1e-07, min_samples_leaf=1,
            min_samples_split=2, min_weight_fraction_leaf=0.0,
            n_estimators=10, n_jobs=1, oob_score=False, random_state=None,
            verbose=0, warm_start=False)
Best accuracy score is  0.82632
Best parameter is  {'n_estimators': 350}


In [9]:
lr_params = {
    'C': [100,10,500]
}
grid_lr = LogisticRegression()
grid_lr = param_search(grid_lr, lr_params)

Fitting 5 folds for each of 3 candidates, totalling 15 fits
[CV] C=100 ...........................................................
[CV] C=100 ...........................................................
[CV] C=100 ...........................................................
[CV] C=100 ...........................................................
[CV] ............................ C=100, score=0.795300, total= 1.1min
[CV] C=100 ...........................................................
[CV] ............................ C=100, score=0.797900, total= 1.1min
[CV] C=10 ............................................................
[CV] ............................ C=100, score=0.799100, total= 1.1min
[CV] C=10 ............................................................
[CV] ............................ C=100, score=0.799300, total= 1.2min
[CV] C=10 ............................................................
[CV] ............................. C=10, score=0.798100, total= 1.1min
[CV] C=10 .......

[Parallel(n_jobs=-1)]: Done   5 tasks      | elapsed:  2.2min


[CV] ............................. C=10, score=0.799100, total= 1.0min
[CV] C=10 ............................................................
[CV] ............................ C=100, score=0.804800, total= 1.2min
[CV] C=500 ...........................................................
[CV] ............................. C=10, score=0.795800, total= 1.2min
[CV] C=500 ...........................................................
[CV] ............................. C=10, score=0.804800, total= 1.0min
[CV] C=500 ...........................................................
[CV] ............................. C=10, score=0.799300, total= 1.2min
[CV] C=500 ...........................................................


[Parallel(n_jobs=-1)]: Done  10 out of  15 | elapsed:  3.4min remaining:  1.7min


[CV] ............................ C=500, score=0.795500, total= 1.2min
[CV] C=500 ...........................................................
[CV] ............................ C=500, score=0.798000, total= 1.3min


[Parallel(n_jobs=-1)]: Done  12 out of  15 | elapsed:  3.7min remaining:   55.0s


[CV] ............................ C=500, score=0.799200, total=  58.3s
[CV] ............................ C=500, score=0.799100, total=  55.3s
[CV] ............................ C=500, score=0.804800, total=  49.7s


[Parallel(n_jobs=-1)]: Done  15 out of  15 | elapsed:  4.4min finished


LogisticRegression(C=1.0, class_weight=None, dual=False, fit_intercept=True,
          intercept_scaling=1, max_iter=100, multi_class='ovr', n_jobs=1,
          penalty='l2', random_state=None, solver='liblinear', tol=0.0001,
          verbose=0, warm_start=False)
Best accuracy score is  0.79942
Best parameter is  {'C': 10}


ensemble = Ensemble(5,xgb,base_models)
ensemble.fit_predict(X_train,y_train,test)

### Voting classifier (a simple ensemble learning)

In [19]:
#after grid search, I choose the optimal parameter
lr = LogisticRegression(C = 10)
rf = RandomForestClassifier(n_estimators = 350)
mlp = MLPClassifier(hidden_layer_sizes = 350, alpha = 0.001)

In [23]:
vc_hard = VotingClassifier(estimators=[('lr', lr), ('rf', rf), ('mlp', mlp), ('xgb', xgb)], voting='soft')
vc_hard = vc_hard.fit(X_train, y_train)

In [24]:
#test_predictions = xgb.predict(test)
test_predictions = vc_hard.predict(test)

### A good improvement but need to work further..
Soft voting classifier: 0.78058,
Hard voting classifier: 0.77710,
I decide to continue grid search for better parameters:)

In [None]:
lr_params = {
    'C': [5, 10, 100]
}
grid_lr = LogisticRegression()
grid_lr = param_search(grid_lr, lr_params)

Fitting 5 folds for each of 3 candidates, totalling 15 fits
[CV] C=5 .............................................................
[CV] C=5 .............................................................
[CV] C=5 .............................................................
[CV] C=5 .............................................................
[CV] .............................. C=5, score=0.795300, total=  39.7s
[CV] C=5 .............................................................
[CV] .............................. C=5, score=0.799200, total=  39.8s
[CV] .............................. C=5, score=0.799200, total=  39.2s
[CV] C=10 ............................................................
[CV] C=10 ............................................................
[CV] .............................. C=5, score=0.797500, total=  45.3s
[CV] C=10 ............................................................
[CV] .............................. C=5, score=0.805300, total=  32.1s
[CV] C=10 .......

[Parallel(n_jobs=-1)]: Done   5 tasks      | elapsed:  1.2min


[CV] ............................. C=10, score=0.798100, total=  32.2s
[CV] C=10 ............................................................
[CV] ............................. C=10, score=0.799100, total=  30.6s
[CV] C=100 ...........................................................
[CV] ............................. C=10, score=0.795800, total=  36.7s
[CV] C=100 ...........................................................
[CV] ............................. C=10, score=0.804800, total=  28.8s
[CV] C=100 ...........................................................
[CV] ............................. C=10, score=0.799300, total=  31.9s
[CV] C=100 ...........................................................


[Parallel(n_jobs=-1)]: Done  10 out of  15 | elapsed:  1.7min remaining:   52.3s


[CV] ............................ C=100, score=0.795300, total=  28.3s
[CV] C=100 ...........................................................
[CV] ............................ C=100, score=0.797900, total=  28.6s


[Parallel(n_jobs=-1)]: Done  12 out of  15 | elapsed:  1.8min remaining:   26.6s


[CV] ............................ C=100, score=0.799300, total=  20.3s
[CV] ............................ C=100, score=0.799100, total=  18.6s
[CV] ............................ C=100, score=0.804800, total=  19.8s


[Parallel(n_jobs=-1)]: Done  15 out of  15 | elapsed:  2.1min finished


LogisticRegression(C=1.0, class_weight=None, dual=False, fit_intercept=True,
          intercept_scaling=1, max_iter=100, multi_class='ovr', n_jobs=1,
          penalty='l2', random_state=None, solver='liblinear', tol=0.0001,
          verbose=0, warm_start=False)
Best accuracy score is  0.79942
Best parameter is  {'C': 10}


In [10]:
rf_params = {
    'n_estimators': [450, 550]
}
grid_rf = RandomForestClassifier()
grid_rf = param_search(grid_rf, rf_params)

Fitting 5 folds for each of 2 candidates, totalling 10 fits
[CV] n_estimators=450 ................................................
[CV] n_estimators=450 ................................................
[CV] n_estimators=450 ................................................
[CV] n_estimators=450 ................................................
[CV] ................. n_estimators=450, score=0.825900, total=95.3min
[CV] n_estimators=450 ................................................
[CV] ................. n_estimators=450, score=0.826100, total=95.4min
[CV] ................. n_estimators=450, score=0.825300, total=95.4min
[CV] n_estimators=550 ................................................
[CV] n_estimators=550 ................................................
[CV] ................. n_estimators=450, score=0.825400, total=95.4min
[CV] n_estimators=550 ................................................
[CV] ................. n_estimators=450, score=0.831000, total= 9.6min
[CV] n_estimators

[Parallel(n_jobs=-1)]: Done   5 out of  10 | elapsed: 105.2min remaining: 105.2min


[CV] ................. n_estimators=550, score=0.826400, total=11.6min
[CV] n_estimators=550 ................................................
[CV] ................. n_estimators=550, score=0.824100, total=11.6min


[Parallel(n_jobs=-1)]: Done   7 out of  10 | elapsed: 107.3min remaining: 46.0min


[CV] ................. n_estimators=550, score=0.826200, total=11.7min
[CV] ................. n_estimators=550, score=0.828400, total= 8.3min
[CV] ................. n_estimators=550, score=0.830700, total= 7.6min


[Parallel(n_jobs=-1)]: Done  10 out of  10 | elapsed: 115.0min finished


KeyboardInterrupt: 

In [18]:
mlp_params = {
    'hidden_layer_sizes': [450, 550],
    'alpha': [0.001]
}
grid_mlp = MLPClassifier()
grid_mlp = param_search(grid_mlp, mlp_params)

Fitting 5 folds for each of 2 candidates, totalling 10 fits
[CV] alpha=0.001, hidden_layer_sizes=450 .............................
[CV] alpha=0.001, hidden_layer_sizes=450 .............................
[CV] alpha=0.001, hidden_layer_sizes=450 .............................
[CV] alpha=0.001, hidden_layer_sizes=450 .............................
[CV]  alpha=0.001, hidden_layer_sizes=450, score=0.857300, total= 4.5min
[CV] alpha=0.001, hidden_layer_sizes=450 .............................
[CV]  alpha=0.001, hidden_layer_sizes=450, score=0.859300, total= 4.7min
[CV] alpha=0.001, hidden_layer_sizes=550 .............................
[CV]  alpha=0.001, hidden_layer_sizes=450, score=0.858800, total= 4.9min
[CV] alpha=0.001, hidden_layer_sizes=550 .............................
[CV]  alpha=0.001, hidden_layer_sizes=450, score=0.860400, total= 5.1min
[CV] alpha=0.001, hidden_layer_sizes=550 .............................
[CV]  alpha=0.001, hidden_layer_sizes=450, score=0.858100, total= 4.9min
[CV] al

[Parallel(n_jobs=-1)]: Done   5 out of  10 | elapsed:  9.5min remaining:  9.5min


[CV]  alpha=0.001, hidden_layer_sizes=550, score=0.836600, total= 5.5min
[CV] alpha=0.001, hidden_layer_sizes=550 .............................
[CV]  alpha=0.001, hidden_layer_sizes=550, score=0.862200, total= 5.6min


[Parallel(n_jobs=-1)]: Done   7 out of  10 | elapsed: 10.8min remaining:  4.6min


[CV]  alpha=0.001, hidden_layer_sizes=550, score=0.856800, total= 5.9min
[CV]  alpha=0.001, hidden_layer_sizes=550, score=0.860700, total= 2.6min
[CV]  alpha=0.001, hidden_layer_sizes=550, score=0.858400, total= 3.4min


[Parallel(n_jobs=-1)]: Done  10 out of  10 | elapsed: 12.9min finished


MLPClassifier(activation='relu', alpha=0.0001, batch_size='auto', beta_1=0.9,
       beta_2=0.999, early_stopping=False, epsilon=1e-08,
       hidden_layer_sizes=(100,), learning_rate='constant',
       learning_rate_init=0.001, max_iter=200, momentum=0.9,
       nesterovs_momentum=True, power_t=0.5, random_state=None,
       shuffle=True, solver='adam', tol=0.0001, validation_fraction=0.1,
       verbose=False, warm_start=False)
Best accuracy score is  0.85878
Best parameter is  {'alpha': 0.001, 'hidden_layer_sizes': 450}


In [13]:
#after grid search, I choose the optimal parameter
lr = LogisticRegression(C = 10)
rf = RandomForestClassifier(n_estimators = 550)
mlp = MLPClassifier(hidden_layer_sizes = 450, alpha = 0.001)

In [14]:
vc_soft = VotingClassifier(estimators=[('lr', lr), ('rf', rf), ('mlp', mlp), ('xgb', xgb)], voting='soft')
vc_soft = vc_soft.fit(X_train, y_train)
#test_predictions = xgb.predict(test)
test_predictions = vc_soft.predict(test)

### Output


In [15]:
d = {'ID': range(len(test)), 'TARGET': test_predictions.astype(int)}
df_output = pd.DataFrame(data=d)

df_output.to_csv(
     r'submissions.csv',
     index=False
)