# Setup

In [1]:
import sys
import os

import re
import collections
import itertools
import bcolz
import pickle
sys.path.append('../../lib')

import numpy as np
import pandas as pd
import gc
import random
import smart_open
import h5py
import csv
import json
import functools
import time
import string

import datetime as dt
from tqdm import tqdm_notebook as tqdm

import numpy as np
import pandas as pd

import matplotlib.pyplot as plt
import seaborn as sns

random_state_number = 967898

In [2]:
import tensorflow as tf
from tensorflow.python.client import device_lib
def get_available_gpus():
    local_device_protos = device_lib.list_local_devices()
    return [x.name for x in local_device_protos if x.device_type == 'GPU']

config = tf.ConfigProto()
config.gpu_options.allow_growth=True
sess = tf.Session(config=config)
get_available_gpus()

['/gpu:0', '/gpu:1']

In [3]:
%pylab
%matplotlib inline
%load_ext line_profiler
%load_ext memory_profiler
%load_ext autoreload

Using matplotlib backend: Qt5Agg
Populating the interactive namespace from numpy and matplotlib


`%matplotlib` prevents importing * from pylab and numpy
  "\n`%matplotlib` prevents importing * from pylab and numpy"


In [4]:
pd.options.mode.chained_assignment = None
pd.options.display.max_columns = 999
color = sns.color_palette()

# Data

In [5]:
store = pd.HDFStore('../data_prep/processed/stage1/data_frames.h5')
train_df = store['train_df']
test_df = store['test_df']

In [6]:
display(train_df.head())
display(test_df.head())

Unnamed: 0,ID,Gene,Variation,Class,Sentences
0,0,[fam58a],"[truncating, mutations]",1,"[[cyclin-dependent, kinases, , cdks, , regulat..."
1,1,[cbl],[w802*],2,"[[abstract, background, non-small, cell, lung,..."
2,2,[cbl],[q249e],2,"[[abstract, background, non-small, cell, lung,..."
3,3,[cbl],[n454d],3,"[[recent, evidence, has, demonstrated, that, a..."
4,4,[cbl],[l399v],4,"[[oncogenic, mutations, in, the, monomeric, ca..."


Unnamed: 0,ID,Gene,Variation,Sentences
0,0,[acsl4],[r570s],"[[2, this, mutation, resulted, in, a, myelopro..."
1,1,[naglu],[p521l],"[[abstract, the, large, tumor, suppressor, 1, ..."
2,2,[pah],[l333f],"[[vascular, endothelial, growth, factor, recep..."
3,3,[ing1],[a148d],"[[inflammatory, myofibroblastic, tumor, , imt,..."
4,4,[tmem216],[g77a],"[[abstract, retinoblastoma, is, a, pediatric, ..."


In [7]:
with open('../data_prep/processed/stage1/vocab_words_wordidx.pkl', 'rb') as f:
    (vocab_words, vocab_wordidx) = pickle.load(f)
vocab_size = len(vocab_words)
vocab_size, len(vocab_wordidx)

(352220, 352220)

## Train-Test Split and Data Prep

In [8]:
from sklearn.model_selection import train_test_split, StratifiedShuffleSplit
from sklearn.metrics import make_scorer, f1_score, precision_score, accuracy_score, log_loss
f1_scorer = make_scorer(f1_score, average="macro")
precision_scorer = make_scorer(precision_score, average="macro")
accuracy_scorer = make_scorer(accuracy_score, average="macro")
log_loss_scorer = make_scorer(log_loss)

from sklearn.metrics import classification_report, confusion_matrix
from sklearn.pipeline import Pipeline
from sklearn.model_selection import cross_val_score, GridSearchCV, RandomizedSearchCV

from sklearn.feature_extraction.text import CountVectorizer
from sklearn.feature_extraction.text import TfidfTransformer

In [9]:
train_df.Sentences = train_df.Sentences.apply(lambda ll: list(itertools.chain.from_iterable(ll)))
all_text_train_df = pd.DataFrame()
all_text_train_df["Text"] = train_df.Gene + train_df.Variation + train_df.Sentences
all_text_train_df["Class"] = train_df.Class
display(all_text_train_df.head())

Unnamed: 0,Text,Class
0,"[fam58a, truncating, mutations, cyclin-depende...",1
1,"[cbl, w802*, abstract, background, non-small, ...",2
2,"[cbl, q249e, abstract, background, non-small, ...",2
3,"[cbl, n454d, recent, evidence, has, demonstrat...",3
4,"[cbl, l399v, oncogenic, mutations, in, the, mo...",4


In [10]:
x_train, x_test, y_train, y_test = train_test_split(all_text_train_df.Text,all_text_train_df.Class,
                                                   test_size=0.10, random_state=random_state_number,
                                                   stratify=all_text_train_df.Class)

print(x_train.shape, y_train.shape)
print(x_test.shape, y_test.shape)

(2988,) (2988,)
(333,) (333,)


In [11]:
del all_text_train_df
del train_df
#del test_df

In [12]:
cvec = CountVectorizer(vocabulary=vocab_wordidx)
tfidf = TfidfTransformer()

In [13]:
x_train = x_train.str.join(" ")
x_train_counts = cvec.fit_transform(x_train, y_train)
print(x_train_counts.shape)
x_train_tf = tfidf.fit_transform(x_train_counts)
print(x_train_tf.shape)

(2988, 352220)
(2988, 352220)


In [14]:
x_test = x_test.str.join(" ")
x_test_counts = cvec.fit_transform(x_test, y_test)
print(x_test_counts.shape)
x_test_tf = tfidf.fit_transform(x_test_counts)
print(x_test_tf.shape)

(333, 352220)
(333, 352220)


In [15]:
len(x_test)

333

In [16]:
gc.collect()

197

In [17]:
def evaluate_model(model, sparse=True, predict_proba=True):
    if sparse:
        x_train_loc = x_train_tf
        y_train_loc = y_train
        x_test_loc  = x_test_tf
        y_test_loc  = y_test
    else:
        x_train_loc = x_train_tf.toarray()
        y_train_loc = y_train.values
        x_test_loc  = x_test_tf.toarray()
        y_test_loc  = y_test.values
        
    model.fit(x_train_loc, y_train_loc)        
    if predict_proba and 'predict_proba' in dir(model):
        predicted_prob = model.predict_proba(x_test_loc)
        print("log_loss\n", log_loss(y_test_loc, predicted_prob, labels=range(1,10)))
        
    y_pred = model.predict(x_test_loc)
    print("f1_score\n", f1_score(y_test_loc, y_pred, average="macro"))
    print("accuracy_score\n", accuracy_score(y_test_loc, y_pred))
    print("\nclassification_report\n",classification_report(y_test_loc, y_pred))
    print("\nconfusion_matrix\n",confusion_matrix(y_test_loc, y_pred))

# Models

did not run GaussianProcessClassifier, GaussianMixture since they took more than 64G of ram

## Multinomial NB

In [19]:
from sklearn.naive_bayes import MultinomialNB
nb_model = MultinomialNB(alpha=0.001)

In [20]:
evaluate_model(nb_model)

log_loss
 1.7626723557
f1_score
 0.581177607313
accuracy_score
 0.663663663664

classification_report
              precision    recall  f1-score   support

          1       0.61      0.58      0.59        57
          2       0.64      0.64      0.64        45
          3       0.45      0.56      0.50         9
          4       0.63      0.59      0.61        69
          5       0.50      0.62      0.56        24
          6       0.84      0.57      0.68        28
          7       0.75      0.83      0.79        95
          8       0.00      0.00      0.00         2
          9       1.00      0.75      0.86         4

avg / total       0.67      0.66      0.66       333


confusion_matrix
 [[33  0  2 17  4  0  1  0  0]
 [ 0 29  0  1  1  0 14  0  0]
 [ 0  0  5  1  1  0  2  0  0]
 [16  0  1 41  6  2  3  0  0]
 [ 1  1  0  3 15  1  3  0  0]
 [ 3  4  0  1  2 16  2  0  0]
 [ 1 10  3  1  1  0 79  0  0]
 [ 0  1  0  0  0  0  1  0  0]
 [ 0  0  0  0  0  0  1  0  3]]


  'precision', 'predicted', average, warn_for)
  'precision', 'predicted', average, warn_for)


In [24]:
parameters = { 'alpha': [0.0005, 0.001,0.002,0.01,0.02]  }

nb_gs = GridSearchCV(nb_model, parameters)

In [25]:
evaluate_model(nb_gs)

log_loss
 1.7626723557
f1_score
 0.581177607313
accuracy_score
 0.663663663664

classification_report
              precision    recall  f1-score   support

          1       0.61      0.58      0.59        57
          2       0.64      0.64      0.64        45
          3       0.45      0.56      0.50         9
          4       0.63      0.59      0.61        69
          5       0.50      0.62      0.56        24
          6       0.84      0.57      0.68        28
          7       0.75      0.83      0.79        95
          8       0.00      0.00      0.00         2
          9       1.00      0.75      0.86         4

avg / total       0.67      0.66      0.66       333


confusion_matrix
 [[33  0  2 17  4  0  1  0  0]
 [ 0 29  0  1  1  0 14  0  0]
 [ 0  0  5  1  1  0  2  0  0]
 [16  0  1 41  6  2  3  0  0]
 [ 1  1  0  3 15  1  3  0  0]
 [ 3  4  0  1  2 16  2  0  0]
 [ 1 10  3  1  1  0 79  0  0]
 [ 0  1  0  0  0  0  1  0  0]
 [ 0  0  0  0  0  0  1  0  3]]


  'precision', 'predicted', average, warn_for)
  'precision', 'predicted', average, warn_for)


In [26]:
pd.DataFrame(nb_gs.cv_results_).transpose()



Unnamed: 0,0,1,2,3,4
mean_fit_time,0.28587,0.23958,0.243783,0.239334,0.241517
mean_score_time,0.0435839,0.039856,0.0404784,0.0392499,0.0399577
mean_test_score,0.610776,0.614123,0.614123,0.602075,0.577309
mean_train_score,0.831156,0.828478,0.817602,0.740794,0.699124
param_alpha,0.0005,0.001,0.002,0.01,0.02
params,{'alpha': 0.0005},{'alpha': 0.001},{'alpha': 0.002},{'alpha': 0.01},{'alpha': 0.02}
rank_test_score,3,1,1,4,5
split0_test_score,0.5996,0.612613,0.602603,0.597598,0.577578
split0_train_score,0.824032,0.822524,0.812469,0.735043,0.689794
split1_test_score,0.601805,0.603811,0.610832,0.600802,0.579739


## Support Vector Machine

In [27]:
from sklearn.svm import SVC
svc_model = SVC(probability=True)

In [28]:
evaluate_model(svc_model)

log_loss
 1.8784669174
f1_score
 0.0493250259605
accuracy_score
 0.285285285285

classification_report
              precision    recall  f1-score   support

          1       0.00      0.00      0.00        57
          2       0.00      0.00      0.00        45
          3       0.00      0.00      0.00         9
          4       0.00      0.00      0.00        69
          5       0.00      0.00      0.00        24
          6       0.00      0.00      0.00        28
          7       0.29      1.00      0.44        95
          8       0.00      0.00      0.00         2
          9       0.00      0.00      0.00         4

avg / total       0.08      0.29      0.13       333


confusion_matrix
 [[ 0  0  0  0  0  0 57  0  0]
 [ 0  0  0  0  0  0 45  0  0]
 [ 0  0  0  0  0  0  9  0  0]
 [ 0  0  0  0  0  0 69  0  0]
 [ 0  0  0  0  0  0 24  0  0]
 [ 0  0  0  0  0  0 28  0  0]
 [ 0  0  0  0  0  0 95  0  0]
 [ 0  0  0  0  0  0  2  0  0]
 [ 0  0  0  0  0  0  4  0  0]]


  'precision', 'predicted', average, warn_for)
  'precision', 'predicted', average, warn_for)


In [29]:
parameters = {'kernel':('linear', 'rbf'), 
              'C':[1,2,3,4,5,6,7,8,9,10], 
              'gamma':[0.01,0.02,0.03,0.04,0.1,0.2],
              'degree':[3,4,5,6]}

svc_rs = RandomizedSearchCV(svc_model, parameters, n_iter=20, n_jobs=10)

In [30]:
evaluate_model(svc_rs)

log_loss
 0.989794887126
f1_score
 0.49552989396
accuracy_score
 0.633633633634

classification_report
              precision    recall  f1-score   support

          1       0.55      0.58      0.56        57
          2       0.73      0.49      0.59        45
          3       0.17      0.11      0.13         9
          4       0.64      0.67      0.65        69
          5       0.36      0.21      0.26        24
          6       0.88      0.50      0.64        28
          7       0.66      0.92      0.77        95
          8       0.00      0.00      0.00         2
          9       1.00      0.75      0.86         4

avg / total       0.63      0.63      0.62       333


confusion_matrix
 [[33  0  2 17  2  1  2  0  0]
 [ 0 22  0  1  1  0 21  0  0]
 [ 0  0  1  1  1  0  6  0  0]
 [15  0  1 46  3  0  4  0  0]
 [ 6  1  0  5  5  0  7  0  0]
 [ 6  3  0  1  1 14  3  0  0]
 [ 0  4  2  1  1  0 87  0  0]
 [ 0  0  0  0  0  1  1  0  0]
 [ 0  0  0  0  0  0  1  0  3]]


  'precision', 'predicted', average, warn_for)
  'precision', 'predicted', average, warn_for)


In [31]:
pd.DataFrame(svc_rs.cv_results_).transpose()



Unnamed: 0,0,1,2,3,4,5,6,7,8,9,10,11,12,13,14,15,16,17,18,19
mean_fit_time,412.061,436.964,458.363,464.73,444.044,432.007,402.727,440.653,435.493,427.901,418.603,436.425,404.344,410.929,420.259,409.574,419.399,456.733,437.916,416.943
mean_score_time,27.1016,27.3234,27.3088,25.5032,27.5299,25.8271,28.0497,27.8096,25.7136,25.1252,26.663,23.7416,24.2685,25.9255,27.6242,25.7644,24.5527,22.7222,23.9271,25.3354
mean_test_score,0.579652,0.626171,0.465529,0.462517,0.532798,0.623159,0.621486,0.621151,0.626171,0.622155,0.622155,0.535475,0.604418,0.620482,0.621486,0.582999,0.620482,0.52008,0.621151,0.621151
mean_train_score,0.658475,0.828311,0.49181,0.490472,0.586851,0.815427,0.858433,0.855253,0.828311,0.850569,0.850569,0.589361,0.749665,0.796854,0.858433,0.680724,0.796854,0.567278,0.855253,0.855253
param_C,6,4,6,3,3,3,10,9,4,8,8,6,6,2,10,10,2,10,9,9
param_degree,5,4,4,6,5,6,4,4,4,5,3,3,5,5,5,3,4,6,4,4
param_gamma,0.04,0.03,0.01,0.02,0.04,0.2,0.2,0.03,0.1,0.1,0.1,0.02,0.1,0.04,0.1,0.03,0.02,0.01,0.02,0.04
param_kernel,rbf,linear,rbf,rbf,rbf,linear,linear,linear,linear,linear,linear,rbf,rbf,linear,linear,rbf,linear,rbf,linear,linear
params,"{'kernel': 'rbf', 'gamma': 0.04, 'degree': 5, ...","{'kernel': 'linear', 'gamma': 0.03, 'degree': ...","{'kernel': 'rbf', 'gamma': 0.01, 'degree': 4, ...","{'kernel': 'rbf', 'gamma': 0.02, 'degree': 6, ...","{'kernel': 'rbf', 'gamma': 0.04, 'degree': 5, ...","{'kernel': 'linear', 'gamma': 0.2, 'degree': 6...","{'kernel': 'linear', 'gamma': 0.2, 'degree': 4...","{'kernel': 'linear', 'gamma': 0.03, 'degree': ...","{'kernel': 'linear', 'gamma': 0.1, 'degree': 4...","{'kernel': 'linear', 'gamma': 0.1, 'degree': 5...","{'kernel': 'linear', 'gamma': 0.1, 'degree': 3...","{'kernel': 'rbf', 'gamma': 0.02, 'degree': 3, ...","{'kernel': 'rbf', 'gamma': 0.1, 'degree': 5, '...","{'kernel': 'linear', 'gamma': 0.04, 'degree': ...","{'kernel': 'linear', 'gamma': 0.1, 'degree': 5...","{'kernel': 'rbf', 'gamma': 0.03, 'degree': 3, ...","{'kernel': 'linear', 'gamma': 0.02, 'degree': ...","{'kernel': 'rbf', 'gamma': 0.01, 'degree': 6, ...","{'kernel': 'linear', 'gamma': 0.02, 'degree': ...","{'kernel': 'linear', 'gamma': 0.04, 'degree': ..."
rank_test_score,15,1,19,20,17,3,6,8,1,4,4,16,13,11,6,14,11,18,8,8


## Softmax Regression

In [32]:
from sklearn.linear_model import LogisticRegression
smreg_model = LogisticRegression(multi_class="multinomial", solver='lbfgs')

In [33]:
evaluate_model(smreg_model)

log_loss
 1.45620897096
f1_score
 0.264439056896
accuracy_score
 0.507507507508

classification_report
              precision    recall  f1-score   support

          1       0.48      0.56      0.52        57
          2       0.00      0.00      0.00        45
          3       0.00      0.00      0.00         9
          4       0.77      0.43      0.56        69
          5       0.50      0.12      0.20        24
          6       0.77      0.36      0.49        28
          7       0.45      0.99      0.62        95
          8       0.00      0.00      0.00         2
          9       0.00      0.00      0.00         4

avg / total       0.47      0.51      0.44       333


confusion_matrix
 [[32  0  0  3  0  0 22  0  0]
 [ 0  0  0  0  0  0 45  0  0]
 [ 0  0  0  2  1  0  6  0  0]
 [19  0  0 30  2  3 15  0  0]
 [10  0  0  2  3  0  9  0  0]
 [ 6  0  0  1  0 10 11  0  0]
 [ 0  0  0  1  0  0 94  0  0]
 [ 0  0  0  0  0  0  2  0  0]
 [ 0  0  0  0  0  0  4  0  0]]


  'precision', 'predicted', average, warn_for)
  'precision', 'predicted', average, warn_for)


In [34]:
parameters ={
#                 'penalty' : ['l1','l2'],
                'tol' : [1e-5, 1e-4, 1e-3],
                'class_weight' : [None, 'balanced'],
                'max_iter' : [100,150,200,250,300],
#                 'solver' : ['newton-cg', 'lbfgs'], 
                'C':[1,2,3,4,5,6,7,8,9,10], 
            }

smreg_rs = RandomizedSearchCV(smreg_model, parameters, n_jobs=10)

In [35]:
evaluate_model(smreg_rs)

log_loss
 1.27620591928
f1_score
 0.529901181
accuracy_score
 0.600600600601

classification_report
              precision    recall  f1-score   support

          1       0.58      0.60      0.59        57
          2       0.65      0.53      0.59        45
          3       0.26      0.67      0.38         9
          4       0.64      0.55      0.59        69
          5       0.48      0.62      0.55        24
          6       0.80      0.57      0.67        28
          7       0.66      0.67      0.67        95
          8       0.00      0.00      0.00         2
          9       0.75      0.75      0.75         4

avg / total       0.63      0.60      0.61       333


confusion_matrix
 [[34  1  2 13  4  0  3  0  0]
 [ 2 24  0  2  0  0 16  1  0]
 [ 0  0  6  1  1  0  1  0  0]
 [15  0  1 38  7  4  2  2  0]
 [ 2  1  1  2 15  0  3  0  0]
 [ 4  1  0  0  2 16  5  0  0]
 [ 2 10 13  3  2  0 64  0  1]
 [ 0  0  0  0  0  0  2  0  0]
 [ 0  0  0  0  0  0  1  0  3]]


In [36]:
pd.DataFrame(smreg_rs.cv_results_).transpose()



Unnamed: 0,0,1,2,3,4,5,6,7,8,9
mean_fit_time,27.5595,198.528,112.677,268.056,182.775,172.518,81.6039,71.7768,142.068,105.899
mean_score_time,0.168866,0.25248,0.213176,0.119718,0.22307,0.213178,0.214585,0.171396,0.154252,0.123574
mean_test_score,0.296854,0.445114,0.455154,0.509705,0.418005,0.469545,0.308902,0.453481,0.409304,0.399264
mean_train_score,0.300342,0.479201,0.521915,0.614491,0.44973,0.555049,0.314801,0.509134,0.42422,0.418052
param_C,10,8,10,9,4,10,1,4,10,9
param_class_weight,,,balanced,balanced,,balanced,,balanced,,
param_max_iter,250,150,200,200,100,250,250,250,200,300
param_tol,1e-05,0.0001,0.0001,0.001,0.001,0.001,0.0001,1e-05,0.001,1e-05
params,"{'tol': 1e-05, 'max_iter': 250, 'class_weight'...","{'tol': 0.0001, 'max_iter': 150, 'class_weight...","{'tol': 0.0001, 'max_iter': 200, 'class_weight...","{'tol': 0.001, 'max_iter': 200, 'class_weight'...","{'tol': 0.001, 'max_iter': 100, 'class_weight'...","{'tol': 0.001, 'max_iter': 250, 'class_weight'...","{'tol': 0.0001, 'max_iter': 250, 'class_weight...","{'tol': 1e-05, 'max_iter': 250, 'class_weight'...","{'tol': 0.001, 'max_iter': 200, 'class_weight'...","{'tol': 1e-05, 'max_iter': 300, 'class_weight'..."
rank_test_score,10,5,3,1,6,2,9,4,7,8


## K Nearest Neighbour

In [37]:
from sklearn.neighbors import KNeighborsClassifier
knn_model = KNeighborsClassifier(n_neighbors=3)

In [38]:
evaluate_model(knn_model)

log_loss
 8.1949303197
f1_score
 0.477345011858
accuracy_score
 0.612612612613

classification_report
              precision    recall  f1-score   support

          1       0.54      0.47      0.50        57
          2       0.54      0.49      0.51        45
          3       0.00      0.00      0.00         9
          4       0.64      0.77      0.70        69
          5       0.36      0.21      0.26        24
          6       0.94      0.61      0.74        28
          7       0.65      0.81      0.72        95
          8       0.00      0.00      0.00         2
          9       1.00      0.75      0.86         4

avg / total       0.60      0.61      0.60       333


confusion_matrix
 [[27  4  1 19  3  0  3  0  0]
 [ 2 22  1  1  1  0 18  0  0]
 [ 0  0  0  1  2  0  6  0  0]
 [11  0  2 53  1  0  2  0  0]
 [ 5  1  0  6  5  0  7  0  0]
 [ 3  2  0  1  2 17  3  0  0]
 [ 2 12  2  2  0  0 77  0  0]
 [ 0  0  0  0  0  1  1  0  0]
 [ 0  0  0  0  0  0  1  0  3]]


  'precision', 'predicted', average, warn_for)
  'precision', 'predicted', average, warn_for)


In [39]:
parameters = {'n_neighbors':[3,4,5], 
              'algorithm':['auto', 'ball_tree', 'kd_tree', 'brute'], 
              'leaf_size':[30,35,40,45,50]
             }

knn_rs = RandomizedSearchCV(knn_model, parameters, n_jobs=10, n_iter=30)


In [40]:
evaluate_model(knn_rs)





log_loss
 8.1949303197
f1_score
 0.477345011858
accuracy_score
 0.612612612613

classification_report
              precision    recall  f1-score   support

          1       0.54      0.47      0.50        57
          2       0.54      0.49      0.51        45
          3       0.00      0.00      0.00         9
          4       0.64      0.77      0.70        69
          5       0.36      0.21      0.26        24
          6       0.94      0.61      0.74        28
          7       0.65      0.81      0.72        95
          8       0.00      0.00      0.00         2
          9       1.00      0.75      0.86         4

avg / total       0.60      0.61      0.60       333


confusion_matrix
 [[27  4  1 19  3  0  3  0  0]
 [ 2 22  1  1  1  0 18  0  0]
 [ 0  0  0  1  2  0  6  0  0]
 [11  0  2 53  1  0  2  0  0]
 [ 5  1  0  6  5  0  7  0  0]
 [ 3  2  0  1  2 17  3  0  0]
 [ 2 12  2  2  0  0 77  0  0]
 [ 0  0  0  0  0  1  1  0  0]
 [ 0  0  0  0  0  0  1  0  3]]


  'precision', 'predicted', average, warn_for)
  'precision', 'predicted', average, warn_for)


In [41]:
pd.DataFrame(knn_rs.cv_results_).transpose()



Unnamed: 0,0,1,2,3,4,5,6,7,8,9,10,11,12,13,14,15,16,17,18,19,20,21,22,23,24,25,26,27,28,29
mean_fit_time,0.206978,0.195477,0.248091,0.243201,0.271506,0.234352,0.220022,0.221808,0.242535,0.204945,0.268333,0.245849,0.218028,0.267725,0.284965,0.265359,0.193832,0.277452,0.25334,0.257153,0.245191,0.211847,0.274466,0.26906,0.267873,0.265113,0.296615,0.216831,0.259145,0.236642
mean_score_time,5.54123,5.96974,6.18311,4.9272,5.90894,6.02457,5.77798,6.16831,5.68161,6.06576,5.6261,5.59572,5.66479,5.92912,6.0613,5.84496,4.84615,6.00699,5.72506,5.45945,5.90998,5.5023,5.55137,6.00434,5.49428,5.62633,6.10382,5.22852,5.82188,6.15362
mean_test_score,0.570616,0.583668,0.57162,0.57162,0.570616,0.57162,0.57162,0.57162,0.583668,0.570616,0.583668,0.583668,0.583668,0.583668,0.57162,0.583668,0.570616,0.583668,0.570616,0.570616,0.583668,0.57162,0.570616,0.570616,0.583668,0.57162,0.57162,0.570616,0.583668,0.57162
mean_train_score,0.692778,0.757202,0.716873,0.716873,0.692778,0.716873,0.716873,0.716873,0.757202,0.692778,0.757202,0.757202,0.757202,0.757202,0.716873,0.757202,0.692778,0.757202,0.692778,0.692778,0.757202,0.716873,0.692778,0.692778,0.757202,0.716873,0.716873,0.692778,0.757202,0.716873
param_algorithm,brute,brute,kd_tree,brute,ball_tree,kd_tree,ball_tree,brute,kd_tree,brute,ball_tree,brute,brute,kd_tree,ball_tree,ball_tree,auto,brute,brute,kd_tree,ball_tree,brute,kd_tree,auto,kd_tree,auto,ball_tree,ball_tree,kd_tree,ball_tree
param_leaf_size,35,50,30,50,30,40,50,30,35,45,45,40,45,45,45,35,40,35,50,30,30,35,35,30,50,30,40,35,40,35
param_n_neighbors,5,3,4,4,5,4,4,4,3,5,3,3,3,3,4,3,5,3,5,5,3,4,5,5,3,4,4,5,3,4
params,"{'n_neighbors': 5, 'leaf_size': 35, 'algorithm...","{'n_neighbors': 3, 'leaf_size': 50, 'algorithm...","{'n_neighbors': 4, 'leaf_size': 30, 'algorithm...","{'n_neighbors': 4, 'leaf_size': 50, 'algorithm...","{'n_neighbors': 5, 'leaf_size': 30, 'algorithm...","{'n_neighbors': 4, 'leaf_size': 40, 'algorithm...","{'n_neighbors': 4, 'leaf_size': 50, 'algorithm...","{'n_neighbors': 4, 'leaf_size': 30, 'algorithm...","{'n_neighbors': 3, 'leaf_size': 35, 'algorithm...","{'n_neighbors': 5, 'leaf_size': 45, 'algorithm...","{'n_neighbors': 3, 'leaf_size': 45, 'algorithm...","{'n_neighbors': 3, 'leaf_size': 40, 'algorithm...","{'n_neighbors': 3, 'leaf_size': 45, 'algorithm...","{'n_neighbors': 3, 'leaf_size': 45, 'algorithm...","{'n_neighbors': 4, 'leaf_size': 45, 'algorithm...","{'n_neighbors': 3, 'leaf_size': 35, 'algorithm...","{'n_neighbors': 5, 'leaf_size': 40, 'algorithm...","{'n_neighbors': 3, 'leaf_size': 35, 'algorithm...","{'n_neighbors': 5, 'leaf_size': 50, 'algorithm...","{'n_neighbors': 5, 'leaf_size': 30, 'algorithm...","{'n_neighbors': 3, 'leaf_size': 30, 'algorithm...","{'n_neighbors': 4, 'leaf_size': 35, 'algorithm...","{'n_neighbors': 5, 'leaf_size': 35, 'algorithm...","{'n_neighbors': 5, 'leaf_size': 30, 'algorithm...","{'n_neighbors': 3, 'leaf_size': 50, 'algorithm...","{'n_neighbors': 4, 'leaf_size': 30, 'algorithm...","{'n_neighbors': 4, 'leaf_size': 40, 'algorithm...","{'n_neighbors': 5, 'leaf_size': 35, 'algorithm...","{'n_neighbors': 3, 'leaf_size': 40, 'algorithm...","{'n_neighbors': 4, 'leaf_size': 35, 'algorithm..."
rank_test_score,22,1,12,12,22,12,12,12,1,22,1,1,1,1,12,1,22,1,22,22,1,12,22,22,1,12,12,22,1,12
split0_test_score,0.562563,0.565566,0.567568,0.567568,0.562563,0.567568,0.567568,0.567568,0.565566,0.562563,0.565566,0.565566,0.565566,0.565566,0.567568,0.565566,0.562563,0.565566,0.562563,0.562563,0.565566,0.567568,0.562563,0.562563,0.565566,0.567568,0.567568,0.562563,0.565566,0.567568


## Passive Aggresive Classifier

In [42]:
from sklearn.linear_model import PassiveAggressiveClassifier
pag_model = PassiveAggressiveClassifier()

In [43]:
evaluate_model(pag_model, predict_proba=False)



f1_score
 0.480985363462
accuracy_score
 0.630630630631

classification_report
              precision    recall  f1-score   support

          1       0.75      0.37      0.49        57
          2       0.66      0.51      0.57        45
          3       0.00      0.00      0.00         9
          4       0.54      0.81      0.65        69
          5       0.41      0.38      0.39        24
          6       0.92      0.43      0.59        28
          7       0.68      0.91      0.77        95
          8       0.00      0.00      0.00         2
          9       1.00      0.75      0.86         4

avg / total       0.64      0.63      0.61       333


confusion_matrix
 [[21  0  0 28  4  1  3  0  0]
 [ 0 23  0  2  2  0 18  0  0]
 [ 0  0  0  2  1  0  6  0  0]
 [ 6  0  0 56  3  0  4  0  0]
 [ 0  1  0 10  9  0  4  0  0]
 [ 1  6  0  4  2 12  3  0  0]
 [ 0  5  2  1  1  0 86  0  0]
 [ 0  0  0  0  0  0  2  0  0]
 [ 0  0  0  0  0  0  1  0  3]]


  'precision', 'predicted', average, warn_for)
  'precision', 'predicted', average, warn_for)


In [44]:
parameters = {
              'C':[1,2,3,4,5,6,7,8,9,10], 
              'tol' : [1e-5, 1e-4, 1e-3],
              'max_iter' : [800, 1000,1200],
              'loss':['hinge', 'squared_hinge']}

pag_rs = RandomizedSearchCV(pag_model, parameters, n_iter=100, n_jobs=-1)


In [45]:
evaluate_model(pag_rs, predict_proba=False)

f1_score
 0.509996921376
accuracy_score
 0.633633633634

classification_report
              precision    recall  f1-score   support

          1       0.64      0.53      0.58        57
          2       0.69      0.49      0.57        45
          3       0.00      0.00      0.00         9
          4       0.60      0.61      0.60        69
          5       0.47      0.58      0.52        24
          6       0.78      0.64      0.71        28
          7       0.67      0.86      0.76        95
          8       0.00      0.00      0.00         2
          9       1.00      0.75      0.86         4

avg / total       0.63      0.63      0.62       333


confusion_matrix
 [[30  0  2 19  3  1  2  0  0]
 [ 0 22  1  1  1  1 19  0  0]
 [ 0  0  0  1  2  0  6  0  0]
 [14  0  1 42  6  2  4  0  0]
 [ 1  1  0  4 14  1  3  0  0]
 [ 2  2  0  1  2 18  3  0  0]
 [ 0  7  2  2  2  0 82  0  0]
 [ 0  0  0  0  0  0  2  0  0]
 [ 0  0  0  0  0  0  1  0  3]]


  'precision', 'predicted', average, warn_for)
  'precision', 'predicted', average, warn_for)


In [46]:
pd.DataFrame(pag_rs.cv_results_).transpose()



Unnamed: 0,0,1,2,3,4,5,6,7,8,9,10,11,12,13,14,15,16,17,18,19,20,21,22,23,24,25,26,27,28,29,30,31,32,33,34,35,36,37,38,39,40,41,42,43,44,45,46,47,48,49,50,51,52,53,54,55,56,57,58,59,60,61,62,63,64,65,66,67,68,69,70,71,72,73,74,75,76,77,78,79,80,81,82,83,84,85,86,87,88,89,90,91,92,93,94,95,96,97,98,99
mean_fit_time,4.69141,5.17403,5.81754,6.05769,5.64559,5.73351,5.44339,6.20617,5.93692,4.9666,5.2936,5.63633,6.01756,5.52463,5.35597,4.8775,5.98308,5.65369,5.75122,6.04173,4.68882,5.83786,6.65004,5.23889,5.7606,5.81416,4.76406,4.98885,6.78704,5.90837,5.42041,4.90635,6.12552,4.9673,5.03094,5.31631,5.69464,5.26255,5.15273,5.10388,5.27021,5.5899,5.1643,6.16244,4.87604,5.92397,5.27779,5.4448,5.18967,5.70801,5.90522,5.50348,6.03759,5.70091,5.56506,5.81096,5.39011,5.5249,5.50633,5.16198,5.30588,7.0649,5.4077,5.04437,5.84974,5.69728,5.13546,5.6758,4.92367,5.83372,5.30657,5.79851,5.39185,5.20008,5.34125,5.58655,6.09797,4.94426,5.81323,7.15672,4.38186,5.69502,4.51838,5.25462,4.82495,4.80061,5.02607,4.3407,6.51144,6.3211,5.31898,4.71003,5.21117,5.63814,5.77182,5.39516,5.55288,5.03671,4.85112,4.20524
mean_score_time,0.189719,0.240132,0.208435,0.225928,0.212793,0.266837,0.217597,0.134774,0.17759,0.208523,0.210603,0.197531,0.214884,0.225052,0.336249,0.213195,0.180372,0.264629,0.178855,0.18627,0.194383,0.260447,0.258638,0.180881,0.183927,0.175092,0.161024,0.166639,0.283745,0.244208,0.200292,0.182029,0.20899,0.294645,0.172888,0.211955,0.185197,0.228238,0.293007,0.143034,0.227473,0.159958,0.20903,0.249781,0.2119,0.295004,0.164955,0.280722,0.217975,0.185283,0.196856,0.252122,0.24211,0.273185,0.170086,0.18409,0.26958,0.243639,0.308593,0.178258,0.218136,0.211781,0.258549,0.213837,0.163956,0.211572,0.260401,0.200077,0.229689,0.195647,0.199248,0.211747,0.231224,0.213782,0.249848,0.230768,0.250966,0.161769,0.218041,0.218727,0.17284,0.22782,0.18375,0.293927,0.230111,0.268408,0.191907,0.257111,0.235776,0.233858,0.201623,0.199014,0.283054,0.205147,0.258849,0.254384,0.209584,0.219595,0.158811,0.0942084
mean_test_score,0.598728,0.596386,0.600736,0.600736,0.604418,0.604418,0.604418,0.604752,0.582999,0.582999,0.588353,0.588688,0.602075,0.594712,0.592369,0.583333,0.591365,0.591365,0.591365,0.594043,0.608768,0.605422,0.594712,0.607764,0.611111,0.603748,0.603748,0.600402,0.5917,0.59672,0.579987,0.601071,0.600067,0.593708,0.60743,0.600402,0.587349,0.592704,0.603414,0.597724,0.587349,0.60743,0.588019,0.588688,0.584337,0.605756,0.603414,0.580656,0.599732,0.602744,0.607764,0.60174,0.599063,0.58668,0.592035,0.598394,0.602744,0.589357,0.5917,0.590027,0.594043,0.617805,0.587015,0.602744,0.60174,0.609103,0.596051,0.602075,0.576305,0.605422,0.559572,0.581995,0.578313,0.574632,0.599398,0.588019,0.598394,0.602744,0.60174,0.611111,0.596051,0.592704,0.597724,0.594378,0.588019,0.60676,0.592369,0.600402,0.617135,0.610776,0.59672,0.592704,0.598059,0.595047,0.576975,0.59672,0.608099,0.58668,0.597724,0.610107
mean_train_score,0.82076,0.820092,0.820759,0.820759,0.839194,0.839194,0.839194,0.848059,0.829817,0.830341,0.831161,0.836006,0.844535,0.834001,0.830987,0.828475,0.824938,0.824604,0.824604,0.830132,0.833848,0.841879,0.835041,0.834521,0.850734,0.84889,0.838012,0.841364,0.855424,0.847551,0.823617,0.840351,0.837831,0.840347,0.84354,0.845216,0.829809,0.824953,0.840704,0.830138,0.833172,0.834323,0.829654,0.83451,0.83316,0.849404,0.829316,0.83483,0.843196,0.845543,0.837345,0.839508,0.835032,0.834861,0.853911,0.834865,0.838027,0.831518,0.837201,0.832355,0.832836,0.848389,0.822136,0.839013,0.823599,0.829137,0.828804,0.823934,0.817787,0.840687,0.809581,0.823642,0.819761,0.818755,0.832322,0.834155,0.827313,0.838685,0.834509,0.847905,0.825802,0.832342,0.833842,0.831,0.829492,0.834012,0.831665,0.833344,0.854919,0.851409,0.832508,0.831168,0.837184,0.830294,0.82549,0.83918,0.841541,0.832323,0.826961,0.831138
param_C,6,9,10,10,7,10,7,1,5,2,6,6,4,10,6,10,7,9,5,2,6,4,2,6,3,3,7,5,1,2,10,6,1,2,7,7,4,3,4,9,5,10,6,4,3,2,6,3,7,10,3,4,8,3,1,5,10,10,2,10,3,1,8,10,8,3,4,9,4,2,5,8,6,6,2,2,4,8,5,1,5,9,8,5,10,9,7,8,1,1,3,9,5,7,2,3,1,5,4,8
param_loss,hinge,hinge,squared_hinge,squared_hinge,hinge,hinge,hinge,squared_hinge,hinge,hinge,squared_hinge,squared_hinge,squared_hinge,squared_hinge,squared_hinge,squared_hinge,hinge,hinge,hinge,hinge,hinge,squared_hinge,squared_hinge,hinge,squared_hinge,squared_hinge,hinge,squared_hinge,hinge,squared_hinge,hinge,squared_hinge,squared_hinge,squared_hinge,hinge,squared_hinge,squared_hinge,hinge,squared_hinge,squared_hinge,squared_hinge,hinge,squared_hinge,squared_hinge,hinge,squared_hinge,hinge,hinge,hinge,hinge,squared_hinge,squared_hinge,squared_hinge,squared_hinge,hinge,hinge,hinge,hinge,hinge,squared_hinge,squared_hinge,squared_hinge,hinge,hinge,squared_hinge,hinge,hinge,squared_hinge,hinge,squared_hinge,hinge,squared_hinge,squared_hinge,hinge,squared_hinge,hinge,hinge,squared_hinge,squared_hinge,hinge,hinge,squared_hinge,squared_hinge,hinge,hinge,hinge,squared_hinge,hinge,squared_hinge,squared_hinge,hinge,squared_hinge,hinge,squared_hinge,squared_hinge,hinge,squared_hinge,hinge,hinge,squared_hinge
param_max_iter,800,1000,800,800,1000,1000,1200,1200,800,800,1000,1000,1000,1000,1200,1000,800,1200,1000,800,1000,1200,1200,800,1000,800,1200,1200,1200,1000,1200,1200,1200,800,1000,800,1000,800,1000,800,800,1200,1000,800,1000,800,1000,800,1000,800,800,1200,1200,1200,800,1200,800,1000,800,1200,800,1200,800,800,1000,1200,800,1000,1000,1200,1200,800,800,800,1000,1000,1200,1200,1000,800,800,1000,1200,1000,1200,800,1200,800,1000,800,1200,1200,1000,1000,800,800,1000,1200,1200,1000
param_tol,1e-05,0.0001,0.0001,1e-05,1e-05,0.0001,1e-05,0.001,1e-05,0.001,0.001,1e-05,0.0001,1e-05,0.001,0.001,0.0001,1e-05,1e-05,1e-05,1e-05,1e-05,1e-05,0.001,1e-05,0.0001,0.001,0.001,0.0001,0.0001,1e-05,0.0001,0.0001,0.001,0.001,1e-05,1e-05,0.001,0.001,0.001,1e-05,0.0001,0.0001,1e-05,0.0001,0.0001,0.0001,1e-05,0.0001,1e-05,1e-05,0.001,1e-05,0.0001,0.001,1e-05,0.0001,0.001,0.0001,0.001,0.001,1e-05,0.0001,0.001,0.0001,0.0001,0.0001,0.0001,1e-05,0.0001,0.001,0.0001,0.001,0.0001,0.001,1e-05,1e-05,0.0001,1e-05,1e-05,0.001,1e-05,0.001,0.001,0.001,0.001,0.001,0.001,1e-05,0.0001,1e-05,0.001,0.0001,1e-05,1e-05,0.0001,0.001,0.0001,0.0001,0.001
params,"{'tol': 1e-05, 'max_iter': 800, 'loss': 'hinge...","{'tol': 0.0001, 'max_iter': 1000, 'loss': 'hin...","{'tol': 0.0001, 'max_iter': 800, 'loss': 'squa...","{'tol': 1e-05, 'max_iter': 800, 'loss': 'squar...","{'tol': 1e-05, 'max_iter': 1000, 'loss': 'hing...","{'tol': 0.0001, 'max_iter': 1000, 'loss': 'hin...","{'tol': 1e-05, 'max_iter': 1200, 'loss': 'hing...","{'tol': 0.001, 'max_iter': 1200, 'loss': 'squa...","{'tol': 1e-05, 'max_iter': 800, 'loss': 'hinge...","{'tol': 0.001, 'max_iter': 800, 'loss': 'hinge...","{'tol': 0.001, 'max_iter': 1000, 'loss': 'squa...","{'tol': 1e-05, 'max_iter': 1000, 'loss': 'squa...","{'tol': 0.0001, 'max_iter': 1000, 'loss': 'squ...","{'tol': 1e-05, 'max_iter': 1000, 'loss': 'squa...","{'tol': 0.001, 'max_iter': 1200, 'loss': 'squa...","{'tol': 0.001, 'max_iter': 1000, 'loss': 'squa...","{'tol': 0.0001, 'max_iter': 800, 'loss': 'hing...","{'tol': 1e-05, 'max_iter': 1200, 'loss': 'hing...","{'tol': 1e-05, 'max_iter': 1000, 'loss': 'hing...","{'tol': 1e-05, 'max_iter': 800, 'loss': 'hinge...","{'tol': 1e-05, 'max_iter': 1000, 'loss': 'hing...","{'tol': 1e-05, 'max_iter': 1200, 'loss': 'squa...","{'tol': 1e-05, 'max_iter': 1200, 'loss': 'squa...","{'tol': 0.001, 'max_iter': 800, 'loss': 'hinge...","{'tol': 1e-05, 'max_iter': 1000, 'loss': 'squa...","{'tol': 0.0001, 'max_iter': 800, 'loss': 'squa...","{'tol': 0.001, 'max_iter': 1200, 'loss': 'hing...","{'tol': 0.001, 'max_iter': 1200, 'loss': 'squa...","{'tol': 0.0001, 'max_iter': 1200, 'loss': 'hin...","{'tol': 0.0001, 'max_iter': 1000, 'loss': 'squ...","{'tol': 1e-05, 'max_iter': 1200, 'loss': 'hing...","{'tol': 0.0001, 'max_iter': 1200, 'loss': 'squ...","{'tol': 0.0001, 'max_iter': 1200, 'loss': 'squ...","{'tol': 0.001, 'max_iter': 800, 'loss': 'squar...","{'tol': 0.001, 'max_iter': 1000, 'loss': 'hing...","{'tol': 1e-05, 'max_iter': 800, 'loss': 'squar...","{'tol': 1e-05, 'max_iter': 1000, 'loss': 'squa...","{'tol': 0.001, 'max_iter': 800, 'loss': 'hinge...","{'tol': 0.001, 'max_iter': 1000, 'loss': 'squa...","{'tol': 0.001, 'max_iter': 800, 'loss': 'squar...","{'tol': 1e-05, 'max_iter': 800, 'loss': 'squar...","{'tol': 0.0001, 'max_iter': 1200, 'loss': 'hin...","{'tol': 0.0001, 'max_iter': 1000, 'loss': 'squ...","{'tol': 1e-05, 'max_iter': 800, 'loss': 'squar...","{'tol': 0.0001, 'max_iter': 1000, 'loss': 'hin...","{'tol': 0.0001, 'max_iter': 800, 'loss': 'squa...","{'tol': 0.0001, 'max_iter': 1000, 'loss': 'hin...","{'tol': 1e-05, 'max_iter': 800, 'loss': 'hinge...","{'tol': 0.0001, 'max_iter': 1000, 'loss': 'hin...","{'tol': 1e-05, 'max_iter': 800, 'loss': 'hinge...","{'tol': 1e-05, 'max_iter': 800, 'loss': 'squar...","{'tol': 0.001, 'max_iter': 1200, 'loss': 'squa...","{'tol': 1e-05, 'max_iter': 1200, 'loss': 'squa...","{'tol': 0.0001, 'max_iter': 1200, 'loss': 'squ...","{'tol': 0.001, 'max_iter': 800, 'loss': 'hinge...","{'tol': 1e-05, 'max_iter': 1200, 'loss': 'hing...","{'tol': 0.0001, 'max_iter': 800, 'loss': 'hing...","{'tol': 0.001, 'max_iter': 1000, 'loss': 'hing...","{'tol': 0.0001, 'max_iter': 800, 'loss': 'hing...","{'tol': 0.001, 'max_iter': 1200, 'loss': 'squa...","{'tol': 0.001, 'max_iter': 800, 'loss': 'squar...","{'tol': 1e-05, 'max_iter': 1200, 'loss': 'squa...","{'tol': 0.0001, 'max_iter': 800, 'loss': 'hing...","{'tol': 0.001, 'max_iter': 800, 'loss': 'hinge...","{'tol': 0.0001, 'max_iter': 1000, 'loss': 'squ...","{'tol': 0.0001, 'max_iter': 1200, 'loss': 'hin...","{'tol': 0.0001, 'max_iter': 800, 'loss': 'hing...","{'tol': 0.0001, 'max_iter': 1000, 'loss': 'squ...","{'tol': 1e-05, 'max_iter': 1000, 'loss': 'hing...","{'tol': 0.0001, 'max_iter': 1200, 'loss': 'squ...","{'tol': 0.001, 'max_iter': 1200, 'loss': 'hing...","{'tol': 0.0001, 'max_iter': 800, 'loss': 'squa...","{'tol': 0.001, 'max_iter': 800, 'loss': 'squar...","{'tol': 0.0001, 'max_iter': 800, 'loss': 'hing...","{'tol': 0.001, 'max_iter': 1000, 'loss': 'squa...","{'tol': 1e-05, 'max_iter': 1000, 'loss': 'hing...","{'tol': 1e-05, 'max_iter': 1200, 'loss': 'hing...","{'tol': 0.0001, 'max_iter': 1200, 'loss': 'squ...","{'tol': 1e-05, 'max_iter': 1000, 'loss': 'squa...","{'tol': 1e-05, 'max_iter': 800, 'loss': 'hinge...","{'tol': 0.001, 'max_iter': 800, 'loss': 'hinge...","{'tol': 1e-05, 'max_iter': 1000, 'loss': 'squa...","{'tol': 0.001, 'max_iter': 1200, 'loss': 'squa...","{'tol': 0.001, 'max_iter': 1000, 'loss': 'hing...","{'tol': 0.001, 'max_iter': 1200, 'loss': 'hing...","{'tol': 0.001, 'max_iter': 800, 'loss': 'hinge...","{'tol': 0.001, 'max_iter': 1200, 'loss': 'squa...","{'tol': 0.001, 'max_iter': 800, 'loss': 'hinge...","{'tol': 1e-05, 'max_iter': 1000, 'loss': 'squa...","{'tol': 0.0001, 'max_iter': 800, 'loss': 'squa...","{'tol': 1e-05, 'max_iter': 1200, 'loss': 'hing...","{'tol': 0.001, 'max_iter': 1200, 'loss': 'squa...","{'tol': 0.0001, 'max_iter': 1000, 'loss': 'hin...","{'tol': 1e-05, 'max_iter': 1000, 'loss': 'squa...","{'tol': 1e-05, 'max_iter': 800, 'loss': 'squar...","{'tol': 0.0001, 'max_iter': 800, 'loss': 'hing...","{'tol': 0.001, 'max_iter': 1000, 'loss': 'squa...","{'tol': 0.0001, 'max_iter': 1200, 'loss': 'hin...","{'tol': 0.0001, 'max_iter': 1200, 'loss': 'hin...","{'tol': 0.001, 'max_iter': 1000, 'loss': 'squa..."
rank_test_score,45,55,36,36,19,19,19,18,91,91,80,78,30,59,68,90,73,73,73,62,8,16,59,10,3,22,22,38,71,52,95,35,41,64,12,38,84,65,24,49,84,12,81,78,89,15,24,94,42,26,10,32,44,87,70,46,26,77,71,76,62,1,86,26,32,7,56,30,98,16,100,93,96,99,43,81,46,26,32,3,56,65,49,61,81,14,68,38,2,5,52,65,48,58,97,52,9,87,49,6


## Quadratic Discriminant Analysis

In [40]:
from sklearn.discriminant_analysis import QuadraticDiscriminantAnalysis
qda_model = QuadraticDiscriminantAnalysis()

In [41]:
evaluate_model(qda_model, sparse=False)

  S2 = (S ** 2) / (len(Xg) - 1)


LinAlgError: SVD did not converge

## Decision Trees Classifier

In [20]:
from sklearn.tree import DecisionTreeClassifier
dt_model = DecisionTreeClassifier()

In [21]:
evaluate_model(dt_model)

log_loss
 13.8984865974
f1_score
 0.4952080585
accuracy_score
 0.597597597598

classification_report
              precision    recall  f1-score   support

          1       0.66      0.58      0.62        57
          2       0.49      0.38      0.42        45
          3       0.56      0.56      0.56         9
          4       0.63      0.67      0.65        69
          5       0.31      0.21      0.25        24
          6       0.48      0.50      0.49        28
          7       0.66      0.80      0.72        95
          8       0.00      0.00      0.00         2
          9       0.75      0.75      0.75         4

avg / total       0.58      0.60      0.59       333


confusion_matrix
 [[33  1  0 14  3  3  3  0  0]
 [ 3 17  0  2  4  1 17  0  1]
 [ 0  0  5  1  0  1  2  0  0]
 [10  0  1 46  1  3  7  1  0]
 [ 1  1  0  5  5  5  7  0  0]
 [ 1  6  0  2  2 14  3  0  0]
 [ 1  9  3  3  1  2 76  0  0]
 [ 0  1  0  0  0  0  1  0  0]
 [ 1  0  0  0  0  0  0  0  3]]


In [22]:
parameters = {
              'criterion':['gini','entropy'], 
              'splitter':['best','random'], 
              'max_depth':[None, 4, 6, 8, 10], 
              'min_samples_split':[2,4,6,8],
              'min_samples_leaf':[1, 3, 5, 8, 10], 
              'min_weight_fraction_leaf':[0, 0.1, 0.01, 0.5, 0.001], 
              'max_features':['log2','auto'], 
              'max_leaf_nodes':[None, 4, 8, 16, 32],
              'min_impurity_decrease':[0, 1e-3, 1e-4, 1e-5], 
              'class_weight':['balanced',None],
#               'presort':[True, False] 
}
dt_rs = RandomizedSearchCV(dt_model, parameters, n_iter=100, n_jobs=-1)


In [23]:
evaluate_model(dt_rs)

log_loss
 1.96834503006
f1_score
 0.214009018851
accuracy_score
 0.414414414414

classification_report
              precision    recall  f1-score   support

          1       0.00      0.00      0.00        57
          2       0.27      0.07      0.11        45
          3       0.00      0.00      0.00         9
          4       0.42      0.61      0.50        69
          5       0.50      0.29      0.37        24
          6       0.80      0.29      0.42        28
          7       0.39      0.82      0.53        95
          8       0.00      0.00      0.00         2
          9       0.00      0.00      0.00         4

avg / total       0.34      0.41      0.33       333


confusion_matrix
 [[ 0  0  0 26  1  1 29  0  0]
 [ 0  3  0  6  1  0 35  0  0]
 [ 0  0  0  1  1  0  7  0  0]
 [ 0  0  0 42  2  1 24  0  0]
 [ 0  0  0  6  7  0 11  0  0]
 [ 0  1  0  9  2  8  8  0  0]
 [ 0  7  0 10  0  0 78  0  0]
 [ 0  0  0  0  0  0  2  0  0]
 [ 0  0  0  0  0  0  4  0  0]]


  'precision', 'predicted', average, warn_for)
  'precision', 'predicted', average, warn_for)


In [24]:
pd.DataFrame(dt_rs.cv_results_).transpose()



Unnamed: 0,0,1,2,3,4,5,6,7,8,9,10,11,12,13,14,15,16,17,18,19,20,21,22,23,24,25,26,27,28,29,30,31,32,33,34,35,36,37,38,39,40,41,42,43,44,45,46,47,48,49,50,51,52,53,54,55,56,57,58,59,60,61,62,63,64,65,66,67,68,69,70,71,72,73,74,75,76,77,78,79,80,81,82,83,84,85,86,87,88,89,90,91,92,93,94,95,96,97,98,99
mean_fit_time,0.313733,0.32039,0.356138,0.31481,0.296806,0.290432,0.264392,0.25365,0.257489,0.26021,0.25733,0.25164,0.254534,0.286518,0.256201,0.28327,0.266835,0.269067,0.253577,0.268765,0.281491,0.26282,0.251076,0.276193,0.255068,0.258375,0.249997,0.250794,0.247413,0.2524,0.280266,0.257586,0.282973,0.253087,0.266651,0.288054,0.26373,0.248901,0.268829,0.30966,0.271328,0.257541,0.261439,0.295585,0.254075,0.283564,0.273906,0.261038,0.251548,0.269953,0.257897,0.258045,0.270042,0.270989,0.284164,0.252578,0.273933,0.261486,0.260678,0.281827,0.316679,0.280729,0.261517,0.240767,0.297596,0.253014,0.268957,0.258091,0.280749,0.27003,0.257051,0.248549,0.266309,0.248819,0.281853,0.276501,0.284845,0.278954,0.268727,0.263831,0.279956,0.295217,0.268141,0.265991,0.273674,0.255482,0.267634,0.289877,0.274471,0.280744,0.263607,0.253787,0.249273,0.267732,0.271647,0.243611,0.258268,0.250615,0.302766,0.247863
mean_score_time,0.0158895,0.015655,0.0161053,0.0155009,0.0196966,0.0153979,0.0172904,0.0146091,0.0147491,0.0168078,0.0147785,0.0148711,0.0149643,0.0146803,0.0180303,0.0155439,0.0183969,0.0148659,0.0144838,0.017343,0.0172402,0.0156358,0.0169936,0.0153302,0.0175722,0.0147572,0.0151584,0.0148299,0.0145361,0.0144521,0.0151419,0.0161815,0.0165791,0.0172835,0.0161424,0.0158324,0.0161065,0.0146058,0.0149357,0.018237,0.0152432,0.014929,0.0148489,0.0172286,0.0146417,0.0150193,0.0195572,0.0177679,0.0160352,0.0149887,0.0177342,0.0145862,0.0146778,0.0146131,0.0148172,0.0151035,0.0156276,0.0157901,0.0144891,0.0146633,0.0164942,0.0171609,0.0148683,0.0143341,0.015564,0.0162158,0.016384,0.0146405,0.0144985,0.0148097,0.0153613,0.0146327,0.0146851,0.0148203,0.0171069,0.0149777,0.0147939,0.0148157,0.0163821,0.0146379,0.0171184,0.0148702,0.017496,0.0172877,0.0148413,0.0147684,0.014669,0.0156697,0.0148378,0.0174088,0.0148775,0.0148722,0.0147392,0.0149827,0.0149117,0.0145961,0.0147193,0.0145505,0.0149163,0.0143854
mean_test_score,0.336345,0.171352,0.287149,0.063253,0.375167,0.0639224,0.171352,0.287149,0.341031,0.0930388,0.174364,0.334003,0.171352,0.0374833,0.171352,0.447456,0.171352,0.186412,0.291165,0.159973,0.171352,0.109103,0.150602,0.387885,0.287149,0.287149,0.287149,0.171352,0.130187,0.171352,0.308902,0.296185,0.371821,0.114123,0.183735,0.269076,0.162651,0.287149,0.287149,0.214859,0.319277,0.287149,0.121486,0.177711,0.287149,0.349063,0.119813,0.290495,0.171352,0.315596,0.287149,0.290161,0.287149,0.308568,0.287149,0.293173,0.170348,0.180388,0.312249,0.287149,0.343373,0.287818,0.313922,0.171352,0.171352,0.287149,0.288153,0.287149,0.302878,0.287149,0.287149,0.298862,0.39257,0.354418,0.287149,0.171352,0.0465194,0.281459,0.0950469,0.0672691,0.143909,0.312584,0.287149,0.217537,0.13822,0.109438,0.346051,0.171352,0.323293,0.171352,0.287149,0.172691,0.0535475,0.337349,0.183066,0.292169,0.193775,0.287149,0.171352,0.297858
mean_train_score,0.346388,0.171214,0.287149,0.0663442,0.37866,0.0764791,0.171214,0.287149,0.348905,0.0953949,0.174897,0.335188,0.171214,0.0503525,0.171214,0.457827,0.171214,0.187578,0.292512,0.174192,0.171214,0.113407,0.17198,0.40746,0.287149,0.287149,0.287149,0.171214,0.128646,0.171214,0.316271,0.302036,0.37933,0.128795,0.198197,0.298678,0.162196,0.287149,0.287149,0.232348,0.322972,0.287149,0.119481,0.186336,0.287149,0.366464,0.136583,0.297004,0.171214,0.31857,0.287149,0.291666,0.287149,0.308583,0.287149,0.296346,0.176263,0.18025,0.321471,0.287149,0.35205,0.289829,0.320781,0.171214,0.171214,0.287149,0.289326,0.287149,0.308239,0.287149,0.287149,0.300885,0.423183,0.360592,0.287149,0.171214,0.052242,0.287149,0.100672,0.0693531,0.144859,0.315928,0.287149,0.225288,0.138984,0.108431,0.356096,0.171214,0.325458,0.171214,0.287149,0.189428,0.0567348,0.342192,0.20265,0.291672,0.196516,0.287149,0.171214,0.30155
param_class_weight,,balanced,,balanced,,balanced,balanced,,,balanced,balanced,,balanced,balanced,balanced,,balanced,balanced,,balanced,balanced,balanced,balanced,,,,,balanced,balanced,balanced,,,,balanced,balanced,balanced,balanced,,,balanced,,,balanced,balanced,,,balanced,,balanced,balanced,,,,,,,balanced,balanced,,,,,,balanced,balanced,,,,,,,,,,,balanced,balanced,,balanced,balanced,balanced,,,balanced,balanced,balanced,,balanced,,balanced,,balanced,balanced,,balanced,,balanced,,balanced,
param_criterion,gini,gini,entropy,entropy,entropy,entropy,entropy,entropy,gini,entropy,gini,gini,gini,entropy,gini,gini,entropy,gini,gini,gini,entropy,entropy,gini,entropy,entropy,gini,gini,entropy,gini,gini,entropy,gini,entropy,gini,gini,entropy,entropy,gini,gini,gini,gini,entropy,gini,entropy,gini,entropy,entropy,gini,gini,gini,entropy,gini,gini,entropy,entropy,gini,gini,gini,gini,entropy,gini,gini,gini,entropy,entropy,gini,entropy,entropy,entropy,entropy,gini,entropy,entropy,gini,entropy,gini,gini,gini,gini,gini,gini,entropy,entropy,gini,gini,gini,entropy,entropy,entropy,entropy,gini,gini,gini,entropy,entropy,gini,entropy,entropy,gini,entropy
param_max_depth,,10,4,10,4,6,4,10,6,10,8,4,4,10,6,,10,8,4,4,10,6,6,4,6,10,6,4,6,8,8,4,6,8,4,,,8,6,10,,10,,10,10,6,6,8,,10,6,6,4,4,,6,4,,6,8,6,10,6,4,8,6,6,,8,8,4,6,8,4,4,10,4,10,4,4,4,4,,,4,6,8,8,8,,4,,4,4,8,4,4,6,4,6
param_max_features,auto,auto,auto,auto,auto,log2,log2,log2,auto,log2,log2,auto,auto,log2,log2,auto,log2,auto,auto,auto,log2,auto,auto,auto,auto,log2,auto,log2,log2,log2,auto,log2,auto,auto,auto,auto,auto,log2,auto,auto,auto,log2,auto,auto,auto,auto,auto,log2,log2,auto,auto,log2,log2,log2,log2,log2,auto,log2,log2,log2,auto,log2,log2,auto,log2,log2,log2,log2,log2,log2,log2,log2,auto,auto,log2,log2,auto,auto,auto,log2,log2,log2,log2,auto,log2,log2,auto,log2,auto,log2,auto,log2,auto,auto,auto,log2,auto,log2,auto,log2
param_max_leaf_nodes,16,4,32,4,,32,16,,32,32,16,,4,8,4,16,32,8,16,32,16,4,,32,4,4,4,16,4,16,4,,16,32,,32,16,,,32,4,4,16,8,16,32,8,32,4,16,16,8,4,32,32,32,,4,32,8,4,4,32,32,32,,4,16,16,4,32,32,16,,32,,4,,4,8,8,16,16,8,,4,8,32,4,8,,32,4,,16,32,4,16,32,32
param_min_impurity_decrease,0.001,0.001,0.0001,0.0001,0,0.001,0,0,0.0001,0.0001,0.001,0.0001,0.001,0,0.001,1e-05,1e-05,0.001,1e-05,0.001,1e-05,1e-05,0.001,0,0.001,0.001,0.0001,0.001,0.0001,0,0,0.0001,0,0.0001,0.0001,0,0.0001,0.0001,0,1e-05,0.0001,0.0001,0.001,1e-05,1e-05,0.001,1e-05,0.001,0,0.0001,0.0001,0,0.001,1e-05,0.001,0.0001,1e-05,0.001,0.0001,1e-05,0.001,1e-05,0,0.001,1e-05,0,0.001,0.0001,0,0.001,0,0.001,1e-05,1e-05,0.001,0.0001,1e-05,1e-05,0.0001,0.001,0.0001,0.001,0.001,0,1e-05,0,0.0001,1e-05,0,0,0,0,0.001,0,0,0.001,0.001,0.0001,0.001,1e-05


## AdaBoost Classifier

In [25]:
from sklearn.ensemble import AdaBoostClassifier
adab_model = AdaBoostClassifier()

In [26]:
evaluate_model(adab_model)

log_loss
 2.03696523324
f1_score
 0.171401646976
accuracy_score
 0.408408408408

classification_report
              precision    recall  f1-score   support

          1       0.00      0.00      0.00        57
          2       0.00      0.00      0.00        45
          3       0.00      0.00      0.00         9
          4       0.31      0.84      0.45        69
          5       0.00      0.00      0.00        24
          6       0.00      0.00      0.00        28
          7       0.54      0.80      0.65        95
          8       0.00      0.00      0.00         2
          9       0.40      0.50      0.44         4

avg / total       0.22      0.41      0.28       333


confusion_matrix
 [[ 0  0  0 52  0  0  5  0  0]
 [ 0  0  0 20  0  0 25  0  0]
 [ 0  0  0  3  0  0  6  0  0]
 [ 0  0  0 58  0  0  8  0  3]
 [ 0  0  0 16  0  0  8  0  0]
 [ 0  0  0 19  0  0  9  0  0]
 [ 0  0  0 19  0  0 76  0  0]
 [ 0  0  0  0  0  0  2  0  0]
 [ 0  0  0  1  0  0  1  0  2]]


  'precision', 'predicted', average, warn_for)
  'precision', 'predicted', average, warn_for)


In [27]:
parameters = {
              # adaboost
              'n_estimators' :[50, 100, 150, 200, 500],
              'learning_rate':[1e-4, 1e-3, 1e-2, 1e-1],
              'algorithm' : ['SAMME', 'SAMME.R'],
}
adab_rs = RandomizedSearchCV(adab_model, parameters, n_iter=25, n_jobs=-1)


In [28]:
evaluate_model(adab_rs)

log_loss
 2.19089383327
f1_score
 0.22149712825
accuracy_score
 0.465465465465

classification_report
              precision    recall  f1-score   support

          1       0.29      0.40      0.34        57
          2       0.00      0.00      0.00        45
          3       0.00      0.00      0.00         9
          4       0.46      0.57      0.51        69
          5       0.00      0.00      0.00        24
          6       1.00      0.32      0.49        28
          7       0.53      0.88      0.66        95
          8       0.00      0.00      0.00         2
          9       0.00      0.00      0.00         4

avg / total       0.38      0.47      0.39       333


confusion_matrix
 [[23  0  0 31  0  0  3  0  0]
 [12  0  0  0  0  0 33  0  0]
 [ 1  0  0  2  0  0  6  0  0]
 [18  0  0 39  0  0 12  0  0]
 [13  0  0  4  0  0  7  0  0]
 [ 6  0  0  2  0  9 11  0  0]
 [ 6  1  0  4  0  0 84  0  0]
 [ 0  0  0  0  0  0  2  0  0]
 [ 1  0  0  2  0  0  1  0  0]]


  'precision', 'predicted', average, warn_for)
  'precision', 'predicted', average, warn_for)


In [29]:
pd.DataFrame(adab_rs.cv_results_).transpose()



Unnamed: 0,0,1,2,3,4,5,6,7,8,9,10,11,12,13,14,15,16,17,18,19,20,21,22,23,24
mean_fit_time,156.573,38.2216,381.323,38.2644,116.594,75.3731,38.8562,387.191,77.4278,116.787,154.186,40.1432,39.4332,115.648,112.91,77.6359,155.363,154.296,78.8653,154.435,329.653,78.2523,78.8717,106.909,125.501
mean_score_time,3.49791,0.770263,7.3015,0.941093,2.45084,1.922,0.892208,7.01006,1.39271,2.15467,3.16781,0.747203,0.822978,2.22351,2.05617,1.47483,2.94339,3.66996,1.36709,2.90098,5.41284,1.52498,1.76703,1.89943,2.27419
mean_test_score,0.389558,0.39257,0.44344,0.379518,0.424364,0.383869,0.447122,0.393909,0.379518,0.456493,0.379518,0.379518,0.404618,0.379518,0.379518,0.379518,0.396921,0.379518,0.396252,0.45917,0.38253,0.413655,0.423695,0.387216,0.424699
mean_train_score,0.392233,0.396753,0.48713,0.380023,0.425873,0.389231,0.452309,0.394071,0.380023,0.466698,0.380023,0.380023,0.405113,0.380023,0.380023,0.380023,0.397762,0.380023,0.396091,0.467709,0.38538,0.418505,0.454985,0.387724,0.431391
param_algorithm,SAMME,SAMME,SAMME.R,SAMME.R,SAMME.R,SAMME.R,SAMME,SAMME,SAMME,SAMME,SAMME.R,SAMME,SAMME.R,SAMME.R,SAMME,SAMME.R,SAMME.R,SAMME,SAMME,SAMME,SAMME,SAMME.R,SAMME.R,SAMME,SAMME.R
param_learning_rate,0.001,0.01,0.1,0.0001,0.01,0.001,0.1,0.001,0.0001,0.1,0.0001,0.0001,0.01,0.0001,0.0001,0.0001,0.001,0.0001,0.01,0.1,0.0001,0.01,0.1,0.001,0.01
param_n_estimators,200,50,500,50,150,100,50,500,100,150,200,50,50,150,150,100,200,200,100,200,500,100,100,150,200
params,"{'n_estimators': 200, 'learning_rate': 0.001, ...","{'n_estimators': 50, 'learning_rate': 0.01, 'a...","{'n_estimators': 500, 'learning_rate': 0.1, 'a...","{'n_estimators': 50, 'learning_rate': 0.0001, ...","{'n_estimators': 150, 'learning_rate': 0.01, '...","{'n_estimators': 100, 'learning_rate': 0.001, ...","{'n_estimators': 50, 'learning_rate': 0.1, 'al...","{'n_estimators': 500, 'learning_rate': 0.001, ...","{'n_estimators': 100, 'learning_rate': 0.0001,...","{'n_estimators': 150, 'learning_rate': 0.1, 'a...","{'n_estimators': 200, 'learning_rate': 0.0001,...","{'n_estimators': 50, 'learning_rate': 0.0001, ...","{'n_estimators': 50, 'learning_rate': 0.01, 'a...","{'n_estimators': 150, 'learning_rate': 0.0001,...","{'n_estimators': 150, 'learning_rate': 0.0001,...","{'n_estimators': 100, 'learning_rate': 0.0001,...","{'n_estimators': 200, 'learning_rate': 0.001, ...","{'n_estimators': 200, 'learning_rate': 0.0001,...","{'n_estimators': 100, 'learning_rate': 0.01, '...","{'n_estimators': 200, 'learning_rate': 0.1, 'a...","{'n_estimators': 500, 'learning_rate': 0.0001,...","{'n_estimators': 100, 'learning_rate': 0.01, '...","{'n_estimators': 100, 'learning_rate': 0.1, 'a...","{'n_estimators': 150, 'learning_rate': 0.001, ...","{'n_estimators': 200, 'learning_rate': 0.01, '..."
rank_test_score,14,13,4,18,6,16,3,12,18,2,18,18,9,18,18,18,10,18,11,1,17,8,7,15,5
split0_test_score,0.368368,0.361361,0.426426,0.368368,0.424424,0.368368,0.426426,0.364364,0.368368,0.44044,0.368368,0.368368,0.38038,0.368368,0.368368,0.368368,0.387387,0.368368,0.367367,0.445445,0.368368,0.402402,0.424424,0.368368,0.42042


## Random Forest Classifier

In [30]:
from sklearn.ensemble import RandomForestClassifier
rf_model = RandomForestClassifier()

In [31]:
evaluate_model(rf_model)

log_loss
 3.71404558515
f1_score
 0.551773156393
accuracy_score
 0.627627627628

classification_report
              precision    recall  f1-score   support

          1       0.51      0.51      0.51        57
          2       0.59      0.38      0.46        45
          3       0.83      0.56      0.67         9
          4       0.67      0.67      0.67        69
          5       0.55      0.25      0.34        24
          6       0.79      0.68      0.73        28
          7       0.63      0.88      0.73        95
          8       0.00      0.00      0.00         2
          9       1.00      0.75      0.86         4

avg / total       0.62      0.63      0.61       333


confusion_matrix
 [[29  3  0 17  1  3  4  0  0]
 [ 2 17  0  0  2  0 24  0  0]
 [ 0  0  5  1  1  0  2  0  0]
 [16  0  0 46  1  0  6  0  0]
 [ 5  0  0  3  6  2  8  0  0]
 [ 2  2  0  1  0 19  4  0  0]
 [ 3  6  1  1  0  0 84  0  0]
 [ 0  1  0  0  0  0  1  0  0]
 [ 0  0  0  0  0  0  1  0  3]]


  'precision', 'predicted', average, warn_for)
  'precision', 'predicted', average, warn_for)


In [32]:
parameters = {
              'n_estimators': [10, 50, 100, 200, 250, 500],
              'criterion':['gini','entropy'], 
              'max_depth':[None, 4, 6, 8, 10], 
              'min_samples_split':[2,4,6,8],
              'min_samples_leaf':[1, 3, 5, 8, 10], 
              'min_weight_fraction_leaf':[0, 0.1, 0.01, 0.5, 0.001], 
              'max_features':['log2','auto'], 
              'max_leaf_nodes':[None, 4, 8, 16, 32],
              'min_impurity_decrease':[0, 1e-3, 1e-4, 1e-5], 
              'class_weight':['balanced',None],
#               'bootstrap':[True, False],
#               'oob_score':[True, False]
#               'presort':[True, False] 
}
rf_rs = RandomizedSearchCV(rf_model, parameters, n_iter=100, n_jobs=-1)


In [33]:
evaluate_model(rf_rs)

log_loss
 1.40210320613
f1_score
 0.615124328168
accuracy_score
 0.675675675676

classification_report
              precision    recall  f1-score   support

          1       0.68      0.63      0.65        57
          2       0.67      0.67      0.67        45
          3       0.33      0.67      0.44         9
          4       0.75      0.62      0.68        69
          5       0.58      0.62      0.60        24
          6       1.00      0.64      0.78        28
          7       0.65      0.77      0.71        95
          8       0.00      0.00      0.00         2
          9       1.00      1.00      1.00         4

avg / total       0.70      0.68      0.68       333


confusion_matrix
 [[36  3  0  8  4  0  6  0  0]
 [ 1 30  0  1  1  0 12  0  0]
 [ 0  0  6  1  1  0  1  0  0]
 [11  0  3 43  2  0 10  0  0]
 [ 2  0  1  2 15  0  4  0  0]
 [ 2  2  0  0  2 18  4  0  0]
 [ 1 10  8  2  1  0 73  0  0]
 [ 0  0  0  0  0  0  2  0  0]
 [ 0  0  0  0  0  0  0  0  4]]


  'precision', 'predicted', average, warn_for)
  'precision', 'predicted', average, warn_for)


In [34]:
pd.DataFrame(rf_rs.cv_results_).transpose()



Unnamed: 0,0,1,2,3,4,5,6,7,8,9,10,11,12,13,14,15,16,17,18,19,20,21,22,23,24,25,26,27,28,29,30,31,32,33,34,35,36,37,38,39,40,41,42,43,44,45,46,47,48,49,50,51,52,53,54,55,56,57,58,59,60,61,62,63,64,65,66,67,68,69,70,71,72,73,74,75,76,77,78,79,80,81,82,83,84,85,86,87,88,89,90,91,92,93,94,95,96,97,98,99
mean_fit_time,0.39338,0.719029,0.668831,1.04588,0.920103,1.20702,1.03285,0.895184,1.98646,2.39252,1.44542,2.97444,1.18421,2.35235,2.54754,8.62891,3.59315,1.5915,1.84329,2.13887,10.1458,2.1903,3.80982,10.0488,0.70245,3.55245,4.66416,1.60113,1.51287,2.13996,0.78683,0.715229,2.19737,3.11843,1.96671,2.99936,0.733039,1.60728,1.48242,2.38502,1.63125,0.845662,2.69298,0.83473,1.61643,0.735034,1.58044,1.73993,3.87642,3.88724,5.28502,4.69174,2.22148,1.91898,4.71954,1.22507,3.68826,1.92918,1.16509,2.70608,0.951422,1.73992,2.0105,1.05507,2.05007,0.924112,3.37211,5.81792,2.24061,1.36865,2.88595,0.748825,5.0553,3.00347,0.773172,1.35859,2.54229,0.978359,1.19695,15.2315,1.95714,1.26655,0.689999,0.557382,9.04278,1.47953,4.20174,2.84086,2.17243,0.853171,0.789835,0.856372,0.892008,1.58101,0.690267,2.80274,0.775654,0.87801,1.25208,1.6394
mean_score_time,0.749967,1.46357,1.6006,1.79238,1.54245,7.66856,6.22761,0.29431,16.3674,16.2629,6.03965,16.7785,1.62706,13.6261,7.29983,6.72764,6.07807,6.30628,3.30263,4.54909,13.1811,6.70279,14.2232,16.2912,1.37473,7.89715,4.90675,7.91406,6.28405,1.27517,0.336744,0.334854,14.8614,7.65425,15.8078,7.77242,0.344398,8.54256,8.38503,15.1985,7.55257,0.281319,15.9522,1.29818,8.40998,0.345215,7.98832,1.64444,4.93172,2.17703,8.11694,16.4866,15.9781,7.98408,16.3271,3.20187,8.4118,6.57153,3.36114,15.4186,2.92151,7.89763,8.52025,3.19493,6.69446,1.5238,5.29271,16.8139,15.7738,6.72372,7.43622,0.311078,3.24237,7.46264,0.277863,5.82282,2.94928,1.73741,2.96678,6.34037,9.74813,5.63155,0.29584,0.269392,16.1744,6.52285,6.00086,15.481,1.50552,1.54683,0.265502,1.51736,1.63099,7.50592,1.35562,14.9857,1.68437,0.363816,0.342568,6.63811
mean_test_score,0.290495,0.426372,0.111111,0.287149,0.141901,0.165663,0.232597,0.529451,0.115127,0.287149,0.412985,0.337684,0.377175,0.287149,0.476908,0.587015,0.558902,0.348394,0.453815,0.315596,0.508367,0.372155,0.373494,0.568273,0.287149,0.505355,0.517068,0.287149,0.462517,0.479585,0.303548,0.135542,0.287149,0.370147,0.287149,0.392905,0.308568,0.287149,0.399264,0.460174,0.287149,0.513722,0.0732932,0.40328,0.287149,0.299866,0.287149,0.438086,0.450134,0.53581,0.425033,0.374163,0.465194,0.0331325,0.374833,0.0317938,0.441098,0.403614,0.488621,0.287149,0.32162,0.287149,0.287149,0.287149,0.576975,0.0147256,0.431392,0.390562,0.493641,0.125167,0.402276,0.287149,0.582664,0.468206,0.491299,0.258367,0.465529,0.0562249,0.287149,0.607764,0.511714,0.0742972,0.342704,0.301874,0.509036,0.287149,0.529786,0.287149,0.533467,0.117805,0.103079,0.243976,0.266064,0.287149,0.287483,0.317938,0.245649,0.291165,0.451473,0.287149
mean_train_score,0.338801,0.492814,0.111397,0.287149,0.140771,0.165196,0.247966,0.611107,0.115095,0.287149,0.492487,0.342534,0.45213,0.287149,0.556565,0.815936,0.720064,0.388711,0.480743,0.319109,0.585012,0.384018,0.38469,0.716701,0.287149,0.578988,0.639061,0.287149,0.580304,0.564754,0.307059,0.1546,0.287149,0.383195,0.287149,0.410797,0.310412,0.287149,0.468861,0.544988,0.287149,0.573622,0.0734077,0.423175,0.287149,0.301859,0.287149,0.506193,0.516715,0.605427,0.444094,0.384525,0.564758,0.0330818,0.384859,0.0316576,0.461176,0.421516,0.680036,0.287149,0.391923,0.287149,0.287149,0.287149,0.824129,0.0143908,0.453814,0.412306,0.622006,0.125253,0.421168,0.287149,0.728419,0.555893,0.530451,0.289787,0.544665,0.056257,0.287149,0.882872,0.654291,0.074331,0.410667,0.329165,0.588193,0.287149,0.586686,0.287149,0.592197,0.117921,0.102895,0.290531,0.293768,0.287149,0.288987,0.322272,0.286305,0.293662,0.598758,0.287149
param_class_weight,balanced,balanced,balanced,,balanced,balanced,balanced,,balanced,,balanced,,balanced,,balanced,balanced,balanced,balanced,,,balanced,,,balanced,,balanced,balanced,,balanced,balanced,,balanced,,,,,,,balanced,balanced,,,balanced,,,,,balanced,balanced,,,,balanced,balanced,,balanced,,,balanced,,balanced,,,,balanced,balanced,,,balanced,balanced,,,,balanced,,balanced,balanced,balanced,,balanced,balanced,balanced,balanced,balanced,balanced,,,,,balanced,balanced,balanced,balanced,,,,balanced,,balanced,
param_criterion,gini,gini,entropy,gini,entropy,entropy,gini,gini,gini,entropy,entropy,gini,entropy,gini,gini,entropy,gini,entropy,gini,gini,entropy,gini,gini,gini,gini,gini,entropy,entropy,entropy,entropy,gini,entropy,entropy,entropy,gini,entropy,gini,entropy,entropy,entropy,gini,gini,entropy,gini,gini,gini,gini,entropy,entropy,entropy,entropy,gini,entropy,gini,gini,entropy,gini,gini,gini,gini,entropy,entropy,gini,entropy,entropy,entropy,gini,entropy,gini,gini,gini,entropy,gini,entropy,gini,entropy,entropy,gini,entropy,entropy,entropy,entropy,gini,gini,entropy,entropy,gini,gini,entropy,gini,gini,entropy,gini,entropy,entropy,gini,entropy,gini,entropy,entropy
param_max_depth,4,10,,10,6,4,6,,10,4,,,8,,10,8,6,10,4,4,,8,4,10,,10,4,10,10,6,10,10,4,8,10,10,,,10,8,,8,10,10,8,6,8,,8,10,8,10,10,,,10,10,6,6,10,6,10,8,,10,4,6,6,8,4,,4,10,4,6,8,6,4,,10,4,6,6,,10,8,8,4,8,10,10,,4,6,4,,,4,,10
param_max_features,log2,auto,auto,log2,log2,log2,log2,auto,log2,auto,log2,log2,auto,log2,auto,auto,auto,log2,auto,log2,auto,auto,auto,auto,log2,auto,auto,log2,log2,auto,log2,log2,log2,log2,log2,auto,log2,log2,log2,log2,log2,auto,auto,auto,log2,log2,log2,auto,auto,auto,auto,auto,log2,auto,auto,auto,auto,auto,log2,log2,log2,log2,auto,auto,log2,log2,auto,auto,log2,auto,auto,log2,auto,auto,auto,log2,auto,log2,log2,auto,log2,log2,auto,auto,auto,log2,auto,auto,auto,auto,auto,log2,log2,log2,log2,log2,log2,log2,auto,log2
param_max_leaf_nodes,16,32,16,,16,16,8,32,16,16,32,16,16,32,8,,,4,32,16,8,4,4,,8,8,16,4,16,16,16,32,8,32,4,32,16,,8,16,,32,,16,4,4,16,8,4,32,8,4,8,8,4,16,8,16,,4,32,16,4,4,32,4,8,16,4,32,,4,,32,32,,8,4,4,,8,4,,32,8,32,,4,32,8,4,4,8,4,32,8,16,4,,16
param_min_impurity_decrease,0.0001,1e-05,0.0001,1e-05,0.0001,0.001,1e-05,1e-05,1e-05,0,1e-05,1e-05,1e-05,0.0001,0.001,0,0.001,0,0,0.0001,0.0001,0.001,0.0001,0.0001,0.001,1e-05,0.0001,0.0001,0.001,1e-05,0.0001,0.0001,0.001,0.001,0.0001,0.0001,0.0001,0.001,0.001,0,0.001,0.001,0,0.0001,0.0001,0.001,0.0001,0.0001,0.0001,0.0001,1e-05,0.001,0.0001,0,0.001,0,0.0001,1e-05,1e-05,0.0001,1e-05,0,0.001,0.001,0.0001,0,1e-05,0,1e-05,0.0001,0,0.0001,0.0001,1e-05,0,0,1e-05,0,0.001,0.0001,0.0001,1e-05,0,1e-05,0,0.0001,0.001,0,0,0.001,0,0,1e-05,1e-05,1e-05,0.001,0.001,0.001,1e-05,0


## Extreme Randomization Trees

In [35]:
from sklearn.ensemble import ExtraTreesClassifier
xtr_model = ExtraTreesClassifier()

In [36]:
evaluate_model(xtr_model)

log_loss
 3.86575438533
f1_score
 0.505023576656
accuracy_score
 0.630630630631

classification_report
              precision    recall  f1-score   support

          1       0.62      0.63      0.63        57
          2       0.70      0.47      0.56        45
          3       0.14      0.11      0.12         9
          4       0.68      0.65      0.67        69
          5       0.47      0.29      0.36        24
          6       0.79      0.54      0.64        28
          7       0.61      0.86      0.71        95
          8       0.00      0.00      0.00         2
          9       1.00      0.75      0.86         4

avg / total       0.63      0.63      0.62       333


confusion_matrix
 [[36  1  0 13  1  1  5  0  0]
 [ 1 21  0  0  1  0 22  0  0]
 [ 0  0  1  1  1  1  5  0  0]
 [10  1  1 45  2  1  9  0  0]
 [ 5  0  1  4  7  1  6  0  0]
 [ 2  4  0  1  2 15  4  0  0]
 [ 4  2  4  2  1  0 82  0  0]
 [ 0  1  0  0  0  0  1  0  0]
 [ 0  0  0  0  0  0  1  0  3]]


  'precision', 'predicted', average, warn_for)
  'precision', 'predicted', average, warn_for)


In [37]:
parameters = {
              'n_estimators': [10, 50, 100, 200, 250, 500],
              'criterion':['gini','entropy'], 
              'max_depth':[None, 4, 6, 8, 10], 
              'min_samples_split':[2,4,6,8],
              'min_samples_leaf':[1, 3, 5, 8, 10], 
              'min_weight_fraction_leaf':[0, 0.1, 0.01, 0.5, 0.001], 
              'max_features':['log2','auto'], 
              'max_leaf_nodes':[None, 4, 8, 16, 32],
              'min_impurity_decrease':[0, 1e-3, 1e-4, 1e-5], 
              'class_weight':['balanced',None],
#               'bootstrap':[True, False],
#               'oob_score':[True, False]
#               'presort':[True, False] 
}
xtr_rs = RandomizedSearchCV(xtr_model, parameters, n_iter=100, n_jobs=-1)


In [38]:
evaluate_model(xtr_rs)

log_loss
 1.28911404362
f1_score
 0.583032846735
accuracy_score
 0.648648648649

classification_report
              precision    recall  f1-score   support

          1       0.70      0.56      0.62        57
          2       0.64      0.62      0.63        45
          3       0.33      0.67      0.44         9
          4       0.75      0.55      0.63        69
          5       0.55      0.67      0.60        24
          6       0.86      0.64      0.73        28
          7       0.62      0.78      0.69        95
          8       0.00      0.00      0.00         2
          9       0.80      1.00      0.89         4

avg / total       0.67      0.65      0.65       333


confusion_matrix
 [[32  5  0  7  4  1  8  0  0]
 [ 0 28  0  0  1  0 16  0  0]
 [ 0  0  6  1  1  0  1  0  0]
 [13  0  3 38  2  1 12  0  0]
 [ 0  0  1  3 16  0  4  0  0]
 [ 1  1  0  1  3 18  4  0  0]
 [ 0 10  8  1  2  0 74  0  0]
 [ 0  0  0  0  0  1  0  0  1]
 [ 0  0  0  0  0  0  0  0  4]]


  'precision', 'predicted', average, warn_for)
  'precision', 'predicted', average, warn_for)


In [39]:
pd.DataFrame(xtr_rs.cv_results_).transpose()



Unnamed: 0,0,1,2,3,4,5,6,7,8,9,10,11,12,13,14,15,16,17,18,19,20,21,22,23,24,25,26,27,28,29,30,31,32,33,34,35,36,37,38,39,40,41,42,43,44,45,46,47,48,49,50,51,52,53,54,55,56,57,58,59,60,61,62,63,64,65,66,67,68,69,70,71,72,73,74,75,76,77,78,79,80,81,82,83,84,85,86,87,88,89,90,91,92,93,94,95,96,97,98,99
mean_fit_time,1.63213,0.984,0.713139,1.01601,1.56011,0.9758,1.06277,0.852209,1.01125,2.47341,1.16787,2.22416,2.79621,0.818125,2.40016,0.860864,4.43001,2.56318,2.54628,1.03094,1.10524,2.09506,1.57042,1.41079,2.43427,1.52007,0.845324,2.35796,2.80829,1.08851,1.55394,12.3447,0.648079,1.23502,1.73758,0.693868,2.88976,2.65128,1.03502,0.962736,3.04941,0.767263,2.6043,0.8163,0.887554,1.56712,1.51518,0.784252,1.78871,1.03736,11.793,1.28215,2.12767,0.795517,0.93088,0.759597,11.5292,2.09937,0.804202,1.43919,2.24872,2.58268,0.932033,0.779974,1.4184,2.65768,0.670084,3.84872,1.57837,0.842074,1.67331,0.62991,1.13903,2.61889,1.56054,1.80257,5.15046,3.23209,1.34793,1.46508,3.98982,0.897365,0.858867,0.784859,4.49256,2.17197,0.993402,1.66549,1.12058,2.20154,1.06333,1.44627,1.89894,1.93965,1.41221,0.996976,8.54132,2.213,0.802647,0.688619
mean_score_time,15.7786,1.49318,3.18232,3.50184,6.35336,0.289661,3.16325,1.70929,3.23593,15.3634,3.25636,16.1069,15.771,0.296487,16.9501,0.340873,15.3125,16.6407,16.7929,1.74265,1.7028,8.55551,6.78976,5.98778,16.6863,6.20702,0.356667,15.6951,16.7525,3.30863,6.68218,2.93314,0.230894,5.57633,6.77251,0.295107,8.0839,15.3959,1.73305,0.363043,16.8208,0.300918,16.7651,0.33342,1.63612,6.90886,7.8162,0.284407,8.65305,2.98392,12.809,4.92299,7.37953,0.321466,0.331934,1.59138,17.4334,13.6691,0.299791,6.70743,3.0439,15.2965,0.316866,0.348082,6.39279,15.5265,0.244994,6.08021,8.37989,0.303,7.6091,0.314538,3.34574,15.4456,7.89775,8.16465,8.13334,15.7936,3.27327,1.50847,5.60862,1.64322,0.342686,0.368683,6.57214,16.2356,1.72915,7.69828,1.42215,6.34553,1.70052,6.61482,2.84772,6.4766,5.99724,1.37474,8.4289,2.97091,1.34356,0.283924
mean_test_score,0.171352,0.400268,0.287149,0.287149,0.0776439,0.363788,0.287149,0.171352,0.171352,0.287149,0.287149,0.149598,0.287149,0.308902,0.287149,0.287149,0.518742,0.42905,0.287149,0.287149,0.190763,0.287149,0.287149,0.287149,0.287149,0.171352,0.313253,0.135877,0.291165,0.287149,0.287149,0.600067,0.288487,0.287149,0.287149,0.287149,0.380187,0.287149,0.171352,0.446787,0.312584,0.387216,0.171352,0.325301,0.287149,0.287149,0.257028,0.291834,0.171352,0.287149,0.441432,0.323293,0.352744,0.191098,0.186412,0.287149,0.414993,0.323628,0.297523,0.287149,0.479585,0.150602,0.299531,0.287149,0.287149,0.171352,0.287149,0.407965,0.285475,0.171352,0.287149,0.0756359,0.287149,0.287149,0.0719545,0.171352,0.538153,0.38755,0.293507,0.395248,0.403949,0.287149,0.286479,0.11747,0.517738,0.287149,0.287149,0.105756,0.370147,0.428715,0.292169,0.287149,0.415663,0.35241,0.287149,0.203481,0.409304,0.414324,0.287149,0.17336
mean_train_score,0.171214,0.415994,0.287149,0.287149,0.0800483,0.449795,0.287149,0.171214,0.171214,0.287149,0.287149,0.158842,0.287149,0.356286,0.287149,0.287149,0.627704,0.55773,0.287149,0.287149,0.209003,0.287149,0.287149,0.287149,0.287149,0.171214,0.309233,0.140534,0.290997,0.287149,0.287149,0.873499,0.317455,0.287149,0.287149,0.287149,0.388218,0.287149,0.171214,0.47289,0.313925,0.401091,0.171214,0.329659,0.287149,0.287149,0.288646,0.293509,0.171214,0.287149,0.479084,0.369157,0.353914,0.211803,0.20988,0.287149,0.440598,0.370824,0.324485,0.287149,0.565615,0.166456,0.317919,0.287149,0.287149,0.171214,0.287149,0.420847,0.328647,0.171214,0.287149,0.0843232,0.287149,0.287149,0.0798244,0.171214,0.712698,0.436754,0.338855,0.405788,0.415329,0.287149,0.30513,0.125576,0.663004,0.287149,0.287149,0.109619,0.378168,0.448118,0.291999,0.287149,0.458365,0.354578,0.287149,0.231121,0.422354,0.430892,0.287149,0.177892
param_class_weight,balanced,,,,balanced,balanced,,balanced,balanced,,,balanced,,balanced,,,balanced,balanced,,,balanced,,,,,balanced,,balanced,,,,balanced,balanced,,,,,,balanced,,,,balanced,,,,balanced,,balanced,,,balanced,,balanced,balanced,,,balanced,balanced,,balanced,balanced,balanced,,,balanced,,,balanced,balanced,,balanced,,,balanced,balanced,balanced,balanced,balanced,,,,balanced,balanced,balanced,,,balanced,,balanced,,,balanced,,,balanced,,,,balanced
param_criterion,gini,gini,gini,gini,entropy,entropy,gini,gini,gini,gini,gini,gini,entropy,entropy,entropy,entropy,entropy,gini,entropy,entropy,gini,gini,entropy,entropy,entropy,entropy,entropy,gini,entropy,gini,entropy,entropy,entropy,gini,gini,entropy,gini,gini,entropy,entropy,gini,gini,gini,gini,gini,gini,entropy,gini,entropy,gini,entropy,gini,gini,entropy,entropy,entropy,entropy,entropy,entropy,entropy,entropy,gini,entropy,entropy,entropy,gini,gini,gini,entropy,gini,gini,gini,entropy,entropy,entropy,gini,gini,entropy,entropy,entropy,entropy,entropy,gini,gini,entropy,gini,gini,gini,gini,gini,gini,entropy,gini,entropy,gini,gini,gini,gini,gini,gini
param_max_depth,4,,10,8,,8,,10,,10,4,8,,6,8,6,4,4,8,10,,8,4,8,10,10,8,8,,,10,,10,10,10,,6,8,,10,10,6,8,6,,6,10,,8,10,,6,8,6,8,6,8,6,10,6,8,,8,4,,10,8,10,,,10,8,,6,8,,10,4,,6,8,6,4,4,10,6,10,4,4,8,6,,8,4,,8,,10,10,8
param_max_features,auto,auto,log2,log2,log2,auto,log2,log2,auto,log2,auto,log2,log2,auto,log2,log2,auto,log2,log2,auto,auto,auto,log2,log2,auto,log2,log2,log2,log2,log2,log2,auto,log2,log2,auto,auto,auto,log2,auto,auto,log2,auto,log2,log2,auto,log2,log2,log2,auto,log2,auto,auto,auto,auto,log2,log2,auto,log2,auto,log2,auto,log2,auto,auto,log2,auto,log2,auto,log2,log2,auto,log2,log2,log2,log2,auto,auto,auto,auto,auto,auto,log2,auto,log2,auto,log2,auto,log2,auto,auto,log2,log2,auto,auto,log2,auto,auto,auto,log2,log2
param_max_leaf_nodes,16,16,16,,32,32,8,,32,8,8,8,4,16,4,4,,8,8,,,,32,,4,4,,4,8,8,32,,8,8,4,8,16,16,4,32,32,16,4,,32,4,8,16,32,,,16,4,4,,4,32,,8,16,16,4,8,16,8,32,4,16,4,32,4,,16,16,,16,,4,4,16,16,32,8,,32,4,32,,,4,,4,8,4,4,32,16,32,32,
param_min_impurity_decrease,1e-05,1e-05,0.001,0.0001,0.001,0,0,0.0001,0.001,0.0001,0.001,0,0.001,0.0001,1e-05,0.001,0.001,1e-05,1e-05,0.0001,0,0.001,0,0.0001,0,1e-05,1e-05,0.001,0.001,0.001,1e-05,0.001,0.001,0.0001,0,1e-05,0.001,0.001,0,0,0,0,0.0001,1e-05,0,0.001,1e-05,0,1e-05,0,0.0001,1e-05,0.0001,0.001,0.001,0,0.0001,0.0001,0.001,0.0001,0,0.001,0,0.001,1e-05,1e-05,0.0001,0,0,1e-05,0.001,0,0.001,1e-05,0.0001,1e-05,0.0001,0.001,0.001,0,0,0.001,0.0001,0.001,0,1e-05,0.0001,0,0,0,0,0.001,0.001,1e-05,0.0001,0.0001,0.0001,1e-05,0.0001,0


## Saving models run

### stage1

In [None]:
pickle.dump(nb_model, open('stage1/nb_model', 'wb'))
pickle.dump(svc_model, open('stage1/svc_model', 'wb'))
pickle.dump(smreg_model, open('stage1/smreg_model', 'wb'))
pickle.dump(knn_model, open('stage1/knn_model', 'wb'))
pickle.dump(pag_model, open('stage1/pag_model', 'wb'))
pickle.dump(qda_model, open('stage1/qda_model', 'wb'))
pickle.dump(dt_model, open('stage1/qda_model', 'wb'))
pickle.dump(adab_model, open('stage1/adab_model', 'wb'))
pickle.dump(rf_model, open('stage1/rf_model', 'wb'))
pickle.dump(xtr_model, open('stage1/xtr_model', 'wb'))

In [None]:
from sklearn.ensemble import VotingClassifier

## Load classifiers

In [None]:
nb_model    = pickle.load(open('stage1/nb_model', 'rb'))
svc_model   = pickle.load(open('stage1/svc_model', 'rb'))
smreg_model = pickle.load(open('stage1/smreg_model', 'rb'))
knn_model   = pickle.load(open('stage1/knn_model', 'rb'))
pag_model   = pickle.load(open('stage1/pag_model', 'rb'))
qda_model   = pickle.load(open('stage1/qda_model', 'rb'))
dt_model    = pickle.load(open('stage1/qda_model', 'rb'))
adab_model  = pickle.load(open('stage1/adab_model', 'rb'))
rf_model    = pickle.load(open('stage1/rf_model', 'rb'))
xtr_model   = pickle.load(open('stage1/xtr_model', 'rb'))

## Average on probabilities

In [None]:
estimators_returning_probabilities = [
                    ("NB", nb_model),           
                    ("SVM", svc_model),         
                    ("Softmax", smreg_model),  
                    ("KNN", knn_model),         
#                     ("PasAgg", pag_model),      
#                     ("QDA", qda_model),         
                    ("DecisionTree", dt_model), 
                    ("ADABoost", adab_model),   
                    ("RandomForest", rf_model),
                    ("ExtremeRand", xtr_model)
                 ]

In [None]:
def average_predicted_probabilities(local_data):
    nof_data = local_data.shape[0]
    mean_probs = np.zeros((nof_data, 9))
    for name, estimator in estimators_returning_probabilities:
        mean_probs += estimator.predict_proba(local_data)
    return mean_probs/nof_data

In [None]:
y_pred = average_predicted_probabilities(x_test_tf)
print("log_loss\n", log_loss(y_test, y_pred, labels=range(1,10)))