In [1]:
%load_ext autoreload
%autoreload 2

In [2]:
import pickle
import matplotlib.pyplot as plt
from mpl_toolkits.mplot3d import Axes3D

import numpy as np
import cv2

from sklearn.model_selection import KFold, cross_val_score



from sklearn.ensemble import AdaBoostClassifier, BaggingClassifier, GradientBoostingClassifier
from sklearn.ensemble import RandomForestClassifier, IsolationForest

from sklearn.neighbors import KNeighborsClassifier
from sklearn.svm import LinearSVC, SVC

from skopt import BayesSearchCV
from skopt.space import Real, Categorical, Integer
from skopt.plots import plot_objective, plot_histogram


from sklearn.pipeline import Pipeline

from src.utils.feats import load_gei

import pandas as pd

In [3]:
# Kfold
n_splits = 3
cv = KFold(n_splits=n_splits, random_state=42, shuffle=True)
    
# classifier
model = RandomForestClassifier(n_estimators=150, max_depth=None, random_state=0, criterion='gini')    

In [4]:
datapath = "../data/feats/database24_gei_480x640.pkl"

dim = (64, 48)

crop_person = True

X, y = load_gei(datapath, dim=dim, crop_person=crop_person) 

In [5]:
# pipeline class is used as estimator to enable
# search over different model types

pipe = Pipeline([
    ('model', KNeighborsClassifier())
])

In [6]:
# single categorical value of 'model' parameter is
# sets the model class
# We will get ConvergenceWarnings because the problem is not well-conditioned.
# But that's fine, this is just an example.



# from sklearn.ensemble import AdaBoostClassifier, GradientBoostingClassifier
# from sklearn.ensemble import RandomForestClassifier, IsolationForest

# from sklearn.neighbors import KNeighborsClassifier
# from sklearn.svm import LinearSVC, SVC

# explicit dimension classes can be specified like this

ada_search = {
    'model': Categorical([AdaBoostClassifier(random_state=0)]),    
    'model__n_estimators': Integer(1, 400),     
    'model__learning_rate': Real(1e-6, 1e+0, prior='log-uniform'),
}

# gdb_search = {
#     'model': Categorical([GradientBoostingClassifier(max_depth=None, random_state=0)]),    
#     'model__learning_rate': Real(1e-3, 0.5, prior='uniform'),
#     'model__n_estimators': Integer(1, 400),         
#     'model__max_depth': Integer(1, 400),     
# }


knn_search = {
    'model': Categorical([KNeighborsClassifier()]),    
    'model__n_neighbors': Integer(1,8),    
}

rf_search = {
    'model': Categorical([RandomForestClassifier(max_depth=None, random_state=0, criterion='gini')]),    
    'model__n_estimators': Integer(100, 400),    
}

svc_search = {
    'model': Categorical([SVC()]),
    'model__C': Real(1e-6, 1e+6, prior='log-uniform'),
    'model__gamma': Real(1e-6, 1e+1, prior='log-uniform'),
    'model__degree': Integer(1,8),
    'model__kernel': Categorical(['linear', 'poly', 'rbf']),
}


In [7]:
opt = BayesSearchCV(
    pipe,
    # (parameter space, # of evaluations)    
    [(ada_search, 32), (knn_search, 12), (svc_search, 32), (rf_search, 32)],
    cv=cv,
    scoring='accuracy'
)

In [8]:
opt.fit(X, y)

BayesSearchCV(cv=KFold(n_splits=3, random_state=42, shuffle=True),
              estimator=Pipeline(steps=[('model', KNeighborsClassifier())]),
              scoring='accuracy',
              search_spaces=[({'model': Categorical(categories=(AdaBoostClassifier(random_state=0),), prior=None),
                               'model__learning_rate': Real(low=1e-06, high=1.0, prior='log-uniform', transform='identity'),
                               'model__n_estimator...
                               'model__degree': Integer(low=1, high=8, prior='uniform', transform='identity'),
                               'model__gamma': Real(low=1e-06, high=10.0, prior='log-uniform', transform='identity'),
                               'model__kernel': Categorical(categories=('linear', 'poly', 'rbf'), prior=None)},
                              32),
                             ({'model': Categorical(categories=(RandomForestClassifier(random_state=0),), prior=None),
                               'm

In [9]:
df = pd.DataFrame(opt.cv_results_['params'])
# df.rename(columns = {0:'param_model'}, inplace = True)

df_mean = pd.DataFrame(opt.cv_results_['mean_test_score'])
df_std = pd.DataFrame(opt.cv_results_['std_test_score'])
df_rank = pd.DataFrame(opt.cv_results_['rank_test_score'])

df = df.join(df_mean)
df.rename(columns = {0:'mean_test_score'}, inplace = True)


df = df.join(df_std)
df.rename(columns = {0:'std_test_score'}, inplace = True)

df = df.join(df_rank)
df.rename(columns = {0:'rank'}, inplace = True)

df.sort_values(by='mean_test_score', inplace=True, ascending=False)

df

Unnamed: 0,model,model__learning_rate,model__n_estimators,model__n_neighbors,model__C,model__degree,model__gamma,model__kernel,mean_test_score,std_test_score,rank
75,"SVC(C=882.4272922521141, degree=5, gamma=0.000...",,,,14947.151357,7.0,0.000185,rbf,0.866545,0.009649,1
52,"SVC(C=882.4272922521141, degree=5, gamma=0.000...",,,,882.427292,5.0,0.000208,rbf,0.866545,0.009649,1
60,"SVC(C=882.4272922521141, degree=5, gamma=0.000...",,,,2205.462183,5.0,0.000115,rbf,0.866545,0.009649,1
65,"SVC(C=882.4272922521141, degree=5, gamma=0.000...",,,,1000000.000000,8.0,0.005459,linear,0.862888,0.004786,4
73,"SVC(C=882.4272922521141, degree=5, gamma=0.000...",,,,432.854824,8.0,3.038642,linear,0.862888,0.004786,4
...,...,...,...,...,...,...,...,...,...,...,...
44,"SVC(C=882.4272922521141, degree=5, gamma=0.000...",,,,0.033167,4.0,0.001063,poly,0.038391,0.009038,103
45,"SVC(C=882.4272922521141, degree=5, gamma=0.000...",,,,0.242048,3.0,0.000096,poly,0.038391,0.009038,103
47,"SVC(C=882.4272922521141, degree=5, gamma=0.000...",,,,0.000036,1.0,0.000004,poly,0.038391,0.009038,103
61,"SVC(C=882.4272922521141, degree=5, gamma=0.000...",,,,0.000001,1.0,0.001373,linear,0.038391,0.009038,103


In [10]:
# 5 best knn models
df[df['model__n_neighbors']>0].head(5)

Unnamed: 0,model,model__learning_rate,model__n_estimators,model__n_neighbors,model__C,model__degree,model__gamma,model__kernel,mean_test_score,std_test_score,rank
42,KNeighborsClassifier(),,,1.0,,,,,0.745887,0.007,57
37,KNeighborsClassifier(),,,2.0,,,,,0.685558,0.025417,59
33,KNeighborsClassifier(),,,2.0,,,,,0.685558,0.025417,59
36,KNeighborsClassifier(),,,5.0,,,,,0.681901,0.014677,61
34,KNeighborsClassifier(),,,5.0,,,,,0.681901,0.014677,61


In [11]:
# 5 best RF models
df[df['model__n_estimators']>0].head(5)

Unnamed: 0,model,model__learning_rate,model__n_estimators,model__n_neighbors,model__C,model__degree,model__gamma,model__kernel,mean_test_score,std_test_score,rank
105,RandomForestClassifier(random_state=0),,357.0,,,,,,0.844607,0.020495,20
92,RandomForestClassifier(random_state=0),,317.0,,,,,,0.842779,0.017907,21
91,RandomForestClassifier(random_state=0),,378.0,,,,,,0.842779,0.01855,21
96,RandomForestClassifier(random_state=0),,311.0,,,,,,0.842779,0.017907,21
106,RandomForestClassifier(random_state=0),,189.0,,,,,,0.840951,0.01755,24


In [12]:
# 5 best SVC models
df[df['model__C']>0].head(5)

Unnamed: 0,model,model__learning_rate,model__n_estimators,model__n_neighbors,model__C,model__degree,model__gamma,model__kernel,mean_test_score,std_test_score,rank
75,"SVC(C=882.4272922521141, degree=5, gamma=0.000...",,,,14947.151357,7.0,0.000185,rbf,0.866545,0.009649,1
52,"SVC(C=882.4272922521141, degree=5, gamma=0.000...",,,,882.427292,5.0,0.000208,rbf,0.866545,0.009649,1
60,"SVC(C=882.4272922521141, degree=5, gamma=0.000...",,,,2205.462183,5.0,0.000115,rbf,0.866545,0.009649,1
65,"SVC(C=882.4272922521141, degree=5, gamma=0.000...",,,,1000000.0,8.0,0.005459,linear,0.862888,0.004786,4
73,"SVC(C=882.4272922521141, degree=5, gamma=0.000...",,,,432.854824,8.0,3.038642,linear,0.862888,0.004786,4
