In [1]:
%load_ext autoreload
%autoreload 2

In [2]:
import pickle
import matplotlib.pyplot as plt
from mpl_toolkits.mplot3d import Axes3D

import numpy as np
import cv2

from sklearn.model_selection import KFold, cross_val_score


from sklearn.tree import DecisionTreeClassifier
from sklearn.ensemble import AdaBoostClassifier, BaggingClassifier, GradientBoostingClassifier
from sklearn.ensemble import RandomForestClassifier, IsolationForest

from sklearn.neighbors import KNeighborsClassifier
from sklearn.svm import LinearSVC, SVC

from skopt import BayesSearchCV
from skopt.space import Real, Categorical, Integer
from skopt.plots import plot_objective, plot_histogram


from sklearn.pipeline import Pipeline

from src.utils.feats import load_gei
from src.utils.results import df_results


import pandas as pd

In [3]:
# Kfold
n_splits = 3
cv = KFold(n_splits=n_splits, random_state=42, shuffle=True)
    
# classifier
model = RandomForestClassifier(n_estimators=150, max_depth=None, random_state=0, criterion='gini')    

In [4]:
datapath = "../data/feats/database24_gei_480x640.pkl"

dim = (64, 48)

crop_person = True

X, y = load_gei(datapath, dim=dim, crop_person=crop_person) 

In [5]:
# pipeline class is used as estimator to enable
# search over different model types

pipe = Pipeline([
    ('model', KNeighborsClassifier())
])

In [6]:
# single categorical value of 'model' parameter is
# sets the model class
# We will get ConvergenceWarnings because the problem is not well-conditioned.
# But that's fine, this is just an example.



# from sklearn.ensemble import AdaBoostClassifier, GradientBoostingClassifier
# from sklearn.ensemble import RandomForestClassifier, IsolationForest

# from sklearn.neighbors import KNeighborsClassifier
# from sklearn.svm import LinearSVC, SVC

# explicit dimension classes can be specified like this

ada_search = {
    'model': Categorical([AdaBoostClassifier(base_estimator=DecisionTreeClassifier(max_depth=10), random_state=0)]),    
    'model__n_estimators': Integer(300, 1100),     
    'model__learning_rate': Real(0.1, 0.5, prior='uniform'),
}

# gdb_search = {
#     'model': Categorical([GradientBoostingClassifier(max_depth=None, random_state=0)]),    
#     'model__learning_rate': Real(1e-3, 0.5, prior='uniform'),
#     'model__n_estimators': Integer(1, 400),         
#     'model__max_depth': Integer(1, 400),     
# }


knn_search = {
    'model': Categorical([KNeighborsClassifier()]),    
    'model__n_neighbors': Integer(1,6),    
}

rf_search = {
    'model': Categorical([RandomForestClassifier(max_depth=None, random_state=0, criterion='gini')]),    
    'model__n_estimators': Integer(250, 400),    
}

svc_search = {
    'model': Categorical([SVC()]),
    'model__C': Real(1e-6, 1e+6, prior='log-uniform'),
    'model__gamma': Real(1e-6, 1e+1, prior='log-uniform'),
    'model__degree': Integer(1,8),
    'model__kernel': Categorical(['linear', 'poly', 'rbf']),
}


In [None]:
opt = BayesSearchCV(
    pipe,
    # (parameter space, # of evaluations)    
    [(ada_search, 32), (knn_search, 8), (svc_search, 128), (rf_search, 128)],
    cv=cv,
    scoring='accuracy'
)

opt.fit(X, y)

In [18]:
df = df_results(opt)
df.to_csv('results_classifiers_bayes_search.csv')
df

Unnamed: 0,model,model__learning_rate,model__n_estimators,model__n_neighbors,model__C,model__degree,model__gamma,model__kernel,mean_test_score,std_test_score,rank
87,"SVC(C=1000000.0, degree=1, gamma=3.26500109005...",,,,1000000.000000,1.0,0.000001,rbf,0.864717,0.002932,1
51,"SVC(C=1000000.0, degree=1, gamma=3.26500109005...",,,,1000000.000000,1.0,0.000033,rbf,0.864717,0.002932,1
68,"SVC(C=1000000.0, degree=1, gamma=3.26500109005...",,,,1000000.000000,8.0,0.000001,rbf,0.864717,0.002932,1
148,"SVC(C=1000000.0, degree=1, gamma=3.26500109005...",,,,31.373243,1.0,10.000000,poly,0.862888,0.004786,4
98,"SVC(C=1000000.0, degree=1, gamma=3.26500109005...",,,,72.561062,1.0,1.517500,poly,0.862888,0.004786,4
...,...,...,...,...,...,...,...,...,...,...,...
47,"SVC(C=1000000.0, degree=1, gamma=3.26500109005...",,,,417.510639,2.0,0.181862,rbf,0.038391,0.009038,291
54,"SVC(C=1000000.0, degree=1, gamma=3.26500109005...",,,,0.000001,1.0,0.000047,linear,0.038391,0.009038,291
59,"SVC(C=1000000.0, degree=1, gamma=3.26500109005...",,,,0.000001,1.0,5.512171,poly,0.038391,0.009038,291
73,"SVC(C=1000000.0, degree=1, gamma=3.26500109005...",,,,0.005878,8.0,10.000000,rbf,0.038391,0.009038,291


In [14]:
# 5 best ADA models
df[df['model__learning_rate']>0].head(5)

Unnamed: 0,model,model__learning_rate,model__n_estimators,model__n_neighbors,model__C,model__degree,model__gamma,model__kernel,mean_test_score,std_test_score,rank
21,AdaBoostClassifier(base_estimator=DecisionTree...,0.273532,721.0,,,,,,0.835466,0.007324,219
16,AdaBoostClassifier(base_estimator=DecisionTree...,0.314346,703.0,,,,,,0.83181,0.015366,231
31,AdaBoostClassifier(base_estimator=DecisionTree...,0.287706,732.0,,,,,,0.826325,0.031918,245
27,AdaBoostClassifier(base_estimator=DecisionTree...,0.364809,1100.0,,,,,,0.826325,0.004717,245
19,AdaBoostClassifier(base_estimator=DecisionTree...,0.298465,681.0,,,,,,0.826325,0.016494,245


In [15]:
# 5 best knn models
df[df['model__n_neighbors']>0].head(5)

Unnamed: 0,model,model__learning_rate,model__n_estimators,model__n_neighbors,model__C,model__degree,model__gamma,model__kernel,mean_test_score,std_test_score,rank
36,KNeighborsClassifier(),,,2.0,,,,,0.685558,0.025417,280
32,KNeighborsClassifier(),,,5.0,,,,,0.681901,0.014677,281
34,KNeighborsClassifier(),,,5.0,,,,,0.681901,0.014677,281
39,KNeighborsClassifier(),,,6.0,,,,,0.676417,0.004559,283
33,KNeighborsClassifier(),,,3.0,,,,,0.661792,0.032126,284


In [16]:
# 5 best RF models
df[df['model__n_estimators']>0].head(5)

Unnamed: 0,model,model__learning_rate,model__n_estimators,model__n_neighbors,model__C,model__degree,model__gamma,model__kernel,mean_test_score,std_test_score,rank
273,RandomForestClassifier(random_state=0),,358.0,,,,,,0.846435,0.019378,116
275,RandomForestClassifier(random_state=0),,358.0,,,,,,0.846435,0.019378,116
276,RandomForestClassifier(random_state=0),,358.0,,,,,,0.846435,0.019378,116
279,RandomForestClassifier(random_state=0),,358.0,,,,,,0.846435,0.019378,116
281,RandomForestClassifier(random_state=0),,358.0,,,,,,0.846435,0.019378,116
