# Image Classification 

## Libraries

In [1]:
import time
import datetime
import pickle
import numpy as np
from sklearn import svm, metrics
from sklearn.model_selection import StratifiedKFold,GridSearchCV
from sklearn.linear_model import LogisticRegression
from sklearn.ensemble import RandomForestClassifier
from pandas import DataFrame

## Loading Dataset from the pickle

In [2]:
# Start point
start_time = time.time()
print("Start time: ", datetime.datetime.now())

Start time:  2019-01-16 11:31:07.510567


In [3]:
pickle_in = open("Signature_Dataset_HOG_110_2_int.pickle","rb")
image_dataset = pickle.load(pickle_in)

## Split data in Training Set and Testing Set

In [4]:
X_train = image_dataset.X_train
X_test = image_dataset.X_test
y_train = image_dataset.y_train
y_test = image_dataset.y_test

print("----Dataset Loaded From Pickle----")

----Dataset Loaded From Pickle----


## Train data with parameter optimization

In [5]:
#Dictionary of models and hyperparameters

models_and_hyperparameters={'LR':(LogisticRegression(),{'C': [0.01, 0.05, 0.1, 0.5, 1, 2],
                                                        'penalty': ['l1', 'l2'], 
                                                        'class_weight': ['balanced']}),
                           'SVM':(svm.SVC(),{'C': [1, 10, 100, 1000],
                                             'gamma': [0.001, 0.0001],
                                             'kernel': ['rbf'],
                                             'class_weight': ['balanced']}),
                           'RFC':(RandomForestClassifier(), {'n_estimators': [50,100,200],
                                                             # 'max_features': ['auto', 'sqrt'],
                                                             'max_depth': [50,60,70],
                                                             'min_samples_split': [2,5,10],
                                                             #'min_samples_leaf': [1,2],
                                                             'bootstrap': [True, False],
                                                             'class_weight': ['balanced']})
                           }

# Logistic Regression

In [6]:
#LOGISTIC REGRESSION

print("----Training----")

k_fold = StratifiedKFold(n_splits=10, shuffle=True, random_state=31)

model = models_and_hyperparameters['LR'][0]
hyperparameters = models_and_hyperparameters['LR'][1]

gs = GridSearchCV(model, 
                   param_grid = hyperparameters,
                   cv=k_fold, 
                   scoring='roc_auc',
                   verbose=3,
                   return_train_score=True)

gs.fit(X_train, y_train)

print("----Training Ended----")


----Training----
Fitting 10 folds for each of 12 candidates, totalling 120 fits
[CV] C=0.01, class_weight=balanced, penalty=l1 .......................
[CV]  C=0.01, class_weight=balanced, penalty=l1, score=0.5, total=   0.7s
[CV] C=0.01, class_weight=balanced, penalty=l1 .......................


[Parallel(n_jobs=1)]: Done   1 out of   1 | elapsed:    0.8s remaining:    0.0s


[CV]  C=0.01, class_weight=balanced, penalty=l1, score=0.5, total=   0.6s
[CV] C=0.01, class_weight=balanced, penalty=l1 .......................


[Parallel(n_jobs=1)]: Done   2 out of   2 | elapsed:    1.5s remaining:    0.0s


[CV]  C=0.01, class_weight=balanced, penalty=l1, score=0.5, total=   0.6s
[CV] C=0.01, class_weight=balanced, penalty=l1 .......................
[CV]  C=0.01, class_weight=balanced, penalty=l1, score=0.5, total=   0.6s
[CV] C=0.01, class_weight=balanced, penalty=l1 .......................
[CV]  C=0.01, class_weight=balanced, penalty=l1, score=0.5, total=   0.6s
[CV] C=0.01, class_weight=balanced, penalty=l1 .......................
[CV]  C=0.01, class_weight=balanced, penalty=l1, score=0.5, total=   0.6s
[CV] C=0.01, class_weight=balanced, penalty=l1 .......................
[CV]  C=0.01, class_weight=balanced, penalty=l1, score=0.5, total=   0.8s
[CV] C=0.01, class_weight=balanced, penalty=l1 .......................
[CV]  C=0.01, class_weight=balanced, penalty=l1, score=0.5, total=   0.7s
[CV] C=0.01, class_weight=balanced, penalty=l1 .......................
[CV]  C=0.01, class_weight=balanced, penalty=l1, score=0.5, total=   0.7s
[CV] C=0.01, class_weight=balanced, penalty=l1 .........

[CV]  C=0.1, class_weight=balanced, penalty=l2, score=0.7607897153351698, total=   3.2s
[CV] C=0.1, class_weight=balanced, penalty=l2 ........................
[CV]  C=0.1, class_weight=balanced, penalty=l2, score=0.7398530762167127, total=   3.2s
[CV] C=0.1, class_weight=balanced, penalty=l2 ........................
[CV]  C=0.1, class_weight=balanced, penalty=l2, score=0.698714416896235, total=   3.2s
[CV] C=0.1, class_weight=balanced, penalty=l2 ........................
[CV]  C=0.1, class_weight=balanced, penalty=l2, score=0.7101928374655647, total=   3.2s
[CV] C=0.1, class_weight=balanced, penalty=l2 ........................
[CV]  C=0.1, class_weight=balanced, penalty=l2, score=0.7338842975206612, total=   3.2s
[CV] C=0.5, class_weight=balanced, penalty=l1 ........................
[CV]  C=0.5, class_weight=balanced, penalty=l1, score=0.6837465564738293, total=   1.1s
[CV] C=0.5, class_weight=balanced, penalty=l1 ........................
[CV]  C=0.5, class_weight=balanced, penalty=l1,

[CV]  C=2, class_weight=balanced, penalty=l1, score=0.6376492194674013, total=   1.2s
[CV] C=2, class_weight=balanced, penalty=l1 ..........................
[CV]  C=2, class_weight=balanced, penalty=l1, score=0.6510560146923783, total=   1.1s
[CV] C=2, class_weight=balanced, penalty=l1 ..........................
[CV]  C=2, class_weight=balanced, penalty=l1, score=0.7064279155188246, total=   1.3s
[CV] C=2, class_weight=balanced, penalty=l2 ..........................
[CV]  C=2, class_weight=balanced, penalty=l2, score=0.7143250688705235, total=   4.8s
[CV] C=2, class_weight=balanced, penalty=l2 ..........................
[CV]  C=2, class_weight=balanced, penalty=l2, score=0.7312213039485767, total=   4.7s
[CV] C=2, class_weight=balanced, penalty=l2 ..........................
[CV]  C=2, class_weight=balanced, penalty=l2, score=0.6806244260789717, total=   4.8s
[CV] C=2, class_weight=balanced, penalty=l2 ..........................
[CV]  C=2, class_weight=balanced, penalty=l2, score=0.7265

[Parallel(n_jobs=1)]: Done 120 out of 120 | elapsed:  4.7min finished


----Training Ended----


In [7]:
DataFrame(gs.cv_results_)

Unnamed: 0,mean_fit_time,mean_score_time,mean_test_score,mean_train_score,param_C,param_class_weight,param_penalty,params,rank_test_score,split0_test_score,...,split7_test_score,split7_train_score,split8_test_score,split8_train_score,split9_test_score,split9_train_score,std_fit_time,std_score_time,std_test_score,std_train_score
0,0.67782,0.009528,0.5,0.5,0.01,balanced,l1,"{'C': 0.01, 'class_weight': 'balanced', 'penal...",12,0.5,...,0.5,0.5,0.5,0.5,0.5,0.5,0.04803,0.001429,0.0,0.0
1,1.711092,0.008939,0.70584,0.985044,0.01,balanced,l2,"{'C': 0.01, 'class_weight': 'balanced', 'penal...",6,0.709091,...,0.6955,0.98517,0.686318,0.98566,0.721028,0.985814,0.122934,8.6e-05,0.01251,0.0008120014
2,0.822581,0.008945,0.581648,0.612603,0.05,balanced,l1,"{'C': 0.05, 'class_weight': 'balanced', 'penal...",11,0.608815,...,0.603581,0.601626,0.511203,0.617794,0.622498,0.596533,0.028789,8.3e-05,0.046098,0.01033102
3,2.510007,0.009104,0.727245,0.999985,0.05,balanced,l2,"{'C': 0.05, 'class_weight': 'balanced', 'penal...",3,0.721396,...,0.702112,1.0,0.709091,0.999998,0.732415,0.999992,0.063607,0.000652,0.019795,1.855346e-05
4,0.925438,0.008947,0.654279,0.728097,0.1,balanced,l1,"{'C': 0.1, 'class_weight': 'balanced', 'penalt...",10,0.704132,...,0.706428,0.726941,0.621028,0.729782,0.687879,0.725248,0.024421,8.1e-05,0.041721,0.004020678
5,3.219833,0.009413,0.728558,1.0,0.1,balanced,l2,"{'C': 0.1, 'class_weight': 'balanced', 'penalt...",1,0.721488,...,0.698714,1.0,0.710193,1.0,0.733884,1.0,0.080889,0.000949,0.022264,0.0
6,1.090583,0.008933,0.685464,0.996418,0.5,balanced,l1,"{'C': 0.5, 'class_weight': 'balanced', 'penalt...",7,0.683747,...,0.639302,0.996742,0.662902,0.996333,0.699816,0.996322,0.032679,6.1e-05,0.028133,0.000273059
7,3.888121,0.008892,0.72775,1.0,0.5,balanced,l2,"{'C': 0.5, 'class_weight': 'balanced', 'penalt...",2,0.717264,...,0.692929,1.0,0.713131,1.0,0.733792,1.0,0.171257,7.8e-05,0.025996,0.0
8,1.13691,0.008932,0.680606,1.0,1.0,balanced,l1,"{'C': 1, 'class_weight': 'balanced', 'penalty'...",8,0.681543,...,0.633701,1.0,0.652525,1.0,0.69899,1.0,0.058563,5.5e-05,0.032449,6.802027e-07
9,4.715721,0.008895,0.726951,1.0,1.0,balanced,l2,"{'C': 1, 'class_weight': 'balanced', 'penalty'...",4,0.714876,...,0.690542,1.0,0.713039,1.0,0.735629,1.0,0.072102,7.1e-05,0.026764,0.0


In [8]:
# Creation of the pickle which is goig to store the model.

pickle_out = open("LR_roc_auc_HOG_2.pickle","wb") #Modify with the name of the dataset you're saving
pickle.dump(gs.cv_results_, pickle_out)
pickle_out.close()

print("----Pickle created----")

----Pickle created----


### Predict

In [9]:
y_pred = gs.predict(X_test)

In [10]:
print("Best hyperparameters: {}".format(gs.best_params_))
print("Best roc_auc score: {:3f}".format(gs.best_score_))

Best hyperparameters: {'C': 0.1, 'class_weight': 'balanced', 'penalty': 'l2'}
Best roc_auc score: 0.728558


### Report

In [11]:
print("Classification report for - \n{}:\n{}\n".format(gs, metrics.classification_report(y_test, y_pred)))

Classification report for - 
GridSearchCV(cv=StratifiedKFold(n_splits=10, random_state=31, shuffle=True),
       error_score='raise',
       estimator=LogisticRegression(C=1.0, class_weight=None, dual=False, fit_intercept=True,
          intercept_scaling=1, max_iter=100, multi_class='ovr', n_jobs=1,
          penalty='l2', random_state=None, solver='liblinear', tol=0.0001,
          verbose=0, warm_start=False),
       fit_params=None, iid=True, n_jobs=1,
       param_grid={'C': [0.01, 0.05, 0.1, 0.5, 1, 2], 'penalty': ['l1', 'l2'], 'class_weight': ['balanced']},
       pre_dispatch='2*n_jobs', refit=True, return_train_score=True,
       scoring='roc_auc', verbose=3):
             precision    recall  f1-score   support

         -1       0.61      0.59      0.60       480
          1       0.56      0.59      0.57       432

avg / total       0.59      0.59      0.59       912




\\
\\
\\
\\
\\
# SVM

In [12]:
# SVM

print("----Training----")

k_fold = StratifiedKFold(n_splits=4, shuffle=True, random_state=31)

model = models_and_hyperparameters['SVM'][0]
hyperparameters = models_and_hyperparameters['SVM'][1]

gs = GridSearchCV(model, 
                   param_grid = hyperparameters,
                   cv=k_fold, 
                   scoring='roc_auc',
                   verbose=3,
                   return_train_score=True)

gs.fit(X_train, y_train)

print("----Training Ended----")


----Training----
Fitting 4 folds for each of 8 candidates, totalling 32 fits
[CV] C=1, class_weight=balanced, gamma=0.001, kernel=rbf .............
[CV]  C=1, class_weight=balanced, gamma=0.001, kernel=rbf, score=0.6675806451612903, total= 3.3min
[CV] C=1, class_weight=balanced, gamma=0.001, kernel=rbf .............


[Parallel(n_jobs=1)]: Done   1 out of   1 | elapsed:  5.8min remaining:    0.0s


[CV]  C=1, class_weight=balanced, gamma=0.001, kernel=rbf, score=0.65366568914956, total= 3.3min
[CV] C=1, class_weight=balanced, gamma=0.001, kernel=rbf .............


[Parallel(n_jobs=1)]: Done   2 out of   2 | elapsed: 11.6min remaining:    0.0s


[CV]  C=1, class_weight=balanced, gamma=0.001, kernel=rbf, score=0.6483327199116673, total= 3.4min
[CV] C=1, class_weight=balanced, gamma=0.001, kernel=rbf .............
[CV]  C=1, class_weight=balanced, gamma=0.001, kernel=rbf, score=0.6789547294810453, total= 3.3min
[CV] C=1, class_weight=balanced, gamma=0.0001, kernel=rbf ............
[CV]  C=1, class_weight=balanced, gamma=0.0001, kernel=rbf, score=0.5567741935483872, total= 3.3min
[CV] C=1, class_weight=balanced, gamma=0.0001, kernel=rbf ............
[CV]  C=1, class_weight=balanced, gamma=0.0001, kernel=rbf, score=0.555733137829912, total= 3.3min
[CV] C=1, class_weight=balanced, gamma=0.0001, kernel=rbf ............
[CV]  C=1, class_weight=balanced, gamma=0.0001, kernel=rbf, score=0.5578505704821494, total= 3.3min
[CV] C=1, class_weight=balanced, gamma=0.0001, kernel=rbf ............
[CV]  C=1, class_weight=balanced, gamma=0.0001, kernel=rbf, score=0.5848656606551343, total= 3.3min
[CV] C=10, class_weight=balanced, gamma=0.001, k

[Parallel(n_jobs=1)]: Done  32 out of  32 | elapsed: 179.4min finished


----Training Ended----


In [13]:
DataFrame(gs.cv_results_)

Unnamed: 0,mean_fit_time,mean_score_time,mean_test_score,mean_train_score,param_C,param_class_weight,param_gamma,param_kernel,params,rank_test_score,...,split1_test_score,split1_train_score,split2_test_score,split2_train_score,split3_test_score,split3_train_score,std_fit_time,std_score_time,std_test_score,std_train_score
0,146.826307,51.460305,0.662132,0.945796,1,balanced,0.001,rbf,"{'C': 1, 'class_weight': 'balanced', 'gamma': ...",6,...,0.653666,0.940606,0.648333,0.941247,0.678955,0.954987,1.1526,1.288901,0.011984,0.00575318
1,146.233172,51.089052,0.563799,0.754103,1,balanced,0.0001,rbf,"{'C': 1, 'class_weight': 'balanced', 'gamma': ...",8,...,0.555733,0.740624,0.557851,0.75399,0.584866,0.756214,0.155792,0.02515,0.012178,0.008915632
2,145.785899,49.023252,0.842665,1.0,10,balanced,0.001,rbf,"{'C': 10, 'class_weight': 'balanced', 'gamma':...",3,...,0.850806,1.0,0.861612,1.0,0.827045,1.0,0.095257,0.134472,0.014145,0.0
3,138.62267,47.962976,0.657566,0.927154,10,balanced,0.0001,rbf,"{'C': 10, 'class_weight': 'balanced', 'gamma':...",7,...,0.663431,0.921055,0.645432,0.92417,0.671667,0.935883,0.718764,0.360785,0.010506,0.005532233
4,145.801156,49.043213,0.842874,1.0,100,balanced,0.001,rbf,"{'C': 100, 'class_weight': 'balanced', 'gamma'...",1,...,0.851422,1.0,0.861259,1.0,0.827619,1.0,0.106188,0.15534,0.013961,0.0
5,142.107442,43.29624,0.741626,0.999999,100,balanced,0.0001,rbf,"{'C': 100, 'class_weight': 'balanced', 'gamma'...",5,...,0.759736,0.999998,0.757968,1.0,0.729407,1.0,0.182213,0.328031,0.017598,8.167933e-07
6,145.783166,49.049437,0.842874,1.0,1000,balanced,0.001,rbf,"{'C': 1000, 'class_weight': 'balanced', 'gamma...",1,...,0.851422,1.0,0.861259,1.0,0.827619,1.0,0.091702,0.148445,0.013961,0.0
7,143.19424,43.286253,0.742371,1.0,1000,balanced,0.0001,rbf,"{'C': 1000, 'class_weight': 'balanced', 'gamma...",4,...,0.762185,1.0,0.758587,1.0,0.729967,1.0,0.6892,0.306349,0.018491,0.0


In [14]:
# Creation of the pickle which is goig to store the model.

pickle_out = open("SVM_roc_auc_HOG_2.pickle","wb") #Modify with the name of the dataset you're saving
pickle.dump(gs.cv_results_, pickle_out)
pickle_out.close()

print("----Pickle created----")

----Pickle created----


### Predict

In [15]:
y_pred = gs.predict(X_test)

In [16]:
print("Best hyperparameters: {}".format(gs.best_params_))
print("Best roc_auc score: {:3f}".format(gs.best_score_))

Best hyperparameters: {'C': 100, 'class_weight': 'balanced', 'gamma': 0.001, 'kernel': 'rbf'}
Best roc_auc score: 0.842874


### Report

In [17]:
print("Classification report for - \n{}:\n{}\n".format(gs, metrics.classification_report(y_test, y_pred)))

Classification report for - 
GridSearchCV(cv=StratifiedKFold(n_splits=4, random_state=31, shuffle=True),
       error_score='raise',
       estimator=SVC(C=1.0, cache_size=200, class_weight=None, coef0=0.0,
  decision_function_shape='ovr', degree=3, gamma='auto', kernel='rbf',
  max_iter=-1, probability=False, random_state=None, shrinking=True,
  tol=0.001, verbose=False),
       fit_params=None, iid=True, n_jobs=1,
       param_grid={'C': [1, 10, 100, 1000], 'gamma': [0.001, 0.0001], 'kernel': ['rbf'], 'class_weight': ['balanced']},
       pre_dispatch='2*n_jobs', refit=True, return_train_score=True,
       scoring='roc_auc', verbose=3):
             precision    recall  f1-score   support

         -1       0.61      0.65      0.63       480
          1       0.58      0.55      0.56       432

avg / total       0.60      0.60      0.60       912




//
//
//
//
//

# Random Forest

In [18]:
#RANDOM FOREST

print("----Training----")

k_fold = StratifiedKFold(n_splits=4, shuffle=True, random_state=31)

model = models_and_hyperparameters['RFC'][0]
hyperparameters = models_and_hyperparameters['RFC'][1]

gs = GridSearchCV(model, 
                   param_grid = hyperparameters,
                   cv=k_fold, 
                   scoring='roc_auc',
                   verbose=3,
                   return_train_score=True)

gs.fit(X_train, y_train)

print("----Training Ended----")


----Training----
Fitting 4 folds for each of 54 candidates, totalling 216 fits
[CV] bootstrap=True, class_weight=balanced, max_depth=50, min_samples_split=2, n_estimators=50 
[CV]  bootstrap=True, class_weight=balanced, max_depth=50, min_samples_split=2, n_estimators=50, score=0.7981818181818182, total=   4.0s
[CV] bootstrap=True, class_weight=balanced, max_depth=50, min_samples_split=2, n_estimators=50 


[Parallel(n_jobs=1)]: Done   1 out of   1 | elapsed:    4.1s remaining:    0.0s


[CV]  bootstrap=True, class_weight=balanced, max_depth=50, min_samples_split=2, n_estimators=50, score=0.761510263929619, total=   3.9s
[CV] bootstrap=True, class_weight=balanced, max_depth=50, min_samples_split=2, n_estimators=50 


[Parallel(n_jobs=1)]: Done   2 out of   2 | elapsed:    8.2s remaining:    0.0s


[CV]  bootstrap=True, class_weight=balanced, max_depth=50, min_samples_split=2, n_estimators=50, score=0.7761280824438719, total=   4.0s
[CV] bootstrap=True, class_weight=balanced, max_depth=50, min_samples_split=2, n_estimators=50 
[CV]  bootstrap=True, class_weight=balanced, max_depth=50, min_samples_split=2, n_estimators=50, score=0.7554876702245125, total=   3.9s
[CV] bootstrap=True, class_weight=balanced, max_depth=50, min_samples_split=2, n_estimators=100 
[CV]  bootstrap=True, class_weight=balanced, max_depth=50, min_samples_split=2, n_estimators=100, score=0.8247140762463343, total=   7.5s
[CV] bootstrap=True, class_weight=balanced, max_depth=50, min_samples_split=2, n_estimators=100 
[CV]  bootstrap=True, class_weight=balanced, max_depth=50, min_samples_split=2, n_estimators=100, score=0.7964662756598241, total=   7.3s
[CV] bootstrap=True, class_weight=balanced, max_depth=50, min_samples_split=2, n_estimators=100 
[CV]  bootstrap=True, class_weight=balanced, max_depth=50, min_

[CV]  bootstrap=True, class_weight=balanced, max_depth=60, min_samples_split=2, n_estimators=50, score=0.7808284457478007, total=   3.8s
[CV] bootstrap=True, class_weight=balanced, max_depth=60, min_samples_split=2, n_estimators=50 
[CV]  bootstrap=True, class_weight=balanced, max_depth=60, min_samples_split=2, n_estimators=50, score=0.7623113728376887, total=   4.0s
[CV] bootstrap=True, class_weight=balanced, max_depth=60, min_samples_split=2, n_estimators=50 
[CV]  bootstrap=True, class_weight=balanced, max_depth=60, min_samples_split=2, n_estimators=50, score=0.7559440559440559, total=   3.9s
[CV] bootstrap=True, class_weight=balanced, max_depth=60, min_samples_split=2, n_estimators=100 
[CV]  bootstrap=True, class_weight=balanced, max_depth=60, min_samples_split=2, n_estimators=100, score=0.8129765395894428, total=   7.5s
[CV] bootstrap=True, class_weight=balanced, max_depth=60, min_samples_split=2, n_estimators=100 
[CV]  bootstrap=True, class_weight=balanced, max_depth=60, min_sa

[CV]  bootstrap=True, class_weight=balanced, max_depth=70, min_samples_split=2, n_estimators=50, score=0.77933284457478, total=   3.9s
[CV] bootstrap=True, class_weight=balanced, max_depth=70, min_samples_split=2, n_estimators=50 
[CV]  bootstrap=True, class_weight=balanced, max_depth=70, min_samples_split=2, n_estimators=50, score=0.7311656891495601, total=   4.0s
[CV] bootstrap=True, class_weight=balanced, max_depth=70, min_samples_split=2, n_estimators=50 
[CV]  bootstrap=True, class_weight=balanced, max_depth=70, min_samples_split=2, n_estimators=50, score=0.76370997423629, total=   4.0s
[CV] bootstrap=True, class_weight=balanced, max_depth=70, min_samples_split=2, n_estimators=50 
[CV]  bootstrap=True, class_weight=balanced, max_depth=70, min_samples_split=2, n_estimators=50, score=0.7632683106367317, total=   4.0s
[CV] bootstrap=True, class_weight=balanced, max_depth=70, min_samples_split=2, n_estimators=100 
[CV]  bootstrap=True, class_weight=balanced, max_depth=70, min_samples_

[CV]  bootstrap=True, class_weight=balanced, max_depth=70, min_samples_split=10, n_estimators=200, score=0.802811924917188, total=  13.8s
[CV] bootstrap=False, class_weight=balanced, max_depth=50, min_samples_split=2, n_estimators=50 
[CV]  bootstrap=False, class_weight=balanced, max_depth=50, min_samples_split=2, n_estimators=50, score=0.8135850439882698, total=   6.0s
[CV] bootstrap=False, class_weight=balanced, max_depth=50, min_samples_split=2, n_estimators=50 
[CV]  bootstrap=False, class_weight=balanced, max_depth=50, min_samples_split=2, n_estimators=50, score=0.8239222873900294, total=   6.0s
[CV] bootstrap=False, class_weight=balanced, max_depth=50, min_samples_split=2, n_estimators=50 
[CV]  bootstrap=False, class_weight=balanced, max_depth=50, min_samples_split=2, n_estimators=50, score=0.7856017666543983, total=   6.1s
[CV] bootstrap=False, class_weight=balanced, max_depth=50, min_samples_split=2, n_estimators=50 
[CV]  bootstrap=False, class_weight=balanced, max_depth=50, 

[CV]  bootstrap=False, class_weight=balanced, max_depth=50, min_samples_split=10, n_estimators=200, score=0.8354508649245491, total=  23.1s
[CV] bootstrap=False, class_weight=balanced, max_depth=50, min_samples_split=10, n_estimators=200 
[CV]  bootstrap=False, class_weight=balanced, max_depth=50, min_samples_split=10, n_estimators=200, score=0.8207287449392712, total=  22.1s
[CV] bootstrap=False, class_weight=balanced, max_depth=60, min_samples_split=2, n_estimators=50 
[CV]  bootstrap=False, class_weight=balanced, max_depth=60, min_samples_split=2, n_estimators=50, score=0.8052272727272727, total=   6.1s
[CV] bootstrap=False, class_weight=balanced, max_depth=60, min_samples_split=2, n_estimators=50 
[CV]  bootstrap=False, class_weight=balanced, max_depth=60, min_samples_split=2, n_estimators=50, score=0.7996847507331378, total=   5.9s
[CV] bootstrap=False, class_weight=balanced, max_depth=60, min_samples_split=2, n_estimators=50 
[CV]  bootstrap=False, class_weight=balanced, max_dept

[CV]  bootstrap=False, class_weight=balanced, max_depth=60, min_samples_split=10, n_estimators=200, score=0.8511436950146628, total=  22.1s
[CV] bootstrap=False, class_weight=balanced, max_depth=60, min_samples_split=10, n_estimators=200 
[CV]  bootstrap=False, class_weight=balanced, max_depth=60, min_samples_split=10, n_estimators=200, score=0.8313433934486566, total=  22.9s
[CV] bootstrap=False, class_weight=balanced, max_depth=60, min_samples_split=10, n_estimators=200 
[CV]  bootstrap=False, class_weight=balanced, max_depth=60, min_samples_split=10, n_estimators=200, score=0.8217887375782111, total=  21.7s
[CV] bootstrap=False, class_weight=balanced, max_depth=70, min_samples_split=2, n_estimators=50 
[CV]  bootstrap=False, class_weight=balanced, max_depth=70, min_samples_split=2, n_estimators=50, score=0.82316715542522, total=   5.9s
[CV] bootstrap=False, class_weight=balanced, max_depth=70, min_samples_split=2, n_estimators=50 
[CV]  bootstrap=False, class_weight=balanced, max_de

[CV]  bootstrap=False, class_weight=balanced, max_depth=70, min_samples_split=10, n_estimators=200, score=0.859208211143695, total=  22.5s
[CV] bootstrap=False, class_weight=balanced, max_depth=70, min_samples_split=10, n_estimators=200 
[CV]  bootstrap=False, class_weight=balanced, max_depth=70, min_samples_split=10, n_estimators=200, score=0.8437976539589442, total=  22.1s
[CV] bootstrap=False, class_weight=balanced, max_depth=70, min_samples_split=10, n_estimators=200 
[CV]  bootstrap=False, class_weight=balanced, max_depth=70, min_samples_split=10, n_estimators=200, score=0.8449024659550975, total=  22.7s
[CV] bootstrap=False, class_weight=balanced, max_depth=70, min_samples_split=10, n_estimators=200 
[CV]  bootstrap=False, class_weight=balanced, max_depth=70, min_samples_split=10, n_estimators=200, score=0.8239087228560913, total=  22.2s


[Parallel(n_jobs=1)]: Done 216 out of 216 | elapsed: 40.2min finished


----Training Ended----


In [19]:
DataFrame(gs.cv_results_)

Unnamed: 0,mean_fit_time,mean_score_time,mean_test_score,mean_train_score,param_bootstrap,param_class_weight,param_max_depth,param_min_samples_split,param_n_estimators,params,...,split1_test_score,split1_train_score,split2_test_score,split2_train_score,split3_test_score,split3_train_score,std_fit_time,std_score_time,std_test_score,std_train_score
0,3.883076,0.056426,0.772834,1.0,True,balanced,50,2,50,"{'bootstrap': True, 'class_weight': 'balanced'...",...,0.76151,1.0,0.776128,1.0,0.755488,1.0,0.059498,0.000235,0.016454,7.850462000000001e-17
1,7.364434,0.065167,0.798992,1.0,True,balanced,50,2,100,"{'bootstrap': True, 'class_weight': 'balanced'...",...,0.796466,1.0,0.79046,1.0,0.784284,1.0,0.081002,8.5e-05,0.015472,0.0
2,14.343236,0.083509,0.809856,1.0,True,balanced,50,2,200,"{'bootstrap': True, 'class_weight': 'balanced'...",...,0.819135,1.0,0.808414,1.0,0.782252,1.0,0.169241,0.000236,0.017595,5.5511150000000004e-17
3,3.841459,0.056535,0.777295,1.0,True,balanced,50,5,50,"{'bootstrap': True, 'class_weight': 'balanced'...",...,0.776327,1.0,0.776474,1.0,0.779426,1.0,0.111655,2.2e-05,0.001251,0.0
4,7.262926,0.065237,0.798921,1.0,True,balanced,50,5,100,"{'bootstrap': True, 'class_weight': 'balanced'...",...,0.781349,1.0,0.8077,1.0,0.791115,1.0,0.100838,0.00019,0.013442,0.0
5,14.242902,0.083361,0.811408,1.0,True,balanced,50,5,200,"{'bootstrap': True, 'class_weight': 'balanced'...",...,0.808798,1.0,0.813957,1.0,0.807773,1.0,0.143779,8.6e-05,0.003169,0.0
6,3.789865,0.056491,0.765944,1.0,True,balanced,50,10,50,"{'bootstrap': True, 'class_weight': 'balanced'...",...,0.753812,1.0,0.796393,1.0,0.743467,1.0,0.066805,0.000156,0.019973,0.0
7,7.157019,0.064827,0.779739,1.0,True,balanced,50,10,100,"{'bootstrap': True, 'class_weight': 'balanced'...",...,0.784883,1.0,0.768612,1.0,0.762812,1.0,0.103096,0.000149,0.015487,0.0
8,14.061513,0.082914,0.813909,1.0,True,balanced,50,10,200,"{'bootstrap': True, 'class_weight': 'balanced'...",...,0.823534,1.0,0.811248,1.0,0.808333,1.0,0.085921,0.000256,0.005763,0.0
9,3.865893,0.05639,0.767376,1.0,True,balanced,60,2,50,"{'bootstrap': True, 'class_weight': 'balanced'...",...,0.780828,1.0,0.762311,1.0,0.755944,1.0,0.071858,0.000254,0.009306,7.850462000000001e-17


In [20]:
# Creation of the pickle which is goig to store the model.

pickle_out = open("RF_roc_auc_HOG_2.pickle","wb") #Modify with the name of the dataset you're saving
pickle.dump(gs.cv_results_, pickle_out)
pickle_out.close()

print("----Pickle created----")

----Pickle created----


## Predict

In [21]:
y_pred = gs.predict(X_test)

In [22]:
print("Best hyperparameters: {}".format(gs.best_params_))
print("Best roc_auc score: {:3f}".format(gs.best_score_))

Best hyperparameters: {'bootstrap': False, 'class_weight': 'balanced', 'max_depth': 50, 'min_samples_split': 2, 'n_estimators': 200}
Best roc_auc score: 0.848984


## Report

In [23]:
print("Classification report for - \n{}:\n{}\n".format(gs, metrics.classification_report(y_test, y_pred)))

Classification report for - 
GridSearchCV(cv=StratifiedKFold(n_splits=4, random_state=31, shuffle=True),
       error_score='raise',
       estimator=RandomForestClassifier(bootstrap=True, class_weight=None, criterion='gini',
            max_depth=None, max_features='auto', max_leaf_nodes=None,
            min_impurity_decrease=0.0, min_impurity_split=None,
            min_samples_leaf=1, min_samples_split=2,
            min_weight_fraction_leaf=0.0, n_estimators=10, n_jobs=1,
            oob_score=False, random_state=None, verbose=0,
            warm_start=False),
       fit_params=None, iid=True, n_jobs=1,
       param_grid={'n_estimators': [50, 100, 200], 'max_depth': [50, 60, 70], 'min_samples_split': [2, 5, 10], 'bootstrap': [True, False], 'class_weight': ['balanced']},
       pre_dispatch='2*n_jobs', refit=True, return_train_score=True,
       scoring='roc_auc', verbose=3):
             precision    recall  f1-score   support

         -1       0.59      0.84      0.69       480


In [24]:
# End point
end_time = time.time()
uptime = end_time - start_time
human_uptime = datetime.timedelta(seconds=uptime)

print("End time: ", datetime.datetime.now())
print("Uptime :" ,human_uptime)

End time:  2019-01-16 15:23:41.745017
Uptime : 3:52:34.234460
