# Image Classification 

## Libraries

In [1]:
import time
import datetime
import pickle
import numpy as np
from sklearn import svm, metrics
from sklearn.model_selection import StratifiedKFold,GridSearchCV
from sklearn.linear_model import LogisticRegression
from sklearn.ensemble import RandomForestClassifier
from pandas import DataFrame

## Loading Dataset from the pickle

In [2]:
# Start point
start_time = time.time()
print("Start time: ", datetime.datetime.now())

Start time:  2019-01-16 16:39:16.035625


In [3]:
pickle_in = open("Signature_Dataset_DAISY_110_3_int.pickle","rb")
image_dataset = pickle.load(pickle_in)

## Split data in Training Set and Testing Set

In [4]:
X_train = image_dataset.X_train
X_test = image_dataset.X_test
y_train = image_dataset.y_train
y_test = image_dataset.y_test

print("----Dataset Loaded From Pickle----")

----Dataset Loaded From Pickle----


## Train data with parameter optimization

In [5]:
#Dictionary of models and hyperparameters

models_and_hyperparameters={'LR':(LogisticRegression(),{'C': [0.01, 0.05, 0.1, 0.5, 1, 2],
                                                        'penalty': ['l1', 'l2'], 
                                                        'class_weight': ['balanced'] }),
                           'SVM':(svm.SVC(),{'C': [1, 10, 100, 1000],
                                             'gamma': [0.001, 0.0001],
                                             'kernel': ['rbf'],
                                             'class_weight': ['balanced']}),
                           'RFC':(RandomForestClassifier(), {'n_estimators': [50,100,200],
                                                             # 'max_features': ['auto', 'sqrt'],
                                                             'max_depth': [50,60,70],
                                                             'min_samples_split': [2,5,10],
                                                             #'min_samples_leaf': [1,2],
                                                             'bootstrap': [True, False],
                                                             'class_weight': ['balanced']})
                           }

# Logistic Regression

In [6]:
#LOGISTIC REGRESSION

print("----Training----")

k_fold = StratifiedKFold(n_splits=10, shuffle=True, random_state=31)

model = models_and_hyperparameters['LR'][0]
hyperparameters = models_and_hyperparameters['LR'][1]

gs = GridSearchCV(model, 
                   param_grid = hyperparameters,
                   cv=k_fold, 
                   scoring='roc_auc',
                   verbose=3,
                   return_train_score=True)

gs.fit(X_train, y_train)

print("----Training Ended----")


----Training----
Fitting 10 folds for each of 12 candidates, totalling 120 fits
[CV] C=0.01, class_weight=balanced, penalty=l1 .......................
[CV]  C=0.01, class_weight=balanced, penalty=l1, score=0.5, total=   1.0s
[CV] C=0.01, class_weight=balanced, penalty=l1 .......................


[Parallel(n_jobs=1)]: Done   1 out of   1 | elapsed:    1.1s remaining:    0.0s


[CV]  C=0.01, class_weight=balanced, penalty=l1, score=0.5, total=   1.0s
[CV] C=0.01, class_weight=balanced, penalty=l1 .......................


[Parallel(n_jobs=1)]: Done   2 out of   2 | elapsed:    2.1s remaining:    0.0s


[CV]  C=0.01, class_weight=balanced, penalty=l1, score=0.5, total=   1.0s
[CV] C=0.01, class_weight=balanced, penalty=l1 .......................
[CV]  C=0.01, class_weight=balanced, penalty=l1, score=0.5, total=   0.9s
[CV] C=0.01, class_weight=balanced, penalty=l1 .......................
[CV]  C=0.01, class_weight=balanced, penalty=l1, score=0.5, total=   0.9s
[CV] C=0.01, class_weight=balanced, penalty=l1 .......................
[CV]  C=0.01, class_weight=balanced, penalty=l1, score=0.5, total=   0.9s
[CV] C=0.01, class_weight=balanced, penalty=l1 .......................
[CV]  C=0.01, class_weight=balanced, penalty=l1, score=0.5, total=   0.9s
[CV] C=0.01, class_weight=balanced, penalty=l1 .......................
[CV]  C=0.01, class_weight=balanced, penalty=l1, score=0.5, total=   0.9s
[CV] C=0.01, class_weight=balanced, penalty=l1 .......................
[CV]  C=0.01, class_weight=balanced, penalty=l1, score=0.5, total=   1.0s
[CV] C=0.01, class_weight=balanced, penalty=l1 .........

[CV]  C=0.1, class_weight=balanced, penalty=l2, score=0.6000918273645546, total=   2.3s
[CV] C=0.1, class_weight=balanced, penalty=l2 ........................
[CV]  C=0.1, class_weight=balanced, penalty=l2, score=0.6641873278236915, total=   2.3s
[CV] C=0.1, class_weight=balanced, penalty=l2 ........................
[CV]  C=0.1, class_weight=balanced, penalty=l2, score=0.6189164370982553, total=   1.8s
[CV] C=0.1, class_weight=balanced, penalty=l2 ........................
[CV]  C=0.1, class_weight=balanced, penalty=l2, score=0.6295684113865933, total=   2.2s
[CV] C=0.5, class_weight=balanced, penalty=l1 ........................
[CV]  C=0.5, class_weight=balanced, penalty=l1, score=0.4963269054178145, total=   1.9s
[CV] C=0.5, class_weight=balanced, penalty=l1 ........................
[CV]  C=0.5, class_weight=balanced, penalty=l1, score=0.5254361799816346, total=   1.9s
[CV] C=0.5, class_weight=balanced, penalty=l1 ........................
[CV]  C=0.5, class_weight=balanced, penalty=l1

[CV]  C=2, class_weight=balanced, penalty=l1, score=0.5987144168962351, total=   2.7s
[CV] C=2, class_weight=balanced, penalty=l1 ..........................
[CV]  C=2, class_weight=balanced, penalty=l1, score=0.640771349862259, total=   2.1s
[CV] C=2, class_weight=balanced, penalty=l2 ..........................
[CV]  C=2, class_weight=balanced, penalty=l2, score=0.6934802571166209, total=   5.4s
[CV] C=2, class_weight=balanced, penalty=l2 ..........................
[CV]  C=2, class_weight=balanced, penalty=l2, score=0.6874196510560148, total=   4.7s
[CV] C=2, class_weight=balanced, penalty=l2 ..........................
[CV]  C=2, class_weight=balanced, penalty=l2, score=0.707346189164371, total=   4.7s
[CV] C=2, class_weight=balanced, penalty=l2 ..........................
[CV]  C=2, class_weight=balanced, penalty=l2, score=0.7392102846648302, total=   4.5s
[CV] C=2, class_weight=balanced, penalty=l2 ..........................
[CV]  C=2, class_weight=balanced, penalty=l2, score=0.655922

[Parallel(n_jobs=1)]: Done 120 out of 120 | elapsed:  4.7min finished


----Training Ended----


In [7]:
DataFrame(gs.cv_results_)

Unnamed: 0,mean_fit_time,mean_score_time,mean_test_score,mean_train_score,param_C,param_class_weight,param_penalty,params,rank_test_score,split0_test_score,...,split7_test_score,split7_train_score,split8_test_score,split8_train_score,split9_test_score,split9_train_score,std_fit_time,std_score_time,std_test_score,std_train_score
0,0.946181,0.009711,0.5,0.5,0.01,balanced,l1,"{'C': 0.01, 'class_weight': 'balanced', 'penal...",10,0.5,...,0.5,0.5,0.5,0.5,0.5,0.5,0.04007,0.017649,0.0,0.0
1,1.372397,0.0038,0.595684,0.643196,0.01,balanced,l2,"{'C': 0.01, 'class_weight': 'balanced', 'penal...",7,0.602204,...,0.648026,0.638755,0.57438,0.640721,0.610744,0.647974,0.111126,0.000488,0.040628,0.005479
2,1.207217,0.003725,0.5,0.5,0.05,balanced,l1,"{'C': 0.05, 'class_weight': 'balanced', 'penal...",10,0.5,...,0.5,0.5,0.5,0.5,0.5,0.5,0.025878,4.1e-05,0.0,0.0
3,2.078948,0.004346,0.61404,0.661744,0.05,balanced,l2,"{'C': 0.05, 'class_weight': 'balanced', 'penal...",6,0.620569,...,0.657759,0.658226,0.602571,0.660876,0.621763,0.663724,0.140061,0.001627,0.039103,0.003574
4,1.180818,0.003749,0.5,0.5,0.1,balanced,l1,"{'C': 0.1, 'class_weight': 'balanced', 'penalt...",10,0.5,...,0.5,0.5,0.5,0.5,0.5,0.5,0.006263,2.8e-05,0.0,0.0
5,2.190995,0.003585,0.625785,0.674796,0.1,balanced,l2,"{'C': 0.1, 'class_weight': 'balanced', 'penalt...",5,0.634894,...,0.664187,0.671623,0.618916,0.675233,0.629568,0.675327,0.12737,2.3e-05,0.037798,0.002882
6,1.963071,0.003753,0.54169,0.543274,0.5,balanced,l1,"{'C': 0.5, 'class_weight': 'balanced', 'penalt...",9,0.496327,...,0.547107,0.542225,0.519467,0.545555,0.568687,0.540414,0.132424,1.2e-05,0.029943,0.003266
7,3.300158,0.003576,0.658733,0.718044,0.5,balanced,l2,"{'C': 0.5, 'class_weight': 'balanced', 'penalt...",3,0.666758,...,0.694582,0.715023,0.642241,0.721813,0.656474,0.716261,0.337687,1.2e-05,0.030374,0.002559
8,2.444153,0.003769,0.555308,0.582827,1.0,balanced,l1,"{'C': 1, 'class_weight': 'balanced', 'penalty'...",8,0.55528,...,0.567401,0.567555,0.556198,0.596791,0.57989,0.586202,0.358026,2.8e-05,0.029331,0.018901
9,3.655536,0.003576,0.674086,0.74141,1.0,balanced,l2,"{'C': 1, 'class_weight': 'balanced', 'penalty'...",2,0.680716,...,0.711846,0.7381,0.649311,0.745669,0.676217,0.739423,0.241704,1.4e-05,0.027481,0.002591


In [8]:
# Creation of the pickle which is goig to store the model.

pickle_out = open("LR_roc_auc_DAISY_3.pickle","wb") #Modify with the name of the dataset you're saving
pickle.dump(gs.cv_results_, pickle_out)
pickle_out.close()

print("----Pickle created----")

----Pickle created----


### Predict

In [9]:
y_pred = gs.predict(X_test)

In [10]:
print("Best hyperparameters: {}".format(gs.best_params_))
print("Best roc_auc score: {:3f}".format(gs.best_score_))

Best hyperparameters: {'C': 2, 'class_weight': 'balanced', 'penalty': 'l2'}
Best roc_auc score: 0.690661


### Report

\\
\\
\\
\\
\\
# SVM

In [12]:
# SVM

print("----Training----")

k_fold = StratifiedKFold(n_splits=4, shuffle=True, random_state=31)

model = models_and_hyperparameters['SVM'][0]
hyperparameters = models_and_hyperparameters['SVM'][1]

gs = GridSearchCV(model, 
                   param_grid = hyperparameters,
                   cv=k_fold, 
                   scoring='roc_auc',
                   verbose=3,
                   return_train_score=True)

gs.fit(X_train, y_train)

print("----Training Ended----")


----Training----
Fitting 4 folds for each of 8 candidates, totalling 32 fits
[CV] C=1, class_weight=balanced, gamma=0.001, kernel=rbf .............
[CV]  C=1, class_weight=balanced, gamma=0.001, kernel=rbf, score=0.5684310850439882, total= 1.2min
[CV] C=1, class_weight=balanced, gamma=0.001, kernel=rbf .............


[Parallel(n_jobs=1)]: Done   1 out of   1 | elapsed:  2.2min remaining:    0.0s


[CV]  C=1, class_weight=balanced, gamma=0.001, kernel=rbf, score=0.6141055718475072, total= 1.2min
[CV] C=1, class_weight=balanced, gamma=0.001, kernel=rbf .............


[Parallel(n_jobs=1)]: Done   2 out of   2 | elapsed:  4.3min remaining:    0.0s


[CV]  C=1, class_weight=balanced, gamma=0.001, kernel=rbf, score=0.5433640044166359, total= 1.2min
[CV] C=1, class_weight=balanced, gamma=0.001, kernel=rbf .............
[CV]  C=1, class_weight=balanced, gamma=0.001, kernel=rbf, score=0.601648877438351, total= 1.3min
[CV] C=1, class_weight=balanced, gamma=0.0001, kernel=rbf ............
[CV]  C=1, class_weight=balanced, gamma=0.0001, kernel=rbf, score=0.5681378299120234, total= 1.2min
[CV] C=1, class_weight=balanced, gamma=0.0001, kernel=rbf ............
[CV]  C=1, class_weight=balanced, gamma=0.0001, kernel=rbf, score=0.6141202346041056, total= 1.2min
[CV] C=1, class_weight=balanced, gamma=0.0001, kernel=rbf ............
[CV]  C=1, class_weight=balanced, gamma=0.0001, kernel=rbf, score=0.5433051159366948, total= 1.3min
[CV] C=1, class_weight=balanced, gamma=0.0001, kernel=rbf ............
[CV]  C=1, class_weight=balanced, gamma=0.0001, kernel=rbf, score=0.601324990798675, total= 1.3min
[CV] C=10, class_weight=balanced, gamma=0.001, ke

[Parallel(n_jobs=1)]: Done  32 out of  32 | elapsed: 66.9min finished


----Training Ended----


In [13]:
DataFrame(gs.cv_results_)

Unnamed: 0,mean_fit_time,mean_score_time,mean_test_score,mean_train_score,param_C,param_class_weight,param_gamma,param_kernel,params,rank_test_score,...,split1_test_score,split1_train_score,split2_test_score,split2_train_score,split3_test_score,split3_train_score,std_fit_time,std_score_time,std_test_score,std_train_score
0,55.8318,18.500885,0.581896,0.634364,1,balanced,0.001,rbf,"{'C': 1, 'class_weight': 'balanced', 'gamma': ...",4,...,0.614106,0.637373,0.543364,0.635972,0.601649,0.629794,0.807686,0.296982,0.027807,0.002852
1,56.76869,18.975788,0.581731,0.634216,1,balanced,0.0001,rbf,"{'C': 1, 'class_weight': 'balanced', 'gamma': ...",6,...,0.61412,0.637272,0.543305,0.635755,0.601325,0.62965,0.958382,0.515612,0.027811,0.002853
2,55.044163,18.266336,0.581896,0.634364,10,balanced,0.001,rbf,"{'C': 10, 'class_weight': 'balanced', 'gamma':...",4,...,0.614106,0.637373,0.543364,0.635972,0.601649,0.629794,0.492905,0.046947,0.027807,0.002852
3,55.195572,18.371802,0.581731,0.634216,10,balanced,0.0001,rbf,"{'C': 10, 'class_weight': 'balanced', 'gamma':...",6,...,0.61412,0.637272,0.543305,0.635755,0.601325,0.62965,0.369913,0.256913,0.027811,0.002853
4,53.262908,17.545612,0.624966,0.679156,100,balanced,0.001,rbf,"{'C': 100, 'class_weight': 'balanced', 'gamma'...",2,...,0.642214,0.675539,0.609952,0.69303,0.632698,0.671073,0.129788,0.078148,0.013062,0.008301
5,54.672679,18.174279,0.581731,0.634216,100,balanced,0.0001,rbf,"{'C': 100, 'class_weight': 'balanced', 'gamma'...",6,...,0.61412,0.637272,0.543305,0.635755,0.601325,0.62965,0.027076,0.017069,0.027811,0.002853
6,48.423498,15.470352,0.692465,0.7847,1000,balanced,0.001,rbf,"{'C': 1000, 'class_weight': 'balanced', 'gamma...",1,...,0.702199,0.781575,0.669003,0.793352,0.699551,0.781177,0.077503,0.062799,0.01359,0.005026
7,53.206612,17.495161,0.624742,0.678943,1000,balanced,0.0001,rbf,"{'C': 1000, 'class_weight': 'balanced', 'gamma...",3,...,0.642082,0.675277,0.60979,0.692805,0.632448,0.670822,0.188395,0.047116,0.013095,0.008304


In [14]:
# Creation of the pickle which is goig to store the model.

pickle_out = open("SVM_roc_auc_DAISY_3.pickle","wb") #Modify with the name of the dataset you're saving
pickle.dump(gs.cv_results_, pickle_out)
pickle_out.close()

print("----Pickle created----")

----Pickle created----


### Predict

In [15]:
y_pred = gs.predict(X_test)

In [16]:
print("Best hyperparameters: {}".format(gs.best_params_))
print("Best roc_auc score: {:3f}".format(gs.best_score_))

Best hyperparameters: {'C': 1000, 'class_weight': 'balanced', 'gamma': 0.001, 'kernel': 'rbf'}
Best roc_auc score: 0.692465


### Report

In [17]:
print("Classification report for - \n{}:\n{}\n".format(gs, metrics.classification_report(y_test, y_pred)))

Classification report for - 
GridSearchCV(cv=StratifiedKFold(n_splits=4, random_state=31, shuffle=True),
       error_score='raise',
       estimator=SVC(C=1.0, cache_size=200, class_weight=None, coef0=0.0,
  decision_function_shape='ovr', degree=3, gamma='auto', kernel='rbf',
  max_iter=-1, probability=False, random_state=None, shrinking=True,
  tol=0.001, verbose=False),
       fit_params=None, iid=True, n_jobs=1,
       param_grid={'C': [1, 10, 100, 1000], 'gamma': [0.001, 0.0001], 'kernel': ['rbf'], 'class_weight': ['balanced']},
       pre_dispatch='2*n_jobs', refit=True, return_train_score=True,
       scoring='roc_auc', verbose=3):
             precision    recall  f1-score   support

         -1       0.60      0.59      0.59       480
          1       0.55      0.57      0.56       432

avg / total       0.58      0.58      0.58       912




//
//
//
//
//

# Random Forest

In [18]:
#RANDOM FOREST

print("----Training----")

k_fold = StratifiedKFold(n_splits=4, shuffle=True, random_state=31)

model = models_and_hyperparameters['RFC'][0]
hyperparameters = models_and_hyperparameters['RFC'][1]

gs = GridSearchCV(model, 
                   param_grid = hyperparameters,
                   cv=k_fold, 
                   scoring='roc_auc',
                   verbose=3,
                   return_train_score=True)

gs.fit(X_train, y_train)

print("----Training Ended----")


----Training----
Fitting 4 folds for each of 54 candidates, totalling 216 fits
[CV] bootstrap=True, class_weight=balanced, max_depth=50, min_samples_split=2, n_estimators=50 
[CV]  bootstrap=True, class_weight=balanced, max_depth=50, min_samples_split=2, n_estimators=50, score=0.9050586510263929, total=   4.2s
[CV] bootstrap=True, class_weight=balanced, max_depth=50, min_samples_split=2, n_estimators=50 


[Parallel(n_jobs=1)]: Done   1 out of   1 | elapsed:    4.3s remaining:    0.0s


[CV]  bootstrap=True, class_weight=balanced, max_depth=50, min_samples_split=2, n_estimators=50, score=0.9062316715542521, total=   4.2s
[CV] bootstrap=True, class_weight=balanced, max_depth=50, min_samples_split=2, n_estimators=50 


[Parallel(n_jobs=1)]: Done   2 out of   2 | elapsed:    8.6s remaining:    0.0s


[CV]  bootstrap=True, class_weight=balanced, max_depth=50, min_samples_split=2, n_estimators=50, score=0.8953698932646301, total=   4.2s
[CV] bootstrap=True, class_weight=balanced, max_depth=50, min_samples_split=2, n_estimators=50 
[CV]  bootstrap=True, class_weight=balanced, max_depth=50, min_samples_split=2, n_estimators=50, score=0.8785645933014354, total=   4.2s
[CV] bootstrap=True, class_weight=balanced, max_depth=50, min_samples_split=2, n_estimators=100 
[CV]  bootstrap=True, class_weight=balanced, max_depth=50, min_samples_split=2, n_estimators=100, score=0.913357771260997, total=   8.3s
[CV] bootstrap=True, class_weight=balanced, max_depth=50, min_samples_split=2, n_estimators=100 
[CV]  bootstrap=True, class_weight=balanced, max_depth=50, min_samples_split=2, n_estimators=100, score=0.9292595307917888, total=   8.1s
[CV] bootstrap=True, class_weight=balanced, max_depth=50, min_samples_split=2, n_estimators=100 
[CV]  bootstrap=True, class_weight=balanced, max_depth=50, min_s

[CV]  bootstrap=True, class_weight=balanced, max_depth=60, min_samples_split=2, n_estimators=50, score=0.8970234604105571, total=   4.2s
[CV] bootstrap=True, class_weight=balanced, max_depth=60, min_samples_split=2, n_estimators=50 
[CV]  bootstrap=True, class_weight=balanced, max_depth=60, min_samples_split=2, n_estimators=50, score=0.9019801251380198, total=   4.3s
[CV] bootstrap=True, class_weight=balanced, max_depth=60, min_samples_split=2, n_estimators=50 
[CV]  bootstrap=True, class_weight=balanced, max_depth=60, min_samples_split=2, n_estimators=50, score=0.8949429517850571, total=   4.3s
[CV] bootstrap=True, class_weight=balanced, max_depth=60, min_samples_split=2, n_estimators=100 
[CV]  bootstrap=True, class_weight=balanced, max_depth=60, min_samples_split=2, n_estimators=100, score=0.9145454545454546, total=   8.3s
[CV] bootstrap=True, class_weight=balanced, max_depth=60, min_samples_split=2, n_estimators=100 
[CV]  bootstrap=True, class_weight=balanced, max_depth=60, min_sa

[CV]  bootstrap=True, class_weight=balanced, max_depth=70, min_samples_split=2, n_estimators=50, score=0.9049633431085045, total=   4.3s
[CV] bootstrap=True, class_weight=balanced, max_depth=70, min_samples_split=2, n_estimators=50 
[CV]  bootstrap=True, class_weight=balanced, max_depth=70, min_samples_split=2, n_estimators=50, score=0.9143108504398827, total=   4.3s
[CV] bootstrap=True, class_weight=balanced, max_depth=70, min_samples_split=2, n_estimators=50 
[CV]  bootstrap=True, class_weight=balanced, max_depth=70, min_samples_split=2, n_estimators=50, score=0.8877511961722488, total=   4.2s
[CV] bootstrap=True, class_weight=balanced, max_depth=70, min_samples_split=2, n_estimators=50 
[CV]  bootstrap=True, class_weight=balanced, max_depth=70, min_samples_split=2, n_estimators=50, score=0.8893117408906882, total=   4.2s
[CV] bootstrap=True, class_weight=balanced, max_depth=70, min_samples_split=2, n_estimators=100 
[CV]  bootstrap=True, class_weight=balanced, max_depth=70, min_samp

[CV]  bootstrap=True, class_weight=balanced, max_depth=70, min_samples_split=10, n_estimators=200, score=0.902569009937431, total=  16.7s
[CV] bootstrap=False, class_weight=balanced, max_depth=50, min_samples_split=2, n_estimators=50 
[CV]  bootstrap=False, class_weight=balanced, max_depth=50, min_samples_split=2, n_estimators=50, score=0.9321554252199413, total=   6.6s
[CV] bootstrap=False, class_weight=balanced, max_depth=50, min_samples_split=2, n_estimators=50 
[CV]  bootstrap=False, class_weight=balanced, max_depth=50, min_samples_split=2, n_estimators=50, score=0.932741935483871, total=   6.6s
[CV] bootstrap=False, class_weight=balanced, max_depth=50, min_samples_split=2, n_estimators=50 
[CV]  bootstrap=False, class_weight=balanced, max_depth=50, min_samples_split=2, n_estimators=50, score=0.925991902834008, total=   6.9s
[CV] bootstrap=False, class_weight=balanced, max_depth=50, min_samples_split=2, n_estimators=50 
[CV]  bootstrap=False, class_weight=balanced, max_depth=50, mi

[CV]  bootstrap=False, class_weight=balanced, max_depth=50, min_samples_split=10, n_estimators=200, score=0.9281413323518587, total=  26.1s
[CV] bootstrap=False, class_weight=balanced, max_depth=50, min_samples_split=10, n_estimators=200 
[CV]  bootstrap=False, class_weight=balanced, max_depth=50, min_samples_split=10, n_estimators=200, score=0.9122414427677585, total=  26.6s
[CV] bootstrap=False, class_weight=balanced, max_depth=60, min_samples_split=2, n_estimators=50 
[CV]  bootstrap=False, class_weight=balanced, max_depth=60, min_samples_split=2, n_estimators=50, score=0.9199193548387096, total=   6.7s
[CV] bootstrap=False, class_weight=balanced, max_depth=60, min_samples_split=2, n_estimators=50 
[CV]  bootstrap=False, class_weight=balanced, max_depth=60, min_samples_split=2, n_estimators=50, score=0.9185483870967741, total=   6.4s
[CV] bootstrap=False, class_weight=balanced, max_depth=60, min_samples_split=2, n_estimators=50 
[CV]  bootstrap=False, class_weight=balanced, max_dept

[CV]  bootstrap=False, class_weight=balanced, max_depth=60, min_samples_split=10, n_estimators=200, score=0.9266275659824047, total=  26.4s
[CV] bootstrap=False, class_weight=balanced, max_depth=60, min_samples_split=10, n_estimators=200 
[CV]  bootstrap=False, class_weight=balanced, max_depth=60, min_samples_split=10, n_estimators=200, score=0.9300846521899153, total=  27.6s
[CV] bootstrap=False, class_weight=balanced, max_depth=60, min_samples_split=10, n_estimators=200 
[CV]  bootstrap=False, class_weight=balanced, max_depth=60, min_samples_split=10, n_estimators=200, score=0.9166580787633419, total=  28.1s
[CV] bootstrap=False, class_weight=balanced, max_depth=70, min_samples_split=2, n_estimators=50 
[CV]  bootstrap=False, class_weight=balanced, max_depth=70, min_samples_split=2, n_estimators=50, score=0.9193181818181818, total=   6.8s
[CV] bootstrap=False, class_weight=balanced, max_depth=70, min_samples_split=2, n_estimators=50 
[CV]  bootstrap=False, class_weight=balanced, max_

[CV]  bootstrap=False, class_weight=balanced, max_depth=70, min_samples_split=10, n_estimators=200, score=0.9251319648093841, total=  26.0s
[CV] bootstrap=False, class_weight=balanced, max_depth=70, min_samples_split=10, n_estimators=200 
[CV]  bootstrap=False, class_weight=balanced, max_depth=70, min_samples_split=10, n_estimators=200, score=0.9310703812316715, total=  25.4s
[CV] bootstrap=False, class_weight=balanced, max_depth=70, min_samples_split=10, n_estimators=200 
[CV]  bootstrap=False, class_weight=balanced, max_depth=70, min_samples_split=10, n_estimators=200, score=0.9276849466323152, total=  26.3s
[CV] bootstrap=False, class_weight=balanced, max_depth=70, min_samples_split=10, n_estimators=200 
[CV]  bootstrap=False, class_weight=balanced, max_depth=70, min_samples_split=10, n_estimators=200, score=0.9118586676481413, total=  26.8s


[Parallel(n_jobs=1)]: Done 216 out of 216 | elapsed: 45.5min finished


----Training Ended----


In [19]:
DataFrame(gs.cv_results_)

Unnamed: 0,mean_fit_time,mean_score_time,mean_test_score,mean_train_score,param_bootstrap,param_class_weight,param_max_depth,param_min_samples_split,param_n_estimators,params,...,split1_test_score,split1_train_score,split2_test_score,split2_train_score,split3_test_score,split3_train_score,std_fit_time,std_score_time,std_test_score,std_train_score
0,4.198904,0.026231,0.896315,1.0,True,balanced,50,2,50,"{'bootstrap': True, 'class_weight': 'balanced'...",...,0.906232,1.0,0.89537,1.0,0.878565,1.0,0.024908,4.8e-05,0.011075,0.0
1,8.270871,0.034913,0.912225,1.0,True,balanced,50,2,100,"{'bootstrap': True, 'class_weight': 'balanced'...",...,0.92926,1.0,0.907081,1.0,0.899168,1.0,0.150211,9.7e-05,0.011051,5.5511150000000004e-17
2,16.325047,0.053191,0.913725,1.0,True,balanced,50,2,200,"{'bootstrap': True, 'class_weight': 'balanced'...",...,0.921122,1.0,0.912256,1.0,0.901524,1.0,0.130817,0.000167,0.007821,0.0
3,4.166206,0.026169,0.90188,1.0,True,balanced,50,5,50,"{'bootstrap': True, 'class_weight': 'balanced'...",...,0.914435,1.0,0.900729,1.0,0.890843,1.0,0.06661,0.0001,0.008381,5.5511150000000004e-17
4,8.245427,0.034728,0.913017,1.0,True,balanced,50,5,100,"{'bootstrap': True, 'class_weight': 'balanced'...",...,0.924062,1.0,0.908046,1.0,0.90385,1.0,0.110187,4.6e-05,0.007749,0.0
5,16.372784,0.052894,0.914929,1.0,True,balanced,50,5,200,"{'bootstrap': True, 'class_weight': 'balanced'...",...,0.926085,1.0,0.91233,1.0,0.905705,1.0,0.14202,0.000179,0.007361,0.0
6,4.163113,0.026148,0.896867,1.0,True,balanced,50,10,50,"{'bootstrap': True, 'class_weight': 'balanced'...",...,0.899985,1.0,0.898579,1.0,0.882459,1.0,0.034871,3.9e-05,0.008824,0.0
7,8.134492,0.034383,0.90354,1.0,True,balanced,50,10,100,"{'bootstrap': True, 'class_weight': 'balanced'...",...,0.913211,1.0,0.901715,1.0,0.889628,1.0,0.066945,4.2e-05,0.009038,0.0
8,16.166167,0.052281,0.911062,1.0,True,balanced,50,10,200,"{'bootstrap': True, 'class_weight': 'balanced'...",...,0.918343,1.0,0.913758,1.0,0.899227,1.0,0.236236,0.0001,0.007135,0.0
9,4.2364,0.026328,0.903317,1.0,True,balanced,60,2,50,"{'bootstrap': True, 'class_weight': 'balanced'...",...,0.897023,1.0,0.90198,1.0,0.894943,1.0,0.025706,4.9e-05,0.009583,5.5511150000000004e-17


In [20]:
# Creation of the pickle which is goig to store the model.

pickle_out = open("RF_roc_auc_DAISY_3.pickle","wb") #Modify with the name of the dataset you're saving
pickle.dump(gs.cv_results_, pickle_out)
pickle_out.close()

print("----Pickle created----")

----Pickle created----


## Predict

In [21]:
y_pred = gs.predict(X_test)

In [22]:
print("Best hyperparameters: {}".format(gs.best_params_))
print("Best roc_auc score: {:3f}".format(gs.best_score_))

Best hyperparameters: {'bootstrap': False, 'class_weight': 'balanced', 'max_depth': 50, 'min_samples_split': 2, 'n_estimators': 200}
Best roc_auc score: 0.930732


## Report

In [23]:
print("Classification report for - \n{}:\n{}\n".format(gs, metrics.classification_report(y_test, y_pred)))

Classification report for - 
GridSearchCV(cv=StratifiedKFold(n_splits=4, random_state=31, shuffle=True),
       error_score='raise',
       estimator=RandomForestClassifier(bootstrap=True, class_weight=None, criterion='gini',
            max_depth=None, max_features='auto', max_leaf_nodes=None,
            min_impurity_decrease=0.0, min_impurity_split=None,
            min_samples_leaf=1, min_samples_split=2,
            min_weight_fraction_leaf=0.0, n_estimators=10, n_jobs=1,
            oob_score=False, random_state=None, verbose=0,
            warm_start=False),
       fit_params=None, iid=True, n_jobs=1,
       param_grid={'n_estimators': [50, 100, 200], 'max_depth': [50, 60, 70], 'min_samples_split': [2, 5, 10], 'bootstrap': [True, False], 'class_weight': ['balanced']},
       pre_dispatch='2*n_jobs', refit=True, return_train_score=True,
       scoring='roc_auc', verbose=3):
             precision    recall  f1-score   support

         -1       0.60      0.65      0.63       480


In [24]:
# End point
end_time = time.time()
uptime = end_time - start_time
human_uptime = datetime.timedelta(seconds=uptime)

print("End time: ", datetime.datetime.now())
print("Uptime :" ,human_uptime)

End time:  2019-01-16 18:39:19.367414
Uptime : 2:00:03.331741
