### All technique of hyperparameter optimization 
    1-GridSearchCV
    2-RandomSearchCV
    3-Bayesian Optimization-Automate hyperparameter tunning(Hyperopt)
    4-Optuna -Automate hypermeter tunning
    5-genetic algorithms(TPOT classifier)

In [1]:
import  pandas as pd
import numpy as np
import warnings
warnings.filterwarnings('ignore')

In [2]:
# read the csv file 
df=pd.read_csv('diabetes.csv')
df.head()

Unnamed: 0,Pregnancies,Glucose,BloodPressure,SkinThickness,Insulin,BMI,DiabetesPedigreeFunction,Age,Outcome
0,6,148,72,35,0,33.6,0.627,50,1
1,1,85,66,29,0,26.6,0.351,31,0
2,8,183,64,0,0,23.3,0.672,32,1
3,1,89,66,23,94,28.1,0.167,21,0
4,0,137,40,35,168,43.1,2.288,33,1


In [3]:
df['Glucose']=np.where(df['Glucose']==0,df['Glucose'].median(),df['Glucose'])
df['Insulin']=np.where(df['Insulin']==0,df['Insulin'].median(),df['Insulin'])

df.head()

Unnamed: 0,Pregnancies,Glucose,BloodPressure,SkinThickness,Insulin,BMI,DiabetesPedigreeFunction,Age,Outcome
0,6,148.0,72,35,30.5,33.6,0.627,50,1
1,1,85.0,66,29,30.5,26.6,0.351,31,0
2,8,183.0,64,0,30.5,23.3,0.672,32,1
3,1,89.0,66,23,94.0,28.1,0.167,21,0
4,0,137.0,40,35,168.0,43.1,2.288,33,1


In [4]:
## independent and dependent feature
X=df.iloc[:,:-1]
y=df.iloc[:,-1]

In [27]:
## train and test split
from sklearn.model_selection import train_test_split,cross_val_score
X_train,X_test,y_train,y_test=train_test_split(X,y,test_size=0.2,random_state=33)

In [6]:
## using random classifier withou using any hyperparater tunning
from sklearn.ensemble import RandomForestClassifier
rf=RandomForestClassifier(n_estimators=10).fit(X_train,y_train)
prediction=rf.predict(X_test)

In [7]:
## check the matrix of the prediction
from sklearn.metrics import confusion_matrix,classification_report,accuracy_score
print(confusion_matrix(y_test,prediction))
print(accuracy_score(y_test,prediction))
print(classification_report(y_test,prediction))

[[88 11]
 [28 27]]
0.7467532467532467
              precision    recall  f1-score   support

           0       0.76      0.89      0.82        99
           1       0.71      0.49      0.58        55

    accuracy                           0.75       154
   macro avg       0.73      0.69      0.70       154
weighted avg       0.74      0.75      0.73       154



In [8]:
## manual hyperparameter tunning in Random forest
model=RandomForestClassifier(n_estimators=300,criterion='entropy',max_features='sqrt',
                             min_samples_leaf=10,random_state=100).fit(X_train,y_train)
prediction=model.predict(X_test)
print(confusion_matrix(y_test,prediction))
print(accuracy_score(y_test,prediction))
print(classification_report(y_test,prediction))

[[88 11]
 [26 29]]
0.7597402597402597
              precision    recall  f1-score   support

           0       0.77      0.89      0.83        99
           1       0.72      0.53      0.61        55

    accuracy                           0.76       154
   macro avg       0.75      0.71      0.72       154
weighted avg       0.76      0.76      0.75       154



### RandomSearchCV hyperparameter tunning

In [9]:
from sklearn.model_selection import RandomizedSearchCV
# number of tree in random forest
n_estimators=[int(x) for x in np.linspace(start=200,stop=2000,num=20)]
# number of features to consider at every split
max_features=['auto','sqrt','log2']
# max number of levels in tree
max_depth=[int(x) for x in np.linspace(10,1000,20)]
# min number of samples required to split the node
min_samples_split=[2,3,5,8,10]
# min number of samples required at each leaf node
min_samples_leaf=[1,2,3,4,7,8]
# create the random grid
random_grid={'n_estimators':n_estimators,
            'max_features': max_features,
            'max_depth':max_depth,
            'min_samples_leaf':min_samples_leaf,
            'min_samples_split':min_samples_split,
            'criterion':['entropy','gini']}
print(random_grid)

{'n_estimators': [200, 294, 389, 484, 578, 673, 768, 863, 957, 1052, 1147, 1242, 1336, 1431, 1526, 1621, 1715, 1810, 1905, 2000], 'max_features': ['auto', 'sqrt', 'log2'], 'max_depth': [10, 62, 114, 166, 218, 270, 322, 374, 426, 478, 531, 583, 635, 687, 739, 791, 843, 895, 947, 1000], 'min_samples_leaf': [1, 2, 3, 4, 7, 8], 'min_samples_split': [2, 3, 5, 8, 10], 'criterion': ['entropy', 'gini']}


In [10]:
rf_random=RandomForestClassifier()
rf_random_cv=RandomizedSearchCV(estimator=rf_random,param_distributions=random_grid,n_iter=100,n_jobs=-1,verbose=1,random_state=100,cv=3)
rf_random_cv.fit(X_train,y_train)

Fitting 3 folds for each of 100 candidates, totalling 300 fits


[Parallel(n_jobs=-1)]: Using backend LokyBackend with 8 concurrent workers.
[Parallel(n_jobs=-1)]: Done  34 tasks      | elapsed:   16.1s
[Parallel(n_jobs=-1)]: Done 184 tasks      | elapsed:  1.4min
[Parallel(n_jobs=-1)]: Done 300 out of 300 | elapsed:  2.4min finished


RandomizedSearchCV(cv=3, error_score=nan,
                   estimator=RandomForestClassifier(bootstrap=True,
                                                    ccp_alpha=0.0,
                                                    class_weight=None,
                                                    criterion='gini',
                                                    max_depth=None,
                                                    max_features='auto',
                                                    max_leaf_nodes=None,
                                                    max_samples=None,
                                                    min_impurity_decrease=0.0,
                                                    min_impurity_split=None,
                                                    min_samples_leaf=1,
                                                    min_samples_split=2,
                                                    min_weight_fraction_leaf=0.0,
               

In [22]:
rf_random_cv.best_params_

{'n_estimators': 1715,
 'min_samples_split': 10,
 'min_samples_leaf': 8,
 'max_features': 'log2',
 'max_depth': 843,
 'criterion': 'entropy'}

In [12]:
rf_random_grid=rf_random_cv.best_estimator_

In [13]:
rf_random_cv.best_estimator_

RandomForestClassifier(bootstrap=True, ccp_alpha=0.0, class_weight=None,
                       criterion='entropy', max_depth=843, max_features='log2',
                       max_leaf_nodes=None, max_samples=None,
                       min_impurity_decrease=0.0, min_impurity_split=None,
                       min_samples_leaf=8, min_samples_split=10,
                       min_weight_fraction_leaf=0.0, n_estimators=1715,
                       n_jobs=None, oob_score=False, random_state=None,
                       verbose=0, warm_start=False)

In [14]:
y_test.shape,X_test.shape

((154,), (154, 8))

In [15]:
from sklearn.metrics import accuracy_score
y_pred=rf_random_grid.predict(X_test)
print(confusion_matrix(y_test,y_pred))
print("accuracy score {}".format(accuracy_score(y_test,y_pred)))
print("classification report : {}".format(classification_report(y_test,y_pred)))

[[86 13]
 [26 29]]
accuracy score 0.7467532467532467
classification report :               precision    recall  f1-score   support

           0       0.77      0.87      0.82        99
           1       0.69      0.53      0.60        55

    accuracy                           0.75       154
   macro avg       0.73      0.70      0.71       154
weighted avg       0.74      0.75      0.74       154



In [16]:
## using grid searchcv for hyperparameter tunning
from sklearn.model_selection import GridSearchCV
param_grid={
    'criterion':[rf_random_cv.best_params_['criterion']],
    'max_depth':[rf_random_cv.best_params_['max_depth']],
    'max_features':[rf_random_cv.best_params_['max_features']],
    'min_samples_leaf':[rf_random_cv.best_params_['min_samples_leaf'],
                       rf_random_cv.best_params_['min_samples_leaf']+2,
                       rf_random_cv.best_params_['min_samples_leaf']+4],
    'min_samples_split':[rf_random_cv.best_params_['min_samples_split']-2,
                        rf_random_cv.best_params_['min_samples_split']-1,
                        rf_random_cv.best_params_['min_samples_split'],
                        rf_random_cv.best_params_['min_samples_split']+1,
                        rf_random_cv.best_params_['min_samples_split']+2],
    'n_estimators':[rf_random_cv.best_params_['n_estimators']-200,
                   rf_random_cv.best_params_['n_estimators']-100,
                   rf_random_cv.best_params_['n_estimators'],
                   rf_random_cv.best_params_['n_estimators']+100,
                   rf_random_cv.best_params_['n_estimators']+200]
}

In [17]:
print(param_grid)

{'criterion': ['entropy'], 'max_depth': [843], 'max_features': ['log2'], 'min_samples_leaf': [8, 10, 12], 'min_samples_split': [8, 9, 10, 11, 12], 'n_estimators': [1515, 1615, 1715, 1815, 1915]}


In [18]:
rf=RandomForestClassifier()
grid_search=GridSearchCV(estimator=rf,param_grid=param_grid,n_jobs=-1,cv=10,verbose=2)
grid_search.fit(X_train,y_train)

Fitting 10 folds for each of 75 candidates, totalling 750 fits


[Parallel(n_jobs=-1)]: Using backend LokyBackend with 8 concurrent workers.
[Parallel(n_jobs=-1)]: Done  25 tasks      | elapsed:   20.4s
[Parallel(n_jobs=-1)]: Done 146 tasks      | elapsed:  2.1min
[Parallel(n_jobs=-1)]: Done 349 tasks      | elapsed:  5.1min
[Parallel(n_jobs=-1)]: Done 632 tasks      | elapsed:  9.0min
[Parallel(n_jobs=-1)]: Done 750 out of 750 | elapsed: 10.6min finished


GridSearchCV(cv=10, error_score=nan,
             estimator=RandomForestClassifier(bootstrap=True, ccp_alpha=0.0,
                                              class_weight=None,
                                              criterion='gini', max_depth=None,
                                              max_features='auto',
                                              max_leaf_nodes=None,
                                              max_samples=None,
                                              min_impurity_decrease=0.0,
                                              min_impurity_split=None,
                                              min_samples_leaf=1,
                                              min_samples_split=2,
                                              min_weight_fraction_leaf=0.0,
                                              n_estimators=100, n_jobs=None,
                                              oob_score=False,
                                              rand

In [19]:
grid_search.best_estimator_

RandomForestClassifier(bootstrap=True, ccp_alpha=0.0, class_weight=None,
                       criterion='entropy', max_depth=843, max_features='log2',
                       max_leaf_nodes=None, max_samples=None,
                       min_impurity_decrease=0.0, min_impurity_split=None,
                       min_samples_leaf=10, min_samples_split=12,
                       min_weight_fraction_leaf=0.0, n_estimators=1915,
                       n_jobs=None, oob_score=False, random_state=None,
                       verbose=0, warm_start=False)

In [20]:
best_grid=grid_search.best_estimator_
best_grid

RandomForestClassifier(bootstrap=True, ccp_alpha=0.0, class_weight=None,
                       criterion='entropy', max_depth=843, max_features='log2',
                       max_leaf_nodes=None, max_samples=None,
                       min_impurity_decrease=0.0, min_impurity_split=None,
                       min_samples_leaf=10, min_samples_split=12,
                       min_weight_fraction_leaf=0.0, n_estimators=1915,
                       n_jobs=None, oob_score=False, random_state=None,
                       verbose=0, warm_start=False)

In [21]:
y_pred=best_grid.predict(X_test)
print(confusion_matrix(y_test,y_pred))
print("Accuracy score {}".format(accuracy_score(y_test,y_pred)))
print("Classification report {}".format(classification_report(y_test,y_pred)))

[[87 12]
 [28 27]]
Accuracy score 0.7402597402597403
Classification report               precision    recall  f1-score   support

           0       0.76      0.88      0.81        99
           1       0.69      0.49      0.57        55

    accuracy                           0.74       154
   macro avg       0.72      0.68      0.69       154
weighted avg       0.73      0.74      0.73       154



### Automated Hyperparameter Tunning

-> Bayyesian Optimization
-> Gradient Descent 
-> Evolutionary Algorithms

### Bayesian Optimization

Bayesian approaches, in contrast to random or grid search, keep track of past evaluation results which they 
use to form a probabilistic model mapping hyperparameters to a probability of a score on the objective 
function.
                    p(score/hyperparameters)

Bayesian optimization uses probability to find the minimum of a function.The final aim is to find the input
value to a function which can give us the lowest possible output value.it usually performs better than 
random,grid and manual search providing betters performance in the testing phase and reduced optimization
time.In otimization can be implemented giving three main parameters to the function fmin.

        1. Objective function = defines the loss function to minimize.
        2. Domain space = defines the range of input values to test (in bayesian optimization this space
        creates a probability distribution for each of the used hyperparameters)
        3. Optimization Algritham = defines the search algorithm to use to select the best input values to 
        use in each iteration

In [23]:
## using hyperopt hyperparameter tunning
from hyperopt import hp,fmin,tpe,STATUS_OK,Trials

In [24]:
space={'criterion':hp.choice('criterion',['entropy','gini']),
      'max_depth':hp.quniform('max_depth',10,1200,10),
      'max_features':hp.choice('max_features',['auto','sqrt','log2',None]),
      'min_samples_leaf': hp.uniform('min_samples_leaf',0, 0.5),
      'min_samples_split':hp.uniform('min_samples_split',0,1),
      'n_estimators':hp.choice('n_estimators',[10,50,100,150,300,700,1200,1300])}

In [31]:
def objective(space):
    model=RandomForestClassifier(criterion=space['criterion'],max_depth=space['max_depth'],
                                max_features=space['max_features'],min_samples_leaf=space['min_samples_leaf'],
                                min_impurity_split=space['min_samples_split'],
                                n_estimators=space['n_estimators'])
    accuracy=cross_val_score(model,X_train,y_train,cv=5).mean()
    return { 'loss': -accuracy,'status':STATUS_OK}

In [32]:
trials=Trials()
best=fmin(fn=objective,
     space=space,
     algo=tpe.suggest,
     max_evals=80,
     trials=trials)
best

100%|██████████| 80/80 [04:56<00:00,  3.71s/trial, best loss: -0.7736638677862189]


{'criterion': 0,
 'max_depth': 480.0,
 'max_features': 2,
 'min_samples_leaf': 0.0197212962568308,
 'min_samples_split': 0.673774005702759,
 'n_estimators': 1}

In [33]:
crit={0:'entropy',1:'gini'}
feat={0:'auto',1:'sqrt',2:'log2',3:None}
est={0:10,1:50,2:300,3:750,4:1200,5:1300,6:1500}
print(crit[best['criterion']])
print(feat[best['max_features']])
print(est[best['n_estimators']])

entropy
log2
50


In [36]:
train_forest=RandomForestClassifier(criterion=crit[best['criterion']],
                                   max_depth=best['max_depth'],
                                   max_features=feat[best['max_features']],
                                   min_samples_leaf=best['min_samples_leaf'],
                                   min_samples_split=best['min_samples_split'],
                                   n_estimators=est[best['n_estimators']])
train_forest.fit(X_train,y_train)
prediction_rf=train_forest.predict(X_test)
print(confusion_matrix(y_test,prediction_rf))
print(accuracy_score(y_test,prediction_rf))
print(classification_report(y_test,prediction_rf))
acc=accuracy_score(y_test,prediction_rf)

[[99  0]
 [55  0]]
0.6428571428571429
              precision    recall  f1-score   support

           0       0.64      1.00      0.78        99
           1       0.00      0.00      0.00        55

    accuracy                           0.64       154
   macro avg       0.32      0.50      0.39       154
weighted avg       0.41      0.64      0.50       154

