# Search for Optimal Parameters of Classification Models
Hyperparameter Tuning of *IV. Classification Models* in [Main Notebook](https://github.com/czarinagluna/vaccination-status-classification/blob/main/main.ipynb)
***
Authors: [Czarina Luna](https://www.linkedin.com/in/czarinaluna/), Weston Shuken, Justin Sohn

In [1]:
%store -r preprocessor

In [2]:
%store -r X_train

In [3]:
%store -r y_train

In [4]:
%store -r X_train_res

In [5]:
%store -r y_train_res

In [6]:
from sklearn.linear_model import LogisticRegression
from sklearn.tree import DecisionTreeClassifier
from sklearn.ensemble import RandomForestClassifier, ExtraTreesClassifier, GradientBoostingClassifier

default_models = {'LogisticRegression': {'classifier': LogisticRegression(random_state=112221)},
                  'DecisionTree': {'classifier': DecisionTreeClassifier(random_state=112221)},
                  'RandomForest': {'classifier': RandomForestClassifier(random_state=112221)},
                  'ExtraTrees': {'classifier': ExtraTreesClassifier(random_state=112221)},
                  'GradientBoost': {'classifier': GradientBoostingClassifier(random_state=112221)}}

In [7]:
import time

from sklearn.model_selection import train_test_split, GridSearchCV
from sklearn.pipeline import Pipeline

tuned_params = {}

def grid_search(params, name, scoring='accuracy', X_train=X_train, y_train=y_train):
    '''
    Performs grid search on classification models, returns best cross validation scores and parameters
    '''
    for model, grid in params.items():
        print(f'Running... {model} GridSearch')
        print(f'Time Started: {time.asctime()}')
        
        pipe = Pipeline(steps=[('col_transformer', preprocessor), 
                               ('classifier', default_models[model]['classifier'])])
        
        gridsearch = GridSearchCV(estimator=pipe, param_grid=grid, scoring=scoring, cv=5)
        gridsearch.fit(X_train, y_train)
        
        print(f'Time Finished: {time.asctime()}\n')
        print(f'Best cross validation score: {gridsearch.best_score_ :.2%}')
        print(f'Optimal parameters: {gridsearch.best_params_}')
        
        tuned_params[name] = gridsearch.best_params_
        
import warnings
warnings.filterwarnings('ignore')

**Logistic Regression**

In [9]:
params_lr1 = {'LogisticRegression': [{
    'classifier__penalty':['l1', 'l2', 'elasticnet'],
    'classifier__C':[0.01, 0.1, 1],
    'classifier__fit_intercept':[True, False],
    'classifier__solver':['lbfgs', 'saga']
}]}

grid_search(params_lr1, name='LogisticRegression')

In [None]:
grid_search(params_lr1, name='LogisticRegression_o', X_train=X_train_res, y_train=y_train_res)

In [11]:
params_lr2 = {'LogisticRegression': [{
    'classifier__penalty':['l1', 'l2', 'elasticnet'],
    'classifier__C':[0.001, 1, 10, 100],
    'classifier__fit_intercept':[True, False],
    'classifier__solver':['lbfgs', 'saga']
}]}

grid_search(params_lr2, name='LogisticRegression2')

Running... LogisticRegression GridSearch
Time Started: Tue Apr 19 16:02:44 2022
Time Finished: Tue Apr 19 16:10:52 2022

Best cross validation score: 86.76%
Optimal parameters: {'classifier__C': 1, 'classifier__fit_intercept': False, 'classifier__penalty': 'l1', 'classifier__solver': 'saga'}


In [12]:
grid_search(params_lr2, name='LogisticRegression2_o', X_train=X_train_res, y_train=y_train_res)

Running... LogisticRegression GridSearch
Time Started: Tue Apr 19 18:44:17 2022
Time Finished: Tue Apr 19 18:54:23 2022

Best cross validation score: 83.71%
Optimal parameters: {'classifier__C': 10, 'classifier__fit_intercept': True, 'classifier__penalty': 'l1', 'classifier__solver': 'saga'}


In [None]:
grid_search(params_lr1, name='LogisticRegression_p', scoring='precision')

In [None]:
lr_best_params = tuned_params['LogisticRegression']
%store lr_best_params

**Decision Tree**

In [None]:
params_dt1 = {'DecisionTree': [{
    'classifier__criterion':['gini', 'entropy'],
    'classifier__splitter':['best', 'random'],
    'classifier__max_depth':[None, 1, 2, 5],
    'classifier__min_samples_split': [2, 3, 5],
    'classifier__min_samples_leaf': [1, 2, 5],
}]}

grid_search(params_dt1, name='DecisionTree')

In [None]:
grid_search(params_dt1, name='DecisionTree_p', scoring='precision')

In [None]:
dt_best_params = tuned_params['DecisionTree']
%store dt_best_params

**Random Forest**

In [None]:
params_rf1 = {'RandomForest': [{
    'classifier__n_estimators':[10, 30, 100],
    'classifier__criterion':['gini', 'entropy'],
    'classifier__max_depth':[None, 1, 2, 5],
    'classifier__min_samples_split':[2, 3, 5],
    'classifier__min_samples_leaf':[1, 2, 5]
}]}

grid_search(params_rf1, name='RandomForest')

In [None]:
grid_search(params_rf1, name='RandomForest_p', scoring='precision')

In [None]:
rf_best_params = tuned_params['RandomForest']
%store rf_best_params

**Extra Trees**

In [None]:
params_et1 = {'ExtraTrees': [{
    'classifier__n_estimators':[10, 30, 100],
    'classifier__criterion':['gini', 'entropy'],
    'classifier__max_depth':[None, 1, 2, 5],
    'classifier__min_samples_split':[2, 3, 5],
    'classifier__min_samples_leaf':[1, 2, 5]
}]}

grid_search(params_et1, name='ExtraTrees')

In [None]:
grid_search(params_et1, name='ExtraTrees_p', scoring='precision')

In [None]:
et_best_params = tuned_params['ExtraTrees']
%store et_best_params

**Gradient Boost**

In [None]:
params_gb1 = {'GradientBoost': [{
    'classifier__loss': ['deviance', 'exponential'],
    'classifier__n_estimators':[10, 30, 100],
    'classifier__criterion':['friedman_mse', 'squared_error'],
    'classifier__max_depth':[None, 1, 2, 5],
    'classifier__min_samples_split':[2, 3, 5],
    'classifier__min_samples_leaf':[1, 2, 5]
}]}

grid_search(params_gb1, name='GradientBoost')

In [None]:
grid_search(params_gb1, name='GradientBoost_p', scoring='precision')

In [None]:
params_gb2 = {'GradientBoost': [{
    'classifier__loss': ['deviance', 'exponential'],
    'classifier__n_estimators':[100, 150, 300],
    'classifier__criterion':['friedman_mse', 'squared_error'],
    'classifier__max_depth':[None, 3, 10],
    'classifier__min_samples_split':[None, 1, 2],
    'classifier__min_samples_leaf':[3, 5, 10]
}]}

grid_search(params_gb2, name='GradientBoost3')

In [None]:
params_gb3 = {'GradientBoost': [{
    'classifier__loss': ['deviance', 'exponential'],
    'classifier__n_estimators':[200, 300, 500],
    'classifier__criterion':['friedman_mse', 'squared_error'],
    'classifier__max_depth':[None, 1],
    'classifier__min_samples_split':[2, 4, 10],
    'classifier__min_samples_leaf':[1, 3, 5]
}]}

grid_search(params_gb3, name='GradientBoost4')

In [None]:
gb_best_params = tuned_params['GradientBoost']
%store gb_best_params

# Contact
Feel free to contact me for any questions and connect with me on [Linkedin](https://www.linkedin.com/in/czarinagluna).