# Search for Optimal Parameters of Classification Models

Hyperparameter Tuning of *4.1 Classification Algorithms* in [Main Notebook](https://github.com/czarinagluna/sxsw-sentiment-analysis/blob/main/main.ipynb)
***
**By [Czarina Luna](https://czarinaluna.com)**

In [1]:
%store -r X_train_res

In [3]:
%store -r y_train_res

In [8]:
%store -r default_models

In [14]:
import time

from sklearn.pipeline import Pipeline
from sklearn.model_selection import GridSearchCV
from sklearn.feature_extraction.text import TfidfVectorizer

tfidf = TfidfVectorizer(stop_words='english', lowercase=False)

tuned_params = {}

def grid_search(params, name, models=default_models):
    '''
    Performs grid search on classification models, returns best cross validation scores and parameters.
    '''
    for model, grid in params.items():
        print(f'Running... {model} GridSearch')
        print(f'Time Started: {time.asctime()}')
        
        pipe = Pipeline(steps=[('tfidf', tfidf), ('classifier', models[model]['classifier'])])
        pipe.fit(X_train_res, y_train_res)
        
        gridsearch = GridSearchCV(estimator=pipe, param_grid=grid, scoring='recall_micro', cv=5, refit=False)
        gridsearch.fit(X_train_res, y_train_res)
        
        print(f'Time Finished: {time.asctime()}\n')
        print(f'Best cross validation score: {gridsearch.best_score_ :.2%}')
        print(f'Optimal parameters: {gridsearch.best_params_}')
        
        tuned_params[name] = gridsearch.best_params_

import warnings
warnings.filterwarnings('ignore')

In [20]:
params_lr1 = {'LogisticRegression': [{
    'classifier__penalty':['l1', 'l2', 'elasticnet'],
    'classifier__C':[0.001, 0.01, 0.1, 1],
    'classifier__fit_intercept':[True, False],
    'classifier__solver':['lbfgs', 'saga'],
    'classifier__multi_class':['auto', 'ovr', 'multinomial']

}]}

grid_search(params_lr1, name='LogisticRegression')

Running... LogisticRegression GridSearch
Time Started: Sat May  7 12:38:07 2022
Time Finished: Sat May  7 12:48:41 2022

Best cross validation score: 80.65%
Optimal parameters: {'classifier__C': 1, 'classifier__fit_intercept': True, 'classifier__multi_class': 'auto', 'classifier__penalty': 'l2', 'classifier__solver': 'lbfgs'}


In [42]:
lr_best_params = tuned_params['LogisticRegression']
%store lr_best_params

Stored 'lr_best_params' (dict)


In [24]:
params_mn1 = {'MultinomialNB': [{
    'classifier__alpha':[0, 0.001, 0.01, 0.1, 1]
}]}

grid_search(params_mn1, name='MultinomialNB')

Running... MultinomialNB GridSearch
Time Started: Sat May  7 13:07:22 2022
Time Finished: Sat May  7 13:07:28 2022

Best cross validation score: 80.35%
Optimal parameters: {'classifier__alpha': 0}


In [43]:
mn_best_params = tuned_params['MultinomialNB']
%store mn_best_params

Stored 'mn_best_params' (dict)


In [26]:
params_dt1 = {'DecisionTree': [{
    'classifier__criterion':['gini', 'entropy'],
    'classifier__splitter':['best', 'random'],
    'classifier__max_depth':[None, 1, 2, 5],
    'classifier__min_samples_split': [2, 3, 5],
    'classifier__min_samples_leaf': [1, 2, 5]
}]}

grid_search(params_dt1, name='DecisionTree')

Running... DecisionTree GridSearch
Time Started: Sat May  7 13:27:59 2022
Time Finished: Sat May  7 13:32:58 2022

Best cross validation score: 84.42%
Optimal parameters: {'classifier__criterion': 'entropy', 'classifier__max_depth': None, 'classifier__min_samples_leaf': 1, 'classifier__min_samples_split': 2, 'classifier__splitter': 'random'}


In [44]:
dt_best_params = tuned_params['DecisionTree']
%store dt_best_params

Stored 'dt_best_params' (dict)


In [28]:
params_rf1 = {'RandomForest': [{
    'classifier__n_estimators':[10, 30, 100],
    'classifier__criterion':['gini', 'entropy'],
    'classifier__max_depth':[None, 1, 2, 5],
    'classifier__min_samples_split':[2, 3, 5],
    'classifier__min_samples_leaf':[1, 2, 5]
}]}

grid_search(params_rf1, name='RandomForest')

Running... RandomForest GridSearch
Time Started: Sat May  7 13:42:16 2022
Time Finished: Sat May  7 13:57:40 2022

Best cross validation score: 86.58%
Optimal parameters: {'classifier__criterion': 'entropy', 'classifier__max_depth': None, 'classifier__min_samples_leaf': 1, 'classifier__min_samples_split': 2, 'classifier__n_estimators': 100}


In [31]:
params_rf2 = {'RandomForest': [{
    'classifier__n_estimators':[100, 150, 200],
    'classifier__criterion':['gini', 'entropy'],
    'classifier__max_depth':[None, 1, 3],
    'classifier__min_samples_split':[2, 3, 5],
    'classifier__min_samples_leaf':[1, 3, 10]
}]}

grid_search(params_rf2, name='RandomForest2')

Running... RandomForest GridSearch
Time Started: Sat May  7 14:52:26 2022
Time Finished: Sat May  7 15:28:44 2022

Best cross validation score: 86.62%
Optimal parameters: {'classifier__criterion': 'entropy', 'classifier__max_depth': None, 'classifier__min_samples_leaf': 1, 'classifier__min_samples_split': 2, 'classifier__n_estimators': 200}


In [45]:
rf_best_params = tuned_params['RandomForest2']
%store rf_best_params

Stored 'rf_best_params' (dict)


In [34]:
params_et1 = {'ExtraTrees': [{
    'classifier__n_estimators':[10, 30, 100],
    'classifier__criterion':['gini', 'entropy'],
    'classifier__max_depth':[None, 1, 2, 5],
    'classifier__min_samples_split':[2, 3, 5],
    'classifier__min_samples_leaf':[1, 2, 5]
}]}

grid_search(params_et1, name='ExtraTrees')

Running... ExtraTrees GridSearch
Time Started: Sat May  7 17:05:45 2022
Time Finished: Sat May  7 17:24:59 2022

Best cross validation score: 87.15%
Optimal parameters: {'classifier__criterion': 'entropy', 'classifier__max_depth': None, 'classifier__min_samples_leaf': 1, 'classifier__min_samples_split': 5, 'classifier__n_estimators': 100}


In [35]:
params_et2 = {'ExtraTrees': [{
    'classifier__n_estimators':[100, 150, 200],
    'classifier__criterion':['gini', 'entropy'],
    'classifier__max_depth':[None, 1, 2],
    'classifier__min_samples_split':[1, 5, 10],
    'classifier__min_samples_leaf':[1, 3, 10]
}]}

grid_search(params_et2, name='ExtraTrees2')

Running... ExtraTrees GridSearch
Time Started: Sat May  7 18:25:57 2022
Time Finished: Sat May  7 18:50:32 2022

Best cross validation score: 87.28%
Optimal parameters: {'classifier__criterion': 'entropy', 'classifier__max_depth': None, 'classifier__min_samples_leaf': 1, 'classifier__min_samples_split': 5, 'classifier__n_estimators': 200}


In [36]:
params_et3 = {'ExtraTrees': [{
    'classifier__n_estimators':[200, 250, 300],
    'classifier__criterion':['entropy'],
    'classifier__max_depth':[None],
    'classifier__min_samples_split':[5],
    'classifier__min_samples_leaf':[1]
}]}

grid_search(params_et3, name='ExtraTrees3')

Running... ExtraTrees GridSearch
Time Started: Sat May  7 19:01:00 2022
Time Finished: Sat May  7 19:07:59 2022

Best cross validation score: 87.31%
Optimal parameters: {'classifier__criterion': 'entropy', 'classifier__max_depth': None, 'classifier__min_samples_leaf': 1, 'classifier__min_samples_split': 5, 'classifier__n_estimators': 300}


In [37]:
params_et4 = {'ExtraTrees': [{
    'classifier__n_estimators':[300, 500, 1000],
    'classifier__criterion':['entropy'],
    'classifier__max_depth':[None],
    'classifier__min_samples_split':[5],
    'classifier__min_samples_leaf':[1]
}]}

grid_search(params_et4, name='ExtraTrees4')

Running... ExtraTrees GridSearch
Time Started: Sat May  7 19:16:26 2022
Time Finished: Sat May  7 19:32:34 2022

Best cross validation score: 87.39%
Optimal parameters: {'classifier__criterion': 'entropy', 'classifier__max_depth': None, 'classifier__min_samples_leaf': 1, 'classifier__min_samples_split': 5, 'classifier__n_estimators': 500}


In [46]:
et_best_params = tuned_params['ExtraTrees4']
%store et_best_params

Stored 'et_best_params' (dict)


In [40]:
params_gb1 = {'GradientBoost': [{
    'classifier__loss': ['deviance', 'exponential'],
    'classifier__n_estimators':[10, 30, 100],
    'classifier__criterion':['friedman_mse', 'squared_error'],
    'classifier__max_depth':[None, 1, 2, 5],
    'classifier__min_samples_split':[2, 3, 5],
    'classifier__min_samples_leaf':[1, 2, 5]
}]}

grid_search(params_gb1, name='GradientBoost')

Running... GradientBoost GridSearch
Time Started: Sun May  8 01:10:57 2022
Time Finished: Sun May  8 10:46:54 2022

Best cross validation score: 85.75%
Optimal parameters: {'classifier__criterion': 'friedman_mse', 'classifier__loss': 'deviance', 'classifier__max_depth': None, 'classifier__min_samples_leaf': 2, 'classifier__min_samples_split': 2, 'classifier__n_estimators': 100}


In [41]:
params_gb2 = {'GradientBoost': [{
    'classifier__loss': ['deviance', 'exponential'],
    'classifier__n_estimators':[100, 200, 300],
    'classifier__criterion':['friedman_mse', 'squared_error'],
    'classifier__max_depth':[None],
    'classifier__min_samples_split':[2, 5],
    'classifier__min_samples_leaf':[1, 3]
}]}

grid_search(params_gb2, name='GradientBoost2')

Running... GradientBoost GridSearch
Time Started: Sun May  8 12:16:39 2022
Time Finished: Mon May  9 06:34:34 2022

Best cross validation score: 85.59%
Optimal parameters: {'classifier__criterion': 'friedman_mse', 'classifier__loss': 'deviance', 'classifier__max_depth': None, 'classifier__min_samples_leaf': 3, 'classifier__min_samples_split': 2, 'classifier__n_estimators': 100}


In [47]:
gb_best_params = tuned_params['GradientBoost']
%store gb_best_params

Stored 'gb_best_params' (dict)


In [67]:
params_svc1 = {'VectorClass': [{
    'classifier__C':[1, 2, 3],
    'classifier__kernel':['linear', 'poly', 'rbf', 'sigmoid'],
    'classifier__degree':[1, 2, 3],
    'classifier__gamma':['scale', 'auto'],
    'classifier__shrinking':[True, False],
    'classifier__class_weight':['balanced', None],
}]}

grid_search(params_svc1, name='VectorClass')

Running... SVC GridSearch
Time Started: Mon May  9 08:21:44 2022
Time Finished: Mon May  9 12:20:48 2022

Best cross validation score: 87.98%
Optimal parameters: {'classifier__C': 1, 'classifier__class_weight': 'balanced', 'classifier__degree': 3, 'classifier__gamma': 'scale', 'classifier__kernel': 'poly', 'classifier__shrinking': True}


In [68]:
svc_best_params = tuned_params['SVC']
%store svc_best_params

Stored 'svc_best_params' (dict)


In [52]:
params_sgd1 = {'SGDClassifier': [{
    'classifier__penalty':['l1', 'l2', 'elasticnet'],
    'classifier__alpha':[0.0001, 0.001, 0.01, 0.1, 1],
    'classifier__class_weight':['balanced', None],
}]}

grid_search(params_sgd1, name='SGDClassifier')

Running... SGDClassifier GridSearch
Time Started: Mon May  9 07:52:04 2022
Time Finished: Mon May  9 07:52:36 2022

Best cross validation score: 81.07%
Optimal parameters: {'classifier__alpha': 0.0001, 'classifier__class_weight': None, 'classifier__penalty': 'l2'}


In [57]:
params_sgd2 = {'SGDClassifier': [{
    'classifier__penalty':['l1', 'l2', 'elasticnet'],
    'classifier__eta0':[0.01, 0.1],
    'classifier__learning_rate':['constant', 'invscaling', 'adaptive'],
    'classifier__class_weight':['balanced', None],
}]}

grid_search(params_sgd2, name='SGDClassifier2')

Running... SGDClassifier GridSearch
Time Started: Mon May  9 07:55:46 2022
Time Finished: Mon May  9 07:57:04 2022

Best cross validation score: 80.99%
Optimal parameters: {'classifier__class_weight': 'balanced', 'classifier__eta0': 0.1, 'classifier__learning_rate': 'constant', 'classifier__penalty': 'l2'}


In [60]:
params_sgd3 = {'SGDClassifier': [{
    'classifier__penalty':['l1', 'l2', 'elasticnet'],
    'classifier__alpha':[0.000001, 0.00001, 0.0001],
    'classifier__class_weight':['balanced', None],
}]}

grid_search(params_sgd3, name='SGDClassifier3')

Running... SGDClassifier GridSearch
Time Started: Mon May  9 08:14:51 2022
Time Finished: Mon May  9 08:15:27 2022

Best cross validation score: 84.64%
Optimal parameters: {'classifier__alpha': 1e-05, 'classifier__class_weight': None, 'classifier__penalty': 'l1'}


In [66]:
sgd_best_params = tuned_params['SGDClassifier3']
%store sgd_best_params

Stored 'sgd_best_params' (dict)


# Contact

Feel free to contact me for any questions and connect with me on [Linkedin](https://www.linkedin.com/in/czarinagluna/).