# Grid Search Table

In [19]:
# Packages
import pandas as pd
import joblib
import glob
import numpy as np
import dataframe_image as dfi # NOTE: YOU MUST HAVE GOOGLE CHROME INSTALLED FOR THIS TO WORK CORRECTLY

## Load Hyperparameter Settings

In [20]:
# Iterate over all files matching "Hyperparameter Settings.joblib" in subdirectories of '../../Output/Classifier Fitting'
list_of_files = []
list_of_settings = []
for file in glob.glob('../../Output/Classifier Fitting/**/*Hyperparameter Settings.joblib', recursive=True):
    list_of_files.append(file)
    list_of_settings.append(joblib.load(file))

print(list_of_settings)
print(list_of_files)

# list of classifier names - second to last item split on '\\'
classifier_names = [file.split('\\')[-2] for file in list_of_files]
print(classifier_names)

# pair list_of_settings and classifier_names
classifier_settings = dict(zip(classifier_names, list_of_settings))

[[{'solver': ['saga'], 'penalty': [None], 'C': [1], 'class_weight': [None, 'balanced'], 'multi_class': ['ovr', 'multinomial']}, {'solver': ['saga'], 'penalty': ['elasticnet'], 'C': [0.001, 0.01, 0.1, 1, 10, 100], 'l1_ratio': [0.0, 0.25, 0.5, 0.75, 1.0], 'class_weight': [None, 'balanced'], 'multi_class': ['ovr', 'multinomial']}], [{'solver': ['saga'], 'penalty': [None], 'C': [1], 'class_weight': [None, 'balanced'], 'multi_class': ['ovr', 'multinomial']}, {'solver': ['saga'], 'penalty': ['elasticnet'], 'C': [0.001, 0.01, 0.1, 1, 10, 100], 'l1_ratio': [0.0, 0.25, 0.5, 0.75, 1.0], 'class_weight': [None, 'balanced'], 'multi_class': ['ovr', 'multinomial']}], [{'solver': ['saga'], 'penalty': [None], 'C': [1], 'class_weight': [None, 'balanced'], 'multi_class': ['ovr', 'multinomial']}, {'solver': ['saga'], 'penalty': ['elasticnet'], 'C': [0.001, 0.01, 0.1, 1, 10, 100], 'l1_ratio': [0.0, 0.25, 0.5, 0.75, 1.0], 'class_weight': [None, 'balanced'], 'multi_class': ['ovr', 'multinomial']}], [{'solver

In [21]:
# print items in classifer_settings
for key, value in classifier_settings.items():
    print(key)
    print(value)

Logistic Regression
[{'solver': ['saga'], 'penalty': [None], 'C': [1], 'class_weight': [None, 'balanced'], 'multi_class': ['ovr', 'multinomial']}, {'solver': ['saga'], 'penalty': ['elasticnet'], 'C': [0.001, 0.01, 0.1, 1, 10, 100], 'l1_ratio': [0.0, 0.25, 0.5, 0.75, 1.0], 'class_weight': [None, 'balanced'], 'multi_class': ['ovr', 'multinomial']}]
Random Forest
{'n_estimators': [500, 1000, 1500], 'max_features': ['sqrt'], 'max_depth': [20, 40, 60, 80], 'bootstrap': [True, False], 'min_samples_leaf': [1, 2]}
SVM
{'c_values': [0.01, 0.31622776601683794, 10.0], 'kernel_grid': ['rbf'], 'gamma_grid': [1e-05, 0.00025118864315095795, 0.00630957344480193, 0.1584893192461111, 3.981071705534969, 100.0, 'scale'], 'class_weight': ['balanced', None], 'k_folds': 5}
XGBoost
{'n_estimators': [100, 200, 300], 'learning_rate': [0.1, 0.3, 0.5], 'max_depth': [2, 3, 5], 'min_child_weight': [1, 3, 5], 'k_folds': 5}


## Unnest Settings

In [22]:
# Lists for unnesting
classifiers = []
settings = []
settings_options = []
# Iterate over keys
for key in classifier_settings.keys():
    # If value is a list of dictionaries, combine into a single dictionary
    new_dict = {}
    if isinstance(classifier_settings[key], list):
        for d in classifier_settings[key]:
            # iterate over keys in dictionary
            for k in d.keys():
                # if key is in new_dict, append to list
                if k in new_dict.keys():
                    new_dict[k].append(d[k])
                # if key not in new_dict, create new list
                else:
                    new_dict[k] = [d[k]]
        classifier_settings[key] = new_dict
    # For each key in the unnested dictionary
    for k in classifier_settings[key].keys():
        # wrap any single values
        if not isinstance(classifier_settings[key][k], list) and not isinstance(classifier_settings[key][k], np.ndarray):
            classifier_settings[key][k] = [classifier_settings[key][k]]
        if isinstance(classifier_settings[key][k], np.ndarray):
            classifier_settings[key][k] = classifier_settings[key][k].tolist()
        # Append to lists
        classifiers.append(key)
        settings.append(k)
        settings_options.append(list(classifier_settings[key][k]))

## Dataframe of settings

In [23]:
# Put settings into a dataframe
classifier_settings_df = pd.DataFrame({'Classifier': classifiers, 'Setting': settings, 'Setting Options': settings_options})

# convert Setting Options to string
classifier_settings_df['Setting Options'] = classifier_settings_df['Setting Options'].apply(lambda x: ', '.join(map(str, x)))
# strip brackets
classifier_settings_df['Setting Options'] = classifier_settings_df['Setting Options'].str.replace('[', '')
classifier_settings_df['Setting Options'] = classifier_settings_df['Setting Options'].str.replace(']', '')
# expand out Setting Options by splitting on ',' and stacking
classifier_settings_df = classifier_settings_df.set_index(['Classifier', 'Setting'])['Setting Options'].str.split(', ', expand=True).stack().reset_index(level=2, drop=True).reset_index().rename(columns={0: 'Setting Options'}).drop_duplicates()
# strip '
classifier_settings_df['Setting Options'] = classifier_settings_df['Setting Options'].str.replace("'", '')
# recombined Setting Options by Classifer and Setting
classifier_settings_df['Setting Options'] = classifier_settings_df.groupby(['Classifier', 'Setting'])['Setting Options'].transform(lambda x: ', '.join(x))
# drop duplicates
classifier_settings_df = classifier_settings_df.drop_duplicates()
# Rename setting to Hyperparameter
classifier_settings_df = classifier_settings_df.rename(columns={'Setting': 'Hyperparameter'})
# Rename Setting Options to Options
classifier_settings_df = classifier_settings_df.rename(columns={'Setting Options': 'Options'})

classifier_settings_df

Unnamed: 0,Classifier,Hyperparameter,Options
0,Logistic Regression,solver,saga
2,Logistic Regression,penalty,"None, elasticnet"
4,Logistic Regression,C,"1, 0.001, 0.01, 0.1, 10, 100"
11,Logistic Regression,class_weight,"None, balanced"
15,Logistic Regression,multi_class,"ovr, multinomial"
19,Logistic Regression,l1_ratio,"0.0, 0.25, 0.5, 0.75, 1.0"
24,Random Forest,n_estimators,"500, 1000, 1500"
27,Random Forest,max_features,sqrt
28,Random Forest,max_depth,"20, 40, 60, 80"
32,Random Forest,bootstrap,"True, False"


## Table Formatting

In [24]:
# Replace 0.00025118864315095795 with 2.51e-04 in Options strings
classifier_settings_df['Options'] = classifier_settings_df['Options'].str.replace('0.00025118864315095795', '2.51e-04')
# Replace 0.00630957344480193 with 6.31e-03 in Options strings
classifier_settings_df['Options'] = classifier_settings_df['Options'].str.replace('0.00630957344480193', '6.31e-03')
# Replace 0.1584893192461111 with 0.16 in Options strings
classifier_settings_df['Options'] = classifier_settings_df['Options'].str.replace('0.1584893192461111', '0.16')
# Replace 3.981071705534969 with 3.98 in Options strings
classifier_settings_df['Options'] = classifier_settings_df['Options'].str.replace('3.981071705534969', '3.98')
# Replace 0.31622776601683794 with 0.32 in Options strings
classifier_settings_df['Options'] = classifier_settings_df['Options'].str.replace('0.31622776601683794', '0.32')

# Styled grid search output table
grid_search_table = (classifier_settings_df.style
                                        .format(thousands=",", decimal=".")
                                        .set_table_styles([dict(selector='th', props=[('text-align', 'center')])])
                                        .set_properties(**{'text-align': 'center'})
                                        .hide(axis="index"))

# Save to PNG
dfi.export(grid_search_table, '../../Output/Classifier Fitting/Grid Search Table.png')

# Save to Excel
grid_search_table.to_excel('../../Output/Classifier Fitting/Grid Search Table.xlsx', index=False)

grid_search_table

Classifier,Hyperparameter,Options
Logistic Regression,solver,saga
Logistic Regression,penalty,"None, elasticnet"
Logistic Regression,C,"1, 0.001, 0.01, 0.1, 10, 100"
Logistic Regression,class_weight,"None, balanced"
Logistic Regression,multi_class,"ovr, multinomial"
Logistic Regression,l1_ratio,"0.0, 0.25, 0.5, 0.75, 1.0"
Random Forest,n_estimators,"500, 1000, 1500"
Random Forest,max_features,sqrt
Random Forest,max_depth,"20, 40, 60, 80"
Random Forest,bootstrap,"True, False"
