# How to Use Grid Search in scikit-learn

Overview

Hyperparameters are the variables which determines the network structure(Eg: Number of Hidden Units) and the variables which determine how the network is trained(Eg: Learning Rate).

Hyperparameters are set before training(before optimizing the weights and bias).

Iterate through hyper-parameters to try and gauge the best potential set for your final model execution / setup.  
*Note: I've read multiple papers that span arguments ranging from a grid search pattern, to intuition, to intimate knowledge of the data and random selection.  There is no proven, specific way to tailor your hyper-parameters."

References:
+ https://machinelearningmastery.com/
+ https://towardsdatascience.com/hyperparameters-in-deep-learning-927f7b2084dd
+ https://towardsdatascience.com/what-are-hyperparameters-and-how-to-tune-the-hyperparameters-in-a-deep-neural-network-d0604917584a
+ https://machinelearningmastery.com/grid-search-hyperparameters-deep-learning-models-python-keras/


In [1]:
# Python 3.7.3
############################################
# INCLUDES
############################################
#libraries specific to this example
import tensorflow as tf
from tensorflow import keras
from tensorflow.keras import layers
from tensorflow.keras.backend import clear_session
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Dense
from tensorflow.keras.callbacks import EarlyStopping, ModelCheckpoint
from tensorflow.keras.wrappers.scikit_learn import KerasClassifier

from sklearn.preprocessing import MinMaxScaler
from sklearn.model_selection import train_test_split
from sklearn.model_selection import GridSearchCV

# seed the pseudorandom number generator
from random import seed
from random import random
from random import randint

#a set of libraries that perhaps should always be in Python source
import os 
import datetime
import sys
import gc
import getopt
import inspect
import math
import warnings
import types

#Data Science Libraries
import numpy as np
import pandas as pd
import scipy as sp
import scipy.ndimage

#Plotting libraries
import matplotlib as mpl
import matplotlib.pyplot as plt

#a darn useful library for creating paths and one I recommend you load to your environment
from pathlib import Path

# can type in the python console `help(name of function)` to get the documentation
from pydoc import help                          

#Import a custom library, in this case a fairly useful logging framework
debug_lib_location = Path("./")
sys.path.append(str(debug_lib_location))
import debug

warnings.filterwarnings('ignore')               # don't print out warnings


root_location=".." + os.sep + "data";

In [2]:
#Turn on Eager Execution
#tf.enable_eager_execution()

In [3]:
############################################
#JUPYTER NOTEBOOK OUTPUT CONTROL / FORMATTING
############################################
#set floating point to 4 places to things don't run loose
pd.options.display.float_format = '{:,.4f}'.format
np.set_printoptions(precision=4)

# Variable declaration

In [4]:
############################################
# GLOBAL VARIABLES
############################################
DEBUG = 1                            #General ledger output so you know what's happening.
DEBUG_DATA = 1                       #Extremely verbose output, change to zero (0) to supress the volume of output.

# CODE CONSTRAINTS
VERSION_NAME    = "HyperParameterGridSearch"
VERSION_ACRONYM = "ML-HPGS"
VERSION_MAJOR   = 0
VERSION_MINOR   = 0
VERSION_RELEASE = "6"
VERSION_TITLE   = VERSION_NAME + " (" + VERSION_ACRONYM + ") " + str(VERSION_MAJOR) + "." + str(VERSION_MINOR) + "." + str(VERSION_RELEASE) + " generated SEED."

ENCODING  ="utf-8"
############################################
# GLOBAL CONSTANTS
############################################
mpl.rcParams['figure.figsize'] = (8, 6)
mpl.rcParams['axes.grid'] = False

############################################
# APPLICATION VARIABLES
############################################
SEED_INIT = 7
LAYER1_NEURONS=12
LAYER1_INPUT_DIMS=8
LAYER1_ACTIVATION='relu'
LAYER2_NEURONS=1
LAYER2_ACTIVATION='sigmoid'
MODEL_LOSS='binary_crossentropy'
MODEL_ACTIVATION='adam'
MODEL_METRICS=['accuracy']

DATASET_FILENAME=root_location+os.sep+'pima-indians-diabetes.csv'
DATASET_DELIMITER=','

GRID_SEARCH_NJOBS=-1

############################################
# GLOBAL CONFIGURATION
############################################
os.environ['PYTHONIOENCODING']=ENCODING

## General Function Declaration

In [5]:
############################################
# WARNING / ERROR Management
############################################
def fxn():
    warnings.warn("deprecated", DeprecationWarning)

with warnings.catch_warnings():
    warnings.simplefilter("ignore")
    fxn()


############################################
# FUNCTIONS
############################################

def prototype(incMonth):

    debug.msg_info("Entering {}.{}".format(__name__, inspect.stack()[0][3]))
    debug.msg_info("The month you passed in was " + str(incMonth))
    debug.msg_info("Exiting {}.{}".format(__name__, inspect.stack()[0][3]))
    return 1

def lib_diagnostics():
    debug.msg_debug("System version    #:{:>12}".format(sys.version))
    netcdf4_version_info = nc.getlibversion().split(" ")
    debug.msg_debug("netCDF4 version   #:{:>12}".format(netcdf4_version_info[0]))
    debug.msg_debug("Matplotlib version#:{:>12}".format(matplt.__version__))
    debug.msg_debug("Numpy version     #:{:>12}".format(np.__version__))
    debug.msg_debug("Pandas version    #:{:>12}".format(pd.__version__))
    debug.msg_debug("SciPy version     #:{:>12}".format(sp.__version__))

    return


def get_full_version():

    resultant = str(VERSION_NAME) + "  v" + str(VERSION_MAJOR) + "." + str(VERSION_MINOR) + "." + str(VERSION_RELEASE)
    return resultant

def get_version():

    resultant = str(VERSION_MAJOR) + "." + str(VERSION_MINOR) + "." + str(VERSION_RELEASE)
    return resultant

def printversion():

    print(get_full_version())

def printusage():

    print("")
    printversion()
    print("  -v, --version    prints the version of this software package.")
    print("")
    print("  * - indicates required argument.")

######################################################################
#Support routines to see columns in DataFrames
######################################################################
def show_columns_plain(inc_ary):
    new_ary = []
    for col in inc_ary:
        new_ary.append(np.char.lower(col))
    new_ary.sort
    myOutputString = " "
    for col in new_ary:
        myOutputString = myOutputString + " " + str(col)
    return myOutputString

def show_columns_true(inc_ary):
    new_ary = []
    for col in inc_ary:
        new_ary.append(col)
    new_ary.sort
    myOutputString = " "
    for col in new_ary:
        myOutputString = myOutputString + " " + str(col)
    return myOutputString

######################################################################
#Input Validation
######################################################################
# valid string:
#  We don't want the following:
#   - at the start of the file name (might be construed as a switch)
#  $, &, |, ;, <, >, `, !, *, ", \ (to start with)
###
def validstring(testsubject):

    if testsubject[0] == "-":
        return 0
    elif "$" in testsubject or "&" in testsubject or "|" in testsubject:
        return 0
    elif ";" in testsubject or "`" in testsubject or "!" in testsubject:
        return 0
    elif "*" in testsubject or '"' in testsubject or "\\" in testsubject:
        return 0
    else:
        return 1
        

In [6]:
# %load ./libs.py
#Title:     Displays the libraries in current use.
#Objective: Invocation is intended as function calls within another program.
#Assumptions:
#           1. Should be stored in standardized location such as:
#                      /p/home/{user_name}/usr/PYTHONLIB
#           2. Developer loads the module (Jupyter Lab).
#Pre-Requisites:
#           1. Python v3.*
#           2. Jupyter Lab / Notebook (%load libs.py)
#Usage:
#       %load libs.py
#       find_loaded_modules().HTML
#
#Version History:
# ------------------------------------------------------------------------
# Version   Date       Modification                              Author
# ------------------------------------------------------------------------
# 1.0       2020/04/29 Inception                                 Radiance
# ------------------------------------------------------------------------
# ------------------------------------------------------------------------


#######################################################################
#LIBRARIES
#######################################################################
import os
import types

def module_version(mod):
    '''Return version string for module *mod*, or nothing if
    it doesn't have a "version" or "__version__" attribute.'''
    version = []
    if hasattr(mod, '__dict__'):
        keys = []
        for key in mod.__dict__.keys():
            if key.lower() == 'version' or key.lower() == '__version__':
                v = mod.__dict__[key]
                if (str):
                    if isinstance(v, str):
                        version.append(v)
                else:
                    version.append("No version")
        if keys:
            print (mod, keys)
    if version:
        return ', '.join(version)
    else:
        return ''

def find_loaded_modules(only_versioned_modules=True):

    def list_of_lists_to_HTML(lists, header_row=None):
        '''Convert a list of a list of strings to a HTML table.'''
        s = '<table>'
        if header_row:
            s += '\n\t<tr>\n\t\t'
            s += ''.join(['<th>%s</th>' % item for item in header_row])
            s += '\n\t</tr>'
        for inner_list in lists:
            s += '\n\t<tr>\n\t\t'
            s += ''.join(['<td>%s</td>' % item for item in inner_list])
            s += '\n\t</tr>'
        s += '\n</table>'
        return s
    
    class LoadedModules(list):
        '''Very simple wrapper for a list of lists of strings, with an attribute
        for display in IPython Notebooks.'''
        def __init__(self, *args, **kwargs):
            list.__init__(self, *args, **kwargs)
            
        @property
        def HTML(self):
            from IPython.display import HTML
            return HTML(
                    list_of_lists_to_HTML(
                            self, header_row=['Name', 'Version']))
                    
    objs = LoadedModules()
    for i, mod in enumerate(globals().values()):
        if isinstance(mod, types.ModuleType):
            if hasattr(mod, '__name__'):
                name = mod.__name__
            else:
                name = ''
            
            version = module_version(mod)
            
            objs.append([mod.__name__, version])
    objs.sort(key=lambda r: r[0])
    return objs


In [7]:
find_loaded_modules().HTML

Name,Version
builtins,
builtins,
datetime,
debug,
gc,
getopt,
inspect,
math,
matplotlib,3.2.2
matplotlib.pyplot,


## How to Tune Batch Size and Number of Epochs

In this first simple example, we look at tuning the batch size and number of epochs used when fitting the network.

The batch size in iterative gradient descent is the number of patterns shown to the network before the weights are updated. It is also an optimization in the training of the network, defining how many patterns to read at a time and keep in memory.

The number of epochs is the number of times that the entire training dataset is shown to the network during training. Some networks are sensitive to the batch size, such as LSTM recurrent neural networks and Convolutional Neural Networks.

In [None]:
#######################################################################
# Function to create model, required for KerasClassifier
#######################################################################
def create_model():
    # create model
    model = Sequential()
    model.add(Dense(LAYER1_NEURONS, input_dim=LAYER1_INPUT_DIMS, activation=LAYER1_ACTIVATION))
    model.add(Dense(LAYER2_NEURONS, activation=LAYER2_ACTIVATION))
    # Compile model
    model.compile(loss=MODEL_LOSS, optimizer=MODEL_ACTIVATION, metrics=MODEL_METRICS)
    return model

#######################################################################
# fix random seed for reproducibility
# Pop quiz!  Why specify the seed?  To ensure we get reproducible results
# when evaluating our progress during this test / evaluation period.
#######################################################################
seed = SEED_INIT
np.random.seed(seed)

#######################################################################
# load dataset
#######################################################################
dataset = np.loadtxt(DATASET_FILENAME, delimiter=DATASET_DELIMITER)

#######################################################################
# split into input (X) and output (Y) variables
#######################################################################
X = dataset[:,0:8]
Y = dataset[:,8]

#######################################################################
# create model
#######################################################################
model = KerasClassifier(build_fn=create_model, verbose=0)

#######################################################################
# define the grid search parameters
#######################################################################
batch_size = [10, 20, 40, 60, 80, 100]
epochs = [10, 50, 100]

#######################################################################
# define the grid search parameters, instantiate GridSearchCV
#######################################################################
param_grid = dict(batch_size=batch_size, epochs=epochs)
grid = GridSearchCV(estimator=model, param_grid=param_grid, n_jobs=GRID_SEARCH_NJOBS, cv=3)

#######################################################################
# fit the data to the model
#######################################################################
grid_result = grid.fit(X, Y)

#######################################################################
# summarize results
#######################################################################
print("Best: %f using %s" % (grid_result.best_score_, grid_result.best_params_))
means = grid_result.cv_results_['mean_test_score']
stds = grid_result.cv_results_['std_test_score']
params = grid_result.cv_results_['params']
for mean, stdev, param in zip(means, stds, params):
    print("%f (%f) with: %r" % (mean, stdev, param))

## How to Tune the Training Optimization Algorithm

Keras offers a suite of different state-of-the-art optimization algorithms.

Tune the optimization algorithm used to train the network, each with default parameters.

Evaluate the suite of optimization algorithms supported by the Keras API.

In [9]:
#######################################################################
# Function to create model, required for KerasClassifier
#   argument of optimzer used to alter the model
#######################################################################
def create_model(inc_optimizer='adam'):
    # create model
    model = Sequential()
    model.add(Dense(LAYER1_NEURONS, input_dim=LAYER1_INPUT_DIMS, activation=LAYER1_ACTIVATION))
    model.add(Dense(LAYER2_NEURONS, activation=LAYER2_ACTIVATION))
    # Compile model
    model.compile(loss=MODEL_LOSS, optimizer=inc_optimizer, metrics=MODEL_METRICS)
    return model
#######################################################################
# fix random seed for reproducibility
#######################################################################
seed = SEED_INIT
np.random.seed(seed)

#######################################################################
# load dataset
#######################################################################
dataset = np.loadtxt(DATASET_FILENAME, delimiter=DATASET_DELIMITER)

#######################################################################
# split into input (X) and output (Y) variables
#######################################################################
X = dataset[:,0:8]
Y = dataset[:,8]

#######################################################################
# create model
#######################################################################
model = KerasClassifier(build_fn=create_model, epochs=100, batch_size=10, verbose=0)

#######################################################################
# define the grid search parameters
#######################################################################
optimizer = ['SGD', 'RMSprop', 'Adagrad', 'Adadelta', 'Adam', 'Adamax', 'Nadam']
param_grid = dict(inc_optimizer=optimizer)

#######################################################################
# Instantiate the GridSearch class and then fit the data
# Note that "grid" invokes a modile build/compilation and iterates through
# the parameters provided.
#######################################################################
grid = GridSearchCV(estimator=model, param_grid=param_grid, n_jobs=GRID_SEARCH_NJOBS, cv=3)
grid_result = grid.fit(X, Y)

#######################################################################
# summarize results
#######################################################################
print("Best: %f using %s" % (grid_result.best_score_, grid_result.best_params_))
means = grid_result.cv_results_['mean_test_score']
stds = grid_result.cv_results_['std_test_score']
params = grid_result.cv_results_['params']
for mean, stdev, param in zip(means, stds, params):
    print("%f (%f) with: %r" % (mean, stdev, param))

Best: 0.696615 using {'inc_optimizer': 'Adamax'}
0.658854 (0.023939) with: {'inc_optimizer': 'SGD'}
0.690104 (0.031466) with: {'inc_optimizer': 'RMSprop'}
0.601562 (0.060851) with: {'inc_optimizer': 'Adagrad'}
0.546875 (0.147854) with: {'inc_optimizer': 'Adadelta'}
0.683594 (0.014616) with: {'inc_optimizer': 'Adam'}
0.696615 (0.020505) with: {'inc_optimizer': 'Adamax'}
0.695312 (0.016877) with: {'inc_optimizer': 'Nadam'}


## How to Tune Learning Rate and Momentum

It is common to pre-select an optimization algorithm to train your network and tune its parameters.

By far the most common optimization algorithm is plain old Stochastic Gradient Descent (SGD) because it is so well understood. Optimize the SGD learning rate and momentum parameters.

Learning rate controls how much to update the weight at the end of each batch and the momentum controls how much to let the previous update influence the current weight update.

Try a suite of small standard learning rates and a momentum values from 0.2 to 0.8 in steps of 0.2, as well as 0.9 (because it can be a popular value in practice).

Generally, it is a good idea to also include the number of epochs in an optimization like this as there is a dependency between the amount of learning per batch (learning rate), the number of updates per epoch (batch size) and the number of epochs.

In [10]:
from tensorflow.keras.optimizers import SGD

#######################################################################
# Function to create model, required for KerasClassifier
#   argument of learning rate and momentum used to alter the model
#   in this case the SGD optimizer is instantiated with arguments
#######################################################################
def create_model(learn_rate=0.01, momentum=0):
  # create model
    model = Sequential()
    model.add(Dense(LAYER1_NEURONS, input_dim=LAYER1_INPUT_DIMS, activation=LAYER1_ACTIVATION))
    model.add(Dense(LAYER2_NEURONS, activation=LAYER2_ACTIVATION))
    # Compile model
    optimizer = SGD(lr=learn_rate, momentum=momentum)
    model.compile(loss=MODEL_LOSS, optimizer=optimizer, metrics=MODEL_METRICS) 
    return model

#######################################################################
# fix random seed for reproducibility
#######################################################################
seed = SEED_INIT
np.random.seed(seed)

#######################################################################
# load dataset
#######################################################################
dataset = np.loadtxt(DATASET_FILENAME, delimiter=DATASET_DELIMITER)

#######################################################################
# split into input (X) and output (Y) variables
#######################################################################
X = dataset[:,0:8]
Y = dataset[:,8]

#######################################################################
# create model
#######################################################################
model = KerasClassifier(build_fn=create_model, epochs=100, batch_size=10, verbose=0)

#######################################################################
# define the grid search parameters
#######################################################################
learn_rate = [0.001, 0.01, 0.1, 0.2, 0.3]
momentum = [0.0, 0.2, 0.4, 0.6, 0.8, 0.9]
param_grid = dict(learn_rate=learn_rate, momentum=momentum)

#######################################################################
# Instantiate GridSearchCV which builds the model and iterates through
# parameters performing a fit and aggregating results
#######################################################################
grid = GridSearchCV(estimator=model, param_grid=param_grid, n_jobs=-1, cv=3)
grid_result = grid.fit(X, Y)

# summarize results
print("Best: %f using %s" % (grid_result.best_score_, grid_result.best_params_))
means = grid_result.cv_results_['mean_test_score']
stds = grid_result.cv_results_['std_test_score']
params = grid_result.cv_results_['params']
for mean, stdev, param in zip(means, stds, params):
    print("%f (%f) with: %r" % (mean, stdev, param))

Best: 0.697917 using {'learn_rate': 0.001, 'momentum': 0.6}
0.602865 (0.062364) with: {'learn_rate': 0.001, 'momentum': 0.0}
0.673177 (0.024360) with: {'learn_rate': 0.001, 'momentum': 0.2}
0.695312 (0.016877) with: {'learn_rate': 0.001, 'momentum': 0.4}
0.697917 (0.009744) with: {'learn_rate': 0.001, 'momentum': 0.6}
0.661458 (0.025976) with: {'learn_rate': 0.001, 'momentum': 0.8}
0.651042 (0.024774) with: {'learn_rate': 0.001, 'momentum': 0.9}
0.666667 (0.014731) with: {'learn_rate': 0.01, 'momentum': 0.0}
0.648438 (0.025315) with: {'learn_rate': 0.01, 'momentum': 0.2}
0.649740 (0.026557) with: {'learn_rate': 0.01, 'momentum': 0.4}
0.649740 (0.026557) with: {'learn_rate': 0.01, 'momentum': 0.6}
0.649740 (0.026557) with: {'learn_rate': 0.01, 'momentum': 0.8}
0.651042 (0.024774) with: {'learn_rate': 0.01, 'momentum': 0.9}
0.651042 (0.024774) with: {'learn_rate': 0.1, 'momentum': 0.0}
0.652344 (0.022999) with: {'learn_rate': 0.1, 'momentum': 0.2}
0.651042 (0.024774) with: {'learn_rate':

In [11]:
## Perturbing ALL of the Hyper-Parameters

In [None]:
from tensorflow.keras.optimizers import SGD

#######################################################################
# Function to create model, required for KerasClassifier
#   argument of learning rate and momentum used to alter the model
#   in this case the SGD optimizer is instantiated with arguments
#######################################################################
def create_model(inc_neurons, inc_activation, inc_optimizer, inc_loss, inc_learn_rate=0.01, inc_momentum=0):
  # create model
    model = Sequential()
    model.add(Dense(inc_neurons, input_dim=LAYER1_INPUT_DIMS, activation=inc_activation))
    model.add(Dense(LAYER2_NEURONS, activation=LAYER2_ACTIVATION))
    # Compile model
    #optimizer = SGD(lr=learn_rate, momentum=momentum)
    model.compile(loss=inc_loss, optimizer=inc_optimizer, metrics=MODEL_METRICS) 
    return model

#######################################################################
# fix random seed for reproducibility
#######################################################################
seed = SEED_INIT
np.random.seed(seed)

#######################################################################
# load dataset
#######################################################################
dataset = np.loadtxt(DATASET_FILENAME, delimiter=DATASET_DELIMITER)

#######################################################################
# split into input (X) and output (Y) variables
#######################################################################
X = dataset[:,0:8]
Y = dataset[:,8]

#######################################################################
# create model
#######################################################################
model = KerasClassifier(build_fn=create_model, epochs=100, batch_size=10, verbose=0)

#######################################################################
# define the grid search parameters
#######################################################################
learn_rate = [0.001, 0.01, 0.1, 0.2, 0.3]

momentum = [0.0, 0.2, 0.4, 0.6, 0.8, 0.9]

batch_size = [10]
batch_size = [10, 20, 40, 60, 80, 100]

epochs = [10]
epochs = [10, 50, 100]

optimizer = ['SGD']
optimizer = ['SGD', 'RMSprop', 'Adagrad', 'Adadelta', 'Adam', 'Adamax', 'Nadam']

losses =['binary_crossentropy']
losses =['binary_crossentropy', 'sparse_categorical_crossentropy','poisson',
         'mean_squared_error','mean_absolute_error','mean_absolute_percentage_error', 
         'mean_squared_logarithmic_error','cosine_similarity', 'huber_loss']


neurons=[1, 5]
neurons=[1, 5, 10, 15, 20, 25, 30, 35, 40, 45, 50]

activation=['relu']
activation=['relu', 'sigmoid', 'softmax', 'softplus', 'softsign', 'tanh', 'selu']

param_grid = dict(batch_size=batch_size, 
                  epochs=epochs, 
                  inc_optimizer=optimizer,
                  inc_loss=losses,
                  inc_activation=activation,
                  inc_neurons=neurons)

#######################################################################
# Instantiate GridSearchCV which builds the model and iterates through
# parameters performing a fit and aggregating results
# verbose=(the greater the positive value the more detail)
#######################################################################
grid = GridSearchCV(estimator=model, param_grid=param_grid, n_jobs=-1, cv=3, verbose=10)
grid_result = grid.fit(X, Y)

# summarize results
print("Best: %f using %s" % (grid_result.best_score_, grid_result.best_params_))
means = grid_result.cv_results_['mean_test_score']
stds = grid_result.cv_results_['std_test_score']
params = grid_result.cv_results_['params']
for mean, stdev, param in zip(means, stds, params):
    print("%f (%f) with: %r" % (mean, stdev, param))

Fitting 3 folds for each of 87318 candidates, totalling 261954 fits
