### Hyperparameter Optimization With Random Search and Grid Search

https://machinelearningmastery.com/hyperparameter-optimization-with-random-search-and-grid-search/

#### 1. Grid Search for Classification

In [1]:
%%time
# grid search logistic regression model on the sonar dataset
from pandas import read_csv
from sklearn.linear_model import LogisticRegression
from sklearn.model_selection import RepeatedStratifiedKFold, GridSearchCV
# load dataset
url = 'https://raw.githubusercontent.com/jbrownlee/Datasets/master/sonar.csv'
dataframe = read_csv(url, header=None)
# split into input and output elements
data = dataframe.values
X, y = data[:, :-1], data[:, -1]
# define model
model = LogisticRegression()
# define evaluation
cv = RepeatedStratifiedKFold(n_splits=10, n_repeats=3, random_state=1)
# define search space
space = dict()
space['solver'] = ['newton-cg', 'lbfgs', 'liblinear']
space['penalty'] = ['none', 'l1', 'l2', 'elasticnet']
space['C'] = [1e-5, 1e-4, 1e-3, 1e-2, 1e-1, 1, 10, 100]
# define search
search = GridSearchCV(model, space, scoring='accuracy', n_jobs=-1, cv=cv)
# execute search
result = search.fit(X, y)
# summarize result
print('Best Score: %s' % result.best_score_)
print('Best Hyperparameters: %s' % result.best_params_)

Best Score: 0.7828571428571429
Best Hyperparameters: {'C': 1, 'penalty': 'l2', 'solver': 'newton-cg'}
Wall time: 19.7 s


 0.53380952 0.53380952 0.53380952        nan        nan        nan
 0.73865079 0.73396825        nan        nan        nan 0.53380952
 0.53380952 0.53380952 0.53380952        nan        nan        nan
 0.73865079 0.73396825        nan        nan        nan 0.53380952
 0.53380952 0.53380952 0.53380952        nan        nan        nan
 0.73865079 0.73396825        nan        nan        nan 0.53380952
 0.58039683 0.58039683 0.57246032        nan        nan        nan
 0.73865079 0.73396825        nan        nan        nan 0.53380952
 0.69674603 0.69674603 0.69087302        nan        nan        nan
 0.73865079 0.73396825        nan        nan        nan 0.77857143
 0.78285714 0.78285714 0.75444444        nan        nan        nan
 0.73865079 0.73396825        nan        nan        nan 0.75166667
 0.7768254  0.7768254  0.77531746        nan        nan        nan
 0.73865079 0.73396825        nan        nan        nan 0.75166667
 0.75492063 0.75642857 0.75325397        nan        nan       

In [2]:
# grid search logistic regression model on the sonar dataset
from pandas import read_csv
from sklearn.linear_model import LogisticRegression
from sklearn.model_selection import RepeatedStratifiedKFold
from sklearn.model_selection import GridSearchCV

In [3]:
# load dataset
url = 'https://raw.githubusercontent.com/jbrownlee/Datasets/master/sonar.csv'
dataframe = read_csv(url, header=None)

In [4]:
dataframe

Unnamed: 0,0,1,2,3,4,5,6,7,8,9,...,51,52,53,54,55,56,57,58,59,60
0,0.0200,0.0371,0.0428,0.0207,0.0954,0.0986,0.1539,0.1601,0.3109,0.2111,...,0.0027,0.0065,0.0159,0.0072,0.0167,0.0180,0.0084,0.0090,0.0032,R
1,0.0453,0.0523,0.0843,0.0689,0.1183,0.2583,0.2156,0.3481,0.3337,0.2872,...,0.0084,0.0089,0.0048,0.0094,0.0191,0.0140,0.0049,0.0052,0.0044,R
2,0.0262,0.0582,0.1099,0.1083,0.0974,0.2280,0.2431,0.3771,0.5598,0.6194,...,0.0232,0.0166,0.0095,0.0180,0.0244,0.0316,0.0164,0.0095,0.0078,R
3,0.0100,0.0171,0.0623,0.0205,0.0205,0.0368,0.1098,0.1276,0.0598,0.1264,...,0.0121,0.0036,0.0150,0.0085,0.0073,0.0050,0.0044,0.0040,0.0117,R
4,0.0762,0.0666,0.0481,0.0394,0.0590,0.0649,0.1209,0.2467,0.3564,0.4459,...,0.0031,0.0054,0.0105,0.0110,0.0015,0.0072,0.0048,0.0107,0.0094,R
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
203,0.0187,0.0346,0.0168,0.0177,0.0393,0.1630,0.2028,0.1694,0.2328,0.2684,...,0.0116,0.0098,0.0199,0.0033,0.0101,0.0065,0.0115,0.0193,0.0157,M
204,0.0323,0.0101,0.0298,0.0564,0.0760,0.0958,0.0990,0.1018,0.1030,0.2154,...,0.0061,0.0093,0.0135,0.0063,0.0063,0.0034,0.0032,0.0062,0.0067,M
205,0.0522,0.0437,0.0180,0.0292,0.0351,0.1171,0.1257,0.1178,0.1258,0.2529,...,0.0160,0.0029,0.0051,0.0062,0.0089,0.0140,0.0138,0.0077,0.0031,M
206,0.0303,0.0353,0.0490,0.0608,0.0167,0.1354,0.1465,0.1123,0.1945,0.2354,...,0.0086,0.0046,0.0126,0.0036,0.0035,0.0034,0.0079,0.0036,0.0048,M


In [5]:
# split into input and output elements
data = dataframe.values
X, y = data[:, :-1], data[:, -1]

In [6]:
type(data)

numpy.ndarray

In [7]:
print(X.shape,y.shape)

(208, 60) (208,)


In [8]:
# define model
model = LogisticRegression()
# define evaluation
cv = RepeatedStratifiedKFold(n_splits=10, n_repeats=3, random_state=1)

In [9]:
print(type(model))
print(model)
print(type(cv))
print(cv)

<class 'sklearn.linear_model._logistic.LogisticRegression'>
LogisticRegression()
<class 'sklearn.model_selection._split.RepeatedStratifiedKFold'>
RepeatedStratifiedKFold(n_repeats=3, n_splits=10, random_state=1)


In [10]:
# define search space
space = dict()
space['solver'] = ['newton-cg', 'lbfgs', 'liblinear']
space['penalty'] = ['none', 'l1', 'l2', 'elasticnet']
space['C'] = [1e-5, 1e-4, 1e-3, 1e-2, 1e-1, 1, 10, 100]

In [11]:
space

{'solver': ['newton-cg', 'lbfgs', 'liblinear'],
 'penalty': ['none', 'l1', 'l2', 'elasticnet'],
 'C': [1e-05, 0.0001, 0.001, 0.01, 0.1, 1, 10, 100]}

In [12]:
# define search
search = GridSearchCV(model, space, scoring='accuracy', n_jobs=-1, cv=cv)
# execute search
result = search.fit(X, y)

 0.53380952 0.53380952 0.53380952        nan        nan        nan
 0.73865079 0.73396825        nan        nan        nan 0.53380952
 0.53380952 0.53380952 0.53380952        nan        nan        nan
 0.73865079 0.73396825        nan        nan        nan 0.53380952
 0.53380952 0.53380952 0.53380952        nan        nan        nan
 0.73865079 0.73396825        nan        nan        nan 0.53380952
 0.58039683 0.58039683 0.57246032        nan        nan        nan
 0.73865079 0.73396825        nan        nan        nan 0.53380952
 0.69674603 0.69674603 0.69087302        nan        nan        nan
 0.73865079 0.73396825        nan        nan        nan 0.77698413
 0.78285714 0.78285714 0.75444444        nan        nan        nan
 0.73865079 0.73396825        nan        nan        nan 0.75166667
 0.7768254  0.7768254  0.77531746        nan        nan        nan
 0.73865079 0.73396825        nan        nan        nan 0.75166667
 0.75492063 0.75642857 0.75325397        nan        nan       

In [13]:
print(type(search))
print(search)

<class 'sklearn.model_selection._search.GridSearchCV'>
GridSearchCV(cv=RepeatedStratifiedKFold(n_repeats=3, n_splits=10, random_state=1),
             estimator=LogisticRegression(), n_jobs=-1,
             param_grid={'C': [1e-05, 0.0001, 0.001, 0.01, 0.1, 1, 10, 100],
                         'penalty': ['none', 'l1', 'l2', 'elasticnet'],
                         'solver': ['newton-cg', 'lbfgs', 'liblinear']},
             scoring='accuracy')


In [14]:
print(type(result))
print(result)

<class 'sklearn.model_selection._search.GridSearchCV'>
GridSearchCV(cv=RepeatedStratifiedKFold(n_repeats=3, n_splits=10, random_state=1),
             estimator=LogisticRegression(), n_jobs=-1,
             param_grid={'C': [1e-05, 0.0001, 0.001, 0.01, 0.1, 1, 10, 100],
                         'penalty': ['none', 'l1', 'l2', 'elasticnet'],
                         'solver': ['newton-cg', 'lbfgs', 'liblinear']},
             scoring='accuracy')


In [15]:
# summarize result
print('Best Score: %s' % result.best_score_)
print('Best Hyperparameters: %s' % result.best_params_)

Best Score: 0.7828571428571429
Best Hyperparameters: {'C': 1, 'penalty': 'l2', 'solver': 'newton-cg'}


#### 2. Random Search for Classification

In [16]:
%%time
# random search logistic regression model on the sonar dataset
from scipy.stats import loguniform
from pandas import read_csv
from sklearn.linear_model import LogisticRegression
from sklearn.model_selection import RepeatedStratifiedKFold, RandomizedSearchCV
# load dataset
url = 'https://raw.githubusercontent.com/jbrownlee/Datasets/master/sonar.csv'
dataframe = read_csv(url, header=None)
# split into input and output elements
data = dataframe.values
X, y = data[:, :-1], data[:, -1]
# define model
model = LogisticRegression()
# define evaluation
cv = RepeatedStratifiedKFold(n_splits=10, n_repeats=3, random_state=1)
# define search space
space = dict()
space['solver'] = ['newton-cg', 'lbfgs', 'liblinear']
space['penalty'] = ['none', 'l1', 'l2', 'elasticnet']
space['C'] = loguniform(1e-5, 100)
# define search
search = RandomizedSearchCV(model, space, n_iter=500, scoring='accuracy', n_jobs=-1, cv=cv, random_state=1)
# execute search
result = search.fit(X, y)
# summarize result
print('Best Score: %s' % result.best_score_)
print('Best Hyperparameters: %s' % result.best_params_)

Best Score: 0.7897619047619049
Best Hyperparameters: {'C': 4.878363034905756, 'penalty': 'l2', 'solver': 'newton-cg'}
Wall time: 48.4 s


 0.53380952 0.53380952        nan        nan        nan        nan
 0.73865079        nan 0.76730159        nan        nan 0.73396825
 0.53380952        nan 0.78007937 0.53380952 0.73396825 0.53380952
 0.76269841        nan 0.73865079        nan 0.73865079 0.7815873
 0.53380952        nan 0.73865079        nan 0.73865079 0.77666667
        nan 0.73865079 0.53380952        nan 0.67634921 0.77698413
 0.53380952 0.53539683        nan 0.53380952        nan        nan
        nan        nan        nan 0.73865079        nan 0.73396825
        nan 0.73396825        nan        nan        nan        nan
 0.76119048        nan 0.78126984 0.57888889        nan 0.53698413
        nan 0.76380952 0.73865079 0.53380952        nan        nan
        nan        nan 0.73865079 0.76896825        nan 0.60293651
 0.53380952 0.73865079        nan 0.78       0.53380952        nan
 0.53380952        nan 0.73865079 0.53380952        nan        nan
 0.53380952        nan        nan 0.74611111        nan        

#### 3. Grid Search revisited

In [17]:
space['C'] = [0.1, 0.2, 0.3, 0.4, 0.5, 0.6, 0.7, 0.8, 0.9, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10]

In [18]:
space['C'] = [4.6, 4.7, 4.8, 4.9, 5.0, 5.1, 5.2, 5.3, 5.4]

In [19]:
space['C'] = [4.86,4.87,4.88,4.89,4.9,4.91,4.92,4.93,4.94]