# General Example with SGD

In [38]:
import numpy as np

from time import time
import scipy.stats as stats
from sklearn.utils.fixes import loguniform

from sklearn.model_selection import GridSearchCV, RandomizedSearchCV
from sklearn.datasets import load_digits
from sklearn.linear_model import SGDClassifier

In [39]:
# get some data
X, y = load_digits(return_X_y=True, n_class=3)

In [40]:
# Utility function to report best scores
def report(results, n_top=3):
    for i in range(1, n_top + 1):
        candidates = np.flatnonzero(results["rank_test_score"] == i)
        for candidate in candidates:
            print("Model with rank: {0}".format(i))
            print(
                "Mean validation score: {0:.3f} (std: {1:.3f})".format(
                    results["mean_test_score"][candidate],
                    results["std_test_score"][candidate],
                )
            )
            print("Parameters: {0}".format(results["params"][candidate]))
            print("")


In [41]:
# build a classifier
clf = SGDClassifier(loss="hinge", penalty="elasticnet", fit_intercept=True)

In [42]:
# specify parameters and distributions to sample from
param_dist = {
    "average": [True, False],
    "l1_ratio": stats.uniform(0, 1),
    "alpha": loguniform(1e-2, 1e0),
}

In [43]:
# run randomized search
n_iter_search = 15
random_search = RandomizedSearchCV(
    clf, param_distributions=param_dist, n_iter=n_iter_search
)

In [44]:
start = time()
random_search.fit(X, y)
print(
    "RandomizedSearchCV took %.2f seconds for %d candidates parameter settings."
    % ((time() - start), n_iter_search)
)
report(random_search.cv_results_)

RandomizedSearchCV took 0.49 seconds for 15 candidates parameter settings.
Model with rank: 1
Mean validation score: 0.987 (std: 0.018)
Parameters: {'alpha': 0.07955073265828969, 'average': False, 'l1_ratio': 0.7428287394913528}

Model with rank: 2
Mean validation score: 0.987 (std: 0.014)
Parameters: {'alpha': 0.01570482686934638, 'average': False, 'l1_ratio': 0.5489371725732266}

Model with rank: 3
Mean validation score: 0.987 (std: 0.007)
Parameters: {'alpha': 0.07631572174795476, 'average': False, 'l1_ratio': 0.0595599925103647}



In [45]:
random_search.best_params_

{'alpha': 0.07955073265828969,
 'average': False,
 'l1_ratio': 0.7428287394913528}

# Classification

In [46]:
import warnings
warnings.filterwarnings('ignore')

In [47]:
import pandas as pd
from sklearn.linear_model import LogisticRegression
from sklearn.utils.fixes import loguniform
from sklearn.model_selection import RandomizedSearchCV
from sklearn.model_selection import RepeatedStratifiedKFold 

In [48]:
# load dataset
url = 'https://raw.githubusercontent.com/jbrownlee/Datasets/master/sonar.csv'
dataframe = pd.read_csv(url, header=None)
# split into input and output elements
data = dataframe.values
X, y = data[:, :-1], data[:, -1]
print(X.shape, y.shape)

(208, 60) (208,)


In [49]:
# define model
model = LogisticRegression()

# define evaluation
cv = RepeatedStratifiedKFold(n_splits=10, n_repeats=3, random_state=1)

# define search space
space = dict()
space['solver'] = ['newton-cg', 'liblinear']
space['penalty'] = ['l1', 'l2', 'elasticnet']
space['C'] = loguniform(1e-5, 100)

# define search
search = RandomizedSearchCV(model, space, n_iter=500, scoring='accuracy', n_jobs=-1, cv=cv, random_state=1)

# execute search
result = search.fit(X, y)
# summarize result
print('Best Score: %s' % result.best_score_)
print('Best Hyperparameters: %s' % result.best_params_)

Best Score: 0.7897619047619049
Best Hyperparameters: {'C': 1.2646086341209661, 'penalty': 'l1', 'solver': 'liblinear'}


# Regression

In [50]:
import warnings
warnings.filterwarnings('ignore')

In [51]:
import pandas as pd
from sklearn.utils.fixes import loguniform
from sklearn.model_selection import RandomizedSearchCV
from sklearn.model_selection import RepeatedStratifiedKFold 
from sklearn.linear_model import Ridge
from sklearn.model_selection import RepeatedKFold

In [52]:
# load dataset
url = 'https://raw.githubusercontent.com/jbrownlee/Datasets/master/auto-insurance.csv'
dataframe = pd.read_csv(url, header=None)
# split into input and output elements
data = dataframe.values
X, y = data[:, :-1], data[:, -1]
print(X.shape, y.shape)

# define model
model = Ridge()
# define evaluation
cv = RepeatedKFold(n_splits=10, n_repeats=3, random_state=1)

# define search space
space = dict()
space['solver'] = ['svd', 'cholesky', 'lsqr', 'sag']
space['alpha'] = loguniform(1e-5, 100)
space['fit_intercept'] = [True, False]

# define search
search = RandomizedSearchCV(model, space, n_iter=500, scoring='neg_mean_absolute_error', n_jobs=-1, cv=cv, random_state=1)
# execute search
result = search.fit(X, y)

# summarize result
print('Best Score: %s' % result.best_score_)
print('Best Hyperparameters: %s' % result.best_params_)

(63, 1) (63,)
Best Score: -29.19133382956394
Best Hyperparameters: {'alpha': 0.033463130642527546, 'fit_intercept': True, 'solver': 'sag'}
