In [1]:
import numpy as np

from sklearn.linear_model import LogisticRegression, ElasticNet
from sklearn.datasets import load_wine,load_breast_cancer,load_diabetes, make_classification
from sklearn.preprocessing import StandardScaler
from sklearn.model_selection import train_test_split, KFold, ParameterGrid
from sklearn.metrics import accuracy_score, mean_squared_error

from hyperparameter_search import find_best_hyperparams_bayesian,find_best_hyperparams_grid

import warnings
warnings.filterwarnings('ignore')
import time

#### wine classification

In [2]:
# load dataset
data = load_wine()
X, y = data['data'], data['target']
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.33, random_state=42)

# specify parameter grid, model, and performance metric
param_grid = {'C': np.logspace(-4, 4, 20), 'l1_ratio': np.arange(0,1.01,0.1), 'fit_intercept':[True,False]}
categorical_feats = ['fit_intercept'] #boolean values as well
model = LogisticRegression(solver = 'saga', penalty='elasticnet')
metric_fnc = accuracy_score

# bayesian search
start = time.time()
find_best_hyperparams_bayesian(X_train,y_train,param_grid,categorical_feats,model,metric_fnc,tol=10)
end = time.time()
print(f'bayesian search runtime: {end - start :.0f} sec\n')

# grid search
start = time.time()
find_best_hyperparams_grid(X_train,y_train,param_grid,model,metric_fnc)
end = time.time()
print(f'grid search runtime: {end - start:.0f} sec')

hyper_config: {'C': 78.47599703514607, 'fit_intercept': True, 'l1_ratio': 0.2}, mean: 0.9833, std: 0.0204
bayesian search runtime: 0 sec

size of hyperparameter space: 440
hyper_config: {'C': 0.012742749857031334, 'fit_intercept': True, 'l1_ratio': 0.0}, mean: 0.9833, std: 0.0204
grid search runtime: 7 sec


#### breast cancer classification

In [3]:
# load dataset
data = load_breast_cancer()
X, y = data['data'], data['target']
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.33, random_state=42)

# specify parameter grid, model, and performance metric
param_grid = {'C': np.logspace(-4, 4, 20), 'l1_ratio': np.arange(0,1.01,0.1), 'fit_intercept':[True,False]}
categorical_feats = ['fit_intercept'] #boolean values as well
model = LogisticRegression(solver = 'saga', penalty='elasticnet')
metric_fnc = accuracy_score

# bayesian search
start = time.time()
find_best_hyperparams_bayesian(X_train,y_train,param_grid,categorical_feats,model,metric_fnc,tol=10)
end = time.time()
print(f'bayesian search runtime: {end - start :.0f} sec\n')

# grid search
start = time.time()
find_best_hyperparams_grid(X_train,y_train,param_grid,model,metric_fnc)
end = time.time()
print(f'grid search runtime: {end - start:.0f} sec')

hyper_config: {'C': 78.47599703514607, 'fit_intercept': True, 'l1_ratio': 0.2}, mean: 0.9711, std: 0.0175
bayesian search runtime: 1 sec

size of hyperparameter space: 440
hyper_config: {'C': 0.23357214690901212, 'fit_intercept': True, 'l1_ratio': 0.1}, mean: 0.9790, std: 0.0065
grid search runtime: 18 sec


#### synthetic classification

In [4]:
X, y = make_classification(n_samples=1000,n_features=32,n_informative=20,n_classes=4)

X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.33, random_state=42)

# specify parameter grid, model, and performance metric
param_grid = {'C': np.logspace(-4, 4, 20), 'l1_ratio': np.arange(0,1.01,0.1), 'fit_intercept':[True,False]}
categorical_feats = ['fit_intercept'] #boolean values as well
model = LogisticRegression(solver = 'saga', penalty='elasticnet')
metric_fnc = accuracy_score

# bayesian search
start = time.time()
find_best_hyperparams_bayesian(X_train,y_train,param_grid,categorical_feats,model,metric_fnc,tol=10)
end = time.time()
print(f'bayesian search runtime: {end - start :.0f} sec\n')

# grid search
start = time.time()
find_best_hyperparams_grid(X_train,y_train,param_grid,model,metric_fnc)
end = time.time()
print(f'grid search runtime: {end - start:.0f} sec')


hyper_config: {'C': 78.47599703514607, 'fit_intercept': True, 'l1_ratio': 0.2}, mean: 0.5284, std: 0.0577
bayesian search runtime: 1 sec

size of hyperparameter space: 440
hyper_config: {'C': 0.23357214690901212, 'fit_intercept': False, 'l1_ratio': 0.9}, mean: 0.5478, std: 0.0579
grid search runtime: 42 sec


#### diabetes regression

In [5]:
data = load_diabetes()
X, y = data['data'], data['target']
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.33, random_state=42)

param_grid = {'alpha': np.logspace(-2, 2, 20), 'l1_ratio': np.arange(0,1.01,0.1), 'fit_intercept':[True,False]}
categorical_feats = ['fit_intercept'] #boolean values as well
model = ElasticNet()
metric_fnc = lambda y_true,y_pred: 1/mean_squared_error(y_true,y_pred) # since lower is better, get the inverse of l2-loss
# warning! : reported means should be converted to 1/mean to get true mean squared error for best hyperparameter

# bayesian search
start = time.time()
find_best_hyperparams_bayesian(X_train,y_train,param_grid,categorical_feats,model,metric_fnc,tol=10)
end = time.time()
print(f'bayesian search runtime: {end - start :.0f} sec\n')

# grid search
start = time.time()
find_best_hyperparams_grid(X_train,y_train,param_grid,model,metric_fnc)
end = time.time()
print(f'grid search runtime: {end - start:.0f} sec')


hyper_config: {'alpha': 0.00999999999999801, 'fit_intercept': True, 'l1_ratio': 1.0}, mean: 0.0003, std: 0.0000
bayesian search runtime: 0 sec

size of hyperparameter space: 440
hyper_config: {'alpha': 1.2742749857031335, 'fit_intercept': True, 'l1_ratio': 1.0}, mean: 0.0003, std: 0.0000
grid search runtime: 3 sec
