In [1]:
import numpy as np
import pandas as pd
import seaborn as sns
import matplotlib.pyplot as plt
from sklearn.model_selection import train_test_split,cross_val_score
from sklearn.ensemble import RandomForestClassifier,RandomForestRegressor
from sklearn.metrics import classification_report,confusion_matrix,accuracy_score
from sklearn.neighbors import KNeighborsClassifier,KNeighborsRegressor
from sklearn.svm import SVC,SVR
from sklearn import datasets
import scipy.stats as stats
import warnings
warnings.simplefilter(action='ignore', category=FutureWarning)

In [2]:
X, y = datasets.load_boston(return_X_y=True)

In [3]:
datasets.load_boston()

{'data': array([[6.3200e-03, 1.8000e+01, 2.3100e+00, ..., 1.5300e+01, 3.9690e+02,
         4.9800e+00],
        [2.7310e-02, 0.0000e+00, 7.0700e+00, ..., 1.7800e+01, 3.9690e+02,
         9.1400e+00],
        [2.7290e-02, 0.0000e+00, 7.0700e+00, ..., 1.7800e+01, 3.9283e+02,
         4.0300e+00],
        ...,
        [6.0760e-02, 0.0000e+00, 1.1930e+01, ..., 2.1000e+01, 3.9690e+02,
         5.6400e+00],
        [1.0959e-01, 0.0000e+00, 1.1930e+01, ..., 2.1000e+01, 3.9345e+02,
         6.4800e+00],
        [4.7410e-02, 0.0000e+00, 1.1930e+01, ..., 2.1000e+01, 3.9690e+02,
         7.8800e+00]]),
 'target': array([24. , 21.6, 34.7, 33.4, 36.2, 28.7, 22.9, 27.1, 16.5, 18.9, 15. ,
        18.9, 21.7, 20.4, 18.2, 19.9, 23.1, 17.5, 20.2, 18.2, 13.6, 19.6,
        15.2, 14.5, 15.6, 13.9, 16.6, 14.8, 18.4, 21. , 12.7, 14.5, 13.2,
        13.1, 13.5, 18.9, 20. , 21. , 24.7, 30.8, 34.9, 26.6, 25.3, 24.7,
        21.2, 19.3, 20. , 16.6, 14.4, 19.4, 19.7, 20.5, 25. , 23.4, 18.9,
        35.4, 24.7, 3

In [4]:
## Baseline Machine Learning models: Regressors with Default Hyperparameters


In [9]:
#Random Forest
clf = RandomForestRegressor()
scores = cross_val_score(clf, X, y, cv=3,scoring='neg_mean_squared_error') # 3-fold cross-validation
print("MSE:"+ str(-scores.mean()))

MSE:30.461252451394756


In [10]:
#SVM
clf = SVR(gamma='scale')
scores = cross_val_score(clf, X, y, cv=3,scoring='neg_mean_squared_error')
print("MSE:"+ str(-scores.mean()))

MSE:77.42951812579332


In [11]:
#KNN
clf = KNeighborsRegressor()
scores = cross_val_score(clf, X, y, cv=3,scoring='neg_mean_squared_error')
print("MSE:"+ str(-scores.mean()))

MSE:81.48773186343571


In [8]:
#ANN
from keras.models import Sequential, Model
from keras.layers import Dense, Input
from sklearn.model_selection import GridSearchCV
from keras.wrappers.scikit_learn import KerasRegressor
from keras.callbacks import EarlyStopping
def ANN(optimizer = 'adam',neurons=32,batch_size=32,epochs=50,activation='relu',patience=5,loss='mse'):
    model = Sequential()
    model.add(Dense(neurons, input_shape=(X.shape[1],), activation=activation))
    model.add(Dense(neurons, activation=activation))
    model.add(Dense(1))
    model.compile(optimizer = optimizer, loss=loss)
    early_stopping = EarlyStopping(monitor="loss", patience = patience)# early stop patience
    history = model.fit(X, y,
              batch_size=batch_size,
              epochs=epochs,
              callbacks = [early_stopping],
              verbose=0) #verbose set to 1 will show the training process
    return model


Using TensorFlow backend.


In [13]:
clf = KerasRegressor(build_fn=ANN, verbose=0)
scores = cross_val_score(clf, X, y, cv=3,scoring='neg_mean_squared_error')
print("MSE:"+ str(-scores.mean()))


MSE:43.86915343191351


In [14]:
### HPO Algorithm 1: Grid Search


In [15]:
#Random Forest
from sklearn.model_selection import GridSearchCV
# Define the hyperparameter configuration space
rf_params = {
    'n_estimators': [10, 20, 30],
    #'max_features': ['sqrt',0.5],
    'max_depth': [15,20,30,50],
    #'min_samples_leaf': [1,2,4,8],
    #"bootstrap":[True,False],
    #"criterion":['mse','mae']
}
clf = RandomForestRegressor(random_state=0)
grid = GridSearchCV(clf, rf_params, cv=3, scoring='neg_mean_squared_error')
grid.fit(X, y)
print(grid.best_params_)
print("MSE:"+ str(-grid.best_score_))

{'max_depth': 15, 'n_estimators': 20}
MSE:29.02394449507633




In [16]:
#SVM
from sklearn.model_selection import GridSearchCV
rf_params = {
    'C': [1,10, 100],
    "kernel":['poly','rbf','sigmoid'],
    "epsilon":[0.01,0.1,1]
}
clf = SVR(gamma='scale')
grid = GridSearchCV(clf, rf_params, cv=3, scoring='neg_mean_squared_error')
grid.fit(X, y)
print(grid.best_params_)
print("MSE:"+ str(-grid.best_score_))

{'C': 100, 'epsilon': 0.01, 'kernel': 'poly'}
MSE:67.07483887754718




In [17]:
#KNN
from sklearn.model_selection import GridSearchCV
rf_params = {
    'n_neighbors': [2, 3, 5,7,10]
}
clf = KNeighborsRegressor()
grid = GridSearchCV(clf, rf_params, cv=3, scoring='neg_mean_squared_error')
grid.fit(X, y)
print(grid.best_params_)
print("MSE:"+ str(-grid.best_score_))

{'n_neighbors': 5}
MSE:81.52933517786562




In [18]:
#ANN
from sklearn.model_selection import GridSearchCV
rf_params = {
    'optimizer': ['adam','rmsprop'],
    'activation': ['relu','tanh'],
    'loss': ['mse','mae'],
    'batch_size': [16,32],
    'neurons':[16,32],
    'epochs':[20,50],
    'patience':[2,5]
}
clf = KerasRegressor(build_fn=ANN, verbose=0)
grid = GridSearchCV(clf, rf_params, cv=3,scoring='neg_mean_squared_error')
grid.fit(X, y)
print(grid.best_params_)
print("MSE:"+ str(-grid.best_score_))



{'activation': 'relu', 'batch_size': 32, 'epochs': 50, 'loss': 'mse', 'neurons': 32, 'optimizer': 'adam', 'patience': 2}
MSE:52.46714581852489


In [19]:
### HPO Algorithm 2: Random Search


In [20]:
#Random Forest
from scipy.stats import randint as sp_randint
from sklearn.model_selection import RandomizedSearchCV
# Define the hyperparameter configuration space
rf_params = {
    'n_estimators': sp_randint(10,100),
    "max_features":sp_randint(1,13),
    'max_depth': sp_randint(5,50),
    "min_samples_split":sp_randint(2,11),
    "min_samples_leaf":sp_randint(1,11),
    "criterion":['mse','mae']
}
n_iter_search=20 #number of iterations is set to 20, you can increase this number if time permits
clf = RandomForestRegressor(random_state=0)
Random = RandomizedSearchCV(clf, param_distributions=rf_params,n_iter=n_iter_search,cv=3,scoring='neg_mean_squared_error')
Random.fit(X, y)
print(Random.best_params_)
print("MSE:"+ str(-Random.best_score_))


{'criterion': 'mse', 'max_depth': 47, 'max_features': 10, 'min_samples_leaf': 1, 'min_samples_split': 10, 'n_estimators': 59}
MSE:27.396446279090135




In [21]:
#SVM
from scipy.stats import randint as sp_randint
from sklearn.model_selection import RandomizedSearchCV
rf_params = {
    'C': stats.uniform(0,50),
    "kernel":['poly','rbf','sigmoid'],
    "epsilon":stats.uniform(0,1)
}
n_iter_search=20
clf = SVR(gamma='scale')
Random = RandomizedSearchCV(clf, param_distributions=rf_params,n_iter=n_iter_search,cv=3,scoring='neg_mean_squared_error')
Random.fit(X, y)
print(Random.best_params_)
print("MSE:"+ str(-Random.best_score_))

{'C': 31.747323273178772, 'epsilon': 0.9051832494763797, 'kernel': 'poly'}
MSE:60.22903886978192




In [22]:
#KNN
from scipy.stats import randint as sp_randint
from sklearn.model_selection import RandomizedSearchCV
rf_params = {
    'n_neighbors': sp_randint(1,20),
}
n_iter_search=10
clf = KNeighborsRegressor()
Random = RandomizedSearchCV(clf, param_distributions=rf_params,n_iter=n_iter_search,cv=3,scoring='neg_mean_squared_error')
Random.fit(X, y)
print(Random.best_params_)
print("MSE:"+ str(-Random.best_score_))


{'n_neighbors': 13}
MSE:80.7723025469514




In [23]:
#ANN
from scipy.stats import randint as sp_randint
from random import randrange as sp_randrange
from sklearn.model_selection import RandomizedSearchCV
rf_params = {
    'optimizer': ['adam','rmsprop'],
    'activation': ['relu','tanh'],
    'loss': ['mse','mae'],
    'batch_size': [16,32,64],
    'neurons':sp_randint(10,100),
    'epochs':[20,50],
    #'epochs':[20,50,100,200],
    'patience':sp_randint(3,20)
}
n_iter_search=10
clf = KerasRegressor(build_fn=ANN, verbose=0)
Random = RandomizedSearchCV(clf, param_distributions=rf_params,n_iter=n_iter_search,cv=3,scoring='neg_mean_squared_error')
Random.fit(X, y)
print(Random.best_params_)
print("MSE:"+ str(-Random.best_score_))



{'activation': 'relu', 'batch_size': 16, 'epochs': 20, 'loss': 'mse', 'neurons': 70, 'optimizer': 'adam', 'patience': 4}
MSE:38.45416595298494


In [24]:
### HPO Algorithm 3: Hyperband



In [4]:
#Random Forest
from hyperband import HyperbandSearchCV
from scipy.stats import randint as sp_randint
# Define the hyperparameter configuration space
rf_params = {
    'n_estimators': sp_randint(10,100),
    "max_features":sp_randint(1,13),
    'max_depth': sp_randint(5,50),
    "min_samples_split":sp_randint(2,11),
    "min_samples_leaf":sp_randint(1,11),
    "criterion":['mse','mae']
}
clf = RandomForestRegressor(random_state=0)
hyper = HyperbandSearchCV(clf, param_distributions =rf_params,cv=3,min_iter=10,max_iter=100,scoring='neg_mean_squared_error')
hyper.fit(X, y)
print(hyper.best_params_)
print("MSE:"+ str(-hyper.best_score_))


{'criterion': 'mse', 'max_depth': 11, 'max_features': 10, 'min_samples_leaf': 6, 'min_samples_split': 5, 'n_estimators': 11}
MSE:27.82872118482073


In [5]:
#SVM
from hyperband import HyperbandSearchCV
from scipy.stats import randint as sp_randint
rf_params = {
    'C': stats.uniform(0,50),
    "kernel":['poly','rbf','sigmoid'],
    "epsilon":stats.uniform(0,1)
}
clf = SVR(gamma='scale')
hyper = HyperbandSearchCV(clf, param_distributions =rf_params,cv=3,min_iter=1,max_iter=10,scoring='neg_mean_squared_error',resource_param='C')
hyper.fit(X, y)
print(hyper.best_params_)
print("MSE:"+ str(-hyper.best_score_))

{'C': 10, 'epsilon': 0.029177342506488846, 'kernel': 'rbf'}
MSE:72.83527663803682


In [6]:
#KNN
from hyperband import HyperbandSearchCV
from scipy.stats import randint as sp_randint
rf_params = {
    'n_neighbors': range(1,20),
}
clf = KNeighborsRegressor()
hyper = HyperbandSearchCV(clf, param_distributions =rf_params,cv=3,min_iter=1,max_iter=20,scoring='neg_mean_squared_error',resource_param='n_neighbors')
hyper.fit(X, y)
print(hyper.best_params_)
print("MSE:"+ str(-hyper.best_score_))

{'n_neighbors': 6}
MSE:80.87024044795783


In [9]:
#ANN
from hyperband import HyperbandSearchCV
from scipy.stats import randint as sp_randint
rf_params = {
    'optimizer': ['adam','rmsprop'],
    'activation': ['relu','tanh'],
    'loss': ['mse','mae'],
    'batch_size': [16,32,64],
    'neurons':sp_randint(10,100),
    'epochs':[20,50],
    #'epochs':[20,50,100,200],
    'patience':sp_randint(3,20)
}
clf = KerasRegressor(build_fn=ANN, epochs=20, verbose=0)
hyper = HyperbandSearchCV(clf, param_distributions =rf_params,cv=3,min_iter=1,max_iter=10,scoring='neg_mean_squared_error',resource_param='epochs')
hyper.fit(X, y)
print(hyper.best_params_)
print("MSE:"+ str(-hyper.best_score_))

{'activation': 'relu', 'batch_size': 64, 'epochs': 10, 'loss': 'mse', 'neurons': 84, 'optimizer': 'adam', 'patience': 16}
MSE:83.31618953604321


In [10]:
### HPO Algorithm 4: BO-GP


In [11]:
#### Using skopt.BayesSearchCV

In [12]:
#Random Forest
from skopt import Optimizer
from skopt import BayesSearchCV 
from skopt.space import Real, Categorical, Integer
# Define the hyperparameter configuration space
rf_params = {
    'n_estimators': Integer(10,100),
    "max_features":Integer(1,13),
    'max_depth': Integer(5,50),
    "min_samples_split":Integer(2,11),
    "min_samples_leaf":Integer(1,11),
    "criterion":['mse','mae']
}
clf = RandomForestRegressor(random_state=0)
Bayes = BayesSearchCV(clf, rf_params,cv=3,n_iter=20, scoring='neg_mean_squared_error') 
#number of iterations is set to 20, you can increase this number if time permits
Bayes.fit(X, y)
print(Bayes.best_params_)
bclf = Bayes.best_estimator_
print("MSE:"+ str(-Bayes.best_score_))


OrderedDict([('criterion', 'mae'), ('max_depth', 36), ('max_features', 7), ('min_samples_leaf', 1), ('min_samples_split', 11), ('n_estimators', 45)])
MSE:27.200855650493327


In [13]:
#SVM
from skopt import Optimizer
from skopt import BayesSearchCV 
from skopt.space import Real, Categorical, Integer
rf_params = {
    'C': Real(0,50),
    "kernel":['poly','rbf','sigmoid'],
    'epsilon': Real(0,1)
}
clf = SVR(gamma='scale')
Bayes = BayesSearchCV(clf, rf_params,cv=3,n_iter=20, scoring='neg_mean_squared_error')
Bayes.fit(X, y)
print(Bayes.best_params_)
print("MSE:"+ str(-Bayes.best_score_))

OrderedDict([('C', 49.683018375532924), ('epsilon', 0.013241640863736363), ('kernel', 'poly')])
MSE:59.267259166172174


In [14]:
#KNN
from skopt import Optimizer
from skopt import BayesSearchCV 
from skopt.space import Real, Categorical, Integer
rf_params = {
    'n_neighbors': Integer(1,20),
}
clf = KNeighborsRegressor()
Bayes = BayesSearchCV(clf, rf_params,cv=3,n_iter=10, scoring='neg_mean_squared_error')
Bayes.fit(X, y)
print(Bayes.best_params_)
print("MSE:"+ str(-Bayes.best_score_))

OrderedDict([('n_neighbors', 13)])
MSE:80.74121499347262


In [15]:
#ANN
from skopt import Optimizer
from skopt import BayesSearchCV 
from skopt.space import Real, Categorical, Integer
rf_params = {
    'optimizer': ['adam','rmsprop'],
    'activation': ['relu','tanh'],
    'loss': ['mse','mae'],
    'batch_size': [16,32,64],
    'neurons':Integer(10,100),
    'epochs':[20,50],
    #'epochs':[20,50,100,200],
    'patience':Integer(3,20)
}
clf = KerasRegressor(build_fn=ANN, verbose=0)
Bayes = BayesSearchCV(clf, rf_params,cv=3,n_iter=10, scoring='neg_mean_squared_error')
Bayes.fit(X, y)
print(Bayes.best_params_)
print("MSE:"+ str(-Bayes.best_score_))

OrderedDict([('activation', 'relu'), ('batch_size', 32), ('epochs', 24), ('loss', 'mae'), ('neurons', 47), ('optimizer', 'adam'), ('patience', 14)])
MSE:44.03458020922383


In [16]:
#### Using skopt.gp_minimize


In [17]:
#Random Forest
from skopt import Optimizer
from skopt import BayesSearchCV 
from skopt.space import Real, Categorical, Integer
# Define the hyperparameter configuration space
rf_params = {
    'n_estimators': Integer(10,100),
    "max_features":Integer(1,13),
    'max_depth': Integer(5,50),
    "min_samples_split":Integer(2,11),
    "min_samples_leaf":Integer(1,11),
    "criterion":['mse','mae']
}
clf = RandomForestRegressor(random_state=0)
Bayes = BayesSearchCV(clf, rf_params,cv=3,n_iter=20, scoring='neg_mean_squared_error') 
#number of iterations is set to 20, you can increase this number if time permits
Bayes.fit(X, y)
print(Bayes.best_params_)
bclf = Bayes.best_estimator_
print("MSE:"+ str(-Bayes.best_score_))

OrderedDict([('criterion', 'mse'), ('max_depth', 22), ('max_features', 7), ('min_samples_leaf', 1), ('min_samples_split', 6), ('n_estimators', 57)])
MSE:25.46298194972694


In [18]:
#SVM
from skopt import Optimizer
from skopt import BayesSearchCV 
from skopt.space import Real, Categorical, Integer
rf_params = {
    'C': Real(0,50),
    "kernel":['poly','rbf','sigmoid'],
    'epsilon': Real(0,1)
}
clf = SVR(gamma='scale')
Bayes = BayesSearchCV(clf, rf_params,cv=3,n_iter=20, scoring='neg_mean_squared_error')
Bayes.fit(X, y)
print(Bayes.best_params_)
print("MSE:"+ str(-Bayes.best_score_))


OrderedDict([('C', 37.8104361880082), ('epsilon', 0.6904621349570833), ('kernel', 'poly')])
MSE:60.64876628704073


In [19]:
#KNN
from skopt import Optimizer
from skopt import BayesSearchCV 
from skopt.space import Real, Categorical, Integer
rf_params = {
    'n_neighbors': Integer(1,20),
}
clf = KNeighborsRegressor()
Bayes = BayesSearchCV(clf, rf_params,cv=3,n_iter=10, scoring='neg_mean_squared_error')
Bayes.fit(X, y)
print(Bayes.best_params_)
print("MSE:"+ str(-Bayes.best_score_))

OrderedDict([('n_neighbors', 12)])
MSE:81.34909773097274


In [20]:
#ANN
from skopt import Optimizer
from skopt import BayesSearchCV 
from skopt.space import Real, Categorical, Integer
rf_params = {
    'optimizer': ['adam','rmsprop'],
    'activation': ['relu','tanh'],
    'loss': ['mse','mae'],
    'batch_size': [16,32,64],
    'neurons':Integer(10,100),
    'epochs':[20,50],
    #'epochs':[20,50,100,200],
    'patience':Integer(3,20)
}
clf = KerasRegressor(build_fn=ANN, verbose=0)
Bayes = BayesSearchCV(clf, rf_params,cv=3,n_iter=10, scoring='neg_mean_squared_error')
Bayes.fit(X, y)
print(Bayes.best_params_)
print("MSE:"+ str(-Bayes.best_score_))

OrderedDict([('activation', 'relu'), ('batch_size', 32), ('epochs', 43), ('loss', 'mse'), ('neurons', 99), ('optimizer', 'adam'), ('patience', 16)])
MSE:41.933040247226636


In [21]:
#Random Forest
from skopt.space import Real, Integer
from skopt.utils import use_named_args

reg = RandomForestRegressor()
# Define the hyperparameter configuration space
space  = [Integer(10, 100, name='n_estimators'),
            Integer(5, 50, name='max_depth'),
          Integer(1, 13, name='max_features'),
          Integer(2, 11, name='min_samples_split'),
          Integer(1, 11, name='min_samples_leaf'),
         Categorical(['mse', 'mae'], name='criterion')
         ]
# Define the objective function
@use_named_args(space)
def objective(**params):
    reg.set_params(**params)

    return -np.mean(cross_val_score(reg, X, y, cv=3, n_jobs=-1,
                                    scoring="neg_mean_squared_error"))
from skopt import gp_minimize
res_gp = gp_minimize(objective, space, n_calls=20, random_state=0)
#number of iterations is set to 20, you can increase this number if time permits
print("MSE:%.4f" % res_gp.fun)
print(res_gp.x)

MSE:25.8747
[45, 43, 5, 8, 5, 'mse']


In [22]:
#SVM
from skopt.space import Real, Integer
from skopt.utils import use_named_args

reg = SVR(gamma='scale')
space  = [Real(0, 50, name='C'),
          Categorical(['poly','rbf','sigmoid'], name='kernel'),
          Real(0, 1, name='epsilon'),
         ]

@use_named_args(space)
def objective(**params):
    reg.set_params(**params)

    return -np.mean(cross_val_score(reg, X, y, cv=3, n_jobs=-1,
                                    scoring="neg_mean_squared_error"))
from skopt import gp_minimize
res_gp = gp_minimize(objective, space, n_calls=20, random_state=0)
print("MSE:%.4f" % res_gp.fun)
print(res_gp.x)


MSE:61.2510
[37.93078121611787, 'poly', 0.47360041934665753]


In [23]:
#KNN
from skopt.space import Real, Integer
from skopt.utils import use_named_args

reg = KNeighborsRegressor()
space  = [Integer(1, 20, name='n_neighbors')]

@use_named_args(space)
def objective(**params):
    reg.set_params(**params)

    return -np.mean(cross_val_score(reg, X, y, cv=3, n_jobs=-1,
                                    scoring="neg_mean_squared_error"))
from skopt import gp_minimize
res_gp = gp_minimize(objective, space, n_calls=10, random_state=0)
print("MSE:%.4f" % res_gp.fun)
print(res_gp.x)

MSE:80.7412
[13]


In [24]:
#### HPO Algorithm 5: BO-TPE


In [25]:
#Random Forest
from hyperopt import hp, fmin, tpe, STATUS_OK, Trials
from sklearn.model_selection import cross_val_score, StratifiedKFold
# Define the objective function
def objective(params):
    params = {
        'n_estimators': int(params['n_estimators']), 
        'max_depth': int(params['max_depth']),
        'max_features': int(params['max_features']),
        "min_samples_split":int(params['min_samples_split']),
        "min_samples_leaf":int(params['min_samples_leaf']),
        "criterion":str(params['criterion'])
    }
    clf = RandomForestRegressor( **params)
    score = -np.mean(cross_val_score(clf, X, y, cv=3, n_jobs=-1,
                                    scoring="neg_mean_squared_error"))

    return {'loss':score, 'status': STATUS_OK }
# Define the hyperparameter configuration space
space = {
    'n_estimators': hp.quniform('n_estimators', 10, 100, 1),
    'max_depth': hp.quniform('max_depth', 5, 50, 1),
    "max_features":hp.quniform('max_features', 1, 13, 1),
    "min_samples_split":hp.quniform('min_samples_split',2,11,1),
    "min_samples_leaf":hp.quniform('min_samples_leaf',1,11,1),
    "criterion":hp.choice('criterion',['mse','mae'])
}

best = fmin(fn=objective,
            space=space,
            algo=tpe.suggest,
            max_evals=20)
print("Random Forest: Hyperopt estimated optimum {}".format(best))


100%|██████████| 20/20 [00:03<00:00,  6.35trial/s, best loss: 26.97630991791949]
Random Forest: Hyperopt estimated optimum {'criterion': 0, 'max_depth': 42.0, 'max_features': 8.0, 'min_samples_leaf': 8.0, 'min_samples_split': 3.0, 'n_estimators': 75.0}


In [26]:
#SVM
from hyperopt import hp, fmin, tpe, STATUS_OK, Trials
from sklearn.model_selection import cross_val_score, StratifiedKFold
def objective(params):
    params = {
        'C': abs(float(params['C'])), 
        "kernel":str(params['kernel']),
        'epsilon': abs(float(params['epsilon'])),
    }
    clf = SVR(gamma='scale', **params)
    score = -np.mean(cross_val_score(clf, X, y, cv=3, n_jobs=-1,
                                    scoring="neg_mean_squared_error"))
    
    return {'loss':score, 'status': STATUS_OK }

space = {
    'C': hp.normal('C', 0, 50),
    "kernel":hp.choice('kernel',['poly','rbf','sigmoid']),
    'epsilon': hp.normal('epsilon', 0, 1),
}

best = fmin(fn=objective,
            space=space,
            algo=tpe.suggest,
            max_evals=20)
print("SVM: Hyperopt estimated optimum {}".format(best))


100%|██████████| 20/20 [00:00<00:00, 52.81trial/s, best loss: 60.31133170451198]
SVM: Hyperopt estimated optimum {'C': 30.45675844585344, 'epsilon': 0.4757626860624949, 'kernel': 0}


In [27]:
#KNN
from hyperopt import hp, fmin, tpe, STATUS_OK, Trials
from sklearn.model_selection import cross_val_score, StratifiedKFold
def objective(params):
    params = {
        'n_neighbors': abs(int(params['n_neighbors']))
    }
    clf = KNeighborsRegressor( **params)
    score = -np.mean(cross_val_score(clf, X, y, cv=3, n_jobs=-1,
                                    scoring="neg_mean_squared_error"))

    return {'loss':score, 'status': STATUS_OK }

space = {
    'n_neighbors': hp.quniform('n_neighbors', 1, 20, 1),
}

best = fmin(fn=objective,
            space=space,
            algo=tpe.suggest,
            max_evals=10)
print("KNN: Hyperopt estimated optimum {}".format(best))


100%|██████████| 10/10 [00:00<00:00, 97.74trial/s, best loss: 80.83005201647829]
KNN: Hyperopt estimated optimum {'n_neighbors': 6.0}


In [28]:
#ANN
from hyperopt import hp, fmin, tpe, STATUS_OK, Trials
from sklearn.model_selection import cross_val_score, StratifiedKFold
def objective(params):
    params = {
        "optimizer":str(params['optimizer']),
        "activation":str(params['activation']),
        "loss":str(params['loss']),
        'batch_size': abs(int(params['batch_size'])),
        'neurons': abs(int(params['neurons'])),
        'epochs': abs(int(params['epochs'])),
        'patience': abs(int(params['patience']))
    }
    clf = KerasRegressor(build_fn=ANN,**params, verbose=0)
    score = -np.mean(cross_val_score(clf, X, y, cv=3, 
                                    scoring="neg_mean_squared_error"))

    return {'loss':score, 'status': STATUS_OK }

space = {
    "optimizer":hp.choice('optimizer',['adam','rmsprop']),
    "activation":hp.choice('activation',['relu','tanh']),
    "loss":hp.choice('loss',['mse','mae']),
    'batch_size': hp.quniform('batch_size', 16, 64, 16),
    'neurons': hp.quniform('neurons', 10, 100, 10),
    'epochs': hp.quniform('epochs', 20, 50, 10),
    'patience': hp.quniform('patience', 3, 20, 3),
}

best = fmin(fn=objective,
            space=space,
            algo=tpe.suggest,
            max_evals=10)
print("ANN: Hyperopt estimated optimum {}".format(best))

100%|██████████| 10/10 [01:14<00:00,  7.42s/trial, best loss: 50.18424888351995]
ANN: Hyperopt estimated optimum {'activation': 0, 'batch_size': 32.0, 'epochs': 40.0, 'loss': 1, 'neurons': 30.0, 'optimizer': 0, 'patience': 18.0}


In [29]:
### HPO Algorithm 6: PSO



In [30]:
#Random Forest
import optunity
import optunity.metrics
# Define the hyperparameter configuration space
search = {
    'n_estimators': [10, 100],
    'max_features': [1, 13],
    'max_depth': [5,50],
    "min_samples_split":[2,11],
    "min_samples_leaf":[1,11],
         }
# Define the objective function
@optunity.cross_validated(x=X, y=y, num_folds=3)
def performance(x_train, y_train, x_test, y_test,n_estimators=None, max_features=None,max_depth=None,min_samples_split=None,min_samples_leaf=None):
    # fit the model
    model = RandomForestRegressor(n_estimators=int(n_estimators),
                                   max_features=int(max_features),
                                   max_depth=int(max_depth),
                                   min_samples_split=int(min_samples_split),
                                   min_samples_leaf=int(min_samples_leaf),
                                  )
    scores=-np.mean(cross_val_score(model, X, y, cv=3, n_jobs=-1,
                                    scoring="neg_mean_squared_error"))
    return scores

optimal_configuration, info, _ = optunity.minimize(performance,
                                                  solver_name='particle swarm',
                                                  num_evals=20,
                                                   **search
                                                  )
print(optimal_configuration)
print("MSE:"+ str(info.optimum))

{'n_estimators': 51.8798828125, 'max_features': 8.951171875, 'max_depth': 18.33740234375, 'min_samples_split': 6.21435546875, 'min_samples_leaf': 3.9052734375}
MSE:27.259215080169678


In [31]:
#SVM
import optunity
import optunity.metrics
search = {
    'C': (0,50),
    'kernel':[0,3],
    'epsilon': (0, 1)
         }
@optunity.cross_validated(x=X, y=y, num_folds=3)
def performance(x_train, y_train, x_test, y_test,C=None,kernel=None,epsilon=None):
    # fit the model
    if kernel<1:
        ke='poly'
    elif kernel<2:
        ke='rbf'
    else:
        ke='sigmoid'
    model = SVR(C=float(C),
                kernel=ke,
                gamma='scale',
                epsilon=float(epsilon)
                                  )

    scores=-np.mean(cross_val_score(model, X, y, cv=3, n_jobs=-1,
                                    scoring="neg_mean_squared_error"))

    return scores

optimal_configuration, info, _ = optunity.minimize(performance,
                                                  solver_name='particle swarm',
                                                  num_evals=20,
                                                   **search
                                                  )
print(optimal_configuration)
print("MSE:"+ str(info.optimum))

{'C': 32.7099609375, 'kernel': 0.7766601562500002, 'epsilon': 0.31064453125}
MSE:59.758334163239816


In [32]:
#KNN
import optunity
import optunity.metrics
search = {
    'n_neighbors': [1, 20],
         }
@optunity.cross_validated(x=X, y=y, num_folds=3)
def performance(x_train, y_train, x_test, y_test,n_neighbors=None):
    # fit the model
    model = KNeighborsRegressor(n_neighbors=int(n_neighbors),
                                  )

    scores=-np.mean(cross_val_score(model, X, y, cv=3, n_jobs=-1,
                                    scoring="neg_mean_squared_error"))

    return scores

optimal_configuration, info, _ = optunity.minimize(performance,
                                                  solver_name='particle swarm',
                                                  num_evals=10,
                                                   **search
                                                  )
print(optimal_configuration)
print("MSE:"+ str(info.optimum))

{'n_neighbors': 14.0439453125}
MSE:81.26511555604914


In [33]:
#ANN
import optunity
import optunity.metrics
search = {
    'optimizer':[0,2],
    'activation':[0,2],
    'loss':[0,2],
    'batch_size': [0, 2],
    'neurons': [10, 100],
    'epochs': [20, 50],
    'patience': [3, 20],
         }
@optunity.cross_validated(x=X, y=y, num_folds=3)
def performance(x_train, y_train, x_test, y_test,optimizer=None,activation=None,loss=None,batch_size=None,neurons=None,epochs=None,patience=None):
    # fit the model
    if optimizer<1:
        op='adam'
    else:
        op='rmsprop'
    if activation<1:
        ac='relu'
    else:
        ac='tanh'
    if loss<1:
        lo='mse'
    else:
        lo='mae'
    if batch_size<1:
        ba=16
    else:
        ba=32
    model = ANN(optimizer=op,
                activation=ac,
                loss=lo,
                batch_size=ba,
                neurons=int(neurons),
                epochs=int(epochs),
                patience=int(patience)
                                  )
    clf = KerasRegressor(build_fn=ANN, verbose=0)
    scores=-np.mean(cross_val_score(clf, X, y, cv=3, 
                                    scoring="neg_mean_squared_error"))

    return scores

optimal_configuration, info, _ = optunity.minimize(performance,
                                                  solver_name='particle swarm',
                                                  num_evals=20,
                                                   **search
                                                  )
print(optimal_configuration)
print("MSE:"+ str(info.optimum))

{'optimizer': 0.7873046875, 'activation': 1.4373046875, 'loss': 0.8923828125, 'batch_size': 0.7119605382156127, 'neurons': 46.9228515625, 'epochs': 49.1181640625, 'patience': 11.50166015625}
MSE:36.97802766465484
