In [None]:
import numpy as np
import pandas as pd

from sklearn.neural_network import MLPRegressor
from sklearn.metrics import mean_squared_error
from sklearn.datasets import load_boston
from sklearn.model_selection import KFold, cross_val_score
from sklearn.model_selection import train_test_split
from hyperopt import fmin, tpe, hp, anneal, Trials, space_eval

%matplotlib inline

import warnings
warnings.filterwarnings('ignore')

In [None]:
boston = load_boston()
X = pd.DataFrame(boston.data, columns=boston.feature_names)
y = boston.target
train_data, test_data, train_targets, test_targets = train_test_split(X, y, test_size=0.20, shuffle=True, random_state=42)

In [None]:
# Create tuples hidden_layer_sizes
hidden_layer_tuples = []
n_layers = [1, 2]
n_neurons = [10, 20, 30, 40, 50]
for layers in n_layers:
    for neurons in n_neurons:
        cur_size = (neurons,) * layers
        hidden_layer_tuples.append(cur_size)
print('The first hidden layer tuples:')
print(hidden_layer_tuples)

The first hidden layer tuples:
[(10,), (20,), (30,), (40,), (50,), (10, 10), (20, 20), (30, 30), (40, 40), (50, 50)]


In [None]:
all_val_scores = []
all_test_scores = []
n_iter = 100
num_folds=2
kf = KFold(n_splits=num_folds, shuffle=True, random_state=42)

In [None]:
def calculate_min(scores):
    test_scores = []
    min_test_score = np.inf
    for score in scores:
        if score < min_test_score:
            min_test_score = score
        test_scores.append(min_test_score)
    return test_scores

In [None]:
def gb_mse_cv(params, random_state=42, cv=kf, X=train_data, y=train_targets):
    params = {
        'hidden_layer_sizes': params['hidden_layer_sizes'],
        'activation': params['activation'],
        'solver': params['solver'],
        'alpha': params['alpha'],
        'batch_size': int(params['batch_size']),
        'learning_rate': params['learning_rate'],
        'learning_rate_init': params['learning_rate_init'],
        #'power_t': params['power_t'],
        #'beta_1': params['beta_1'],
        #'beta_2': params['beta_2']
    }
    
    # we use this params to create a new LGBM Regressor
    model = MLPRegressor(random_state=random_state, **params)
    
    # and then conduct the cross validation with the same folds as before
    score = -cross_val_score(model, X, y, cv=cv, scoring="neg_mean_squared_error", n_jobs=-1).mean()

    return score

In [None]:
%%time

space = {
    'hidden_layer_sizes': hp.choice('hidden_layer_sizes', hidden_layer_tuples),
    'activation': hp.choice('activation', ['logistic', 'tanh', 'relu']),
    'solver': hp.choice('solver', ['adam']),
    'alpha': hp.uniform('alpha', 10**-5, 10**-1),
    'batch_size': hp.choice('batch_size', range(2,300)),
    'learning_rate': hp.choice('learning_rate', ['constant', 'invscaling', 'adaptive']),
    'learning_rate_init': hp.uniform('learning_rate_init', 10**-5, 10**-1),
}

for random_state in range(5):
    # trials will contain logging information
    trials = Trials()

    best=fmin(fn=gb_mse_cv, # function to optimize
            space=space, 
            algo=tpe.suggest, # optimization algorithm, hyperotp will select its parameters automatically
            max_evals=n_iter, # maximum number of iterations
            trials=trials, # logging
            rstate=np.random.RandomState(random_state) # fixing random state for the reproducibility
            )

    hyperparams = space_eval(space, best)
    reg = MLPRegressor(**hyperparams)
    reg.fit(train_data, train_targets)
    reg_test_score = mean_squared_error(test_targets, reg.predict(test_data))

    print("Best MSE {:.3f} params {}".format( gb_mse_cv(hyperparams), hyperparams))

    all_test_scores.append(reg_test_score)

    tpe_results=np.array([[x['result']['loss'],
                      x['misc']['vals']['activation'][0],
                      x['misc']['vals']['alpha'][0],
                      x['misc']['vals']['batch_size'][0],
                      x['misc']['vals']['hidden_layer_sizes'][0],
                      x['misc']['vals']['learning_rate'][0],
                      x['misc']['vals']['learning_rate_init'][0],
                      x['misc']['vals']['solver'][0]] for x in trials.trials],
                     )
    tpe_results_df=pd.DataFrame(tpe_results,
                            columns=['score',
                                    'activation',
                                    'alpha',
                                    'batch_size',
                                    'hidden_layer_sizes',
                                    'learning_rate',
                                    'learning_rate_init',
                                    'solver'])
    tpe_results_df['score']
    all_val_scores.append(calculate_min(tpe_results_df['score']))

100%|██████████| 100/100 [00:43<00:00,  2.28it/s, best loss: 21.581199216143325]
Best MSE 21.581 params {'activation': 'relu', 'alpha': 0.028492488126720637, 'batch_size': 213, 'hidden_layer_sizes': (10, 10), 'learning_rate': 'invscaling', 'learning_rate_init': 0.01475050699699766, 'solver': 'adam'}
100%|██████████| 100/100 [00:26<00:00,  3.75it/s, best loss: 25.980198272047858]
Best MSE 25.980 params {'activation': 'relu', 'alpha': 0.032178020453474544, 'batch_size': 103, 'hidden_layer_sizes': (20,), 'learning_rate': 'invscaling', 'learning_rate_init': 0.07026747279451144, 'solver': 'adam'}
100%|██████████| 100/100 [00:30<00:00,  3.24it/s, best loss: 25.47760389391331]
Best MSE 25.478 params {'activation': 'relu', 'alpha': 0.09396061131667036, 'batch_size': 129, 'hidden_layer_sizes': (10, 10), 'learning_rate': 'invscaling', 'learning_rate_init': 0.016537533374984374, 'solver': 'adam'}
100%|██████████| 100/100 [00:31<00:00,  3.15it/s, best loss: 21.64745013235015]
Best MSE 21.647 param

In [None]:
df = pd.DataFrame(all_val_scores).transpose()
df.to_csv('full_results.csv', index=False)

In [None]:
mean_std = []
for row_index in range(len(df)):
    row_np = df.iloc[row_index,:].to_numpy()
    mean_row = np.mean(row_np)
    std_row = np.std(row_np)
    mean_std.append((mean_row, std_row))
mean_std

[(85.95370812884562, 5.1827419009495905),
 (60.46036318480617, 25.089320066705653),
 (59.79737942835895, 24.37427151755166),
 (45.804474251205974, 20.454943798044397),
 (33.0916299074781, 8.03739435178798),
 (33.0916299074781, 8.03739435178798),
 (31.33411029369155, 6.597013723555147),
 (30.239536121045955, 6.233976636676544),
 (29.121361102199614, 5.988798400444816),
 (29.121361102199614, 5.988798400444816),
 (29.121361102199614, 5.988798400444816),
 (29.121361102199614, 5.988798400444816),
 (27.527453555626426, 3.5340492731828954),
 (27.527453555626426, 3.5340492731828954),
 (27.527453555626426, 3.5340492731828954),
 (27.527453555626426, 3.5340492731828954),
 (27.039956071345692, 3.065713475555255),
 (27.039956071345692, 3.065713475555255),
 (27.039956071345692, 3.065713475555255),
 (27.039956071345692, 3.065713475555255),
 (27.039956071345692, 3.065713475555255),
 (26.67105705356221, 2.7618010716798262),
 (25.99165658700366, 2.7133634063975305),
 (25.99165658700366, 2.71336340639753

In [None]:
final_df = pd.DataFrame(mean_std, columns=['Mean', 'Std'])
final_df

Unnamed: 0,Mean,Std
0,85.953708,5.182742
1,60.460363,25.089320
2,59.797379,24.374272
3,45.804474,20.454944
4,33.091630,8.037394
...,...,...
95,23.050519,2.225910
96,23.050519,2.225910
97,23.050519,2.225910
98,23.050519,2.225910


In [None]:
final_df.to_csv('TPE - Final Results.csv', index=False)

In [None]:
test_df = pd.DataFrame(all_test_scores, columns=['Test scores'])
test_df.to_csv('TPE - Test scores.csv', index=False)