In [None]:
import numpy as np
import pandas as pd
import sklearn.gaussian_process as gp
from sklearn.neural_network import MLPClassifier
from sklearn.model_selection import cross_validate
from numpy.random import RandomState

from util.defs import ParamType
from util.util import random_x_sample
from acquisition.acquisition_functions import ProbabilityOfImprovement, ExpectedImprovement
from acquisition.acquisition_optimizers import RandomAcquisitionOpt, ConstrainedAcquisitionOpt
from hp_optimizers.hp_optimizer import GPROptimizer, RandomSearchOptimizer

# Set random seed

In [2]:
rand = RandomState(seed=123)

# Grab the dataset

In [3]:
iris = "../data/iris.csv"
df = pd.read_csv(iris)
data = np.array(df.iloc[:, :5])
rand.shuffle(data)
X = data[:, :-1]
y = data[:, -1]

# Define: bounds for hyperparameters, parameter data types, objective function (lower is better)

In [4]:
# x0 = num layers [>1]
# x1 = learning rate init [0-1]
# x2 = alpha [0-1]
# x3 = beta1 [0-1]
# x4 = beta2 [0-1]
bounds = np.array([
    [2, 100],
    [0.1, 1 - 0.1],
])

p_types = [ParamType.Disc, ParamType.Disc]

def objective(x):
    # x may be passed as a (1, p) array
    if x.ndim > 1:
        x = x[0]
    # define model
    model = MLPClassifier(hidden_layer_sizes=int(x[0]),
                          learning_rate_init=int(x[1]),
                          random_state=rand)

    # get 5-fold cross validation results
    cv_res = cross_validate(model, X, y, cv=5)
    # return 1 - acc
    return 1 - cv_res["test_score"].mean()

# Define the Bayesian Optimizer

In [5]:
kernel = gp.kernels.RBF()
gpr = gp.GaussianProcessRegressor(kernel, 
                                  alpha=np.abs(rand.normal(scale=.01)),
                                  n_restarts_optimizer=10, 
                                  random_state=rand)
acquisition = ExpectedImprovement()
# acquisition = ProbabilityOfImprovement()
opt_acquisition = ConstrainedAcquisitionOpt(gpr, acquisition, rand)
gpr_opt = GPROptimizer(gpr, opt_acquisition, objective, bounds, p_types, rand, initial_samples=3, fit=True)



# Run the optimization

In [None]:
res1 = gpr_opt.optimize(10, thresh=0.03, verbose=True)

In [None]:
res1

# Try Random Search for comparison

In [None]:
rand_opt = RandomSearchOptimizer(objective, bounds, p_types, rand, parallel=True)
res2 = rand_opt.optimize(13red, verbose=False)
res2