In [1]:
import numpy as np
import pandas as pd
import sklearn.gaussian_process as gp
from sklearn.ensemble import RandomForestClassifier
from sklearn.model_selection import cross_validate
from numpy.random import RandomState

from acquisition.acquisition_functions import ProbabilityOfImprovement, ExpectedImprovement
from acquisition.acquisition_optimizers import RandomAcquisitionOpt, ConstrainedAcquisitionOpt, random_x_sample
from bayesian_optimizers.gpr_optimizer import GPROptimizer
from util.defs import ParamType

# Set random seed

In [2]:
rand = RandomState(seed=12345)

# Grab the dataset

In [3]:
iris = "../data/iris.csv"
df = pd.read_csv(iris)
data = np.array(df.iloc[:, :5])
rand.shuffle(data)
X = data[:, :-1]
y = data[:, -1]

# Define: bounds for hyperparameters, parameter data types, objective function (lower is better)

In [4]:
# x0 = n_estimators [10, 1000] -- int
# x1 = max_depth [1, 5] -- int
# x2 = min_samples_split [2, 10] -- int
bounds = np.array([
    [10, 1000],
    [1, 5],
    [2, 10]
])

p_types = [ParamType.Disc, ParamType.Disc, ParamType.Disc]

def objective(x):
    # x may be passed as a (1, p) array
    if x.ndim > 1:
        x = x[0]
    # define model
    model = RandomForestClassifier(n_estimators=int(x[0]),
                                   max_depth=int(x[1]), 
                                   min_samples_split=int(x[2]), 
                                   n_jobs=-1)
    # get 5-fold cross validation results
    cv_res = cross_validate(model, X, y, cv=5)
    # return 1 - acc
    return 1 - cv_res["test_score"].mean()

In [5]:
x0 = np.array(random_x_sample(bounds, p_types))[np.newaxis, :]
y0 = np.array([objective(x0)])

In [6]:
kernel = gp.kernels.RBF()
gpr = gp.GaussianProcessRegressor(kernel, 
                                  alpha=np.abs(rand.normal(scale=.01)),
                                  n_restarts_optimizer=10, 
                                  random_state=rand)
acquisition = ExpectedImprovement()
opt_acquisition = ConstrainedAcquisitionOpt(gpr, acquisition, rand)
gpr_opt = GPROptimizer(gpr, x0, y0, opt_acquisition, objective, bounds, p_types, fit=True)

In [7]:
res = gpr_opt.optimize(20, thresh=0.03, verbose=True)

Optimization iteration 1
Selected next parameter sample from acquisition optimizer: [411.   2.   6.]
Objective value at sample: 0.0467

Optimization iteration 2
Selected next parameter sample from acquisition optimizer: [274.   4.   9.]
Objective value at sample: 0.0333

Optimization iteration 3
Selected next parameter sample from acquisition optimizer: [243.   2.   2.]
Objective value at sample: 0.0533

Optimization iteration 4
Selected next parameter sample from acquisition optimizer: [252.   2.   8.]
Objective value at sample: 0.0467

Optimization iteration 5
Selected next parameter sample from acquisition optimizer: [259.   5.   6.]
Objective value at sample: 0.0467

Optimization iteration 6
Selected next parameter sample from acquisition optimizer: [258.   3.   4.]
Objective value at sample: 0.04

Optimization iteration 7
Selected next parameter sample from acquisition optimizer: [148.   2.   2.]
Objective value at sample: 0.04

Optimization iteration 8
Selected next parameter sam

In [8]:
res

{'argmin': 2,
 'minimizer': array([274.,   4.,   9.]),
 'minimum': array([0.03333333])}