In [2]:
import numpy as np
import pandas as pd
import json
import os
import matplotlib.pyplot as plt
from mpl_toolkits.mplot3d import Axes3D  # noqa: F401 unused import
from sklearn.datasets import make_classification
from sklearn.model_selection import cross_val_score
from sklearn.ensemble import RandomForestClassifier as RFC
from sklearn.svm import SVC
from sklearn.base import clone
from skopt import gp_minimize, forest_minimize, gbrt_minimize
from skopt.learning import GaussianProcessRegressor
from sklearn.gaussian_process.kernels import Matern, ConstantKernel, Product
from bayes_opt import BayesianOptimization
from bayes_opt.util import Colours
from bayes_opt import UtilityFunction
import copy
from functools import partial
import math
from skopt.space import Real, Integer
from skopt.utils import use_named_args
import GPy
import GPyOpt
from hyperparameter.bayesian_optimization import Bayesian

In [13]:
def get_data():
    """Synthetic binary classification dataset."""
    data, targets = make_classification(
        n_samples=10000,
        n_features=120,
        n_informative=110,
        n_redundant=2,
        random_state=134985745,
    )
    return data, targets


def svc_cv(expC, expGamma, X, Y):
    """SVC cross validation.
    This function will instantiate a SVC classifier with parameters C and
    gamma. Combined with data and targets this will in turn be used to perform
    cross validation. The result of cross validation is returned.
    Our goal is to find combinations of C and gamma that maximizes the roc_auc
    metric.
    """
    """Wrapper of SVC cross validation.
    Notice how we transform between regular and log scale. While this
    is not technically necessary, it greatly improves the performance
    of the optimizer.
    """
    C = 10 ** expC
    gamma = 10 ** expGamma
    estimator = SVC(C=C, gamma=gamma, random_state=2)
    cval = cross_val_score(estimator, X, Y, scoring='roc_auc', cv=4)
    return cval.mean()


def rfc_cv(n_estimators, min_samples_split, max_features, X, Y):
    """Random Forest cross validation.
    This function will instantiate a random forest classifier with parameters
    n_estimators, min_samples_split, and max_features. Combined with data and
    targets this will in turn be used to perform cross validation. The result
    of cross validation is returned.
    Our goal is to find combinations of n_estimators, min_samples_split, and
    max_features that minimzes the log loss.
    """
    estimator = RFC(
        n_estimators=int(n_estimators),
        min_samples_split=int(min_samples_split),
        max_features=max_features,
        random_state=2
    )
    cval = cross_val_score(estimator, X, Y,
                           scoring='roc_auc', cv=4)
    return cval.mean()

def camel_minimal(x, y):
    #print("x:{}, y:{}".format(x, y))
    x2 = math.pow(x,2)
    x4 = math.pow(x,4)
    y2 = math.pow(y,2)

    return (4.0 - 2.1 * x2 + (x4 / 3.0)) * x2 + x*y + (-4.0 + 4.0 * y2) * y2 

def reverse_camel_minimal(x, y):
    #print("x:{}, y:{}".format(x, y))
    x2 = math.pow(x,2)
    x4 = math.pow(x,4)
    y2 = math.pow(y,2)

    return -((4.0 - 2.1 * x2 + (x4 / 3.0)) * x2 + x*y + (-4.0 + 4.0 * y2) * y2 )

space  = [Real(-3, 3, name='x'),
          Real(-2, 2, name='y')]
@use_named_args(space)
def objective_gp(**param):
    return camel_minimal(**param)

def gpy_wrap_camel(param):
    rst = []
    for i in range(param.shape[0]):
        rst.append(camel_minimal(param[i, 0], param[i, 1]))
    return rst

def darwin_wrap_camel(param):
    rst = []
    for i in range(len(param)):
        rst.append(camel_minimal(**param[i]))
    return rst
#====================================================================================
def camel_changed_minimal(x, y, z, t):
    #print("x:{}, y:{}".format(x, y))
    x2 = math.pow(x,2)
    x4 = math.pow(x,4)
    y2 = math.pow(y,2)
    
    z2 = math.pow(z,2)
    z4 = math.pow(z,4)
    t2 = math.pow(t,2)

    return ((4.0 - 2.1 * x2 + (x4 / 3.0)) * x2 + x*y + (-4.0 + 4.0 * y2) * y2 )+((4.0 - 2.1 * z2 + (z4 / 3.0)) * z2 + z*t + (-4.0 + 4.0 * t2) * t2 )


space2  = [Real(-3, 3, name='x'),
          Real(-2, 2, name='y'),
          Real(-3, 3, name='z'),
          Real(-2, 2, name='t')]

@use_named_args(space2)
def objective_gp_2(**param):
    return camel_changed_minimal(**param)

def reverse_camel_minimal2(x, y, z, t):
    return 0 - camel_changed_minimal(x, y, z, t)

def gpy_wrap_camel2(param):
    rst = []
    for i in range(param.shape[0]):
        rst.append(camel_changed_minimal(param[i, 0], param[i, 1], param[i, 2], param[i, 3]))
    return rst

def darwin_wrap_camel2(param):
    rst = []
    for i in range(len(param)):
        rst.append(camel_changed_minimal(**param[i]))
    return rst

test_num = 10

In [13]:
rst_lst = []
for idx in range(test_num):
    ck = ConstantKernel(1.0)
    mk = Matern(length_scale=1.0, nu=2.5)
    gpr = GaussianProcessRegressor(kernel=Product(ck, mk))
    res_gp = gp_minimize(objective_gp_2, 
                    space2,
                    #base_estimator=gpr,
                    acq_func='EI',      # expected improvement
                    xi=0.01,            # exploitation-exploration trade-off
                    n_calls=50,         # number of iterations
                    n_random_starts=20,  # initial samples are provided
                    n_jobs=-1,
                    verbose=False)
    print("gp_minimize Best score={} x: {}, y: {} z: {} t: {}".format(res_gp.fun, res_gp.x[0], res_gp.x[1], 
                                                                      res_gp.x[2], res_gp.x[3]))
    rst_lst.append(res_gp.fun)

print("Average: {}".format(np.average(rst_lst)))

gp_minimize Best score=-1.664879202679681 x: -0.13278591823533414, y: 0.5394949824613002 z: -0.10879189841382697 t: 0.5398823126240182
gp_minimize Best score=-0.44335572560288344 x: 0.11288830916490822, y: 0.5172039987649053 z: -0.33142309313654295 t: -0.2714593682496351
gp_minimize Best score=-0.1656621328267901 x: -0.2653181557202169, y: -0.24447685491692273 z: -0.1431685291628444 t: 0.2940879074661309
gp_minimize Best score=1.5593980642889536 x: -1.547827167998963, y: 0.9047827766592444 z: -0.832060269618518 t: -0.6665627340452318
gp_minimize Best score=-1.715203381417612 x: 0.07600919551404672, y: -0.4932787324628116 z: -0.17803527532533447 t: 0.647270752554606
gp_minimize Best score=-0.1304627923937398 x: 0.36512252373468046, y: -0.9132539321284376 z: -0.2859928539106411 t: 0.08536570983984504
gp_minimize Best score=-1.30973525426093 x: -0.21751174700788134, y: -0.4242420266377065 z: 0.015526404295123086 t: -0.6653692718503146
gp_minimize Best score=0.3749672610583954 x: 0.9597584

In [17]:
rst_lst = []
for idx in range(test_num):
    res_gp = gbrt_minimize(objective_gp_2, 
                    space2,
                    #base_estimator=gpr,
                    acq_func='EI',      # expected improvement
                    xi=0.01,            # exploitation-exploration trade-off
                    n_calls=80,         # number of iterations
                    n_random_starts=50,  # initial samples are provided
                    n_jobs=-1,
                    verbose=False)

    print("gbrt_minimize Best score={} x: {}, y: {} z: {} t: {}".format(res_gp.fun, res_gp.x[0], res_gp.x[1], 
                                                                      res_gp.x[2], res_gp.x[3]))
    rst_lst.append(res_gp.fun)

print("Average: {}".format(np.average(rst_lst)))

gbrt_minimize Best score=-0.5795760284112106 x: -0.2154988382317531, y: 0.43561079506717837 z: 0.3556814494525784 t: -0.3380336386705505
gbrt_minimize Best score=0.181385437128457 x: 0.7704480684382982, y: -0.40962043475275056 z: 0.03926630038637402 t: -0.8991278817616737
gbrt_minimize Best score=-1.0173391172264252 x: -1.802985978500866, y: 0.8006952339843667 z: 0.06683321364059891 t: -0.5810251944886857
gbrt_minimize Best score=-1.2866066140248744 x: 0.32722432550480685, y: -0.464516936769257 z: 0.3004398801013761 t: -0.6928483409308688
gbrt_minimize Best score=-0.9046673822696409 x: 0.2967462053106509, y: 0.5005261957708473 z: 0.22470997592183783 t: 0.7239673730214529
gbrt_minimize Best score=-1.157616453959612 x: -1.6417528077059549, y: 0.8129774017292841 z: 0.05881288298977827 t: -0.7846950686031378
gbrt_minimize Best score=0.2234088991628443 x: -0.25058543765454777, y: 0.33485076014486737 z: -0.9055255479482502 t: 0.8412755174464119
gbrt_minimize Best score=-1.025670122936263 x: 

In [25]:
rst_lst = []
for idx in range(test_num):
    # Bounded region of parameter space
    pbounds = {'x': (-3, 3), 'y': (-2, 2), 'z': (-3, 3), 't': (-2, 2)}

    optimizer = BayesianOptimization(
        f=reverse_camel_minimal2,
        pbounds=pbounds,
        verbose=0, # verbose = 1 prints only when a maximum is observed, verbose = 0 is silent
        #random_state=1,
        kernel=Product(ConstantKernel(1.0), Matern(nu=2.5))
    )
    optimizer.maximize(
        init_points=30,
        n_iter=50,
    )
    print(optimizer.max)
    rst_lst.append(optimizer.max['target'])
    

print("Average: {}".format(np.average(rst_lst)))

{'target': 1.925678572380452, 'params': {'t': -0.6511135695625256, 'x': -0.04811957040646764, 'y': -0.7160890146595857, 'z': 0.1753245459971482}}
{'target': 0.10066559195246239, 'params': {'t': -0.6266306682439651, 'x': -0.9058426297896784, 'y': 0.4946627107147257, 'z': 0.021926791530353817}}
{'target': 0.5204440294625988, 'params': {'t': 0.3863357804342729, 'x': 0.12939665473021114, 'y': 0.33799767431282246, 'z': -0.3292210299405671}}
{'target': -0.2277718848413065, 'params': {'t': 0.40733607997386345, 'x': -0.3517573283195271, 'y': 0.7404558520447663, 'z': -1.8162987383075009}}
{'target': -0.4159654322417497, 'params': {'t': 0.3081484548704245, 'x': 1.0262446557695242, 'y': -0.7918893434671532, 'z': 0.20890755474700962}}
{'target': 0.9803487027882566, 'params': {'t': -0.7192208732972535, 'x': -0.13806983034486658, 'y': 0.35858150744883144, 'z': -0.2572634188024671}}
{'target': 1.3585730063778412, 'params': {'t': 0.8717845880857453, 'x': -0.028646608629555814, 'y': 0.7308742770501127,

In [5]:
rst_lst = []
for idx in range(test_num):
    domain=[{'name': 'x', 'type': 'continuous', 'domain': (-3.,3.)},
            {'name': 'y', 'type': 'continuous', 'domain': (-2.,2.)},
            {'name': 'z', 'type': 'continuous', 'domain': (-3.,3.)},
            {'name': 't', 'type': 'continuous', 'domain': (-2.,2.)}]
    opt = GPyOpt.methods.BayesianOptimization(f=gpy_wrap_camel2,            # function to optimize       
                                              domain=domain,         # box-constraints of the problem
                                              acquisition_type ='EI',       # LCB acquisition
                                              acquisition_weight = 0.1, # Exploration exploitation
                                              initial_design_numdata=50,
                                              verbosity=True) 
    opt.run_optimization(max_iter=30)
    x_best = opt.X[np.argmin(opt.Y)]
    best_params = dict(zip([el['name'] for el in domain], x_best))
    best_params["target"] = np.min(opt.Y)
    print(best_params)
    rst_lst.append(best_params['target'])
print("Average: {}".format(np.average(rst_lst)))

{'x': -0.21004252135729917, 'y': -0.5098127126915881, 'z': 0.5544395386167595, 't': -0.43666182332445613, 'target': -0.30844886700285923}
{'x': -1.7364411816403105, 'y': 0.660570222919258, 'z': -0.43735124028936534, 't': 0.7356240619010079, 'target': -0.6488863176358074}
{'x': -1.4312675918556348, 'y': 0.6378579808351755, 'z': 1.8831423816850341, 't': -0.7303305478557106, 'target': 0.6392925885563728}
{'x': 0.061554846792345345, 'y': 0.00527023183118318, 'z': 0.19014476128076369, 't': -0.24355207718038221, 'target': -0.11227633146449137}
{'x': 0.04467955812899449, 'y': 0.6387655852832173, 'z': -0.3613468944847895, 't': 0.7728883163027236, 'target': -1.6837855298082414}
{'x': -1.7100018488899096, 'y': 0.7440855886640304, 'z': 0.05440351254435046, 't': 0.8027893074644522, 'target': -1.0472057839153246}
{'x': -0.09967227078227263, 'y': -0.3460996317049606, 'z': 0.16402158342431333, 't': -0.007450265431844415, 'target': -0.24306336660341163}
{'x': 0.14671618717278007, 'y': -0.5375785703151

In [9]:
rst_lst = []
for idx in range(test_num):
    domain=[{'name': 'x', 'type': 'continuous', 'domain': (-3.,3.)},
            {'name': 'y', 'type': 'continuous', 'domain': (-2.,2.)},
            {'name': 'z', 'type': 'continuous', 'domain': (-3.,3.)},
            {'name': 't', 'type': 'continuous', 'domain': (-2.,2.)}]
    opt = GPyOpt.methods.BayesianOptimization(f=gpy_wrap_camel2,            # function to optimize       
                                              domain=domain,         # box-constraints of the problem
                                              acquisition_type ='LCB',       # LCB acquisition
                                              acquisition_weight = 0.1, # Exploration exploitation
                                              initial_design_numdata=20,
                                              verbosity=True) 
    opt.run_optimization(max_iter=30)
    x_best = opt.X[np.argmin(opt.Y)]
    best_params = dict(zip([el['name'] for el in domain], x_best))
    best_params["target"] = np.min(opt.Y)
    print(best_params)
    rst_lst.append(best_params['target'])
print("Average: {}".format(np.average(rst_lst)))

The set cost function is ignored! LCB acquisition does not make sense with cost.
{'x': -0.052858980418102164, 'y': 0.6707292305411303, 'z': -0.10376861881998618, 't': 0.7405172583201182, 'target': -2.0389020005240415}
The set cost function is ignored! LCB acquisition does not make sense with cost.
{'x': -0.0748613746608827, 'y': 0.7151968511090558, 'z': -1.709501693557943, 't': 0.7971545625994882, 'target': -1.2457816633678362}
The set cost function is ignored! LCB acquisition does not make sense with cost.
{'x': 0.08515876359977655, 'y': -0.7110321664812874, 'z': -0.10360464042572241, 't': 0.7140491992599051, 'target': -2.0624242169405127}
The set cost function is ignored! LCB acquisition does not make sense with cost.
{'x': -0.09414774032842375, 'y': 0.7100779910913267, 'z': 0.08624084714423096, 't': -0.7061984922900154, 'target': -2.062753543589849}
The set cost function is ignored! LCB acquisition does not make sense with cost.
{'x': 0.0927088097169253, 'y': -0.7159665090697298, 'z

In [14]:
bound = {'x': [-3., 3.], 'y': [-2., 2.], 'z': [-3., 3.], 't': [-2., 2.]}
paras_type = ['cont', 'cont','cont', 'cont']
sample_total_num = 80
first_sample_num = 30

# initialize bays
bays = Bayesian(
    paras_bound=bound, paras_type=paras_type,
    first_sample_num=first_sample_num, sample_total_num=sample_total_num)
for i in range(bays.sample_total_num - first_sample_num + 1):
    # look for next samples from tuner
    sample_x = bays.get_next_sample_x()
    sample_y = darwin_wrap_camel2(sample_x)
    # fit the tuner to learn
    print("{}: {}".format(sample_x, {"target": sample_y}))
    bays.fit(sample_y)

best_x = bays.get_best()
print("{} and target: {}".format(best_x, darwin_wrap_camel2(best_x)))

[2019-06-24 11:55:43] INFO [__init__:56] bays.params bound = {'x': [-3.0, 3.0], 'y': [-2.0, 2.0], 'z': [-3.0, 3.0], 't': [-2.0, 2.0]}
[2019-06-24 11:55:43] INFO [__init__:57] bays.params type = ['cont', 'cont', 'cont', 'cont']


[{'x': 0.8454734142361024, 'y': 0.799779725450934, 'z': -0.34727087009288393, 't': 0.7525313739617894}, {'x': -1.472720443808681, 'y': 0.3359574372875507, 'z': 1.4810448338801816, 't': 1.676299667150698}, {'x': 0.2477092845902238, 'y': 1.7827370276785564, 'z': 0.9444618333177139, 't': 1.5909194918118694}, {'x': -1.6693239864502853, 'y': 0.862471077291564, 'z': 1.1071141030186853, 't': -0.6702332751562703}, {'x': -1.1083401833056348, 'y': 1.5938866380856278, 'z': 2.4587386498352286, 't': 1.8207413451738814}, {'x': -2.1857540075086463, 'y': -1.7923403110898288, 'z': -2.7744648173140836, 't': 1.982046947477635}, {'x': -2.108090666957522, 'y': -0.7365800623451606, 'z': -1.9736112565109412, 't': 1.0135756261330107}, {'x': -1.9040591316998214, 'y': -0.03523713587590471, 'z': -2.369009934835845, 't': 1.3314857212846087}, {'x': -1.7820188294188317, 'y': 0.6023290747109646, 'z': 1.3514360624254644, 't': -0.026222295689783515}, {'x': 2.376988765655849, 'y': -1.1524341278952361, 'z': 0.1334684927

[{'x': -1.6616100072866387, 'y': 0.8554193074220271, 'z': 1.0820633505663773, 't': -0.67826714412645}]: {'target': [0.45673053955847787]}
[{'x': -1.6618354557620898, 'y': 0.8542913064270304, 'z': 1.081880029460561, 't': -0.6782005640572863}]: {'target': [0.4549174888910963]}
[{'x': -1.6627058675199697, 'y': 0.8551921030018524, 'z': 1.0809761358329277, 't': -0.6780006178570132}]: {'target': [0.45562799074125837]}
[{'x': -1.6620937039901555, 'y': 0.8540694586074371, 'z': 1.0803840419479769, 't': -0.6774666634613654}]: {'target': [0.45504551808149984]}
[{'x': -1.6627732766618144, 'y': 0.8550037582928492, 'z': 1.0800615265693096, 't': -0.6775508380104426}]: {'target': [0.455690689966474]}
[{'x': -1.6624540620296036, 'y': 0.8553548646883369, 'z': 1.080075576480153, 't': -0.6780824094774269}]: {'target': [0.4556545688298276]}
[{'x': -1.6626852102528433, 'y': 0.854562012376392, 'z': 1.0789870335222302, 't': -0.6782725609214628}]: {'target': [0.4536550804470345]}
[{'x': -1.6641916732039352, 'y

TypeError: camel_minimal() got an unexpected keyword argument 'z'

In [26]:
camel_minimal(-0.16173045408147213, 0.21509990753289807)

-0.10810102953021915

In [31]:
rst_lst = []
for idx in range(test_num):
    domain=[{'name': 'x', 'type': 'continuous', 'domain': (-3.,3.)},
            {'name': 'y', 'type': 'continuous', 'domain': (-2.,2.)}]
    opt = GPyOpt.methods.BayesianOptimization(f=gpy_wrap_camel,            # function to optimize       
                                              domain=domain,         # box-constraints of the problem
                                              model_type='GP_MCMC',
                                              acquisition_type ='EI_MCMC',       # LCB acquisition
                                              acquisition_weight = 0.1, # Exploration exploitation
                                              initial_design_numdata=30,
                                              verbosity=True) 
    opt.run_optimization(max_iter=20)
    x_best = opt.X[np.argmin(opt.Y)]
    best_params = dict(zip([el['name'] for el in domain], x_best))
    best_params["target"] = np.min(opt.Y)
    print(best_params)
    rst_lst.append(best_params['target'])
print("Average: {}".format(np.average(rst_lst)))

reconstraining parameters GP_regression.rbf
reconstraining parameters GP_regression.Gaussian_noise.variance


(1, 2)
(1, 2)
(1, 2)
(1, 2)
(1, 2)
(1, 2)
(1, 2)
(1, 2)
(1, 2)
(1, 2)
(1, 2)
(1, 2)
(1, 2)
(1, 2)
(1, 2)
(1, 2)
(1, 2)
(1, 2)
(1, 2)
(1, 2)
(1, 2)
(1, 2)
(1, 2)
(1, 2)
(1, 2)
(1, 2)
(1, 2)
(1, 2)
(1, 2)
(1, 2)
(1, 2)
(1, 2)
(1, 2)
(1, 2)
(1, 2)
(1, 2)
(1, 2)
(1, 2)
(1, 2)
(1, 2)
(1, 2)
(1, 2)
(1, 2)
(1, 2)
(1, 2)
(1, 2)
(1, 2)
(1, 2)
(1, 2)
(1, 2)


reconstraining parameters GP_regression.rbf
reconstraining parameters GP_regression.Gaussian_noise.variance


{'x': -0.08418938818327455, 'y': 0.6905326192922525, 'target': -1.0277430731863268}
(1, 2)
(1, 2)
(1, 2)
(1, 2)
(1, 2)
(1, 2)
(1, 2)
(1, 2)
(1, 2)
(1, 2)
(1, 2)
(1, 2)
(1, 2)
(1, 2)
(1, 2)
(1, 2)
(1, 2)
(1, 2)
(1, 2)
(1, 2)
(1, 2)
(1, 2)
(1, 2)
(1, 2)
(1, 2)
(1, 2)
(1, 2)
(1, 2)
(1, 2)
(1, 2)
(1, 2)
(1, 2)
(1, 2)
(1, 2)
(1, 2)


KeyboardInterrupt: 

In [4]:
rst_lst = []
for idx in range(test_num):
    domain=[{'name': 'x', 'type': 'continuous', 'domain': (-3.,3.)},
            {'name': 'y', 'type': 'continuous', 'domain': (-2.,2.)}]
    opt = GPyOpt.methods.BayesianOptimization(f=gpy_wrap_camel,            # function to optimize       
                                              domain=domain,         # box-constraints of the problem
                                              model_type='GP_MCMC',
                                              acquisition_type ='MPI_MCMC',       # LCB acquisition
                                              acquisition_weight = 0.1, # Exploration exploitation
                                              initial_design_numdata=20,
                                              verbosity=True) 
    opt.run_optimization(max_iter=30)
    x_best = opt.X[np.argmin(opt.Y)]
    best_params = dict(zip([el['name'] for el in domain], x_best))
    best_params["target"] = np.min(opt.Y)
    print(best_params)
    rst_lst.append(best_params['target'])
print("Average: {}".format(np.average(rst_lst)))

reconstraining parameters GP_regression.rbf
reconstraining parameters GP_regression.Gaussian_noise.variance
reconstraining parameters GP_regression.rbf
reconstraining parameters GP_regression.Gaussian_noise.variance


{'x': 0.009461503933898692, 'y': -0.7141108800815142, 'target': -1.0060021432430921}


KeyboardInterrupt: 