In [1]:
import os
os.chdir('/home/xyz/data/haoyan/bandits')

import bandits as bd
import numpy as np

In [25]:
def rastrigin(vec):
    n = len(vec)
    r = 10*n + sum(vec**2 - 10*np.cos(2*np.pi*vec))
    return r

def ackley(x, y):
    return -20 * np.exp(-0.2*np.sqrt(0.5*(x**2 + y**2))) - np.exp(0.5*(np.cos(2*np.pi*x) + np.cos(2*np.pi*y))) + np.exp(1) + 20

def sphere(x, y):
    return (x**2 + y**2)

def g(u, x, y):
    if u == 0:
        return -rastrigin(np.array([x, y]))
    elif u == 1:
        return -ackley(x, y) + 10
    elif u == 2:
        return -sphere(x, y) + 20

In [26]:
n_trials = 5000
n_experiment = 5
vec_u = list(range(0, 3))
vec_x = [-1,0,1,2,3]
bound = (-5, 5)

func_name = 'mix'

In [27]:
params_list_dict = {
    'u': vec_u,
    'x': vec_x,
}

pbounds={
    'y': bound,
}

scores_list = []
actions_list = []

def single_func(n, file):
    bandit = bd.GaussianBandit(
        g,
        params_list_dict=params_list_dict,
        pbounds=pbounds,
        random_state=n*128,
        init_points=3,
        n_iter=3,
    )

    policy = bd.SoftmaxPolicy()
    agent = bd.GradientAgent(bandit, policy, alpha=0.01, baseline=True, avg_coef=None, increase_rate=0)
    env = bd.Environment(bandit, agent, 'Gradient Agents', random_state=n*128)
    scores, actions = env.run(n_trials, file=file, stop_iter=30)
    
    return scores, actions

if os.path.exists(f'/home/xyz/data/haoyan/bandits/logs/rl_hybrid_results_{func_name}.txt'):
    os.remove(f'/home/xyz/data/haoyan/bandits/logs/rl_hybrid_results_{func_name}.txt')

with open(f'/home/xyz/data/haoyan/bandits/logs/rl_hybrid_results_{func_name}.txt', 'a') as file:
    res = [single_func(x, file) for x in range(n_experiment)]

0
(1, 2)
|   iter    |  target   |     y     |
-------------------------------------
| [0m1        [0m | [0m4.006    [0m | [0m1.138    [0m |
| [0m2        [0m | [0m-1.75    [0m | [0m-4.5     [0m |
| [0m3        [0m | [0m1.032    [0m | [0m3.186    [0m |
4.006038334137642
0.01
((1, 2), {'y': 1.1377870791610327}) 4.006038334137642
1
(2, -1)
|   iter    |  target   |     y     |
-------------------------------------
| [0m1        [0m | [0m1.877    [0m | [0m-4.138   [0m |
| [95m2        [0m | [95m17.95    [0m | [95m1.027    [0m |
| [95m3        [0m | [95m18.73    [0m | [95m0.5168   [0m |
18.73295577865724
0.01
((2, -1), {'y': 0.5167632159343025}) 18.73295577865724
2
(1, 3)
|   iter    |  target   |     y     |
-------------------------------------
| [0m1        [0m | [0m-2.12    [0m | [0m4.376    [0m |
| [95m2        [0m | [95m0.5605   [0m | [95m2.273    [0m |
| [95m3        [0m | [95m1.516    [0m | [95m-2.126   [0m |
1.5158634408860507


In [28]:
scores_list = []
actions_list = []
for x in res:
    scores_list.append(x[0])
    actions_list.append(x[1])
    
for scores, actions in zip(scores_list, actions_list):
    print(actions[np.argmax(scores)], scores[np.argmax(scores)], len(actions))

((2, 0), {'y': -0.003536593357439166}) 19.999987492507422 132
((2, 0), {'y': 0.0001630419957020035}) 19.999999973417307 163
((2, 0), {'y': -0.0033663175734105484}) 19.999988667905995 139
((2, 0), {'y': 8.20285767478696e-05}) 19.999999993271313 138
((2, 0), {'y': -0.0001632850468145454}) 19.999999973337992 134


In [31]:
import os
import numpy as np
import scipy.stats as stats

from sklearn.model_selection import ParameterSampler

# specify parameters and distributions to sample from
params_list_dict = {
    'u': vec_u,
    'x': vec_x,
    'y': stats.uniform(bound[0], bound[1]),
}

# n_iter = max([len(actions) for actions in actions_list])
n_iter = 500
n_experiment = 5

res_dict_list = []
if os.path.exists(f'/home/xyz/data/haoyan/bandits/logs/grid_search_results_{func_name}.txt'):
    os.remove(f'/home/xyz/data/haoyan/bandits/logs/grid_search_results_{func_name}.txt')

with open(f'/home/xyz/data/haoyan/bandits/logs/grid_search_results_{func_name}.txt', 'a') as file:
    for n in range(n_experiment):
        param_list = list(ParameterSampler(params_list_dict, n_iter=n_iter, random_state=n*128))
        res_dict = {}
        for i, param in enumerate(param_list):
            res_dict[str(param)] = g(**param)
            file.write(f'({n*128}, {i}, {param}, {res_dict[str(param)]})\n')
            
        res_dict_list.append(res_dict)

In [33]:
from bayes_opt import BayesianOptimization
from bayes_opt.logger import JSONLogger
from bayes_opt.event import Events

def g_hat(u, x, y):
    return g(
        int(int(u)),
        int(int(x)),
        x,
    )


pbounds = {
    'u': (0, 2),
    'x': (-1, 3),
    'y': (-5, 5),
}


def single_experiment(n):
    optimizer = BayesianOptimization(
        f=g_hat,
        pbounds=pbounds,
        random_state=n,
        allow_duplicate_points=False
    )

    logger = JSONLogger(path=f"/home/xyz/data/haoyan/bandits/logs/bayesian_results_{func_name}_{n*128}.log")
    optimizer.subscribe(Events.OPTIMIZATION_STEP, logger)

    optimizer.maximize(init_points=1, n_iter=500)
    return optimizer.max
    
from multiprocessing import Pool


n_experiment = 5
param_list = list(range(n_experiment))
with Pool(5) as p:
    b_res_list = p.map(single_experiment, param_list)
    
# for n in range(n_experiment):
#     optimizer = BayesianOptimization(
#         f=g_hat,
#         pbounds=pbounds,
#         random_state=n,
#         allow_duplicate_points=True
#     )

#     optimizer.maximize(init_points=4, n_iter=500)
#     b_res_list.append(optimizer.max)

KeyboardInterrupt: 

In [37]:
n_trials = 1000

params_list_dict = {
    'u': vec_u,
    'x': vec_x,
    'y': np.linspace(-5, 5, 200),
}

def single_func(n, file):
    bandit = bd.GaussianBanditOri(
        g,
        params_list_dict=params_list_dict,
    )

    policy = bd.SoftmaxPolicy()
    agent = bd.GradientAgent(bandit, policy, alpha=0.01, baseline=True, increase_rate=0)
    env = bd.Environment(bandit, agent, 'Gradient Agents', random_state=n*128)
    scores, actions = env.run(n_trials, file=file, stop_iter=700)
    
    return scores, actions

if os.path.exists(f'/home/xyz/data/haoyan/bandits/logs/rl_results_{func_name}.txt'):
    os.remove(f'/home/xyz/data/haoyan/bandits/logs/rl_results_{func_name}.txt')

with open(f'/home/xyz/data/haoyan/bandits/logs/rl_results_{func_name}.txt', 'a') as file:
    res = [single_func(x, file) for x in range(n_experiment)]
    

0
(1, 2, -2.688442211055276)
1.0987903769643932
0.01
((1, 2, -2.688442211055276), None) 1.0987903769643932
1
(2, -1, 2.2864321608040203)
13.772227974041058
0.01
((2, -1, 2.2864321608040203), None) 13.772227974041058
2
(1, 3, -4.597989949748744)
-2.42113515847473
0.01
((1, 3, -4.597989949748744), None) -2.42113515847473
3
(1, 2, -3.291457286432161)
0.33180241233948493
0.01
((1, 2, -3.291457286432161), None) 0.33180241233948493
4
(1, 0, -1.4824120603015074)
4.502218305856495
0.01
((1, 0, -1.4824120603015074), None) 4.502218305856495
5
(1, 3, 1.884422110552764)
1.7960897998795744
0.01
((1, 3, 1.884422110552764), None) 1.7960897998795744
6
(1, 0, 0.6281407035175883)
6.747592769279805
0.01
((1, 0, 0.6281407035175883), None) 6.747592769279805
7
(2, 2, -1.2311557788944723)
14.484255448094746
0.01
((2, 2, -1.2311557788944723), None) 14.484255448094746
8
(2, 3, -0.47738693467336635)
10.772101714603167
0.01
((2, 3, -0.47738693467336635), None) 10.772101714603167
9
(1, -1, 2.5376884422110555)
1.8

our hybrid optimization method takes advantage of reinforcement learning to handle the discrete parameters, which successfully reduces the dimension of the parameter space for Bayesian Optimization. The technique improves the accuracy
of the solutions, compared with Bayesian Optimization and Random Search. The overhead of the one step Bayesian Optimization
is overcomed by highly reducing the iteration steps of Bayesian Optimization in the hybrid method because of the 
dimension reduction. The overall iteration steps is also compressed compared with Random Search to reach the global
minimizer.