In [16]:
from bayes_opt import BayesianOptimization
from bayes_opt import SequentialDomainReductionTransformer
from bayes_opt import UtilityFunction
from dqn_scripts import *
from bayes_opt import SequentialDomainReductionTransformer
import pandas as pd
import numpy as np

In [35]:
trial = 0
bounds_transformer = SequentialDomainReductionTransformer()

In [36]:
def evaluate_and_test(params,trial_number,folder): #trial function that trains and stores results into array
    
    max_episodes = 20000
    
    #Train Env
    machine = Machine()
    machine.curr_state = 0
    env = MachineEnv(machine)

    #Eval Env
    machine2 = Machine()
    machine2.curr_state = 0
    env2 = MachineEnv(machine2)

    online_net = DoubleDQNet(4, 2)
    target_net = DoubleDQNet(4, 2)
    update_target_model(online_net, target_net)
    
    optimizer = optim.Adam(online_net.parameters(), lr=params[0])
    online_net.train()
    target_net.train()
    memory = Memory(10000)
    
    #start_time = datetime.now().replace(microsecond=0)

    running_score = 0
    epsilon = 1.0
    steps = 0
    loss = 0
    initial_exploration = 1000
    log_interval = 10
    
    tracker = [[0,0]]
    loss_arr = []
    max_average = 0
    
    for e in range(max_episodes):
        done = False

        score = 0
        state = env.reset()
        state = torch.Tensor(state)
        state = state.unsqueeze(0)
        while not done:
            steps += 1
            action = get_action(state, target_net, epsilon, env)
            next_state, reward, done, _ = env.step(action)

            next_state = torch.Tensor(next_state)
            next_state = next_state.unsqueeze(0)

            mask = 0 if done else 1
            #reward = reward if not done or score == 499 else -1
            action_one_hot = np.zeros(2)
            action_one_hot[action] = 1
            memory.push(state, next_state, action_one_hot, reward, mask)

            score += reward
            state = next_state

            if steps > initial_exploration:
                epsilon -= params[2]
                epsilon = max(epsilon, 0.1)

                batch = memory.sample(64)
                loss = DoubleDQNet.train_model(online_net, target_net, optimizer, batch,params[1])
                loss_arr.append(loss)
                
                if steps % params[3] == 0:
                    update_target_model(online_net, target_net)

        if e % log_interval == 0:
            if e == 0: continue
            eval_score = compute_avg_return(env2,online_net,20)
##            print('{} episode | Ave val score over 20 ep: {:.2f} | epsilon: {:.2f}'.format(
##                e, eval_score, epsilon))
            tracker.append([eval_score,e])
            
            move_ave_score = np.mean(np.array(tracker)[-50:,0])
##            if max_average < move_ave_score:
##                torch.save(online_net,f'{folder}/trial_{trial_number}_dqn_agent_.pt')
##                max_average = move_ave_score

##    np.savetxt(f'{folder}/trial_{trial_number}_loss.txt', loss_arr, delimiter=',',fmt='%s')
##    np.savetxt(f'{folder}/trial_{trial_number}.txt', tracker, delimiter=',',fmt='%s')
##    torch.save(online_net,f'{folder}/final_trial_{trial_number}_dqn_agent.pt')
    
    return (np.array(tracker),online_net)

In [37]:
def black_box_function(lr,gamma,eps_decay,update_freq):
    global trial
    trial+=1
    
    params = [lr,gamma,eps_decay,update_freq]
    
    results,policy = evaluate_and_test(params,trial,None)
    
#     score = np.mean(results[-500:])
    
    machine = Machine()
    machine.curr_state = 0
    env = MachineEnv(machine)
    
    score = compute_avg_return(env, policy, 10)
    
    return score

In [38]:
optimizer = BayesianOptimization(
    f=black_box_function,
    pbounds={'lr':(1e-05,1e-01),'gamma':(0.8,0.99),'eps_decay':(1e-05,1e-01),'update_freq':(20,200)},
    verbose=2,
    bounds_transformer=bounds_transformer,
    
)

In [39]:
df = pd.read_csv('results.csv')
df.drop('Unnamed: 0',axis = 1,inplace=True)
print(df)
df_arr = df.to_numpy()
df_arr

        lr  gamma  epsilon_decay  update_freq    eval_1    eval_2    eval_3  \
0   0.0001   0.80        0.00001           50   8553.47   8430.58   8557.45   
1   0.0001   0.90        0.00010          100   9943.89  10161.06  10096.69   
2   0.0001   0.95        0.00100          150  26629.87  26718.36  26542.90   
3   0.0001   0.99        0.01000          200  20395.44  20582.21  20647.40   
4   0.0010   0.80        0.00010          150   9594.16   9630.14   9724.68   
5   0.0010   0.90        0.00001          200  12295.69  12231.72  12132.94   
6   0.0010   0.95        0.01000           50  10901.19  11059.34  10754.28   
7   0.0010   0.99        0.00100          100  22684.04  22325.43  23005.05   
8   0.0100   0.80        0.00100          200   9921.31   9911.23  10026.68   
9   0.0100   0.90        0.01000          150  16542.93  16691.26  16832.08   
10  0.0100   0.95        0.00001          100  12840.08  12784.63  12666.42   
11  0.0100   0.99        0.00010           50  24721

array([[1.00000000e-04, 8.00000000e-01, 1.00000000e-05, 5.00000000e+01,
        8.55347000e+03, 8.43058000e+03, 8.55745000e+03, 8.51383333e+03],
       [1.00000000e-04, 9.00000000e-01, 1.00000000e-04, 1.00000000e+02,
        9.94389000e+03, 1.01610600e+04, 1.00966900e+04, 1.00672133e+04],
       [1.00000000e-04, 9.50000000e-01, 1.00000000e-03, 1.50000000e+02,
        2.66298700e+04, 2.67183600e+04, 2.65429000e+04, 2.66303767e+04],
       [1.00000000e-04, 9.90000000e-01, 1.00000000e-02, 2.00000000e+02,
        2.03954400e+04, 2.05822100e+04, 2.06474000e+04, 2.05416833e+04],
       [1.00000000e-03, 8.00000000e-01, 1.00000000e-04, 1.50000000e+02,
        9.59416000e+03, 9.63014000e+03, 9.72468000e+03, 9.64966000e+03],
       [1.00000000e-03, 9.00000000e-01, 1.00000000e-05, 2.00000000e+02,
        1.22956900e+04, 1.22317200e+04, 1.21329400e+04, 1.22201167e+04],
       [1.00000000e-03, 9.50000000e-01, 1.00000000e-02, 5.00000000e+01,
        1.09011900e+04, 1.10593400e+04, 1.07542800e+04, 1.

In [40]:
for row in df_arr:
    next_point_to_probe = {'lr':row[0],'gamma':row[1],'eps_decay':row[2],'update_freq':row[3]}
    target = row[7]
    optimizer.register(
     params=next_point_to_probe,
     target=target,
    )

In [41]:
print(optimizer._space._cache)

{(1e-05, 0.8, 0.0001, 50.0): 8513.833333333334, (0.0001, 0.9, 0.0001, 100.0): 10067.213333333333, (0.001, 0.95, 0.0001, 150.0): 26630.37666666667, (0.01, 0.99, 0.0001, 200.0): 20541.68333333333, (0.0001, 0.8, 0.001, 150.0): 9649.66, (1e-05, 0.9, 0.001, 200.0): 12220.116666666667, (0.01, 0.95, 0.001, 50.0): 10904.936666666666, (0.001, 0.99, 0.001, 100.0): 22671.50666666667, (0.001, 0.8, 0.01, 200.0): 9953.073333333334, (0.01, 0.9, 0.01, 150.0): 16688.756666666668, (1e-05, 0.95, 0.01, 100.0): 12763.71, (0.0001, 0.99, 0.01, 50.0): 25010.763333333336, (0.01, 0.8, 0.1, 100.0): 10454.57, (0.001, 0.9, 0.1, 50.0): 18546.126666666667, (0.0001, 0.95, 0.1, 200.0): 23807.153333333332, (1e-05, 0.99, 0.1, 150.0): 26176.456666666665}


In [42]:
from bayes_opt.logger import JSONLogger
from bayes_opt.event import Events
logger = JSONLogger(path="./logs_SDR.json")
optimizer.subscribe(Events.OPTIMIZATION_STEP, logger)

In [44]:
optimizer.maximize(n_iter = 20,  init_points=5, acq='ei',xi=0.005 )

KeyboardInterrupt: 

In [None]:
utility = UtilityFunction(kind="ei",kappa= 0,xi=0)

In [None]:
for _ in range(10):
    next_point = optimizer.suggest(utility)
    target = black_box_function(**next_point)
    optimizer.register(params=next_point, target=target)
    
    print(target, next_point)
print(optimizer.max)

In [None]:
print(optimizer.max)

In [None]:
for i, res in enumerate(optimizer.res):
    print("Iteration {}: \n\t{}".format(i, res))

In [None]:
optimizer.maximize(
    init_points=2,
    n_iter=3,
    kernel = RBF(1.0)
)