In [1]:
import numpy as np
import os
import torch
from torch.nn.functional import normalize
from vissim import Vissim
from ddpg import DDPG
import time
import matplotlib.pyplot as plt

In [2]:
# hyperparameters
alpha = 0.0001
beta = 0.0003
gamma = 0.6
tau = 0.005
n_agents = 1
nodes = 3

# simulation parameters
decision_point = [i for i in range(900, 4501, 60)]
sim_time = 4500
num_episodes = 100

In [3]:
plt.rcParams['figure.dpi'] = 150
ddpg_agent = DDPG(alpha=alpha, beta=beta, gamma=gamma, tau=tau, chkpt_dir='/Users/chhuang/ddpg_model/model/')

Path = os.getcwd()
Filename = os.path.join(Path, r"C:\Users\chhuang\ddpg_model\vissim_network\multi-3.inpx")
env = Vissim(Filename)

com_error: (-2146959355, '伺服器執行失敗', None, None)

In [None]:
def input_transform(car_D, scooter_D, car_S, scooter_S):
    device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
    
    car_D = torch.tensor(car_D, dtype=torch.float).to(device)
    car_D = normalize(car_D, dim=0)
    car_D = torch.stack([car_D])

    scooter_D = torch.tensor(scooter_D, dtype=torch.float).to(device)
    scooter_D = normalize(scooter_D, dim=0)
    scooter_D = torch.stack([scooter_D])

    car_S = torch.tensor(car_S, dtype=torch.float).to(device)
    car_S = normalize(car_S, dim=0)
    car_S = torch.stack([car_S])

    scooter_S = torch.tensor(scooter_S, dtype=torch.float).to(device)
    scooter_S = normalize(scooter_S, dim=0)
    scooter_S = torch.stack([scooter_S])
    
    return car_D, scooter_D, car_S, scooter_S

In [None]:
# 儲存每一個episode獲得的平均累積獎勵
avg_rewards_per_episode = []
avg_global_rewards_per_episode = []

# 儲存每一個episode的平均時相時間
avg_phase_time_1_per_episode = []
avg_phase_time_2_per_episode = []

# 儲存每一個episode的平均時差
avg_offset_per_episode = []

# 儲存旅行時間資料
avg_travtime_car_per_episode = np.zeros((num_episodes, 36))
avg_travtime_scooter_per_episode = np.zeros((num_episodes, 36))

# 儲存等候車隊長度資料
avg_queue_length_per_episode = np.zeros((num_episodes, 12))

# 儲存平均停等次數
avg_num_stops_car = []
avg_num_stops_scooter = []

# 儲存平均延滯
avg_delay_car = []
avg_delay_scooter = []

# 儲存平均總旅行時間
avg_travtmtot_car = []
avg_travtmtot_scooter = []

# 儲存平均總停等次數
avg_stopstot_car = []
avg_stopstot_scooter = []

# 儲存平均總延滯時間
avg_delaytot_car = []
avg_delaytot_scooter = []

# 儲存每回合花費時間
spent_times = []

In [6]:
# 載入模型
ddpg_agent.load_checkpoint()

np.set_printoptions(precision=4, threshold=10000)
total_step = 0

for episode in range(1, num_episodes + 1):
    start = time.time()
    print('******** start episode %s ********' % episode)
    env.stop_simulation()
    env.del_pre_simulation()
    env.set_randseed(episode)
    # env.random_veh_input_each_15_min(episode)
    env.reset()
    env.set_signal_program(3)
    
    # 創建一個回合的reward table, avg. reward table, ...
    rewards = np.zeros(n_agents)
    avg_rewards = np.zeros(n_agents)
    
    phase_1 = np.zeros(nodes)
    phase_2 = np.zeros(nodes)
    avg_phase_1 = np.zeros(nodes)
    avg_phase_2 = np.zeros(nodes)
    
    offset = np.zeros(nodes)
    avg_offset = np.zeros(nodes)
    
    # 熱機840s
    env.quickmode(1)
    env.warm_up(840)
    env.break_time(900)
    
    # 取得900s的狀態
    car_D, scooter_D, car_S, scooter_S = env.get_all_states()
    car_D, scooter_D, car_S, scooter_S = input_transform(car_D, scooter_D, car_S, scooter_S)

    samples = 0
    
    while True:
        if env.time in decision_point:
            splits, first_greens = ddpg_agent.choose_actions(car_D, scooter_D, car_S, scooter_S, 0)
            splits_for_vissim = splits.squeeze(0).cpu().numpy()
            first_greens_for_vissim = first_greens.squeeze(0).cpu().numpy()
            
            # 更新時制計畫
            env.update_timing_plans(splits_for_vissim, first_greens_for_vissim)
            
        # 輸出並記錄新的時制計畫
        new_timing_plans = env.get_timing_plans()
        updated_offsets = env.get_offsets()
        print('new_timing_plans:', new_timing_plans)
        print('updated_offsets:', updated_offsets)
        for i in range(n_agents):
            phase_1[i] += new_timing_plans['%s' % (i+1)][0]
            phase_2[i] += new_timing_plans['%s' % (i+1)][1]
            offset[i] += updated_offsets[i]
        
        # 執行新的時制計畫
        env.execute_new_timing_plans(env.time)

        # 取得新狀態資訊
        car_D_, scooter_D_, car_S_, scooter_S_ = env.get_all_states()
        car_D_, scooter_D_, car_S_, scooter_S_ = input_transform(car_D_, scooter_D_, car_S_, scooter_S_)
      
        # ========================================== 以下計算reward ==========================================
        # 創建單次動作的reward table
        reward = np.zeros(n_agents)
        
        # 從VISSIM取得資料
        throughput_car, throughput_scooter = env.get_total_throughput()
        queue_length = env.get_total_queue_length()
        
        for i in range(n_agents):
            # 這邊要計算每個路口獲得的reward
            # 考慮項目：路口通過車輛數、等候車隊長度
            reward[i] = throughput_car[i] + 0.3 * throughput_scooter[i] - 0.44 * queue_length[i]
            rewards[i] += reward[i]
        
        print('reward:', reward)
        samples += 1
        total_step += 1
        print('###### have got %s sample(s) ######' % samples)
        
        if env.time >= sim_time:
            break
        else:
            car_D = car_D_
            scooter_D = scooter_D_
            car_S = car_S_
            scooter_S = scooter_S_
        
    # 計算一回合的 avg. reward, avg. phase time, ...
    for i in range(n_agents):
        avg_rewards[i] += rewards[i] / 60
        avg_phase_1[i] += phase_1[i] / 60
        avg_phase_2[i] += phase_2[i] / 60
        avg_offset[i] += offset[i] / 60
    avg_rewards_per_episode.append(avg_rewards)
    avg_global_rewards_per_episode.append(float(sum(avg_rewards)))
    avg_phase_time_1_per_episode.append(avg_phase_1)
    avg_phase_time_2_per_episode.append(avg_phase_2)
    avg_offset_per_episode.append(avg_offset)
    
    # 儲存旅行時間資料
    travtime_car, travtime_scooter = env.get_travel_time()
    avg_travtime_car_per_episode[episode - 1] += travtime_car
    avg_travtime_scooter_per_episode[episode - 1] += travtime_scooter
    
    # 儲存等候車隊長度資料
    each_q_len = env.get_each_queue_length()
    avg_queue_length_per_episode[episode - 1] += each_q_len
    
    # 儲存停等次數
    num_stops_car, num_stops_scooter = env.get_avg_num_stops()
    avg_num_stops_car.append(num_stops_car)
    avg_num_stops_scooter.append(num_stops_scooter)
    
    # 儲存延滯時間
    delay_car, delay_scooter = env.get_avg_delay()
    avg_delay_car.append(delay_car)
    avg_delay_scooter.append(delay_scooter)
    
    # 儲存總旅行時間
    travtmtot_car, travtmtot_scooter = env.get_total_travel_time()
    avg_travtmtot_car.append(travtmtot_car)
    avg_travtmtot_scooter.append(travtmtot_scooter)
    
    # 儲存總停等次數
    stopstot_car, stopstot_scooter = env.get_total_num_stops()
    avg_stopstot_car.append(stopstot_car)
    avg_stopstot_scooter.append(stopstot_scooter)
    
    # 儲存總延滯時間
    delaytot_car, delaytot_scooter = env.get_total_delay()
    avg_delaytot_car.append(delaytot_car)
    avg_delaytot_scooter.append(delaytot_scooter)
        
    end = time.time()
    spent_time = end - start
    spent_times.append(spent_time)
    print('avg. rewards in this episode:', avg_rewards)
    # print('avg. global rewards in this episode:', avg_global_rewards_per_episode[episode-1])
    print('******** end episode %s ********' % episode)
    print('spent %.2f (s) for episode %s' % (spent_time, episode))
    
    episode += 1

... loading checkpoint ...
******** start episode 1 ********
new_timing_plans: {'1': [37, 23], '2': [37, 23], '3': [37, 23]}
updated_offsets: [13, 11, 42]
reward: [115.855]
###### have got 1 sample(s) ######
new_timing_plans: {'1': [35, 25], '2': [41, 19], '3': [36, 24]}
updated_offsets: [13, 9, 43]
reward: [104.7003]
###### have got 2 sample(s) ######
new_timing_plans: {'1': [37, 23], '2': [41, 19], '3': [37, 23]}
updated_offsets: [12, 9, 43]
reward: [118.4448]
###### have got 3 sample(s) ######
new_timing_plans: {'1': [37, 23], '2': [41, 19], '3': [36, 24]}
updated_offsets: [10, 9, 44]
reward: [103.6361]
###### have got 4 sample(s) ######
new_timing_plans: {'1': [36, 24], '2': [41, 19], '3': [37, 23]}
updated_offsets: [13, 10, 44]
reward: [92.9921]
###### have got 5 sample(s) ######
new_timing_plans: {'1': [36, 24], '2': [40, 20], '3': [37, 23]}
updated_offsets: [13, 10, 45]
reward: [90.1432]
###### have got 6 sample(s) ######
new_timing_plans: {'1': [37, 23], '2': [39, 21], '3': [35

reward: [109.6805]
###### have got 56 sample(s) ######
new_timing_plans: {'1': [39, 21], '2': [34, 26], '3': [38, 22]}
updated_offsets: [12, 10, 42]
reward: [111.4514]
###### have got 57 sample(s) ######
new_timing_plans: {'1': [43, 17], '2': [34, 26], '3': [37, 23]}
updated_offsets: [11, 11, 42]
reward: [108.0893]
###### have got 58 sample(s) ######
new_timing_plans: {'1': [38, 22], '2': [34, 26], '3': [37, 23]}
updated_offsets: [12, 11, 42]
reward: [97.0106]
###### have got 59 sample(s) ######
new_timing_plans: {'1': [39, 21], '2': [36, 24], '3': [35, 25]}
updated_offsets: [14, 10, 43]
reward: [104.2696]
###### have got 60 sample(s) ######
avg. rewards in this episode: [106.7159]
******** end episode 1 ********
spent 16.74 (s) for episode 1
******** start episode 2 ********
new_timing_plans: {'1': [34, 26], '2': [40, 20], '3': [37, 23]}
updated_offsets: [14, 10, 45]
reward: [118.4165]
###### have got 1 sample(s) ######
new_timing_plans: {'1': [36, 24], '2': [37, 23], '3': [33, 27]}
u

reward: [105.0597]
###### have got 51 sample(s) ######
new_timing_plans: {'1': [43, 17], '2': [37, 23], '3': [37, 23]}
updated_offsets: [9, 10, 42]
reward: [122.9739]
###### have got 52 sample(s) ######
new_timing_plans: {'1': [40, 20], '2': [34, 26], '3': [36, 24]}
updated_offsets: [8, 10, 43]
reward: [111.428]
###### have got 53 sample(s) ######
new_timing_plans: {'1': [42, 18], '2': [33, 27], '3': [34, 26]}
updated_offsets: [11, 11, 44]
reward: [98.998]
###### have got 54 sample(s) ######
new_timing_plans: {'1': [40, 20], '2': [34, 26], '3': [38, 22]}
updated_offsets: [11, 11, 41]
reward: [110.4244]
###### have got 55 sample(s) ######
new_timing_plans: {'1': [39, 21], '2': [32, 28], '3': [37, 23]}
updated_offsets: [15, 12, 42]
reward: [99.6348]
###### have got 56 sample(s) ######
new_timing_plans: {'1': [37, 23], '2': [35, 25], '3': [35, 25]}
updated_offsets: [11, 10, 43]
reward: [106.9602]
###### have got 57 sample(s) ######
new_timing_plans: {'1': [41, 19], '2': [34, 26], '3': [37

reward: [111.363]
###### have got 46 sample(s) ######
new_timing_plans: {'1': [42, 18], '2': [35, 25], '3': [37, 23]}
updated_offsets: [10, 11, 42]
reward: [104.8293]
###### have got 47 sample(s) ######
new_timing_plans: {'1': [40, 20], '2': [37, 23], '3': [39, 21]}
updated_offsets: [11, 10, 41]
reward: [98.3755]
###### have got 48 sample(s) ######
new_timing_plans: {'1': [39, 21], '2': [31, 29], '3': [34, 26]}
updated_offsets: [11, 12, 44]
reward: [96.881]
###### have got 49 sample(s) ######
new_timing_plans: {'1': [40, 20], '2': [33, 27], '3': [39, 21]}
updated_offsets: [7, 10, 41]
reward: [114.5267]
###### have got 50 sample(s) ######
new_timing_plans: {'1': [42, 18], '2': [34, 26], '3': [35, 25]}
updated_offsets: [11, 11, 43]
reward: [106.9891]
###### have got 51 sample(s) ######
new_timing_plans: {'1': [41, 19], '2': [36, 24], '3': [37, 23]}
updated_offsets: [11, 10, 42]
reward: [121.8448]
###### have got 52 sample(s) ######
new_timing_plans: {'1': [45, 15], '2': [34, 26], '3': [3

reward: [116.6209]
###### have got 41 sample(s) ######
new_timing_plans: {'1': [38, 22], '2': [39, 21], '3': [36, 24]}
updated_offsets: [12, 11, 43]
reward: [136.8993]
###### have got 42 sample(s) ######
new_timing_plans: {'1': [38, 22], '2': [38, 22], '3': [37, 23]}
updated_offsets: [10, 11, 42]
reward: [133.5535]
###### have got 43 sample(s) ######
new_timing_plans: {'1': [40, 20], '2': [37, 23], '3': [38, 22]}
updated_offsets: [10, 10, 42]
reward: [108.4408]
###### have got 44 sample(s) ######
new_timing_plans: {'1': [40, 20], '2': [37, 23], '3': [37, 23]}
updated_offsets: [10, 10, 42]
reward: [126.399]
###### have got 45 sample(s) ######
new_timing_plans: {'1': [41, 19], '2': [37, 23], '3': [35, 25]}
updated_offsets: [8, 11, 43]
reward: [119.6617]
###### have got 46 sample(s) ######
new_timing_plans: {'1': [41, 19], '2': [34, 26], '3': [37, 23]}
updated_offsets: [11, 11, 42]
reward: [118.9442]
###### have got 47 sample(s) ######
new_timing_plans: {'1': [43, 17], '2': [35, 25], '3':

reward: [115.0091]
###### have got 36 sample(s) ######
new_timing_plans: {'1': [38, 22], '2': [36, 24], '3': [38, 22]}
updated_offsets: [14, 11, 41]
reward: [117.7721]
###### have got 37 sample(s) ######
new_timing_plans: {'1': [38, 22], '2': [37, 23], '3': [35, 25]}
updated_offsets: [10, 11, 43]
reward: [119.3995]
###### have got 38 sample(s) ######
new_timing_plans: {'1': [36, 24], '2': [35, 25], '3': [39, 21]}
updated_offsets: [14, 13, 41]
reward: [124.2118]
###### have got 39 sample(s) ######
new_timing_plans: {'1': [41, 19], '2': [38, 22], '3': [36, 24]}
updated_offsets: [8, 11, 43]
reward: [136.1253]
###### have got 40 sample(s) ######
new_timing_plans: {'1': [37, 23], '2': [36, 24], '3': [39, 21]}
updated_offsets: [11, 11, 41]
reward: [120.7101]
###### have got 41 sample(s) ######
new_timing_plans: {'1': [39, 21], '2': [37, 23], '3': [35, 25]}
updated_offsets: [11, 10, 43]
reward: [108.6429]
###### have got 42 sample(s) ######
new_timing_plans: {'1': [39, 21], '2': [36, 24], '3'

reward: [103.7965]
###### have got 31 sample(s) ######
new_timing_plans: {'1': [37, 23], '2': [38, 22], '3': [36, 24]}
updated_offsets: [12, 10, 43]
reward: [131.2895]
###### have got 32 sample(s) ######
new_timing_plans: {'1': [40, 20], '2': [36, 24], '3': [37, 23]}
updated_offsets: [9, 11, 42]
reward: [113.661]
###### have got 33 sample(s) ######
new_timing_plans: {'1': [38, 22], '2': [34, 26], '3': [36, 24]}
updated_offsets: [12, 11, 43]
reward: [111.5516]
###### have got 34 sample(s) ######
new_timing_plans: {'1': [37, 23], '2': [39, 21], '3': [40, 20]}
updated_offsets: [11, 10, 40]
reward: [116.5743]
###### have got 35 sample(s) ######
new_timing_plans: {'1': [36, 24], '2': [36, 24], '3': [36, 24]}
updated_offsets: [13, 11, 43]
reward: [109.2966]
###### have got 36 sample(s) ######
new_timing_plans: {'1': [41, 19], '2': [38, 22], '3': [36, 24]}
updated_offsets: [10, 10, 43]
reward: [113.485]
###### have got 37 sample(s) ######
new_timing_plans: {'1': [41, 19], '2': [36, 24], '3': 

reward: [88.6298]
###### have got 26 sample(s) ######
new_timing_plans: {'1': [38, 22], '2': [41, 19], '3': [41, 19]}
updated_offsets: [12, 9, 40]
reward: [87.9802]
###### have got 27 sample(s) ######
new_timing_plans: {'1': [42, 18], '2': [40, 20], '3': [41, 19]}
updated_offsets: [9, 9, 41]
reward: [103.1519]
###### have got 28 sample(s) ######
new_timing_plans: {'1': [39, 21], '2': [39, 21], '3': [42, 18]}
updated_offsets: [11, 9, 40]
reward: [98.3681]
###### have got 29 sample(s) ######
new_timing_plans: {'1': [38, 22], '2': [40, 20], '3': [41, 19]}
updated_offsets: [11, 9, 40]
reward: [97.7144]
###### have got 30 sample(s) ######
new_timing_plans: {'1': [41, 19], '2': [40, 20], '3': [39, 21]}
updated_offsets: [11, 9, 41]
reward: [97.549]
###### have got 31 sample(s) ######
new_timing_plans: {'1': [37, 23], '2': [36, 24], '3': [36, 24]}
updated_offsets: [13, 12, 43]
reward: [107.9475]
###### have got 32 sample(s) ######
new_timing_plans: {'1': [41, 19], '2': [38, 22], '3': [38, 22]}

reward: [91.0121]
###### have got 21 sample(s) ######
new_timing_plans: {'1': [38, 22], '2': [41, 19], '3': [42, 18]}
updated_offsets: [12, 9, 39]
reward: [100.6838]
###### have got 22 sample(s) ######
new_timing_plans: {'1': [39, 21], '2': [40, 20], '3': [40, 20]}
updated_offsets: [11, 10, 42]
reward: [94.3148]
###### have got 23 sample(s) ######
new_timing_plans: {'1': [43, 17], '2': [40, 20], '3': [38, 22]}
updated_offsets: [9, 10, 41]
reward: [109.9681]
###### have got 24 sample(s) ######
new_timing_plans: {'1': [39, 21], '2': [43, 17], '3': [42, 18]}
updated_offsets: [9, 8, 39]
reward: [102.3612]
###### have got 25 sample(s) ######
new_timing_plans: {'1': [41, 19], '2': [40, 20], '3': [42, 18]}
updated_offsets: [9, 10, 39]
reward: [107.3996]
###### have got 26 sample(s) ######
new_timing_plans: {'1': [37, 23], '2': [42, 18], '3': [40, 20]}
updated_offsets: [11, 10, 40]
reward: [90.4987]
###### have got 27 sample(s) ######
new_timing_plans: {'1': [41, 19], '2': [39, 21], '3': [39, 

reward: [97.68]
###### have got 16 sample(s) ######
new_timing_plans: {'1': [41, 19], '2': [39, 21], '3': [40, 20]}
updated_offsets: [10, 10, 42]
reward: [101.0016]
###### have got 17 sample(s) ######
new_timing_plans: {'1': [38, 22], '2': [39, 21], '3': [40, 20]}
updated_offsets: [12, 11, 40]
reward: [91.8574]
###### have got 18 sample(s) ######
new_timing_plans: {'1': [40, 20], '2': [44, 16], '3': [43, 17]}
updated_offsets: [8, 8, 39]
reward: [99.2214]
###### have got 19 sample(s) ######
new_timing_plans: {'1': [39, 21], '2': [42, 18], '3': [40, 20]}
updated_offsets: [11, 10, 40]
reward: [88.4375]
###### have got 20 sample(s) ######
new_timing_plans: {'1': [37, 23], '2': [40, 20], '3': [40, 20]}
updated_offsets: [11, 9, 41]
reward: [103.1804]
###### have got 21 sample(s) ######
new_timing_plans: {'1': [39, 21], '2': [41, 19], '3': [42, 18]}
updated_offsets: [11, 9, 40]
reward: [91.8906]
###### have got 22 sample(s) ######
new_timing_plans: {'1': [38, 22], '2': [39, 21], '3': [39, 21]

reward: [91.9907]
###### have got 11 sample(s) ######
new_timing_plans: {'1': [34, 26], '2': [39, 21], '3': [35, 25]}
updated_offsets: [15, 11, 45]
reward: [93.5025]
###### have got 12 sample(s) ######
new_timing_plans: {'1': [38, 22], '2': [41, 19], '3': [39, 21]}
updated_offsets: [11, 10, 46]
reward: [96.9685]
###### have got 13 sample(s) ######
new_timing_plans: {'1': [35, 25], '2': [38, 22], '3': [36, 24]}
updated_offsets: [14, 11, 43]
reward: [95.5653]
###### have got 14 sample(s) ######
new_timing_plans: {'1': [39, 21], '2': [36, 24], '3': [33, 27]}
updated_offsets: [8, 10, 44]
reward: [89.4604]
###### have got 15 sample(s) ######
new_timing_plans: {'1': [35, 25], '2': [40, 20], '3': [33, 27]}
updated_offsets: [14, 9, 44]
reward: [100.2054]
###### have got 16 sample(s) ######
new_timing_plans: {'1': [40, 20], '2': [39, 21], '3': [42, 18]}
updated_offsets: [9, 11, 43]
reward: [111.3199]
###### have got 17 sample(s) ######
new_timing_plans: {'1': [41, 19], '2': [40, 20], '3': [40, 

reward: [90.9608]
###### have got 6 sample(s) ######
new_timing_plans: {'1': [38, 22], '2': [39, 21], '3': [35, 25]}
updated_offsets: [12, 9, 43]
reward: [98.9428]
###### have got 7 sample(s) ######
new_timing_plans: {'1': [33, 27], '2': [39, 21], '3': [35, 25]}
updated_offsets: [15, 10, 44]
reward: [88.177]
###### have got 8 sample(s) ######
new_timing_plans: {'1': [35, 25], '2': [38, 22], '3': [35, 25]}
updated_offsets: [15, 9, 43]
reward: [99.0051]
###### have got 9 sample(s) ######
new_timing_plans: {'1': [42, 18], '2': [39, 21], '3': [40, 20]}
updated_offsets: [10, 10, 42]
reward: [98.0704]
###### have got 10 sample(s) ######
new_timing_plans: {'1': [34, 26], '2': [39, 21], '3': [35, 25]}
updated_offsets: [17, 11, 46]
reward: [96.2898]
###### have got 11 sample(s) ######
new_timing_plans: {'1': [37, 23], '2': [41, 19], '3': [39, 21]}
updated_offsets: [13, 9, 45]
reward: [105.5637]
###### have got 12 sample(s) ######
new_timing_plans: {'1': [38, 22], '2': [39, 21], '3': [37, 23]}
u

reward: [108.4131]
###### have got 1 sample(s) ######
new_timing_plans: {'1': [39, 21], '2': [40, 20], '3': [39, 21]}
updated_offsets: [8, 9, 44]
reward: [86.0756]
###### have got 2 sample(s) ######
new_timing_plans: {'1': [38, 22], '2': [39, 21], '3': [39, 21]}
updated_offsets: [12, 11, 41]
reward: [100.1683]
###### have got 3 sample(s) ######
new_timing_plans: {'1': [38, 22], '2': [38, 22], '3': [35, 25]}
updated_offsets: [12, 11, 44]
reward: [99.6398]
###### have got 4 sample(s) ######
new_timing_plans: {'1': [32, 28], '2': [37, 23], '3': [37, 23]}
updated_offsets: [18, 10, 43]
reward: [87.0574]
###### have got 5 sample(s) ######
new_timing_plans: {'1': [39, 21], '2': [39, 21], '3': [36, 24]}
updated_offsets: [11, 10, 44]
reward: [91.9817]
###### have got 6 sample(s) ######
new_timing_plans: {'1': [35, 25], '2': [40, 20], '3': [37, 23]}
updated_offsets: [14, 10, 42]
reward: [82.4537]
###### have got 7 sample(s) ######
new_timing_plans: {'1': [36, 24], '2': [42, 18], '3': [37, 23]}
u

reward: [117.5872]
###### have got 57 sample(s) ######
new_timing_plans: {'1': [40, 20], '2': [32, 28], '3': [34, 26]}
updated_offsets: [12, 11, 44]
reward: [109.0402]
###### have got 58 sample(s) ######
new_timing_plans: {'1': [41, 19], '2': [36, 24], '3': [35, 25]}
updated_offsets: [11, 10, 43]
reward: [107.8383]
###### have got 59 sample(s) ######
new_timing_plans: {'1': [42, 18], '2': [35, 25], '3': [39, 21]}
updated_offsets: [9, 10, 41]
reward: [125.1504]
###### have got 60 sample(s) ######
avg. rewards in this episode: [106.4482]
******** end episode 12 ********
spent 16.67 (s) for episode 12
******** start episode 13 ********
new_timing_plans: {'1': [34, 26], '2': [42, 18], '3': [36, 24]}
updated_offsets: [14, 9, 43]
reward: [115.0064]
###### have got 1 sample(s) ######
new_timing_plans: {'1': [36, 24], '2': [42, 18], '3': [39, 21]}
updated_offsets: [13, 9, 42]
reward: [97.7322]
###### have got 2 sample(s) ######
new_timing_plans: {'1': [37, 23], '2': [38, 22], '3': [32, 28]}
up

reward: [101.4326]
###### have got 52 sample(s) ######
new_timing_plans: {'1': [40, 20], '2': [35, 25], '3': [36, 24]}
updated_offsets: [10, 10, 43]
reward: [117.0047]
###### have got 53 sample(s) ######
new_timing_plans: {'1': [41, 19], '2': [35, 25], '3': [37, 23]}
updated_offsets: [10, 10, 42]
reward: [119.6953]
###### have got 54 sample(s) ######
new_timing_plans: {'1': [40, 20], '2': [34, 26], '3': [36, 24]}
updated_offsets: [13, 11, 43]
reward: [106.6925]
###### have got 55 sample(s) ######
new_timing_plans: {'1': [44, 16], '2': [33, 27], '3': [36, 24]}
updated_offsets: [9, 11, 43]
reward: [111.9682]
###### have got 56 sample(s) ######
new_timing_plans: {'1': [41, 19], '2': [34, 26], '3': [36, 24]}
updated_offsets: [10, 10, 43]
reward: [117.6781]
###### have got 57 sample(s) ######
new_timing_plans: {'1': [37, 23], '2': [33, 27], '3': [37, 23]}
updated_offsets: [14, 11, 42]
reward: [119.0307]
###### have got 58 sample(s) ######
new_timing_plans: {'1': [38, 22], '2': [35, 25], '3'

reward: [110.1866]
###### have got 47 sample(s) ######
new_timing_plans: {'1': [43, 17], '2': [34, 26], '3': [39, 21]}
updated_offsets: [11, 11, 41]
reward: [101.8249]
###### have got 48 sample(s) ######
new_timing_plans: {'1': [43, 17], '2': [35, 25], '3': [36, 24]}
updated_offsets: [11, 10, 43]
reward: [104.6508]
###### have got 49 sample(s) ######
new_timing_plans: {'1': [44, 16], '2': [33, 27], '3': [37, 23]}
updated_offsets: [10, 11, 42]
reward: [115.3358]
###### have got 50 sample(s) ######
new_timing_plans: {'1': [40, 20], '2': [35, 25], '3': [37, 23]}
updated_offsets: [13, 11, 42]
reward: [107.2975]
###### have got 51 sample(s) ######
new_timing_plans: {'1': [42, 18], '2': [38, 22], '3': [36, 24]}
updated_offsets: [10, 10, 43]
reward: [104.8921]
###### have got 52 sample(s) ######
new_timing_plans: {'1': [40, 20], '2': [35, 25], '3': [37, 23]}
updated_offsets: [9, 10, 42]
reward: [116.7117]
###### have got 53 sample(s) ######
new_timing_plans: {'1': [40, 20], '2': [33, 27], '3'

reward: [118.7631]
###### have got 42 sample(s) ######
new_timing_plans: {'1': [35, 25], '2': [40, 20], '3': [38, 22]}
updated_offsets: [15, 10, 42]
reward: [118.9084]
###### have got 43 sample(s) ######
new_timing_plans: {'1': [40, 20], '2': [37, 23], '3': [39, 21]}
updated_offsets: [9, 12, 41]
reward: [122.2434]
###### have got 44 sample(s) ######
new_timing_plans: {'1': [40, 20], '2': [38, 22], '3': [37, 23]}
updated_offsets: [10, 10, 42]
reward: [115.6932]
###### have got 45 sample(s) ######
new_timing_plans: {'1': [41, 19], '2': [39, 21], '3': [45, 15]}
updated_offsets: [10, 11, 38]
reward: [116.1257]
###### have got 46 sample(s) ######
new_timing_plans: {'1': [40, 20], '2': [33, 27], '3': [35, 25]}
updated_offsets: [14, 12, 43]
reward: [123.8365]
###### have got 47 sample(s) ######
new_timing_plans: {'1': [42, 18], '2': [36, 24], '3': [37, 23]}
updated_offsets: [9, 10, 42]
reward: [115.725]
###### have got 48 sample(s) ######
new_timing_plans: {'1': [39, 21], '2': [32, 28], '3': 

reward: [116.2628]
###### have got 37 sample(s) ######
new_timing_plans: {'1': [40, 20], '2': [37, 23], '3': [35, 25]}
updated_offsets: [12, 10, 43]
reward: [104.4458]
###### have got 38 sample(s) ######
new_timing_plans: {'1': [38, 22], '2': [36, 24], '3': [37, 23]}
updated_offsets: [11, 12, 42]
reward: [110.0046]
###### have got 39 sample(s) ######
new_timing_plans: {'1': [37, 23], '2': [36, 24], '3': [40, 20]}
updated_offsets: [9, 10, 40]
reward: [103.7228]
###### have got 40 sample(s) ######
new_timing_plans: {'1': [37, 23], '2': [36, 24], '3': [40, 20]}
updated_offsets: [12, 12, 40]
reward: [117.7986]
###### have got 41 sample(s) ######
new_timing_plans: {'1': [38, 22], '2': [37, 23], '3': [36, 24]}
updated_offsets: [11, 10, 43]
reward: [124.484]
###### have got 42 sample(s) ######
new_timing_plans: {'1': [37, 23], '2': [36, 24], '3': [38, 22]}
updated_offsets: [13, 10, 41]
reward: [121.3172]
###### have got 43 sample(s) ######
new_timing_plans: {'1': [40, 20], '2': [38, 22], '3':

reward: [119.293]
###### have got 32 sample(s) ######
new_timing_plans: {'1': [38, 22], '2': [36, 24], '3': [40, 20]}
updated_offsets: [11, 11, 40]
reward: [120.4414]
###### have got 33 sample(s) ######
new_timing_plans: {'1': [42, 18], '2': [37, 23], '3': [35, 25]}
updated_offsets: [9, 10, 43]
reward: [97.8775]
###### have got 34 sample(s) ######
new_timing_plans: {'1': [35, 25], '2': [34, 26], '3': [36, 24]}
updated_offsets: [14, 11, 43]
reward: [137.6526]
###### have got 35 sample(s) ######
new_timing_plans: {'1': [37, 23], '2': [37, 23], '3': [37, 23]}
updated_offsets: [13, 10, 42]
reward: [124.6628]
###### have got 36 sample(s) ######
new_timing_plans: {'1': [40, 20], '2': [39, 21], '3': [36, 24]}
updated_offsets: [10, 10, 43]
reward: [133.5673]
###### have got 37 sample(s) ######
new_timing_plans: {'1': [40, 20], '2': [37, 23], '3': [36, 24]}
updated_offsets: [8, 10, 43]
reward: [112.766]
###### have got 38 sample(s) ######
new_timing_plans: {'1': [36, 24], '2': [37, 23], '3': [3

reward: [103.5498]
###### have got 27 sample(s) ######
new_timing_plans: {'1': [39, 21], '2': [37, 23], '3': [35, 25]}
updated_offsets: [11, 11, 43]
reward: [104.0108]
###### have got 28 sample(s) ######
new_timing_plans: {'1': [40, 20], '2': [42, 18], '3': [43, 17]}
updated_offsets: [9, 8, 39]
reward: [111.2633]
###### have got 29 sample(s) ######
new_timing_plans: {'1': [38, 22], '2': [39, 21], '3': [42, 18]}
updated_offsets: [12, 11, 39]
reward: [86.669]
###### have got 30 sample(s) ######
new_timing_plans: {'1': [39, 21], '2': [42, 18], '3': [40, 20]}
updated_offsets: [11, 10, 40]
reward: [106.2422]
###### have got 31 sample(s) ######
new_timing_plans: {'1': [40, 20], '2': [38, 22], '3': [36, 24]}
updated_offsets: [11, 11, 43]
reward: [125.6724]
###### have got 32 sample(s) ######
new_timing_plans: {'1': [38, 22], '2': [36, 24], '3': [35, 25]}
updated_offsets: [13, 13, 43]
reward: [121.3742]
###### have got 33 sample(s) ######
new_timing_plans: {'1': [41, 19], '2': [36, 24], '3': [

reward: [116.1172]
###### have got 22 sample(s) ######
new_timing_plans: {'1': [44, 16], '2': [40, 20], '3': [39, 21]}
updated_offsets: [7, 9, 44]
reward: [114.279]
###### have got 23 sample(s) ######
new_timing_plans: {'1': [42, 18], '2': [40, 20], '3': [39, 21]}
updated_offsets: [8, 10, 41]
reward: [109.474]
###### have got 24 sample(s) ######
new_timing_plans: {'1': [43, 17], '2': [39, 21], '3': [40, 20]}
updated_offsets: [7, 10, 41]
reward: [115.911]
###### have got 25 sample(s) ######
new_timing_plans: {'1': [39, 21], '2': [42, 18], '3': [41, 19]}
updated_offsets: [10, 8, 40]
reward: [105.8496]
###### have got 26 sample(s) ######
new_timing_plans: {'1': [41, 19], '2': [41, 19], '3': [40, 20]}
updated_offsets: [9, 9, 40]
reward: [115.5775]
###### have got 27 sample(s) ######
new_timing_plans: {'1': [40, 20], '2': [37, 23], '3': [36, 24]}
updated_offsets: [9, 10, 43]
reward: [98.5912]
###### have got 28 sample(s) ######
new_timing_plans: {'1': [36, 24], '2': [43, 17], '3': [50, 10]}

reward: [101.0863]
###### have got 17 sample(s) ######
new_timing_plans: {'1': [36, 24], '2': [41, 19], '3': [39, 21]}
updated_offsets: [13, 10, 41]
reward: [111.7451]
###### have got 18 sample(s) ######
new_timing_plans: {'1': [40, 20], '2': [39, 21], '3': [38, 22]}
updated_offsets: [11, 11, 42]
reward: [126.4867]
###### have got 19 sample(s) ######
new_timing_plans: {'1': [39, 21], '2': [41, 19], '3': [40, 20]}
updated_offsets: [10, 8, 40]
reward: [115.1777]
###### have got 20 sample(s) ######
new_timing_plans: {'1': [41, 19], '2': [42, 18], '3': [41, 19]}
updated_offsets: [9, 9, 40]
reward: [92.3118]
###### have got 21 sample(s) ######
new_timing_plans: {'1': [40, 20], '2': [42, 18], '3': [40, 20]}
updated_offsets: [11, 9, 40]
reward: [110.8073]
###### have got 22 sample(s) ######
new_timing_plans: {'1': [36, 24], '2': [40, 20], '3': [41, 19]}
updated_offsets: [12, 10, 40]
reward: [110.2569]
###### have got 23 sample(s) ######
new_timing_plans: {'1': [38, 22], '2': [40, 20], '3': [3

reward: [103.3247]
###### have got 12 sample(s) ######
new_timing_plans: {'1': [40, 20], '2': [41, 19], '3': [37, 23]}
updated_offsets: [11, 9, 42]
reward: [102.1242]
###### have got 13 sample(s) ######
new_timing_plans: {'1': [34, 26], '2': [39, 21], '3': [35, 25]}
updated_offsets: [14, 10, 44]
reward: [96.8998]
###### have got 14 sample(s) ######
new_timing_plans: {'1': [37, 23], '2': [37, 23], '3': [36, 24]}
updated_offsets: [12, 11, 43]
reward: [92.3776]
###### have got 15 sample(s) ######
new_timing_plans: {'1': [37, 23], '2': [39, 21], '3': [36, 24]}
updated_offsets: [12, 10, 43]
reward: [103.4772]
###### have got 16 sample(s) ######
new_timing_plans: {'1': [40, 20], '2': [40, 20], '3': [42, 18]}
updated_offsets: [10, 8, 39]
reward: [103.7208]
###### have got 17 sample(s) ######
new_timing_plans: {'1': [39, 21], '2': [44, 16], '3': [42, 18]}
updated_offsets: [10, 8, 39]
reward: [107.6868]
###### have got 18 sample(s) ######
new_timing_plans: {'1': [39, 21], '2': [37, 23], '3': [3

reward: [110.7511]
###### have got 7 sample(s) ######
new_timing_plans: {'1': [34, 26], '2': [36, 24], '3': [35, 25]}
updated_offsets: [16, 12, 45]
reward: [85.8206]
###### have got 8 sample(s) ######
new_timing_plans: {'1': [37, 23], '2': [40, 20], '3': [36, 24]}
updated_offsets: [13, 9, 44]
reward: [97.2005]
###### have got 9 sample(s) ######
new_timing_plans: {'1': [36, 24], '2': [40, 20], '3': [37, 23]}
updated_offsets: [13, 9, 43]
reward: [104.5801]
###### have got 10 sample(s) ######
new_timing_plans: {'1': [38, 22], '2': [36, 24], '3': [34, 26]}
updated_offsets: [10, 13, 45]
reward: [103.9177]
###### have got 11 sample(s) ######
new_timing_plans: {'1': [38, 22], '2': [39, 21], '3': [36, 24]}
updated_offsets: [12, 10, 43]
reward: [103.806]
###### have got 12 sample(s) ######
new_timing_plans: {'1': [33, 27], '2': [41, 19], '3': [35, 25]}
updated_offsets: [18, 9, 46]
reward: [99.5798]
###### have got 13 sample(s) ######
new_timing_plans: {'1': [40, 20], '2': [38, 22], '3': [36, 24

new_timing_plans: {'1': [38, 22], '2': [40, 20], '3': [37, 23]}
updated_offsets: [12, 10, 43]
reward: [94.6568]
###### have got 2 sample(s) ######
new_timing_plans: {'1': [38, 22], '2': [38, 22], '3': [35, 25]}
updated_offsets: [13, 10, 44]
reward: [92.905]
###### have got 3 sample(s) ######
new_timing_plans: {'1': [39, 21], '2': [41, 19], '3': [36, 24]}
updated_offsets: [12, 9, 43]
reward: [94.4422]
###### have got 4 sample(s) ######
new_timing_plans: {'1': [38, 22], '2': [37, 23], '3': [35, 25]}
updated_offsets: [12, 10, 44]
reward: [93.4319]
###### have got 5 sample(s) ######
new_timing_plans: {'1': [37, 23], '2': [37, 23], '3': [37, 23]}
updated_offsets: [12, 11, 43]
reward: [88.4376]
###### have got 6 sample(s) ######
new_timing_plans: {'1': [38, 22], '2': [39, 21], '3': [35, 25]}
updated_offsets: [12, 9, 44]
reward: [103.4369]
###### have got 7 sample(s) ######
new_timing_plans: {'1': [35, 25], '2': [39, 21], '3': [34, 26]}
updated_offsets: [14, 9, 45]
reward: [105.0871]
###### h

reward: [117.2202]
###### have got 57 sample(s) ######
new_timing_plans: {'1': [37, 23], '2': [35, 25], '3': [36, 24]}
updated_offsets: [11, 10, 43]
reward: [114.4416]
###### have got 58 sample(s) ######
new_timing_plans: {'1': [42, 18], '2': [35, 25], '3': [36, 24]}
updated_offsets: [11, 11, 43]
reward: [117.2629]
###### have got 59 sample(s) ######
new_timing_plans: {'1': [40, 20], '2': [35, 25], '3': [35, 25]}
updated_offsets: [11, 10, 43]
reward: [112.0503]
###### have got 60 sample(s) ######
avg. rewards in this episode: [104.8662]
******** end episode 23 ********
spent 16.56 (s) for episode 23
******** start episode 24 ********
new_timing_plans: {'1': [34, 26], '2': [38, 22], '3': [34, 26]}
updated_offsets: [14, 11, 47]
reward: [109.6556]
###### have got 1 sample(s) ######
new_timing_plans: {'1': [39, 21], '2': [41, 19], '3': [37, 23]}
updated_offsets: [11, 9, 44]
reward: [100.8886]
###### have got 2 sample(s) ######
new_timing_plans: {'1': [35, 25], '2': [37, 23], '3': [33, 27]}

reward: [113.4681]
###### have got 52 sample(s) ######
new_timing_plans: {'1': [40, 20], '2': [33, 27], '3': [35, 25]}
updated_offsets: [13, 11, 43]
reward: [105.4693]
###### have got 53 sample(s) ######
new_timing_plans: {'1': [40, 20], '2': [34, 26], '3': [36, 24]}
updated_offsets: [12, 10, 43]
reward: [107.924]
###### have got 54 sample(s) ######
new_timing_plans: {'1': [40, 20], '2': [31, 29], '3': [39, 21]}
updated_offsets: [12, 12, 41]
reward: [115.0188]
###### have got 55 sample(s) ######
new_timing_plans: {'1': [40, 20], '2': [34, 26], '3': [35, 25]}
updated_offsets: [10, 10, 43]
reward: [115.1207]
###### have got 56 sample(s) ######
new_timing_plans: {'1': [42, 18], '2': [32, 28], '3': [37, 23]}
updated_offsets: [10, 11, 42]
reward: [108.6925]
###### have got 57 sample(s) ######
new_timing_plans: {'1': [42, 18], '2': [34, 26], '3': [37, 23]}
updated_offsets: [10, 11, 42]
reward: [107.7325]
###### have got 58 sample(s) ######
new_timing_plans: {'1': [38, 22], '2': [35, 25], '3'

reward: [123.3283]
###### have got 47 sample(s) ######
new_timing_plans: {'1': [40, 20], '2': [34, 26], '3': [35, 25]}
updated_offsets: [11, 10, 43]
reward: [105.6734]
###### have got 48 sample(s) ######
new_timing_plans: {'1': [42, 18], '2': [37, 23], '3': [37, 23]}
updated_offsets: [10, 10, 42]
reward: [100.8046]
###### have got 49 sample(s) ######
new_timing_plans: {'1': [40, 20], '2': [34, 26], '3': [34, 26]}
updated_offsets: [12, 11, 44]
reward: [98.3665]
###### have got 50 sample(s) ######
new_timing_plans: {'1': [43, 17], '2': [38, 22], '3': [36, 24]}
updated_offsets: [10, 10, 43]
reward: [107.1954]
###### have got 51 sample(s) ######
new_timing_plans: {'1': [42, 18], '2': [35, 25], '3': [35, 25]}
updated_offsets: [11, 10, 43]
reward: [122.363]
###### have got 52 sample(s) ######
new_timing_plans: {'1': [40, 20], '2': [35, 25], '3': [39, 21]}
updated_offsets: [11, 10, 41]
reward: [118.6869]
###### have got 53 sample(s) ######
new_timing_plans: {'1': [38, 22], '2': [33, 27], '3':

reward: [126.4584]
###### have got 42 sample(s) ######
new_timing_plans: {'1': [34, 26], '2': [37, 23], '3': [39, 21]}
updated_offsets: [12, 13, 41]
reward: [97.5805]
###### have got 43 sample(s) ######
new_timing_plans: {'1': [38, 22], '2': [35, 25], '3': [36, 24]}
updated_offsets: [12, 13, 43]
reward: [110.5465]
###### have got 44 sample(s) ######
new_timing_plans: {'1': [39, 21], '2': [37, 23], '3': [37, 23]}
updated_offsets: [12, 11, 42]
reward: [115.3522]
###### have got 45 sample(s) ######
new_timing_plans: {'1': [42, 18], '2': [37, 23], '3': [36, 24]}
updated_offsets: [8, 10, 44]
reward: [121.8075]
###### have got 46 sample(s) ######
new_timing_plans: {'1': [42, 18], '2': [34, 26], '3': [39, 21]}
updated_offsets: [8, 11, 41]
reward: [121.0441]
###### have got 47 sample(s) ######
new_timing_plans: {'1': [39, 21], '2': [34, 26], '3': [37, 23]}
updated_offsets: [10, 10, 42]
reward: [116.0779]
###### have got 48 sample(s) ######
new_timing_plans: {'1': [39, 21], '2': [35, 25], '3': 

reward: [119.5244]
###### have got 37 sample(s) ######
new_timing_plans: {'1': [39, 21], '2': [37, 23], '3': [36, 24]}
updated_offsets: [10, 10, 43]
reward: [123.0191]
###### have got 38 sample(s) ######
new_timing_plans: {'1': [37, 23], '2': [37, 23], '3': [36, 24]}
updated_offsets: [13, 10, 43]
reward: [138.0157]
###### have got 39 sample(s) ######
new_timing_plans: {'1': [39, 21], '2': [39, 21], '3': [39, 21]}
updated_offsets: [10, 11, 41]
reward: [122.5651]
###### have got 40 sample(s) ######
new_timing_plans: {'1': [41, 19], '2': [37, 23], '3': [36, 24]}
updated_offsets: [10, 11, 43]
reward: [118.1563]
###### have got 41 sample(s) ######
new_timing_plans: {'1': [41, 19], '2': [37, 23], '3': [35, 25]}
updated_offsets: [10, 10, 43]
reward: [118.2481]
###### have got 42 sample(s) ######
new_timing_plans: {'1': [38, 22], '2': [37, 23], '3': [38, 22]}
updated_offsets: [11, 10, 42]
reward: [117.3622]
###### have got 43 sample(s) ######
new_timing_plans: {'1': [39, 21], '2': [36, 24], '3

reward: [102.9494]
###### have got 32 sample(s) ######
new_timing_plans: {'1': [39, 21], '2': [35, 25], '3': [36, 24]}
updated_offsets: [12, 13, 43]
reward: [114.0872]
###### have got 33 sample(s) ######
new_timing_plans: {'1': [37, 23], '2': [39, 21], '3': [38, 22]}
updated_offsets: [13, 10, 41]
reward: [113.042]
###### have got 34 sample(s) ######
new_timing_plans: {'1': [42, 18], '2': [38, 22], '3': [34, 26]}
updated_offsets: [10, 10, 44]
reward: [124.8942]
###### have got 35 sample(s) ######
new_timing_plans: {'1': [40, 20], '2': [38, 22], '3': [38, 22]}
updated_offsets: [11, 10, 41]
reward: [122.8654]
###### have got 36 sample(s) ######
new_timing_plans: {'1': [42, 18], '2': [38, 22], '3': [35, 25]}
updated_offsets: [10, 10, 43]
reward: [122.9646]
###### have got 37 sample(s) ######
new_timing_plans: {'1': [40, 20], '2': [38, 22], '3': [38, 22]}
updated_offsets: [9, 10, 42]
reward: [124.632]
###### have got 38 sample(s) ######
new_timing_plans: {'1': [41, 19], '2': [36, 24], '3': 

reward: [110.1932]
###### have got 27 sample(s) ######
new_timing_plans: {'1': [40, 20], '2': [40, 20], '3': [41, 19]}
updated_offsets: [10, 10, 40]
reward: [99.4598]
###### have got 28 sample(s) ######
new_timing_plans: {'1': [41, 19], '2': [43, 17], '3': [42, 18]}
updated_offsets: [9, 9, 39]
reward: [102.2031]
###### have got 29 sample(s) ######
new_timing_plans: {'1': [39, 21], '2': [44, 16], '3': [44, 16]}
updated_offsets: [10, 8, 38]
reward: [109.286]
###### have got 30 sample(s) ######
new_timing_plans: {'1': [40, 20], '2': [39, 21], '3': [40, 20]}
updated_offsets: [11, 10, 40]
reward: [105.2318]
###### have got 31 sample(s) ######
new_timing_plans: {'1': [41, 19], '2': [39, 21], '3': [37, 23]}
updated_offsets: [10, 9, 42]
reward: [120.5319]
###### have got 32 sample(s) ######
new_timing_plans: {'1': [40, 20], '2': [37, 23], '3': [38, 22]}
updated_offsets: [9, 10, 42]
reward: [112.9196]
###### have got 33 sample(s) ######
new_timing_plans: {'1': [35, 25], '2': [39, 21], '3': [37,

reward: [94.2456]
###### have got 22 sample(s) ######
new_timing_plans: {'1': [42, 18], '2': [40, 20], '3': [43, 17]}
updated_offsets: [8, 9, 39]
reward: [102.9335]
###### have got 23 sample(s) ######
new_timing_plans: {'1': [40, 20], '2': [43, 17], '3': [40, 20]}
updated_offsets: [10, 8, 40]
reward: [111.6741]
###### have got 24 sample(s) ######
new_timing_plans: {'1': [41, 19], '2': [40, 20], '3': [39, 21]}
updated_offsets: [8, 10, 41]
reward: [108.7213]
###### have got 25 sample(s) ######
new_timing_plans: {'1': [41, 19], '2': [38, 22], '3': [37, 23]}
updated_offsets: [9, 11, 42]
reward: [101.1342]
###### have got 26 sample(s) ######
new_timing_plans: {'1': [39, 21], '2': [38, 22], '3': [42, 18]}
updated_offsets: [9, 10, 39]
reward: [100.8385]
###### have got 27 sample(s) ######
new_timing_plans: {'1': [38, 22], '2': [41, 19], '3': [40, 20]}
updated_offsets: [12, 9, 40]
reward: [98.0307]
###### have got 28 sample(s) ######
new_timing_plans: {'1': [39, 21], '2': [40, 20], '3': [40, 2

reward: [101.5426]
###### have got 17 sample(s) ######
new_timing_plans: {'1': [41, 19], '2': [39, 21], '3': [39, 21]}
updated_offsets: [10, 9, 41]
reward: [90.4007]
###### have got 18 sample(s) ######
new_timing_plans: {'1': [44, 16], '2': [43, 17], '3': [41, 19]}
updated_offsets: [8, 8, 40]
reward: [106.9902]
###### have got 19 sample(s) ######
new_timing_plans: {'1': [38, 22], '2': [39, 21], '3': [39, 21]}
updated_offsets: [12, 10, 41]
reward: [97.6953]
###### have got 20 sample(s) ######
new_timing_plans: {'1': [43, 17], '2': [40, 20], '3': [42, 18]}
updated_offsets: [8, 10, 41]
reward: [108.317]
###### have got 21 sample(s) ######
new_timing_plans: {'1': [38, 22], '2': [37, 23], '3': [36, 24]}
updated_offsets: [11, 10, 43]
reward: [101.4114]
###### have got 22 sample(s) ######
new_timing_plans: {'1': [41, 19], '2': [40, 20], '3': [39, 21]}
updated_offsets: [10, 9, 41]
reward: [115.0001]
###### have got 23 sample(s) ######
new_timing_plans: {'1': [39, 21], '2': [39, 21], '3': [40, 

reward: [104.4397]
###### have got 12 sample(s) ######
new_timing_plans: {'1': [39, 21], '2': [40, 20], '3': [37, 23]}
updated_offsets: [11, 10, 44]
reward: [101.7312]
###### have got 13 sample(s) ######
new_timing_plans: {'1': [36, 24], '2': [41, 19], '3': [37, 23]}
updated_offsets: [13, 9, 43]
reward: [104.3056]
###### have got 14 sample(s) ######
new_timing_plans: {'1': [35, 25], '2': [38, 22], '3': [36, 24]}
updated_offsets: [15, 11, 44]
reward: [88.3609]
###### have got 15 sample(s) ######
new_timing_plans: {'1': [37, 23], '2': [40, 20], '3': [35, 25]}
updated_offsets: [11, 9, 43]
reward: [91.1381]
###### have got 16 sample(s) ######
new_timing_plans: {'1': [40, 20], '2': [42, 18], '3': [42, 18]}
updated_offsets: [10, 9, 42]
reward: [95.518]
###### have got 17 sample(s) ######
new_timing_plans: {'1': [44, 16], '2': [41, 19], '3': [37, 23]}
updated_offsets: [9, 9, 42]
reward: [119.8947]
###### have got 18 sample(s) ######
new_timing_plans: {'1': [39, 21], '2': [36, 24], '3': [37, 2

reward: [99.9212]
###### have got 7 sample(s) ######
new_timing_plans: {'1': [37, 23], '2': [35, 25], '3': [36, 24]}
updated_offsets: [13, 13, 44]
reward: [74.1414]
###### have got 8 sample(s) ######
new_timing_plans: {'1': [40, 20], '2': [41, 19], '3': [38, 22]}
updated_offsets: [11, 8, 42]
reward: [92.7386]
###### have got 9 sample(s) ######
new_timing_plans: {'1': [34, 26], '2': [38, 22], '3': [32, 28]}
updated_offsets: [14, 9, 45]
reward: [90.5136]
###### have got 10 sample(s) ######
new_timing_plans: {'1': [34, 26], '2': [41, 19], '3': [35, 25]}
updated_offsets: [14, 9, 44]
reward: [92.0904]
###### have got 11 sample(s) ######
new_timing_plans: {'1': [39, 21], '2': [35, 25], '3': [36, 24]}
updated_offsets: [9, 11, 45]
reward: [89.5906]
###### have got 12 sample(s) ######
new_timing_plans: {'1': [38, 22], '2': [40, 20], '3': [36, 24]}
updated_offsets: [12, 9, 44]
reward: [102.7959]
###### have got 13 sample(s) ######
new_timing_plans: {'1': [36, 24], '2': [41, 19], '3': [39, 21]}
u

new_timing_plans: {'1': [37, 23], '2': [41, 19], '3': [36, 24]}
updated_offsets: [12, 9, 43]
reward: [99.5992]
###### have got 2 sample(s) ######
new_timing_plans: {'1': [37, 23], '2': [36, 24], '3': [36, 24]}
updated_offsets: [12, 10, 44]
reward: [84.6465]
###### have got 3 sample(s) ######
new_timing_plans: {'1': [38, 22], '2': [39, 21], '3': [38, 22]}
updated_offsets: [12, 9, 42]
reward: [101.1343]
###### have got 4 sample(s) ######
new_timing_plans: {'1': [37, 23], '2': [39, 21], '3': [37, 23]}
updated_offsets: [14, 10, 43]
reward: [106.3656]
###### have got 5 sample(s) ######
new_timing_plans: {'1': [32, 28], '2': [42, 18], '3': [35, 25]}
updated_offsets: [15, 9, 51]
reward: [99.1609]
###### have got 6 sample(s) ######
new_timing_plans: {'1': [39, 21], '2': [38, 22], '3': [34, 26]}
updated_offsets: [11, 10, 44]
reward: [104.584]
###### have got 7 sample(s) ######
new_timing_plans: {'1': [36, 24], '2': [42, 18], '3': [38, 22]}
updated_offsets: [12, 9, 44]
reward: [97.2615]
###### h

reward: [102.1576]
###### have got 57 sample(s) ######
new_timing_plans: {'1': [39, 21], '2': [34, 26], '3': [38, 22]}
updated_offsets: [10, 11, 43]
reward: [100.2232]
###### have got 58 sample(s) ######
new_timing_plans: {'1': [38, 22], '2': [32, 28], '3': [37, 23]}
updated_offsets: [16, 12, 43]
reward: [103.6434]
###### have got 59 sample(s) ######
new_timing_plans: {'1': [41, 19], '2': [35, 25], '3': [35, 25]}
updated_offsets: [12, 11, 43]
reward: [105.9391]
###### have got 60 sample(s) ######
avg. rewards in this episode: [108.4987]
******** end episode 34 ********
spent 16.42 (s) for episode 34
******** start episode 35 ********
new_timing_plans: {'1': [36, 24], '2': [38, 22], '3': [37, 23]}
updated_offsets: [14, 15, 48]
reward: [113.5905]
###### have got 1 sample(s) ######
new_timing_plans: {'1': [36, 24], '2': [40, 20], '3': [36, 24]}
updated_offsets: [13, 10, 43]
reward: [98.6658]
###### have got 2 sample(s) ######
new_timing_plans: {'1': [35, 25], '2': [39, 21], '3': [35, 25]}

reward: [110.9]
###### have got 52 sample(s) ######
new_timing_plans: {'1': [43, 17], '2': [35, 25], '3': [40, 20]}
updated_offsets: [9, 10, 40]
reward: [103.4362]
###### have got 53 sample(s) ######
new_timing_plans: {'1': [40, 20], '2': [32, 28], '3': [38, 22]}
updated_offsets: [12, 11, 42]
reward: [111.0137]
###### have got 54 sample(s) ######
new_timing_plans: {'1': [39, 21], '2': [37, 23], '3': [38, 22]}
updated_offsets: [10, 10, 42]
reward: [102.6291]
###### have got 55 sample(s) ######
new_timing_plans: {'1': [41, 19], '2': [36, 24], '3': [36, 24]}
updated_offsets: [10, 10, 43]
reward: [113.4902]
###### have got 56 sample(s) ######
new_timing_plans: {'1': [42, 18], '2': [33, 27], '3': [40, 20]}
updated_offsets: [8, 11, 41]
reward: [106.3637]
###### have got 57 sample(s) ######
new_timing_plans: {'1': [42, 18], '2': [33, 27], '3': [36, 24]}
updated_offsets: [12, 12, 43]
reward: [114.4158]
###### have got 58 sample(s) ######
new_timing_plans: {'1': [40, 20], '2': [35, 25], '3': [3

reward: [109.3726]
###### have got 47 sample(s) ######
new_timing_plans: {'1': [42, 18], '2': [35, 25], '3': [37, 23]}
updated_offsets: [10, 11, 42]
reward: [110.7101]
###### have got 48 sample(s) ######
new_timing_plans: {'1': [38, 22], '2': [34, 26], '3': [37, 23]}
updated_offsets: [13, 11, 42]
reward: [108.8994]
###### have got 49 sample(s) ######
new_timing_plans: {'1': [40, 20], '2': [36, 24], '3': [36, 24]}
updated_offsets: [11, 10, 43]
reward: [116.8788]
###### have got 50 sample(s) ######
new_timing_plans: {'1': [40, 20], '2': [36, 24], '3': [39, 21]}
updated_offsets: [12, 10, 41]
reward: [112.3461]
###### have got 51 sample(s) ######
new_timing_plans: {'1': [45, 15], '2': [36, 24], '3': [35, 25]}
updated_offsets: [9, 10, 43]
reward: [122.2125]
###### have got 52 sample(s) ######
new_timing_plans: {'1': [41, 19], '2': [35, 25], '3': [37, 23]}
updated_offsets: [13, 10, 42]
reward: [110.5183]
###### have got 53 sample(s) ######
new_timing_plans: {'1': [41, 19], '2': [34, 26], '3'

reward: [109.8886]
###### have got 42 sample(s) ######
new_timing_plans: {'1': [40, 20], '2': [41, 19], '3': [39, 21]}
updated_offsets: [10, 10, 41]
reward: [117.3968]
###### have got 43 sample(s) ######
new_timing_plans: {'1': [38, 22], '2': [35, 25], '3': [38, 22]}
updated_offsets: [12, 13, 42]
reward: [108.7031]
###### have got 44 sample(s) ######
new_timing_plans: {'1': [42, 18], '2': [39, 21], '3': [37, 23]}
updated_offsets: [9, 9, 42]
reward: [115.1212]
###### have got 45 sample(s) ######
new_timing_plans: {'1': [42, 18], '2': [37, 23], '3': [36, 24]}
updated_offsets: [10, 10, 43]
reward: [124.2838]
###### have got 46 sample(s) ######
new_timing_plans: {'1': [44, 16], '2': [33, 27], '3': [35, 25]}
updated_offsets: [9, 11, 43]
reward: [116.9679]
###### have got 47 sample(s) ######
new_timing_plans: {'1': [37, 23], '2': [34, 26], '3': [38, 22]}
updated_offsets: [15, 11, 42]
reward: [108.7593]
###### have got 48 sample(s) ######
new_timing_plans: {'1': [50, 10], '2': [35, 25], '3': 

reward: [130.0605]
###### have got 37 sample(s) ######
new_timing_plans: {'1': [40, 20], '2': [37, 23], '3': [39, 21]}
updated_offsets: [11, 11, 41]
reward: [114.825]
###### have got 38 sample(s) ######
new_timing_plans: {'1': [38, 22], '2': [36, 24], '3': [35, 25]}
updated_offsets: [12, 11, 43]
reward: [102.6647]
###### have got 39 sample(s) ######
new_timing_plans: {'1': [43, 17], '2': [39, 21], '3': [37, 23]}
updated_offsets: [10, 10, 42]
reward: [120.6772]
###### have got 40 sample(s) ######
new_timing_plans: {'1': [40, 20], '2': [35, 25], '3': [36, 24]}
updated_offsets: [10, 12, 43]
reward: [121.4342]
###### have got 41 sample(s) ######
new_timing_plans: {'1': [39, 21], '2': [37, 23], '3': [36, 24]}
updated_offsets: [8, 9, 43]
reward: [137.6285]
###### have got 42 sample(s) ######
new_timing_plans: {'1': [42, 18], '2': [38, 22], '3': [37, 23]}
updated_offsets: [9, 10, 42]
reward: [115.9241]
###### have got 43 sample(s) ######
new_timing_plans: {'1': [38, 22], '2': [37, 23], '3': [

reward: [132.9128]
###### have got 32 sample(s) ######
new_timing_plans: {'1': [39, 21], '2': [40, 20], '3': [44, 16]}
updated_offsets: [8, 9, 40]
reward: [118.9919]
###### have got 33 sample(s) ######
new_timing_plans: {'1': [39, 21], '2': [36, 24], '3': [36, 24]}
updated_offsets: [12, 11, 43]
reward: [127.8873]
###### have got 34 sample(s) ######
new_timing_plans: {'1': [40, 20], '2': [35, 25], '3': [39, 21]}
updated_offsets: [8, 11, 41]
reward: [117.2043]
###### have got 35 sample(s) ######
new_timing_plans: {'1': [39, 21], '2': [38, 22], '3': [37, 23]}
updated_offsets: [10, 11, 42]
reward: [116.5773]
###### have got 36 sample(s) ######
new_timing_plans: {'1': [40, 20], '2': [38, 22], '3': [36, 24]}
updated_offsets: [10, 10, 43]
reward: [115.2734]
###### have got 37 sample(s) ######
new_timing_plans: {'1': [37, 23], '2': [36, 24], '3': [36, 24]}
updated_offsets: [12, 11, 43]
reward: [108.5045]
###### have got 38 sample(s) ######
new_timing_plans: {'1': [35, 25], '2': [36, 24], '3': 

reward: [107.3121]
###### have got 27 sample(s) ######
new_timing_plans: {'1': [40, 20], '2': [41, 19], '3': [38, 22]}
updated_offsets: [9, 9, 42]
reward: [99.2325]
###### have got 28 sample(s) ######
new_timing_plans: {'1': [40, 20], '2': [42, 18], '3': [40, 20]}
updated_offsets: [11, 9, 42]
reward: [114.3595]
###### have got 29 sample(s) ######
new_timing_plans: {'1': [40, 20], '2': [37, 23], '3': [36, 24]}
updated_offsets: [8, 12, 44]
reward: [104.1894]
###### have got 30 sample(s) ######
new_timing_plans: {'1': [45, 15], '2': [41, 19], '3': [41, 19]}
updated_offsets: [7, 10, 40]
reward: [109.36]
###### have got 31 sample(s) ######
new_timing_plans: {'1': [36, 24], '2': [35, 25], '3': [35, 25]}
updated_offsets: [15, 11, 43]
reward: [114.641]
###### have got 32 sample(s) ######
new_timing_plans: {'1': [42, 18], '2': [38, 22], '3': [37, 23]}
updated_offsets: [9, 10, 42]
reward: [120.2704]
###### have got 33 sample(s) ######
new_timing_plans: {'1': [38, 22], '2': [35, 25], '3': [37, 23

reward: [105.2421]
###### have got 22 sample(s) ######
new_timing_plans: {'1': [37, 23], '2': [36, 24], '3': [37, 23]}
updated_offsets: [11, 13, 42]
reward: [94.6822]
###### have got 23 sample(s) ######
new_timing_plans: {'1': [37, 23], '2': [39, 21], '3': [38, 22]}
updated_offsets: [12, 11, 42]
reward: [102.2586]
###### have got 24 sample(s) ######
new_timing_plans: {'1': [43, 17], '2': [41, 19], '3': [38, 22]}
updated_offsets: [9, 9, 41]
reward: [116.7782]
###### have got 25 sample(s) ######
new_timing_plans: {'1': [40, 20], '2': [39, 21], '3': [36, 24]}
updated_offsets: [10, 10, 43]
reward: [100.4605]
###### have got 26 sample(s) ######
new_timing_plans: {'1': [41, 19], '2': [38, 22], '3': [38, 22]}
updated_offsets: [8, 10, 42]
reward: [103.9897]
###### have got 27 sample(s) ######
new_timing_plans: {'1': [40, 20], '2': [41, 19], '3': [40, 20]}
updated_offsets: [11, 10, 41]
reward: [99.2339]
###### have got 28 sample(s) ######
new_timing_plans: {'1': [39, 21], '2': [38, 22], '3': [3

reward: [92.2126]
###### have got 17 sample(s) ######
new_timing_plans: {'1': [40, 20], '2': [40, 20], '3': [38, 22]}
updated_offsets: [11, 9, 42]
reward: [101.8377]
###### have got 18 sample(s) ######
new_timing_plans: {'1': [41, 19], '2': [41, 19], '3': [39, 21]}
updated_offsets: [9, 10, 41]
reward: [92.8925]
###### have got 19 sample(s) ######
new_timing_plans: {'1': [37, 23], '2': [44, 16], '3': [43, 17]}
updated_offsets: [12, 8, 39]
reward: [107.3052]
###### have got 20 sample(s) ######
new_timing_plans: {'1': [40, 20], '2': [39, 21], '3': [40, 20]}
updated_offsets: [10, 10, 42]
reward: [98.3552]
###### have got 21 sample(s) ######
new_timing_plans: {'1': [37, 23], '2': [40, 20], '3': [41, 19]}
updated_offsets: [12, 9, 40]
reward: [94.1623]
###### have got 22 sample(s) ######
new_timing_plans: {'1': [38, 22], '2': [42, 18], '3': [40, 20]}
updated_offsets: [12, 8, 41]
reward: [101.3089]
###### have got 23 sample(s) ######
new_timing_plans: {'1': [41, 19], '2': [44, 16], '3': [41, 1

reward: [106.7442]
###### have got 12 sample(s) ######
new_timing_plans: {'1': [37, 23], '2': [40, 20], '3': [34, 26]}
updated_offsets: [12, 9, 44]
reward: [100.9222]
###### have got 13 sample(s) ######
new_timing_plans: {'1': [36, 24], '2': [41, 19], '3': [36, 24]}
updated_offsets: [11, 9, 44]
reward: [91.6064]
###### have got 14 sample(s) ######
new_timing_plans: {'1': [36, 24], '2': [41, 19], '3': [37, 23]}
updated_offsets: [14, 9, 42]
reward: [81.9317]
###### have got 15 sample(s) ######
new_timing_plans: {'1': [37, 23], '2': [37, 23], '3': [38, 22]}
updated_offsets: [12, 12, 44]
reward: [98.4551]
###### have got 16 sample(s) ######
new_timing_plans: {'1': [40, 20], '2': [44, 16], '3': [50, 10]}
updated_offsets: [12, 8, 36]
reward: [102.2739]
###### have got 17 sample(s) ######
new_timing_plans: {'1': [40, 20], '2': [41, 19], '3': [39, 21]}
updated_offsets: [11, 9, 41]
reward: [100.9082]
###### have got 18 sample(s) ######
new_timing_plans: {'1': [39, 21], '2': [36, 24], '3': [39, 

reward: [93.0194]
###### have got 7 sample(s) ######
new_timing_plans: {'1': [38, 22], '2': [38, 22], '3': [35, 25]}
updated_offsets: [12, 10, 43]
reward: [106.0289]
###### have got 8 sample(s) ######
new_timing_plans: {'1': [34, 26], '2': [39, 21], '3': [34, 26]}
updated_offsets: [15, 10, 44]
reward: [101.6425]
###### have got 9 sample(s) ######
new_timing_plans: {'1': [41, 19], '2': [40, 20], '3': [38, 22]}
updated_offsets: [10, 9, 42]
reward: [90.7441]
###### have got 10 sample(s) ######
new_timing_plans: {'1': [34, 26], '2': [38, 22], '3': [33, 27]}
updated_offsets: [14, 10, 44]
reward: [89.3842]
###### have got 11 sample(s) ######
new_timing_plans: {'1': [35, 25], '2': [40, 20], '3': [35, 25]}
updated_offsets: [14, 10, 46]
reward: [101.3771]
###### have got 12 sample(s) ######
new_timing_plans: {'1': [40, 20], '2': [38, 22], '3': [36, 24]}
updated_offsets: [9, 10, 43]
reward: [93.8724]
###### have got 13 sample(s) ######
new_timing_plans: {'1': [34, 26], '2': [39, 21], '3': [36, 2

reward: [87.7858]
###### have got 2 sample(s) ######
new_timing_plans: {'1': [36, 24], '2': [40, 20], '3': [36, 24]}
updated_offsets: [13, 10, 43]
reward: [93.0108]
###### have got 3 sample(s) ######
new_timing_plans: {'1': [39, 21], '2': [36, 24], '3': [36, 24]}
updated_offsets: [12, 10, 43]
reward: [84.0023]
###### have got 4 sample(s) ######
new_timing_plans: {'1': [39, 21], '2': [40, 20], '3': [36, 24]}
updated_offsets: [11, 9, 43]
reward: [107.9131]
###### have got 5 sample(s) ######
new_timing_plans: {'1': [36, 24], '2': [38, 22], '3': [35, 25]}
updated_offsets: [13, 11, 43]
reward: [84.5755]
###### have got 6 sample(s) ######
new_timing_plans: {'1': [40, 20], '2': [36, 24], '3': [36, 24]}
updated_offsets: [10, 10, 43]
reward: [80.7724]
###### have got 7 sample(s) ######
new_timing_plans: {'1': [36, 24], '2': [40, 20], '3': [37, 23]}
updated_offsets: [12, 10, 43]
reward: [88.5981]
###### have got 8 sample(s) ######
new_timing_plans: {'1': [36, 24], '2': [41, 19], '3': [35, 25]}
u

reward: [116.7226]
###### have got 58 sample(s) ######
new_timing_plans: {'1': [42, 18], '2': [33, 27], '3': [37, 23]}
updated_offsets: [9, 11, 42]
reward: [115.8645]
###### have got 59 sample(s) ######
new_timing_plans: {'1': [39, 21], '2': [33, 27], '3': [36, 24]}
updated_offsets: [14, 11, 43]
reward: [104.8373]
###### have got 60 sample(s) ######
avg. rewards in this episode: [104.8846]
******** end episode 45 ********
spent 16.24 (s) for episode 45
******** start episode 46 ********
new_timing_plans: {'1': [35, 25], '2': [38, 22], '3': [34, 26]}
updated_offsets: [15, 10, 45]
reward: [116.5737]
###### have got 1 sample(s) ######
new_timing_plans: {'1': [38, 22], '2': [38, 22], '3': [35, 25]}
updated_offsets: [12, 10, 44]
reward: [107.3717]
###### have got 2 sample(s) ######
new_timing_plans: {'1': [36, 24], '2': [39, 21], '3': [37, 23]}
updated_offsets: [12, 9, 42]
reward: [87.504]
###### have got 3 sample(s) ######
new_timing_plans: {'1': [34, 26], '2': [38, 22], '3': [33, 27]}
upd

reward: [125.5207]
###### have got 53 sample(s) ######
new_timing_plans: {'1': [40, 20], '2': [36, 24], '3': [37, 23]}
updated_offsets: [11, 10, 42]
reward: [124.1221]
###### have got 54 sample(s) ######
new_timing_plans: {'1': [42, 18], '2': [36, 24], '3': [39, 21]}
updated_offsets: [10, 10, 41]
reward: [109.1619]
###### have got 55 sample(s) ######
new_timing_plans: {'1': [39, 21], '2': [35, 25], '3': [37, 23]}
updated_offsets: [13, 11, 42]
reward: [103.3524]
###### have got 56 sample(s) ######
new_timing_plans: {'1': [42, 18], '2': [36, 24], '3': [36, 24]}
updated_offsets: [11, 10, 43]
reward: [116.5556]
###### have got 57 sample(s) ######
new_timing_plans: {'1': [39, 21], '2': [35, 25], '3': [38, 22]}
updated_offsets: [10, 11, 42]
reward: [101.7482]
###### have got 58 sample(s) ######
new_timing_plans: {'1': [41, 19], '2': [35, 25], '3': [35, 25]}
updated_offsets: [11, 10, 43]
reward: [115.4571]
###### have got 59 sample(s) ######
new_timing_plans: {'1': [40, 20], '2': [32, 28], '3

reward: [104.6942]
###### have got 48 sample(s) ######
new_timing_plans: {'1': [40, 20], '2': [35, 25], '3': [40, 20]}
updated_offsets: [13, 10, 40]
reward: [113.8084]
###### have got 49 sample(s) ######
new_timing_plans: {'1': [40, 20], '2': [35, 25], '3': [38, 22]}
updated_offsets: [12, 11, 41]
reward: [116.6333]
###### have got 50 sample(s) ######
new_timing_plans: {'1': [41, 19], '2': [36, 24], '3': [36, 24]}
updated_offsets: [9, 10, 43]
reward: [129.0882]
###### have got 51 sample(s) ######
new_timing_plans: {'1': [42, 18], '2': [35, 25], '3': [36, 24]}
updated_offsets: [11, 10, 43]
reward: [115.3474]
###### have got 52 sample(s) ######
new_timing_plans: {'1': [40, 20], '2': [33, 27], '3': [37, 23]}
updated_offsets: [8, 10, 42]
reward: [122.547]
###### have got 53 sample(s) ######
new_timing_plans: {'1': [41, 19], '2': [35, 25], '3': [33, 27]}
updated_offsets: [10, 10, 44]
reward: [119.5267]
###### have got 54 sample(s) ######
new_timing_plans: {'1': [47, 13], '2': [34, 26], '3': 

reward: [110.1539]
###### have got 43 sample(s) ######
new_timing_plans: {'1': [38, 22], '2': [38, 22], '3': [35, 25]}
updated_offsets: [13, 10, 43]
reward: [116.3707]
###### have got 44 sample(s) ######
new_timing_plans: {'1': [38, 22], '2': [35, 25], '3': [38, 22]}
updated_offsets: [9, 11, 42]
reward: [114.1835]
###### have got 45 sample(s) ######
new_timing_plans: {'1': [40, 20], '2': [41, 19], '3': [36, 24]}
updated_offsets: [11, 9, 43]
reward: [108.7673]
###### have got 46 sample(s) ######
new_timing_plans: {'1': [40, 20], '2': [35, 25], '3': [36, 24]}
updated_offsets: [10, 11, 43]
reward: [102.4629]
###### have got 47 sample(s) ######
new_timing_plans: {'1': [43, 17], '2': [34, 26], '3': [38, 22]}
updated_offsets: [7, 11, 42]
reward: [101.363]
###### have got 48 sample(s) ######
new_timing_plans: {'1': [39, 21], '2': [35, 25], '3': [36, 24]}
updated_offsets: [11, 11, 43]
reward: [116.7114]
###### have got 49 sample(s) ######
new_timing_plans: {'1': [41, 19], '2': [33, 27], '3': [

reward: [107.9051]
###### have got 38 sample(s) ######
new_timing_plans: {'1': [37, 23], '2': [37, 23], '3': [35, 25]}
updated_offsets: [13, 10, 43]
reward: [106.7612]
###### have got 39 sample(s) ######
new_timing_plans: {'1': [42, 18], '2': [38, 22], '3': [37, 23]}
updated_offsets: [8, 10, 42]
reward: [128.1099]
###### have got 40 sample(s) ######
new_timing_plans: {'1': [40, 20], '2': [35, 25], '3': [36, 24]}
updated_offsets: [9, 11, 43]
reward: [126.4132]
###### have got 41 sample(s) ######
new_timing_plans: {'1': [38, 22], '2': [39, 21], '3': [37, 23]}
updated_offsets: [11, 10, 42]
reward: [129.8066]
###### have got 42 sample(s) ######
new_timing_plans: {'1': [37, 23], '2': [35, 25], '3': [38, 22]}
updated_offsets: [12, 12, 42]
reward: [115.3394]
###### have got 43 sample(s) ######
new_timing_plans: {'1': [40, 20], '2': [37, 23], '3': [37, 23]}
updated_offsets: [7, 11, 42]
reward: [114.4102]
###### have got 44 sample(s) ######
new_timing_plans: {'1': [39, 21], '2': [37, 23], '3': 

reward: [110.8893]
###### have got 33 sample(s) ######
new_timing_plans: {'1': [37, 23], '2': [36, 24], '3': [35, 25]}
updated_offsets: [12, 11, 43]
reward: [129.4591]
###### have got 34 sample(s) ######
new_timing_plans: {'1': [39, 21], '2': [38, 22], '3': [37, 23]}
updated_offsets: [11, 9, 42]
reward: [120.8625]
###### have got 35 sample(s) ######
new_timing_plans: {'1': [37, 23], '2': [37, 23], '3': [38, 22]}
updated_offsets: [13, 11, 41]
reward: [109.8945]
###### have got 36 sample(s) ######
new_timing_plans: {'1': [39, 21], '2': [35, 25], '3': [34, 26]}
updated_offsets: [12, 11, 44]
reward: [129.2613]
###### have got 37 sample(s) ######
new_timing_plans: {'1': [41, 19], '2': [38, 22], '3': [39, 21]}
updated_offsets: [7, 9, 41]
reward: [128.3027]
###### have got 38 sample(s) ######
new_timing_plans: {'1': [36, 24], '2': [35, 25], '3': [33, 27]}
updated_offsets: [11, 11, 44]
reward: [112.4814]
###### have got 39 sample(s) ######
new_timing_plans: {'1': [38, 22], '2': [41, 19], '3': 

reward: [102.3495]
###### have got 28 sample(s) ######
new_timing_plans: {'1': [39, 21], '2': [40, 20], '3': [39, 21]}
updated_offsets: [11, 9, 42]
reward: [111.5208]
###### have got 29 sample(s) ######
new_timing_plans: {'1': [39, 21], '2': [38, 22], '3': [36, 24]}
updated_offsets: [9, 10, 43]
reward: [111.2136]
###### have got 30 sample(s) ######
new_timing_plans: {'1': [42, 18], '2': [39, 21], '3': [40, 20]}
updated_offsets: [7, 10, 41]
reward: [102.2691]
###### have got 31 sample(s) ######
new_timing_plans: {'1': [39, 21], '2': [35, 25], '3': [38, 22]}
updated_offsets: [12, 11, 42]
reward: [107.826]
###### have got 32 sample(s) ######
new_timing_plans: {'1': [37, 23], '2': [39, 21], '3': [38, 22]}
updated_offsets: [12, 10, 42]
reward: [101.4556]
###### have got 33 sample(s) ######
new_timing_plans: {'1': [37, 23], '2': [35, 25], '3': [35, 25]}
updated_offsets: [11, 12, 43]
reward: [115.9949]
###### have got 34 sample(s) ######
new_timing_plans: {'1': [41, 19], '2': [36, 24], '3': [

reward: [101.9583]
###### have got 23 sample(s) ######
new_timing_plans: {'1': [41, 19], '2': [39, 21], '3': [43, 17]}
updated_offsets: [8, 9, 39]
reward: [96.4897]
###### have got 24 sample(s) ######
new_timing_plans: {'1': [41, 19], '2': [40, 20], '3': [39, 21]}
updated_offsets: [11, 9, 41]
reward: [99.4014]
###### have got 25 sample(s) ######
new_timing_plans: {'1': [40, 20], '2': [39, 21], '3': [38, 22]}
updated_offsets: [11, 10, 42]
reward: [94.5336]
###### have got 26 sample(s) ######
new_timing_plans: {'1': [37, 23], '2': [48, 12], '3': [50, 10]}
updated_offsets: [11, 7, 35]
reward: [102.9791]
###### have got 27 sample(s) ######
new_timing_plans: {'1': [41, 19], '2': [39, 21], '3': [38, 22]}
updated_offsets: [11, 12, 41]
reward: [98.0809]
###### have got 28 sample(s) ######
new_timing_plans: {'1': [42, 18], '2': [38, 22], '3': [40, 20]}
updated_offsets: [9, 11, 40]
reward: [118.0897]
###### have got 29 sample(s) ######
new_timing_plans: {'1': [41, 19], '2': [44, 16], '3': [42, 1

reward: [108.9452]
###### have got 18 sample(s) ######
new_timing_plans: {'1': [39, 21], '2': [43, 17], '3': [42, 18]}
updated_offsets: [10, 9, 42]
reward: [110.9096]
###### have got 19 sample(s) ######
new_timing_plans: {'1': [41, 19], '2': [39, 21], '3': [40, 20]}
updated_offsets: [10, 10, 41]
reward: [101.78]
###### have got 20 sample(s) ######
new_timing_plans: {'1': [39, 21], '2': [40, 20], '3': [38, 22]}
updated_offsets: [11, 9, 42]
reward: [88.5245]
###### have got 21 sample(s) ######
new_timing_plans: {'1': [40, 20], '2': [38, 22], '3': [39, 21]}
updated_offsets: [10, 10, 41]
reward: [108.8427]
###### have got 22 sample(s) ######
new_timing_plans: {'1': [39, 21], '2': [41, 19], '3': [43, 17]}
updated_offsets: [10, 10, 39]
reward: [102.7192]
###### have got 23 sample(s) ######
new_timing_plans: {'1': [38, 22], '2': [42, 18], '3': [40, 20]}
updated_offsets: [11, 9, 40]
reward: [102.6125]
###### have got 24 sample(s) ######
new_timing_plans: {'1': [40, 20], '2': [41, 19], '3': [40

reward: [93.9014]
###### have got 13 sample(s) ######
new_timing_plans: {'1': [36, 24], '2': [39, 21], '3': [37, 23]}
updated_offsets: [13, 9, 44]
reward: [113.1215]
###### have got 14 sample(s) ######
new_timing_plans: {'1': [38, 22], '2': [39, 21], '3': [36, 24]}
updated_offsets: [11, 11, 44]
reward: [82.6047]
###### have got 15 sample(s) ######
new_timing_plans: {'1': [35, 25], '2': [42, 18], '3': [37, 23]}
updated_offsets: [14, 8, 42]
reward: [92.7147]
###### have got 16 sample(s) ######
new_timing_plans: {'1': [37, 23], '2': [43, 17], '3': [44, 16]}
updated_offsets: [12, 10, 38]
reward: [103.7975]
###### have got 17 sample(s) ######
new_timing_plans: {'1': [35, 25], '2': [38, 22], '3': [50, 10]}
updated_offsets: [13, 12, 35]
reward: [85.6827]
###### have got 18 sample(s) ######
new_timing_plans: {'1': [42, 18], '2': [41, 19], '3': [36, 24]}
updated_offsets: [10, 10, 43]
reward: [109.3973]
###### have got 19 sample(s) ######
new_timing_plans: {'1': [38, 22], '2': [41, 19], '3': [38

reward: [92.8236]
###### have got 8 sample(s) ######
new_timing_plans: {'1': [37, 23], '2': [40, 20], '3': [36, 24]}
updated_offsets: [12, 9, 44]
reward: [108.605]
###### have got 9 sample(s) ######
new_timing_plans: {'1': [36, 24], '2': [39, 21], '3': [38, 22]}
updated_offsets: [13, 9, 42]
reward: [78.2054]
###### have got 10 sample(s) ######
new_timing_plans: {'1': [37, 23], '2': [39, 21], '3': [36, 24]}
updated_offsets: [13, 10, 44]
reward: [85.3977]
###### have got 11 sample(s) ######
new_timing_plans: {'1': [41, 19], '2': [41, 19], '3': [36, 24]}
updated_offsets: [11, 9, 45]
reward: [98.2547]
###### have got 12 sample(s) ######
new_timing_plans: {'1': [38, 22], '2': [41, 19], '3': [37, 23]}
updated_offsets: [11, 9, 42]
reward: [98.6161]
###### have got 13 sample(s) ######
new_timing_plans: {'1': [36, 24], '2': [39, 21], '3': [34, 26]}
updated_offsets: [12, 10, 44]
reward: [112.134]
###### have got 14 sample(s) ######
new_timing_plans: {'1': [37, 23], '2': [41, 19], '3': [38, 22]}


reward: [110.9309]
###### have got 3 sample(s) ######
new_timing_plans: {'1': [37, 23], '2': [39, 21], '3': [36, 24]}
updated_offsets: [13, 9, 45]
reward: [90.3009]
###### have got 4 sample(s) ######
new_timing_plans: {'1': [36, 24], '2': [41, 19], '3': [37, 23]}
updated_offsets: [14, 9, 44]
reward: [76.1279]
###### have got 5 sample(s) ######
new_timing_plans: {'1': [39, 21], '2': [37, 23], '3': [37, 23]}
updated_offsets: [11, 11, 43]
reward: [96.604]
###### have got 6 sample(s) ######
new_timing_plans: {'1': [37, 23], '2': [40, 20], '3': [38, 22]}
updated_offsets: [12, 9, 42]
reward: [94.1358]
###### have got 7 sample(s) ######
new_timing_plans: {'1': [36, 24], '2': [40, 20], '3': [37, 23]}
updated_offsets: [14, 9, 42]
reward: [94.8311]
###### have got 8 sample(s) ######
new_timing_plans: {'1': [39, 21], '2': [41, 19], '3': [36, 24]}
updated_offsets: [11, 9, 46]
reward: [122.5183]
###### have got 9 sample(s) ######
new_timing_plans: {'1': [37, 23], '2': [39, 21], '3': [39, 21]}
updat

reward: [109.118]
###### have got 59 sample(s) ######
new_timing_plans: {'1': [41, 19], '2': [36, 24], '3': [41, 19]}
updated_offsets: [9, 10, 41]
reward: [120.1596]
###### have got 60 sample(s) ######
avg. rewards in this episode: [107.2533]
******** end episode 56 ********
spent 16.72 (s) for episode 56
******** start episode 57 ********
new_timing_plans: {'1': [35, 25], '2': [38, 22], '3': [36, 24]}
updated_offsets: [15, 10, 44]
reward: [106.6045]
###### have got 1 sample(s) ######
new_timing_plans: {'1': [34, 26], '2': [43, 17], '3': [38, 22]}
updated_offsets: [14, 9, 43]
reward: [93.8202]
###### have got 2 sample(s) ######
new_timing_plans: {'1': [37, 23], '2': [39, 21], '3': [37, 23]}
updated_offsets: [12, 9, 42]
reward: [98.5045]
###### have got 3 sample(s) ######
new_timing_plans: {'1': [38, 22], '2': [40, 20], '3': [39, 21]}
updated_offsets: [12, 9, 42]
reward: [94.027]
###### have got 4 sample(s) ######
new_timing_plans: {'1': [34, 26], '2': [40, 20], '3': [33, 27]}
updated_o

reward: [107.3256]
###### have got 54 sample(s) ######
new_timing_plans: {'1': [39, 21], '2': [31, 29], '3': [38, 22]}
updated_offsets: [12, 12, 42]
reward: [101.4325]
###### have got 55 sample(s) ######
new_timing_plans: {'1': [38, 22], '2': [35, 25], '3': [38, 22]}
updated_offsets: [12, 10, 42]
reward: [102.3495]
###### have got 56 sample(s) ######
new_timing_plans: {'1': [42, 18], '2': [32, 28], '3': [36, 24]}
updated_offsets: [10, 12, 43]
reward: [102.3778]
###### have got 57 sample(s) ######
new_timing_plans: {'1': [43, 17], '2': [36, 24], '3': [39, 21]}
updated_offsets: [10, 10, 41]
reward: [110.1552]
###### have got 58 sample(s) ######
new_timing_plans: {'1': [40, 20], '2': [34, 26], '3': [34, 26]}
updated_offsets: [13, 11, 44]
reward: [117.1558]
###### have got 59 sample(s) ######
new_timing_plans: {'1': [41, 19], '2': [34, 26], '3': [38, 22]}
updated_offsets: [8, 11, 42]
reward: [114.5826]
###### have got 60 sample(s) ######
avg. rewards in this episode: [105.4231]
******** en

reward: [106.0244]
###### have got 49 sample(s) ######
new_timing_plans: {'1': [38, 22], '2': [32, 28], '3': [38, 22]}
updated_offsets: [13, 12, 41]
reward: [112.2944]
###### have got 50 sample(s) ######
new_timing_plans: {'1': [43, 17], '2': [35, 25], '3': [36, 24]}
updated_offsets: [10, 10, 43]
reward: [113.5789]
###### have got 51 sample(s) ######
new_timing_plans: {'1': [40, 20], '2': [34, 26], '3': [38, 22]}
updated_offsets: [10, 11, 42]
reward: [113.2597]
###### have got 52 sample(s) ######
new_timing_plans: {'1': [39, 21], '2': [32, 28], '3': [39, 21]}
updated_offsets: [10, 11, 41]
reward: [99.7995]
###### have got 53 sample(s) ######
new_timing_plans: {'1': [38, 22], '2': [33, 27], '3': [36, 24]}
updated_offsets: [12, 11, 43]
reward: [117.4397]
###### have got 54 sample(s) ######
new_timing_plans: {'1': [41, 19], '2': [36, 24], '3': [35, 25]}
updated_offsets: [10, 10, 43]
reward: [111.6048]
###### have got 55 sample(s) ######
new_timing_plans: {'1': [45, 15], '2': [35, 25], '3'

reward: [113.7086]
###### have got 44 sample(s) ######
new_timing_plans: {'1': [39, 21], '2': [36, 24], '3': [38, 22]}
updated_offsets: [11, 10, 42]
reward: [105.0536]
###### have got 45 sample(s) ######
new_timing_plans: {'1': [42, 18], '2': [39, 21], '3': [35, 25]}
updated_offsets: [10, 10, 43]
reward: [127.5272]
###### have got 46 sample(s) ######
new_timing_plans: {'1': [40, 20], '2': [34, 26], '3': [37, 23]}
updated_offsets: [8, 11, 47]
reward: [103.9894]
###### have got 47 sample(s) ######
new_timing_plans: {'1': [41, 19], '2': [36, 24], '3': [38, 22]}
updated_offsets: [12, 10, 42]
reward: [110.7348]
###### have got 48 sample(s) ######
new_timing_plans: {'1': [40, 20], '2': [35, 25], '3': [36, 24]}
updated_offsets: [11, 11, 43]
reward: [114.6745]
###### have got 49 sample(s) ######
new_timing_plans: {'1': [41, 19], '2': [36, 24], '3': [38, 22]}
updated_offsets: [10, 10, 42]
reward: [108.2421]
###### have got 50 sample(s) ######
new_timing_plans: {'1': [38, 22], '2': [32, 28], '3'

reward: [98.1168]
###### have got 39 sample(s) ######
new_timing_plans: {'1': [39, 21], '2': [36, 24], '3': [40, 20]}
updated_offsets: [10, 12, 40]
reward: [115.3788]
###### have got 40 sample(s) ######
new_timing_plans: {'1': [40, 20], '2': [37, 23], '3': [35, 25]}
updated_offsets: [11, 10, 43]
reward: [119.9819]
###### have got 41 sample(s) ######
new_timing_plans: {'1': [37, 23], '2': [34, 26], '3': [34, 26]}
updated_offsets: [11, 11, 44]
reward: [125.2139]
###### have got 42 sample(s) ######
new_timing_plans: {'1': [37, 23], '2': [36, 24], '3': [36, 24]}
updated_offsets: [12, 11, 43]
reward: [119.0689]
###### have got 43 sample(s) ######
new_timing_plans: {'1': [38, 22], '2': [37, 23], '3': [37, 23]}
updated_offsets: [11, 11, 42]
reward: [135.7427]
###### have got 44 sample(s) ######
new_timing_plans: {'1': [39, 21], '2': [37, 23], '3': [38, 22]}
updated_offsets: [10, 10, 42]
reward: [118.8114]
###### have got 45 sample(s) ######
new_timing_plans: {'1': [37, 23], '2': [36, 24], '3'

reward: [127.1535]
###### have got 34 sample(s) ######
new_timing_plans: {'1': [39, 21], '2': [38, 22], '3': [39, 21]}
updated_offsets: [9, 10, 41]
reward: [126.4804]
###### have got 35 sample(s) ######
new_timing_plans: {'1': [40, 20], '2': [36, 24], '3': [35, 25]}
updated_offsets: [10, 10, 43]
reward: [114.0427]
###### have got 36 sample(s) ######
new_timing_plans: {'1': [40, 20], '2': [38, 22], '3': [36, 24]}
updated_offsets: [11, 10, 43]
reward: [128.0622]
###### have got 37 sample(s) ######
new_timing_plans: {'1': [39, 21], '2': [35, 25], '3': [38, 22]}
updated_offsets: [12, 11, 41]
reward: [114.3477]
###### have got 38 sample(s) ######
new_timing_plans: {'1': [40, 20], '2': [38, 22], '3': [37, 23]}
updated_offsets: [10, 10, 42]
reward: [127.1358]
###### have got 39 sample(s) ######
new_timing_plans: {'1': [39, 21], '2': [39, 21], '3': [50, 10]}
updated_offsets: [9, 9, 35]
reward: [107.3854]
###### have got 40 sample(s) ######
new_timing_plans: {'1': [38, 22], '2': [38, 22], '3': 

reward: [99.8258]
###### have got 29 sample(s) ######
new_timing_plans: {'1': [38, 22], '2': [38, 22], '3': [39, 21]}
updated_offsets: [12, 13, 41]
reward: [110.5368]
###### have got 30 sample(s) ######
new_timing_plans: {'1': [39, 21], '2': [42, 18], '3': [38, 22]}
updated_offsets: [11, 9, 43]
reward: [103.2238]
###### have got 31 sample(s) ######
new_timing_plans: {'1': [41, 19], '2': [39, 21], '3': [37, 23]}
updated_offsets: [9, 9, 42]
reward: [114.5748]
###### have got 32 sample(s) ######
new_timing_plans: {'1': [40, 20], '2': [38, 22], '3': [35, 25]}
updated_offsets: [11, 11, 43]
reward: [111.7906]
###### have got 33 sample(s) ######
new_timing_plans: {'1': [39, 21], '2': [35, 25], '3': [37, 23]}
updated_offsets: [10, 11, 42]
reward: [119.5632]
###### have got 34 sample(s) ######
new_timing_plans: {'1': [38, 22], '2': [37, 23], '3': [37, 23]}
updated_offsets: [11, 10, 42]
reward: [116.6245]
###### have got 35 sample(s) ######
new_timing_plans: {'1': [40, 20], '2': [38, 22], '3': [

reward: [96.9221]
###### have got 24 sample(s) ######
new_timing_plans: {'1': [40, 20], '2': [39, 21], '3': [38, 22]}
updated_offsets: [11, 10, 42]
reward: [109.9111]
###### have got 25 sample(s) ######
new_timing_plans: {'1': [36, 24], '2': [40, 20], '3': [43, 17]}
updated_offsets: [13, 9, 39]
reward: [96.711]
###### have got 26 sample(s) ######
new_timing_plans: {'1': [39, 21], '2': [39, 21], '3': [41, 19]}
updated_offsets: [12, 10, 40]
reward: [101.6721]
###### have got 27 sample(s) ######
new_timing_plans: {'1': [43, 17], '2': [42, 18], '3': [39, 21]}
updated_offsets: [10, 9, 43]
reward: [104.5356]
###### have got 28 sample(s) ######
new_timing_plans: {'1': [41, 19], '2': [39, 21], '3': [39, 21]}
updated_offsets: [11, 10, 45]
reward: [101.2926]
###### have got 29 sample(s) ######
new_timing_plans: {'1': [38, 22], '2': [38, 22], '3': [50, 10]}
updated_offsets: [12, 12, 35]
reward: [92.8902]
###### have got 30 sample(s) ######
new_timing_plans: {'1': [38, 22], '2': [39, 21], '3': [39

reward: [94.8977]
###### have got 19 sample(s) ######
new_timing_plans: {'1': [39, 21], '2': [41, 19], '3': [40, 20]}
updated_offsets: [11, 9, 40]
reward: [95.4737]
###### have got 20 sample(s) ######
new_timing_plans: {'1': [36, 24], '2': [38, 22], '3': [40, 20]}
updated_offsets: [13, 11, 40]
reward: [100.829]
###### have got 21 sample(s) ######
new_timing_plans: {'1': [42, 18], '2': [42, 18], '3': [50, 10]}
updated_offsets: [10, 8, 35]
reward: [103.0104]
###### have got 22 sample(s) ######
new_timing_plans: {'1': [37, 23], '2': [40, 20], '3': [37, 23]}
updated_offsets: [12, 10, 42]
reward: [101.1685]
###### have got 23 sample(s) ######
new_timing_plans: {'1': [41, 19], '2': [39, 21], '3': [37, 23]}
updated_offsets: [10, 10, 42]
reward: [106.1355]
###### have got 24 sample(s) ######
new_timing_plans: {'1': [40, 20], '2': [41, 19], '3': [39, 21]}
updated_offsets: [11, 9, 41]
reward: [118.1324]
###### have got 25 sample(s) ######
new_timing_plans: {'1': [41, 19], '2': [42, 18], '3': [40

reward: [90.8337]
###### have got 14 sample(s) ######
new_timing_plans: {'1': [36, 24], '2': [38, 22], '3': [36, 24]}
updated_offsets: [13, 12, 49]
reward: [92.5849]
###### have got 15 sample(s) ######
new_timing_plans: {'1': [38, 22], '2': [38, 22], '3': [38, 22]}
updated_offsets: [11, 10, 42]
reward: [111.6349]
###### have got 16 sample(s) ######
new_timing_plans: {'1': [41, 19], '2': [40, 20], '3': [41, 19]}
updated_offsets: [9, 11, 43]
reward: [107.292]
###### have got 17 sample(s) ######
new_timing_plans: {'1': [43, 17], '2': [38, 22], '3': [39, 21]}
updated_offsets: [9, 13, 45]
reward: [101.8436]
###### have got 18 sample(s) ######
new_timing_plans: {'1': [38, 22], '2': [39, 21], '3': [41, 19]}
updated_offsets: [10, 10, 40]
reward: [101.2543]
###### have got 19 sample(s) ######
new_timing_plans: {'1': [38, 22], '2': [43, 17], '3': [41, 19]}
updated_offsets: [11, 8, 40]
reward: [113.8239]
###### have got 20 sample(s) ######
new_timing_plans: {'1': [38, 22], '2': [42, 18], '3': [42

reward: [109.5607]
###### have got 9 sample(s) ######
new_timing_plans: {'1': [39, 21], '2': [38, 22], '3': [39, 21]}
updated_offsets: [10, 10, 43]
reward: [87.2784]
###### have got 10 sample(s) ######
new_timing_plans: {'1': [33, 27], '2': [36, 24], '3': [33, 27]}
updated_offsets: [17, 11, 46]
reward: [84.1039]
###### have got 11 sample(s) ######
new_timing_plans: {'1': [38, 22], '2': [40, 20], '3': [37, 23]}
updated_offsets: [12, 10, 43]
reward: [99.1123]
###### have got 12 sample(s) ######
new_timing_plans: {'1': [37, 23], '2': [39, 21], '3': [37, 23]}
updated_offsets: [11, 10, 43]
reward: [100.5841]
###### have got 13 sample(s) ######
new_timing_plans: {'1': [40, 20], '2': [39, 21], '3': [37, 23]}
updated_offsets: [11, 9, 43]
reward: [95.8449]
###### have got 14 sample(s) ######
new_timing_plans: {'1': [38, 22], '2': [40, 20], '3': [35, 25]}
updated_offsets: [11, 9, 45]
reward: [97.379]
###### have got 15 sample(s) ######
new_timing_plans: {'1': [37, 23], '2': [39, 21], '3': [37, 2

reward: [94.338]
###### have got 4 sample(s) ######
new_timing_plans: {'1': [37, 23], '2': [41, 19], '3': [37, 23]}
updated_offsets: [12, 8, 43]
reward: [112.7444]
###### have got 5 sample(s) ######
new_timing_plans: {'1': [37, 23], '2': [38, 22], '3': [35, 25]}
updated_offsets: [12, 10, 45]
reward: [98.4712]
###### have got 6 sample(s) ######
new_timing_plans: {'1': [37, 23], '2': [40, 20], '3': [38, 22]}
updated_offsets: [12, 9, 43]
reward: [97.2421]
###### have got 7 sample(s) ######
new_timing_plans: {'1': [36, 24], '2': [41, 19], '3': [35, 25]}
updated_offsets: [13, 9, 44]
reward: [103.2527]
###### have got 8 sample(s) ######
new_timing_plans: {'1': [34, 26], '2': [38, 22], '3': [34, 26]}
updated_offsets: [15, 10, 44]
reward: [97.8044]
###### have got 9 sample(s) ######
new_timing_plans: {'1': [38, 22], '2': [42, 18], '3': [39, 21]}
updated_offsets: [11, 8, 43]
reward: [102.9061]
###### have got 10 sample(s) ######
new_timing_plans: {'1': [38, 22], '2': [41, 19], '3': [37, 23]}
up

reward: [104.6505]
###### have got 60 sample(s) ######
avg. rewards in this episode: [106.2246]
******** end episode 67 ********
spent 16.33 (s) for episode 67
******** start episode 68 ********
new_timing_plans: {'1': [35, 25], '2': [39, 21], '3': [33, 27]}
updated_offsets: [14, 10, 45]
reward: [116.8456]
###### have got 1 sample(s) ######
new_timing_plans: {'1': [37, 23], '2': [40, 20], '3': [33, 27]}
updated_offsets: [12, 10, 44]
reward: [96.7992]
###### have got 2 sample(s) ######
new_timing_plans: {'1': [36, 24], '2': [41, 19], '3': [35, 25]}
updated_offsets: [13, 10, 46]
reward: [113.5048]
###### have got 3 sample(s) ######
new_timing_plans: {'1': [37, 23], '2': [40, 20], '3': [38, 22]}
updated_offsets: [11, 10, 45]
reward: [97.9678]
###### have got 4 sample(s) ######
new_timing_plans: {'1': [37, 23], '2': [39, 21], '3': [36, 24]}
updated_offsets: [12, 10, 44]
reward: [96.1177]
###### have got 5 sample(s) ######
new_timing_plans: {'1': [37, 23], '2': [39, 21], '3': [38, 22]}
upda

reward: [126.7351]
###### have got 55 sample(s) ######
new_timing_plans: {'1': [40, 20], '2': [34, 26], '3': [36, 24]}
updated_offsets: [10, 11, 43]
reward: [107.5995]
###### have got 56 sample(s) ######
new_timing_plans: {'1': [39, 21], '2': [33, 27], '3': [36, 24]}
updated_offsets: [11, 11, 43]
reward: [114.5377]
###### have got 57 sample(s) ######
new_timing_plans: {'1': [42, 18], '2': [34, 26], '3': [38, 22]}
updated_offsets: [10, 11, 41]
reward: [124.5346]
###### have got 58 sample(s) ######
new_timing_plans: {'1': [43, 17], '2': [36, 24], '3': [39, 21]}
updated_offsets: [9, 10, 41]
reward: [109.3742]
###### have got 59 sample(s) ######
new_timing_plans: {'1': [39, 21], '2': [31, 29], '3': [36, 24]}
updated_offsets: [13, 12, 43]
reward: [107.941]
###### have got 60 sample(s) ######
avg. rewards in this episode: [109.726]
******** end episode 68 ********
spent 16.40 (s) for episode 68
******** start episode 69 ********
new_timing_plans: {'1': [37, 23], '2': [39, 21], '3': [36, 24]}

reward: [98.2373]
###### have got 50 sample(s) ######
new_timing_plans: {'1': [38, 22], '2': [36, 24], '3': [37, 23]}
updated_offsets: [15, 10, 42]
reward: [112.5476]
###### have got 51 sample(s) ######
new_timing_plans: {'1': [40, 20], '2': [35, 25], '3': [36, 24]}
updated_offsets: [12, 11, 43]
reward: [114.4847]
###### have got 52 sample(s) ######
new_timing_plans: {'1': [39, 21], '2': [35, 25], '3': [39, 21]}
updated_offsets: [11, 10, 41]
reward: [109.8399]
###### have got 53 sample(s) ######
new_timing_plans: {'1': [39, 21], '2': [33, 27], '3': [39, 21]}
updated_offsets: [9, 11, 41]
reward: [105.4757]
###### have got 54 sample(s) ######
new_timing_plans: {'1': [40, 20], '2': [34, 26], '3': [38, 22]}
updated_offsets: [9, 11, 42]
reward: [114.9502]
###### have got 55 sample(s) ######
new_timing_plans: {'1': [40, 20], '2': [33, 27], '3': [37, 23]}
updated_offsets: [11, 11, 42]
reward: [113.7481]
###### have got 56 sample(s) ######
new_timing_plans: {'1': [42, 18], '2': [35, 25], '3': 

reward: [121.7004]
###### have got 45 sample(s) ######
new_timing_plans: {'1': [38, 22], '2': [36, 24], '3': [35, 25]}
updated_offsets: [12, 10, 43]
reward: [102.8082]
###### have got 46 sample(s) ######
new_timing_plans: {'1': [40, 20], '2': [40, 20], '3': [41, 19]}
updated_offsets: [12, 10, 49]
reward: [103.6467]
###### have got 47 sample(s) ######
new_timing_plans: {'1': [39, 21], '2': [34, 26], '3': [39, 21]}
updated_offsets: [14, 12, 41]
reward: [116.0613]
###### have got 48 sample(s) ######
new_timing_plans: {'1': [45, 15], '2': [33, 27], '3': [36, 24]}
updated_offsets: [10, 11, 43]
reward: [105.2554]
###### have got 49 sample(s) ######
new_timing_plans: {'1': [41, 19], '2': [34, 26], '3': [38, 22]}
updated_offsets: [11, 11, 42]
reward: [113.0984]
###### have got 50 sample(s) ######
new_timing_plans: {'1': [41, 19], '2': [34, 26], '3': [36, 24]}
updated_offsets: [11, 11, 43]
reward: [108.6354]
###### have got 51 sample(s) ######
new_timing_plans: {'1': [39, 21], '2': [34, 26], '3

reward: [127.5662]
###### have got 40 sample(s) ######
new_timing_plans: {'1': [40, 20], '2': [37, 23], '3': [36, 24]}
updated_offsets: [11, 11, 43]
reward: [106.8307]
###### have got 41 sample(s) ######
new_timing_plans: {'1': [36, 24], '2': [35, 25], '3': [39, 21]}
updated_offsets: [13, 12, 41]
reward: [113.7612]
###### have got 42 sample(s) ######
new_timing_plans: {'1': [41, 19], '2': [39, 21], '3': [37, 23]}
updated_offsets: [10, 10, 42]
reward: [116.9818]
###### have got 43 sample(s) ######
new_timing_plans: {'1': [40, 20], '2': [36, 24], '3': [36, 24]}
updated_offsets: [10, 12, 43]
reward: [117.991]
###### have got 44 sample(s) ######
new_timing_plans: {'1': [42, 18], '2': [38, 22], '3': [37, 23]}
updated_offsets: [8, 11, 42]
reward: [107.9608]
###### have got 45 sample(s) ######
new_timing_plans: {'1': [40, 20], '2': [35, 25], '3': [35, 25]}
updated_offsets: [12, 11, 43]
reward: [105.769]
###### have got 46 sample(s) ######
new_timing_plans: {'1': [36, 24], '2': [34, 26], '3': 

reward: [110.566]
###### have got 35 sample(s) ######
new_timing_plans: {'1': [40, 20], '2': [37, 23], '3': [37, 23]}
updated_offsets: [10, 10, 42]
reward: [123.1705]
###### have got 36 sample(s) ######
new_timing_plans: {'1': [37, 23], '2': [35, 25], '3': [36, 24]}
updated_offsets: [12, 11, 43]
reward: [115.8631]
###### have got 37 sample(s) ######
new_timing_plans: {'1': [40, 20], '2': [37, 23], '3': [37, 23]}
updated_offsets: [10, 12, 42]
reward: [131.3321]
###### have got 38 sample(s) ######
new_timing_plans: {'1': [39, 21], '2': [39, 21], '3': [37, 23]}
updated_offsets: [10, 10, 42]
reward: [113.8258]
###### have got 39 sample(s) ######
new_timing_plans: {'1': [42, 18], '2': [38, 22], '3': [38, 22]}
updated_offsets: [8, 10, 42]
reward: [113.846]
###### have got 40 sample(s) ######
new_timing_plans: {'1': [37, 23], '2': [37, 23], '3': [36, 24]}
updated_offsets: [14, 11, 43]
reward: [115.7016]
###### have got 41 sample(s) ######
new_timing_plans: {'1': [37, 23], '2': [37, 23], '3': 

reward: [101.7297]
###### have got 30 sample(s) ######
new_timing_plans: {'1': [39, 21], '2': [42, 18], '3': [42, 18]}
updated_offsets: [11, 9, 40]
reward: [92.697]
###### have got 31 sample(s) ######
new_timing_plans: {'1': [39, 21], '2': [36, 24], '3': [38, 22]}
updated_offsets: [11, 12, 42]
reward: [114.4952]
###### have got 32 sample(s) ######
new_timing_plans: {'1': [41, 19], '2': [38, 22], '3': [38, 22]}
updated_offsets: [9, 10, 42]
reward: [120.5674]
###### have got 33 sample(s) ######
new_timing_plans: {'1': [38, 22], '2': [36, 24], '3': [37, 23]}
updated_offsets: [12, 11, 42]
reward: [113.5576]
###### have got 34 sample(s) ######
new_timing_plans: {'1': [38, 22], '2': [38, 22], '3': [39, 21]}
updated_offsets: [12, 10, 41]
reward: [124.8431]
###### have got 35 sample(s) ######
new_timing_plans: {'1': [38, 22], '2': [37, 23], '3': [36, 24]}
updated_offsets: [10, 12, 43]
reward: [130.8814]
###### have got 36 sample(s) ######
new_timing_plans: {'1': [40, 20], '2': [39, 21], '3': [

reward: [102.8228]
###### have got 25 sample(s) ######
new_timing_plans: {'1': [41, 19], '2': [38, 22], '3': [39, 21]}
updated_offsets: [11, 12, 41]
reward: [96.4526]
###### have got 26 sample(s) ######
new_timing_plans: {'1': [40, 20], '2': [43, 17], '3': [50, 10]}
updated_offsets: [11, 9, 35]
reward: [110.8371]
###### have got 27 sample(s) ######
new_timing_plans: {'1': [38, 22], '2': [42, 18], '3': [40, 20]}
updated_offsets: [12, 8, 40]
reward: [107.6611]
###### have got 28 sample(s) ######
new_timing_plans: {'1': [39, 21], '2': [41, 19], '3': [41, 19]}
updated_offsets: [11, 11, 43]
reward: [92.3877]
###### have got 29 sample(s) ######
new_timing_plans: {'1': [42, 18], '2': [42, 18], '3': [40, 20]}
updated_offsets: [10, 9, 40]
reward: [91.8847]
###### have got 30 sample(s) ######
new_timing_plans: {'1': [41, 19], '2': [41, 19], '3': [40, 20]}
updated_offsets: [9, 9, 43]
reward: [112.0994]
###### have got 31 sample(s) ######
new_timing_plans: {'1': [34, 26], '2': [36, 24], '3': [37, 

reward: [95.4976]
###### have got 20 sample(s) ######
new_timing_plans: {'1': [40, 20], '2': [36, 24], '3': [34, 26]}
updated_offsets: [11, 11, 44]
reward: [77.2042]
###### have got 21 sample(s) ######
new_timing_plans: {'1': [39, 21], '2': [41, 19], '3': [41, 19]}
updated_offsets: [12, 9, 40]
reward: [94.7705]
###### have got 22 sample(s) ######
new_timing_plans: {'1': [42, 18], '2': [45, 15], '3': [50, 10]}
updated_offsets: [10, 8, 36]
reward: [95.6172]
###### have got 23 sample(s) ######
new_timing_plans: {'1': [38, 22], '2': [42, 18], '3': [37, 23]}
updated_offsets: [12, 8, 42]
reward: [99.6476]
###### have got 24 sample(s) ######
new_timing_plans: {'1': [39, 21], '2': [43, 17], '3': [50, 10]}
updated_offsets: [13, 8, 35]
reward: [98.0041]
###### have got 25 sample(s) ######
new_timing_plans: {'1': [38, 22], '2': [44, 16], '3': [40, 20]}
updated_offsets: [11, 8, 40]
reward: [107.2976]
###### have got 26 sample(s) ######
new_timing_plans: {'1': [39, 21], '2': [37, 23], '3': [39, 21]

reward: [79.6489]
###### have got 15 sample(s) ######
new_timing_plans: {'1': [33, 27], '2': [38, 22], '3': [33, 27]}
updated_offsets: [16, 12, 47]
reward: [84.4885]
###### have got 16 sample(s) ######
new_timing_plans: {'1': [42, 18], '2': [39, 21], '3': [41, 19]}
updated_offsets: [9, 9, 46]
reward: [94.3415]
###### have got 17 sample(s) ######
new_timing_plans: {'1': [39, 21], '2': [40, 20], '3': [40, 20]}
updated_offsets: [11, 9, 40]
reward: [82.7533]
###### have got 18 sample(s) ######
new_timing_plans: {'1': [39, 21], '2': [43, 17], '3': [50, 10]}
updated_offsets: [11, 8, 35]
reward: [98.8191]
###### have got 19 sample(s) ######
new_timing_plans: {'1': [40, 20], '2': [37, 23], '3': [38, 22]}
updated_offsets: [11, 12, 43]
reward: [98.2953]
###### have got 20 sample(s) ######
new_timing_plans: {'1': [43, 17], '2': [42, 18], '3': [49, 11]}
updated_offsets: [8, 8, 35]
reward: [117.2326]
###### have got 21 sample(s) ######
new_timing_plans: {'1': [37, 23], '2': [42, 18], '3': [40, 20]}

reward: [78.8018]
###### have got 10 sample(s) ######
new_timing_plans: {'1': [35, 25], '2': [36, 24], '3': [34, 26]}
updated_offsets: [13, 10, 44]
reward: [84.1192]
###### have got 11 sample(s) ######
new_timing_plans: {'1': [40, 20], '2': [41, 19], '3': [36, 24]}
updated_offsets: [11, 9, 43]
reward: [101.4559]
###### have got 12 sample(s) ######
new_timing_plans: {'1': [37, 23], '2': [39, 21], '3': [34, 26]}
updated_offsets: [11, 10, 45]
reward: [97.4906]
###### have got 13 sample(s) ######
new_timing_plans: {'1': [36, 24], '2': [40, 20], '3': [38, 22]}
updated_offsets: [12, 9, 43]
reward: [110.3041]
###### have got 14 sample(s) ######
new_timing_plans: {'1': [35, 25], '2': [41, 19], '3': [38, 22]}
updated_offsets: [12, 9, 43]
reward: [111.4782]
###### have got 15 sample(s) ######
new_timing_plans: {'1': [36, 24], '2': [39, 21], '3': [35, 25]}
updated_offsets: [13, 10, 46]
reward: [104.1278]
###### have got 16 sample(s) ######
new_timing_plans: {'1': [43, 17], '2': [39, 21], '3': [40

reward: [94.0947]
###### have got 5 sample(s) ######
new_timing_plans: {'1': [35, 25], '2': [38, 22], '3': [35, 25]}
updated_offsets: [14, 10, 43]
reward: [85.9767]
###### have got 6 sample(s) ######
new_timing_plans: {'1': [37, 23], '2': [40, 20], '3': [36, 24]}
updated_offsets: [12, 10, 45]
reward: [101.1963]
###### have got 7 sample(s) ######
new_timing_plans: {'1': [38, 22], '2': [39, 21], '3': [36, 24]}
updated_offsets: [8, 9, 43]
reward: [97.9881]
###### have got 8 sample(s) ######
new_timing_plans: {'1': [36, 24], '2': [39, 21], '3': [37, 23]}
updated_offsets: [13, 10, 44]
reward: [97.9214]
###### have got 9 sample(s) ######
new_timing_plans: {'1': [36, 24], '2': [38, 22], '3': [38, 22]}
updated_offsets: [15, 9, 41]
reward: [82.1318]
###### have got 10 sample(s) ######
new_timing_plans: {'1': [38, 22], '2': [39, 21], '3': [36, 24]}
updated_offsets: [12, 9, 43]
reward: [102.4424]
###### have got 11 sample(s) ######
new_timing_plans: {'1': [38, 22], '2': [38, 22], '3': [37, 23]}
u

new_timing_plans: {'1': [36, 24], '2': [38, 22], '3': [37, 23]}
updated_offsets: [13, 13, 46]
reward: [113.6564]
###### have got 1 sample(s) ######
new_timing_plans: {'1': [36, 24], '2': [41, 19], '3': [36, 24]}
updated_offsets: [13, 10, 46]
reward: [94.1565]
###### have got 2 sample(s) ######
new_timing_plans: {'1': [37, 23], '2': [36, 24], '3': [35, 25]}
updated_offsets: [15, 11, 43]
reward: [95.3452]
###### have got 3 sample(s) ######
new_timing_plans: {'1': [36, 24], '2': [36, 24], '3': [40, 20]}
updated_offsets: [13, 15, 42]
reward: [86.0142]
###### have got 4 sample(s) ######
new_timing_plans: {'1': [40, 20], '2': [39, 21], '3': [32, 28]}
updated_offsets: [9, 11, 45]
reward: [100.1606]
###### have got 5 sample(s) ######
new_timing_plans: {'1': [36, 24], '2': [38, 22], '3': [35, 25]}
updated_offsets: [12, 11, 44]
reward: [91.5236]
###### have got 6 sample(s) ######
new_timing_plans: {'1': [36, 24], '2': [35, 25], '3': [36, 24]}
updated_offsets: [13, 12, 46]
reward: [93.7061]
#####

reward: [122.8565]
###### have got 56 sample(s) ######
new_timing_plans: {'1': [42, 18], '2': [33, 27], '3': [35, 25]}
updated_offsets: [10, 11, 43]
reward: [96.7166]
###### have got 57 sample(s) ######
new_timing_plans: {'1': [43, 17], '2': [38, 22], '3': [38, 22]}
updated_offsets: [9, 10, 42]
reward: [97.9282]
###### have got 58 sample(s) ######
new_timing_plans: {'1': [42, 18], '2': [35, 25], '3': [37, 23]}
updated_offsets: [11, 10, 42]
reward: [117.4625]
###### have got 59 sample(s) ######
new_timing_plans: {'1': [39, 21], '2': [35, 25], '3': [36, 24]}
updated_offsets: [12, 11, 43]
reward: [117.3253]
###### have got 60 sample(s) ######
avg. rewards in this episode: [107.369]
******** end episode 79 ********
spent 16.51 (s) for episode 79
******** start episode 80 ********
new_timing_plans: {'1': [34, 26], '2': [38, 22], '3': [35, 25]}
updated_offsets: [14, 10, 44]
reward: [116.1737]
###### have got 1 sample(s) ######
new_timing_plans: {'1': [34, 26], '2': [41, 19], '3': [37, 23]}
u

reward: [106.9664]
###### have got 51 sample(s) ######
new_timing_plans: {'1': [50, 10], '2': [33, 27], '3': [36, 24]}
updated_offsets: [7, 11, 43]
reward: [96.9845]
###### have got 52 sample(s) ######
new_timing_plans: {'1': [41, 19], '2': [35, 25], '3': [39, 21]}
updated_offsets: [10, 10, 41]
reward: [112.7997]
###### have got 53 sample(s) ######
new_timing_plans: {'1': [38, 22], '2': [34, 26], '3': [38, 22]}
updated_offsets: [13, 11, 42]
reward: [116.6351]
###### have got 54 sample(s) ######
new_timing_plans: {'1': [42, 18], '2': [32, 28], '3': [35, 25]}
updated_offsets: [10, 11, 43]
reward: [110.8826]
###### have got 55 sample(s) ######
new_timing_plans: {'1': [41, 19], '2': [36, 24], '3': [35, 25]}
updated_offsets: [8, 10, 43]
reward: [107.3518]
###### have got 56 sample(s) ######
new_timing_plans: {'1': [42, 18], '2': [34, 26], '3': [38, 22]}
updated_offsets: [8, 10, 42]
reward: [106.1774]
###### have got 57 sample(s) ######
new_timing_plans: {'1': [40, 20], '2': [35, 25], '3': [

reward: [115.7359]
###### have got 46 sample(s) ######
new_timing_plans: {'1': [38, 22], '2': [32, 28], '3': [37, 23]}
updated_offsets: [14, 12, 42]
reward: [88.5738]
###### have got 47 sample(s) ######
new_timing_plans: {'1': [39, 21], '2': [34, 26], '3': [37, 23]}
updated_offsets: [8, 10, 44]
reward: [109.4522]
###### have got 48 sample(s) ######
new_timing_plans: {'1': [40, 20], '2': [34, 26], '3': [37, 23]}
updated_offsets: [11, 10, 42]
reward: [130.1251]
###### have got 49 sample(s) ######
new_timing_plans: {'1': [42, 18], '2': [35, 25], '3': [36, 24]}
updated_offsets: [9, 10, 43]
reward: [114.7875]
###### have got 50 sample(s) ######
new_timing_plans: {'1': [39, 21], '2': [35, 25], '3': [37, 23]}
updated_offsets: [13, 10, 42]
reward: [117.2553]
###### have got 51 sample(s) ######
new_timing_plans: {'1': [42, 18], '2': [35, 25], '3': [40, 20]}
updated_offsets: [10, 10, 40]
reward: [121.8926]
###### have got 52 sample(s) ######
new_timing_plans: {'1': [41, 19], '2': [34, 26], '3': 

reward: [128.9002]
###### have got 41 sample(s) ######
new_timing_plans: {'1': [39, 21], '2': [40, 20], '3': [37, 23]}
updated_offsets: [10, 9, 42]
reward: [127.6054]
###### have got 42 sample(s) ######
new_timing_plans: {'1': [42, 18], '2': [37, 23], '3': [38, 22]}
updated_offsets: [8, 11, 42]
reward: [127.3571]
###### have got 43 sample(s) ######
new_timing_plans: {'1': [38, 22], '2': [38, 22], '3': [38, 22]}
updated_offsets: [11, 10, 41]
reward: [105.2471]
###### have got 44 sample(s) ######
new_timing_plans: {'1': [38, 22], '2': [36, 24], '3': [37, 23]}
updated_offsets: [12, 11, 42]
reward: [123.7172]
###### have got 45 sample(s) ######
new_timing_plans: {'1': [39, 21], '2': [37, 23], '3': [39, 21]}
updated_offsets: [8, 11, 41]
reward: [110.7744]
###### have got 46 sample(s) ######
new_timing_plans: {'1': [44, 16], '2': [34, 26], '3': [41, 19]}
updated_offsets: [9, 11, 40]
reward: [103.8684]
###### have got 47 sample(s) ######
new_timing_plans: {'1': [43, 17], '2': [37, 23], '3': [

reward: [105.3824]
###### have got 36 sample(s) ######
new_timing_plans: {'1': [36, 24], '2': [36, 24], '3': [37, 23]}
updated_offsets: [14, 11, 42]
reward: [122.2226]
###### have got 37 sample(s) ######
new_timing_plans: {'1': [42, 18], '2': [38, 22], '3': [35, 25]}
updated_offsets: [10, 10, 43]
reward: [115.5958]
###### have got 38 sample(s) ######
new_timing_plans: {'1': [42, 18], '2': [35, 25], '3': [39, 21]}
updated_offsets: [8, 11, 41]
reward: [113.6161]
###### have got 39 sample(s) ######
new_timing_plans: {'1': [36, 24], '2': [35, 25], '3': [37, 23]}
updated_offsets: [15, 10, 42]
reward: [115.2779]
###### have got 40 sample(s) ######
new_timing_plans: {'1': [37, 23], '2': [40, 20], '3': [41, 19]}
updated_offsets: [12, 9, 40]
reward: [113.5157]
###### have got 41 sample(s) ######
new_timing_plans: {'1': [40, 20], '2': [37, 23], '3': [33, 27]}
updated_offsets: [12, 11, 44]
reward: [108.7624]
###### have got 42 sample(s) ######
new_timing_plans: {'1': [39, 21], '2': [36, 24], '3':

reward: [110.2886]
###### have got 31 sample(s) ######
new_timing_plans: {'1': [39, 21], '2': [36, 24], '3': [34, 26]}
updated_offsets: [11, 10, 44]
reward: [108.5746]
###### have got 32 sample(s) ######
new_timing_plans: {'1': [38, 22], '2': [36, 24], '3': [37, 23]}
updated_offsets: [13, 10, 42]
reward: [108.3932]
###### have got 33 sample(s) ######
new_timing_plans: {'1': [38, 22], '2': [38, 22], '3': [38, 22]}
updated_offsets: [13, 10, 42]
reward: [134.2055]
###### have got 34 sample(s) ######
new_timing_plans: {'1': [35, 25], '2': [35, 25], '3': [41, 19]}
updated_offsets: [11, 11, 40]
reward: [121.5112]
###### have got 35 sample(s) ######
new_timing_plans: {'1': [37, 23], '2': [36, 24], '3': [37, 23]}
updated_offsets: [12, 11, 42]
reward: [120.2031]
###### have got 36 sample(s) ######
new_timing_plans: {'1': [40, 20], '2': [39, 21], '3': [38, 22]}
updated_offsets: [8, 10, 42]
reward: [102.8762]
###### have got 37 sample(s) ######
new_timing_plans: {'1': [39, 21], '2': [37, 23], '3'

reward: [92.8665]
###### have got 26 sample(s) ######
new_timing_plans: {'1': [42, 18], '2': [44, 16], '3': [39, 21]}
updated_offsets: [8, 8, 41]
reward: [100.2591]
###### have got 27 sample(s) ######
new_timing_plans: {'1': [41, 19], '2': [40, 20], '3': [40, 20]}
updated_offsets: [8, 9, 41]
reward: [92.6425]
###### have got 28 sample(s) ######
new_timing_plans: {'1': [38, 22], '2': [42, 18], '3': [41, 19]}
updated_offsets: [12, 9, 40]
reward: [99.9195]
###### have got 29 sample(s) ######
new_timing_plans: {'1': [38, 22], '2': [39, 21], '3': [40, 20]}
updated_offsets: [11, 9, 42]
reward: [94.9539]
###### have got 30 sample(s) ######
new_timing_plans: {'1': [40, 20], '2': [44, 16], '3': [40, 20]}
updated_offsets: [11, 8, 40]
reward: [113.4219]
###### have got 31 sample(s) ######
new_timing_plans: {'1': [38, 22], '2': [37, 23], '3': [36, 24]}
updated_offsets: [11, 10, 43]
reward: [124.9525]
###### have got 32 sample(s) ######
new_timing_plans: {'1': [40, 20], '2': [37, 23], '3': [37, 23]

reward: [103.6634]
###### have got 21 sample(s) ######
new_timing_plans: {'1': [41, 19], '2': [39, 21], '3': [37, 23]}
updated_offsets: [9, 10, 44]
reward: [109.7373]
###### have got 22 sample(s) ######
new_timing_plans: {'1': [38, 22], '2': [44, 16], '3': [50, 10]}
updated_offsets: [11, 8, 35]
reward: [99.2796]
###### have got 23 sample(s) ######
new_timing_plans: {'1': [39, 21], '2': [39, 21], '3': [40, 20]}
updated_offsets: [12, 12, 40]
reward: [90.116]
###### have got 24 sample(s) ######
new_timing_plans: {'1': [39, 21], '2': [42, 18], '3': [41, 19]}
updated_offsets: [11, 9, 40]
reward: [102.0646]
###### have got 25 sample(s) ######
new_timing_plans: {'1': [44, 16], '2': [37, 23], '3': [39, 21]}
updated_offsets: [7, 10, 41]
reward: [97.7622]
###### have got 26 sample(s) ######
new_timing_plans: {'1': [41, 19], '2': [40, 20], '3': [45, 15]}
updated_offsets: [7, 9, 39]
reward: [83.1315]
###### have got 27 sample(s) ######
new_timing_plans: {'1': [38, 22], '2': [39, 21], '3': [36, 24]

reward: [96.3932]
###### have got 16 sample(s) ######
new_timing_plans: {'1': [36, 24], '2': [44, 16], '3': [39, 21]}
updated_offsets: [14, 8, 41]
reward: [81.502]
###### have got 17 sample(s) ######
new_timing_plans: {'1': [39, 21], '2': [38, 22], '3': [42, 18]}
updated_offsets: [9, 12, 39]
reward: [116.1465]
###### have got 18 sample(s) ######
new_timing_plans: {'1': [43, 17], '2': [39, 21], '3': [36, 24]}
updated_offsets: [10, 12, 43]
reward: [101.1456]
###### have got 19 sample(s) ######
new_timing_plans: {'1': [40, 20], '2': [39, 21], '3': [38, 22]}
updated_offsets: [10, 10, 42]
reward: [107.2919]
###### have got 20 sample(s) ######
new_timing_plans: {'1': [40, 20], '2': [40, 20], '3': [40, 20]}
updated_offsets: [9, 9, 40]
reward: [94.5005]
###### have got 21 sample(s) ######
new_timing_plans: {'1': [41, 19], '2': [39, 21], '3': [38, 22]}
updated_offsets: [9, 9, 42]
reward: [91.4886]
###### have got 22 sample(s) ######
new_timing_plans: {'1': [42, 18], '2': [39, 21], '3': [41, 19]

reward: [97.9332]
###### have got 11 sample(s) ######
new_timing_plans: {'1': [34, 26], '2': [38, 22], '3': [34, 26]}
updated_offsets: [16, 10, 44]
reward: [100.5111]
###### have got 12 sample(s) ######
new_timing_plans: {'1': [39, 21], '2': [39, 21], '3': [36, 24]}
updated_offsets: [12, 11, 45]
reward: [97.0398]
###### have got 13 sample(s) ######
new_timing_plans: {'1': [37, 23], '2': [36, 24], '3': [34, 26]}
updated_offsets: [12, 12, 51]
reward: [94.2308]
###### have got 14 sample(s) ######
new_timing_plans: {'1': [39, 21], '2': [39, 21], '3': [37, 23]}
updated_offsets: [13, 9, 44]
reward: [100.333]
###### have got 15 sample(s) ######
new_timing_plans: {'1': [35, 25], '2': [39, 21], '3': [36, 24]}
updated_offsets: [13, 10, 44]
reward: [113.4536]
###### have got 16 sample(s) ######
new_timing_plans: {'1': [39, 21], '2': [42, 18], '3': [40, 20]}
updated_offsets: [12, 9, 43]
reward: [110.5457]
###### have got 17 sample(s) ######
new_timing_plans: {'1': [40, 20], '2': [41, 19], '3': [37

reward: [117.8213]
###### have got 6 sample(s) ######
new_timing_plans: {'1': [36, 24], '2': [39, 21], '3': [37, 23]}
updated_offsets: [13, 11, 44]
reward: [93.5559]
###### have got 7 sample(s) ######
new_timing_plans: {'1': [35, 25], '2': [39, 21], '3': [34, 26]}
updated_offsets: [13, 10, 47]
reward: [88.8431]
###### have got 8 sample(s) ######
new_timing_plans: {'1': [37, 23], '2': [41, 19], '3': [37, 23]}
updated_offsets: [12, 9, 43]
reward: [93.2129]
###### have got 9 sample(s) ######
new_timing_plans: {'1': [35, 25], '2': [40, 20], '3': [36, 24]}
updated_offsets: [13, 9, 45]
reward: [89.0547]
###### have got 10 sample(s) ######
new_timing_plans: {'1': [40, 20], '2': [38, 22], '3': [36, 24]}
updated_offsets: [10, 10, 43]
reward: [84.4138]
###### have got 11 sample(s) ######
new_timing_plans: {'1': [35, 25], '2': [40, 20], '3': [33, 27]}
updated_offsets: [15, 10, 44]
reward: [76.1687]
###### have got 12 sample(s) ######
new_timing_plans: {'1': [35, 25], '2': [38, 22], '3': [36, 24]}

reward: [118.7191]
###### have got 1 sample(s) ######
new_timing_plans: {'1': [37, 23], '2': [40, 20], '3': [37, 23]}
updated_offsets: [11, 10, 43]
reward: [101.7937]
###### have got 2 sample(s) ######
new_timing_plans: {'1': [38, 22], '2': [40, 20], '3': [35, 25]}
updated_offsets: [12, 9, 43]
reward: [107.1074]
###### have got 3 sample(s) ######
new_timing_plans: {'1': [40, 20], '2': [39, 21], '3': [39, 21]}
updated_offsets: [9, 10, 45]
reward: [102.0124]
###### have got 4 sample(s) ######
new_timing_plans: {'1': [36, 24], '2': [38, 22], '3': [38, 22]}
updated_offsets: [12, 10, 42]
reward: [92.4146]
###### have got 5 sample(s) ######
new_timing_plans: {'1': [38, 22], '2': [36, 24], '3': [37, 23]}
updated_offsets: [12, 10, 45]
reward: [86.7021]
###### have got 6 sample(s) ######
new_timing_plans: {'1': [35, 25], '2': [36, 24], '3': [36, 24]}
updated_offsets: [13, 10, 43]
reward: [96.261]
###### have got 7 sample(s) ######
new_timing_plans: {'1': [39, 21], '2': [41, 19], '3': [37, 23]}


reward: [99.0344]
###### have got 57 sample(s) ######
new_timing_plans: {'1': [42, 18], '2': [34, 26], '3': [33, 27]}
updated_offsets: [11, 11, 44]
reward: [121.2063]
###### have got 58 sample(s) ######
new_timing_plans: {'1': [38, 22], '2': [35, 25], '3': [37, 23]}
updated_offsets: [12, 10, 42]
reward: [115.2184]
###### have got 59 sample(s) ######
new_timing_plans: {'1': [40, 20], '2': [37, 23], '3': [36, 24]}
updated_offsets: [11, 10, 43]
reward: [104.369]
###### have got 60 sample(s) ######
avg. rewards in this episode: [108.3816]
******** end episode 90 ********
spent 16.94 (s) for episode 90
******** start episode 91 ********
new_timing_plans: {'1': [35, 25], '2': [41, 19], '3': [38, 22]}
updated_offsets: [13, 10, 44]
reward: [113.3547]
###### have got 1 sample(s) ######
new_timing_plans: {'1': [35, 25], '2': [41, 19], '3': [33, 27]}
updated_offsets: [13, 10, 46]
reward: [93.2763]
###### have got 2 sample(s) ######
new_timing_plans: {'1': [35, 25], '2': [40, 20], '3': [38, 22]}
u

reward: [111.1219]
###### have got 52 sample(s) ######
new_timing_plans: {'1': [41, 19], '2': [32, 28], '3': [34, 26]}
updated_offsets: [12, 11, 44]
reward: [100.7884]
###### have got 53 sample(s) ######
new_timing_plans: {'1': [39, 21], '2': [35, 25], '3': [37, 23]}
updated_offsets: [8, 10, 42]
reward: [115.6335]
###### have got 54 sample(s) ######
new_timing_plans: {'1': [42, 18], '2': [36, 24], '3': [34, 26]}
updated_offsets: [9, 10, 44]
reward: [115.5069]
###### have got 55 sample(s) ######
new_timing_plans: {'1': [40, 20], '2': [33, 27], '3': [39, 21]}
updated_offsets: [8, 11, 43]
reward: [110.5927]
###### have got 56 sample(s) ######
new_timing_plans: {'1': [50, 10], '2': [31, 29], '3': [36, 24]}
updated_offsets: [7, 12, 43]
reward: [109.9928]
###### have got 57 sample(s) ######
new_timing_plans: {'1': [41, 19], '2': [36, 24], '3': [37, 23]}
updated_offsets: [10, 10, 42]
reward: [98.2226]
###### have got 58 sample(s) ######
new_timing_plans: {'1': [39, 21], '2': [33, 27], '3': [3

reward: [111.3877]
###### have got 47 sample(s) ######
new_timing_plans: {'1': [41, 19], '2': [33, 27], '3': [36, 24]}
updated_offsets: [11, 11, 43]
reward: [101.4808]
###### have got 48 sample(s) ######
new_timing_plans: {'1': [40, 20], '2': [36, 24], '3': [39, 21]}
updated_offsets: [13, 10, 41]
reward: [114.8167]
###### have got 49 sample(s) ######
new_timing_plans: {'1': [40, 20], '2': [34, 26], '3': [33, 27]}
updated_offsets: [13, 11, 44]
reward: [105.455]
###### have got 50 sample(s) ######
new_timing_plans: {'1': [39, 21], '2': [35, 25], '3': [37, 23]}
updated_offsets: [10, 10, 43]
reward: [110.6438]
###### have got 51 sample(s) ######
new_timing_plans: {'1': [41, 19], '2': [34, 26], '3': [34, 26]}
updated_offsets: [12, 11, 44]
reward: [107.2833]
###### have got 52 sample(s) ######
new_timing_plans: {'1': [43, 17], '2': [36, 24], '3': [36, 24]}
updated_offsets: [10, 10, 43]
reward: [118.0002]
###### have got 53 sample(s) ######
new_timing_plans: {'1': [39, 21], '2': [33, 27], '3'

reward: [107.6095]
###### have got 42 sample(s) ######
new_timing_plans: {'1': [40, 20], '2': [39, 21], '3': [38, 22]}
updated_offsets: [8, 9, 42]
reward: [128.7465]
###### have got 43 sample(s) ######
new_timing_plans: {'1': [37, 23], '2': [35, 25], '3': [37, 23]}
updated_offsets: [13, 11, 42]
reward: [116.4188]
###### have got 44 sample(s) ######
new_timing_plans: {'1': [41, 19], '2': [37, 23], '3': [37, 23]}
updated_offsets: [9, 10, 42]
reward: [117.6206]
###### have got 45 sample(s) ######
new_timing_plans: {'1': [37, 23], '2': [38, 22], '3': [37, 23]}
updated_offsets: [11, 10, 42]
reward: [118.8716]
###### have got 46 sample(s) ######
new_timing_plans: {'1': [40, 20], '2': [35, 25], '3': [39, 21]}
updated_offsets: [10, 11, 42]
reward: [113.9748]
###### have got 47 sample(s) ######
new_timing_plans: {'1': [41, 19], '2': [36, 24], '3': [38, 22]}
updated_offsets: [9, 10, 42]
reward: [115.0275]
###### have got 48 sample(s) ######
new_timing_plans: {'1': [42, 18], '2': [34, 26], '3': [

reward: [126.0132]
###### have got 37 sample(s) ######
new_timing_plans: {'1': [38, 22], '2': [36, 24], '3': [36, 24]}
updated_offsets: [12, 11, 43]
reward: [122.8874]
###### have got 38 sample(s) ######
new_timing_plans: {'1': [38, 22], '2': [38, 22], '3': [34, 26]}
updated_offsets: [12, 10, 44]
reward: [114.0745]
###### have got 39 sample(s) ######
new_timing_plans: {'1': [39, 21], '2': [37, 23], '3': [34, 26]}
updated_offsets: [10, 11, 44]
reward: [106.5786]
###### have got 40 sample(s) ######
new_timing_plans: {'1': [40, 20], '2': [39, 21], '3': [37, 23]}
updated_offsets: [11, 9, 42]
reward: [109.8218]
###### have got 41 sample(s) ######
new_timing_plans: {'1': [36, 24], '2': [34, 26], '3': [37, 23]}
updated_offsets: [14, 12, 42]
reward: [116.4618]
###### have got 42 sample(s) ######
new_timing_plans: {'1': [38, 22], '2': [36, 24], '3': [37, 23]}
updated_offsets: [10, 12, 42]
reward: [114.1234]
###### have got 43 sample(s) ######
new_timing_plans: {'1': [40, 20], '2': [36, 24], '3'

reward: [119.4175]
###### have got 32 sample(s) ######
new_timing_plans: {'1': [39, 21], '2': [40, 20], '3': [37, 23]}
updated_offsets: [12, 10, 42]
reward: [125.4301]
###### have got 33 sample(s) ######
new_timing_plans: {'1': [37, 23], '2': [37, 23], '3': [37, 23]}
updated_offsets: [10, 11, 42]
reward: [113.6339]
###### have got 34 sample(s) ######
new_timing_plans: {'1': [41, 19], '2': [36, 24], '3': [38, 22]}
updated_offsets: [10, 11, 42]
reward: [116.4745]
###### have got 35 sample(s) ######
new_timing_plans: {'1': [37, 23], '2': [36, 24], '3': [37, 23]}
updated_offsets: [12, 10, 42]
reward: [113.1956]
###### have got 36 sample(s) ######
new_timing_plans: {'1': [38, 22], '2': [37, 23], '3': [37, 23]}
updated_offsets: [12, 10, 42]
reward: [128.216]
###### have got 37 sample(s) ######
new_timing_plans: {'1': [39, 21], '2': [39, 21], '3': [36, 24]}
updated_offsets: [11, 10, 43]
reward: [124.2344]
###### have got 38 sample(s) ######
new_timing_plans: {'1': [38, 22], '2': [36, 24], '3'

reward: [95.5007]
###### have got 27 sample(s) ######
new_timing_plans: {'1': [39, 21], '2': [38, 22], '3': [37, 23]}
updated_offsets: [11, 12, 42]
reward: [106.028]
###### have got 28 sample(s) ######
new_timing_plans: {'1': [40, 20], '2': [40, 20], '3': [42, 18]}
updated_offsets: [9, 9, 39]
reward: [108.0386]
###### have got 29 sample(s) ######
new_timing_plans: {'1': [40, 20], '2': [39, 21], '3': [39, 21]}
updated_offsets: [9, 10, 43]
reward: [110.0853]
###### have got 30 sample(s) ######
new_timing_plans: {'1': [41, 19], '2': [40, 20], '3': [41, 19]}
updated_offsets: [10, 10, 40]
reward: [117.3462]
###### have got 31 sample(s) ######
new_timing_plans: {'1': [38, 22], '2': [36, 24], '3': [35, 25]}
updated_offsets: [12, 11, 43]
reward: [120.908]
###### have got 32 sample(s) ######
new_timing_plans: {'1': [43, 17], '2': [36, 24], '3': [38, 22]}
updated_offsets: [8, 12, 42]
reward: [119.0408]
###### have got 33 sample(s) ######
new_timing_plans: {'1': [40, 20], '2': [38, 22], '3': [35,

reward: [91.5316]
###### have got 22 sample(s) ######
new_timing_plans: {'1': [43, 17], '2': [41, 19], '3': [39, 21]}
updated_offsets: [8, 9, 41]
reward: [97.3125]
###### have got 23 sample(s) ######
new_timing_plans: {'1': [37, 23], '2': [41, 19], '3': [50, 10]}
updated_offsets: [10, 10, 43]
reward: [103.1681]
###### have got 24 sample(s) ######
new_timing_plans: {'1': [40, 20], '2': [40, 20], '3': [40, 20]}
updated_offsets: [11, 10, 40]
reward: [102.7342]
###### have got 25 sample(s) ######
new_timing_plans: {'1': [38, 22], '2': [39, 21], '3': [39, 21]}
updated_offsets: [12, 9, 41]
reward: [105.248]
###### have got 26 sample(s) ######
new_timing_plans: {'1': [43, 17], '2': [38, 22], '3': [38, 22]}
updated_offsets: [9, 12, 45]
reward: [107.3071]
###### have got 27 sample(s) ######
new_timing_plans: {'1': [40, 20], '2': [41, 19], '3': [40, 20]}
updated_offsets: [11, 9, 40]
reward: [105.5233]
###### have got 28 sample(s) ######
new_timing_plans: {'1': [38, 22], '2': [38, 22], '3': [40, 

reward: [111.8482]
###### have got 17 sample(s) ######
new_timing_plans: {'1': [39, 21], '2': [40, 20], '3': [40, 20]}
updated_offsets: [9, 9, 40]
reward: [103.1205]
###### have got 18 sample(s) ######
new_timing_plans: {'1': [39, 21], '2': [41, 19], '3': [41, 19]}
updated_offsets: [10, 8, 40]
reward: [99.3441]
###### have got 19 sample(s) ######
new_timing_plans: {'1': [41, 19], '2': [41, 19], '3': [43, 17]}
updated_offsets: [10, 10, 39]
reward: [95.9974]
###### have got 20 sample(s) ######
new_timing_plans: {'1': [40, 20], '2': [43, 17], '3': [42, 18]}
updated_offsets: [11, 8, 39]
reward: [106.6122]
###### have got 21 sample(s) ######
new_timing_plans: {'1': [43, 17], '2': [42, 18], '3': [39, 21]}
updated_offsets: [8, 8, 41]
reward: [106.2984]
###### have got 22 sample(s) ######
new_timing_plans: {'1': [37, 23], '2': [43, 17], '3': [50, 10]}
updated_offsets: [12, 8, 35]
reward: [98.7553]
###### have got 23 sample(s) ######
new_timing_plans: {'1': [38, 22], '2': [40, 20], '3': [43, 17

reward: [109.0314]
###### have got 12 sample(s) ######
new_timing_plans: {'1': [37, 23], '2': [37, 23], '3': [35, 25]}
updated_offsets: [13, 11, 46]
reward: [95.8494]
###### have got 13 sample(s) ######
new_timing_plans: {'1': [36, 24], '2': [38, 22], '3': [36, 24]}
updated_offsets: [13, 10, 44]
reward: [88.5987]
###### have got 14 sample(s) ######
new_timing_plans: {'1': [39, 21], '2': [40, 20], '3': [35, 25]}
updated_offsets: [11, 9, 44]
reward: [99.9841]
###### have got 15 sample(s) ######
new_timing_plans: {'1': [36, 24], '2': [42, 18], '3': [37, 23]}
updated_offsets: [12, 9, 44]
reward: [102.2613]
###### have got 16 sample(s) ######
new_timing_plans: {'1': [40, 20], '2': [41, 19], '3': [40, 20]}
updated_offsets: [11, 10, 42]
reward: [111.7812]
###### have got 17 sample(s) ######
new_timing_plans: {'1': [36, 24], '2': [43, 17], '3': [50, 10]}
updated_offsets: [11, 8, 35]
reward: [113.8094]
###### have got 18 sample(s) ######
new_timing_plans: {'1': [38, 22], '2': [39, 21], '3': [37

reward: [100.969]
###### have got 7 sample(s) ######
new_timing_plans: {'1': [36, 24], '2': [39, 21], '3': [34, 26]}
updated_offsets: [14, 10, 45]
reward: [94.4392]
###### have got 8 sample(s) ######
new_timing_plans: {'1': [35, 25], '2': [39, 21], '3': [36, 24]}
updated_offsets: [16, 10, 43]
reward: [98.3245]
###### have got 9 sample(s) ######
new_timing_plans: {'1': [36, 24], '2': [40, 20], '3': [35, 25]}
updated_offsets: [14, 9, 44]
reward: [104.112]
###### have got 10 sample(s) ######
new_timing_plans: {'1': [34, 26], '2': [39, 21], '3': [33, 27]}
updated_offsets: [14, 9, 44]
reward: [86.9922]
###### have got 11 sample(s) ######
new_timing_plans: {'1': [37, 23], '2': [39, 21], '3': [37, 23]}
updated_offsets: [12, 11, 46]
reward: [97.9597]
###### have got 12 sample(s) ######
new_timing_plans: {'1': [36, 24], '2': [39, 21], '3': [36, 24]}
updated_offsets: [13, 11, 46]
reward: [100.6335]
###### have got 13 sample(s) ######
new_timing_plans: {'1': [36, 24], '2': [41, 19], '3': [35, 25]

In [7]:
# ========================================== 記錄訓練參數及回合 ==========================================
max_time_per_episode = max(spent_times)
min_time_per_episode = min(spent_times)
avg_time_per_episode = sum(spent_times) / len(spent_times)
print('====== testing %s episodes ======' % len(spent_times))
print('max_time_per_episode: %.2f s' % max_time_per_episode)
print('min_time_per_episode: %.2f s' % min_time_per_episode)
print('avg_time_per_episode: %.2f s' % avg_time_per_episode)

with open('test_results/log.txt', 'w') as f:
    f.write('====== testing %s episodes ======' % len(spent_times))
    f.write('\nmax_time_per_episode: %.2f s' % max_time_per_episode)
    f.write('\nmin_time_per_episode: %.2f s' % min_time_per_episode)
    f.write('\navg_time_per_episode: %.2f s' % avg_time_per_episode)
    

# ========================================== 以下記錄訓練結果 ==========================================
global_agent = []

for i in range(len(avg_rewards_per_episode)):
    global_agent.append(avg_rewards_per_episode[i][0])

# ========================================== 將reward寫入txt檔 ==========================================
with open('test_results/reward.txt', 'w') as f:
    f.write('[')
    for i in range(len(global_agent)):
        if i == len(global_agent) - 1:
            f.write('%s' % (global_agent[i]) + ']')
        else:
            f.write('%s' % (global_agent[i]) + ',')

with open('test_results/global_reward.txt', 'w') as f:
    f.write('[')
    for i in range(len(avg_global_rewards_per_episode)):
        if i == len(avg_global_rewards_per_episode) - 1:
            f.write('%s' % (avg_global_rewards_per_episode[i]) + ']')
        else:
            f.write('%s' % (avg_global_rewards_per_episode[i]) + ',')
            

# ========================================== 將旅行時間寫入txt檔 ==========================================
travtime_car_for_each_detectors = [[] for i in range(36)]
travtime_scooter_for_each_detectors = [[] for i in range(36)]
"""
0-11: 111, 112, 113, ... , 141, 142, 143
12-23: 211, 212, 213, ... , 241, 242, 243
24-35: 311, 312, 313, ... , 341, 342, 343

"""
for i in range(num_episodes):
    for j in range(36):
        travtime_car_for_each_detectors[j].append(avg_travtime_car_per_episode[i][j])
        travtime_scooter_for_each_detectors[j].append(avg_travtime_scooter_per_episode[i][j])

for i in range(36):
    with open('test_results/traveltime/car travel time_%s.txt' % (i+1), 'w') as f:
        f.write('[')
        for j in range(len(travtime_car_for_each_detectors[i])):
            if j == len(travtime_car_for_each_detectors[i]) - 1:
                f.write('%s' % (travtime_car_for_each_detectors[i][j]) + ']')
            else:
                f.write('%s' % (travtime_car_for_each_detectors[i][j]) + ',')
                
for i in range(36):
    with open('test_results/traveltime/scooter travel time_%s.txt' % (i+1), 'w') as f:
        f.write('[')
        for j in range(len(travtime_scooter_for_each_detectors[i])):
            if j == len(travtime_scooter_for_each_detectors[i]) - 1:
                f.write('%s' % (travtime_scooter_for_each_detectors[i][j]) + ']')
            else:
                f.write('%s' % (travtime_scooter_for_each_detectors[i][j]) + ',')


# ========================================== 將時相變化寫入txt檔 ==========================================
node_1_phase_1 = []
node_2_phase_1 = []
node_3_phase_1 = []

node_1_phase_2 = []
node_2_phase_2 = []
node_3_phase_2 = []

for i in range(len(avg_phase_time_1_per_episode)):
    node_1_phase_1.append(avg_phase_time_1_per_episode[i][0])
    node_2_phase_1.append(avg_phase_time_1_per_episode[i][1])
    node_3_phase_1.append(avg_phase_time_1_per_episode[i][2])

for i in range(len(avg_phase_time_2_per_episode)):
    node_1_phase_2.append(avg_phase_time_2_per_episode[i][0])
    node_2_phase_2.append(avg_phase_time_2_per_episode[i][1])
    node_3_phase_2.append(avg_phase_time_2_per_episode[i][2])

with open('test_results/phase/node_1_phase_1.txt', 'w') as f:
    f.write('[')
    for i in range(len(node_1_phase_1)):
        if i == len(node_1_phase_1) - 1:
            f.write('%s' % (node_1_phase_1[i]) + ']')
        else:
            f.write('%s' % (node_1_phase_1[i]) + ',')

with open('test_results/phase/node_1_phase_2.txt', 'w') as f:
    f.write('[')
    for i in range(len(node_1_phase_2)):
        if i == len(node_1_phase_2) - 1:
            f.write('%s' % (node_1_phase_2[i]) + ']')
        else:
            f.write('%s' % (node_1_phase_2[i]) + ',')

with open('test_results/phase/node_2_phase_1.txt', 'w') as f:
    f.write('[')
    for i in range(len(node_2_phase_1)):
        if i == len(node_2_phase_1) - 1:
            f.write('%s' % (node_2_phase_1[i]) + ']')
        else:
            f.write('%s' % (node_2_phase_1[i]) + ',')

with open('test_results/phase/node_2_phase_2.txt', 'w') as f:
    f.write('[')
    for i in range(len(node_2_phase_2)):
        if i == len(node_2_phase_2) - 1:
            f.write('%s' % (node_2_phase_2[i]) + ']')
        else:
            f.write('%s' % (node_2_phase_2[i]) + ',')

with open('test_results/phase/node_3_phase_1.txt', 'w') as f:
    f.write('[')
    for i in range(len(node_3_phase_1)):
        if i == len(node_3_phase_1) - 1:
            f.write('%s' % (node_3_phase_1[i]) + ']')
        else:
            f.write('%s' % (node_3_phase_1[i]) + ',')

with open('test_results/phase/node_3_phase_2.txt', 'w') as f:
    f.write('[')
    for i in range(len(node_3_phase_2)):
        if i == len(node_3_phase_2) - 1:
            f.write('%s' % (node_3_phase_2[i]) + ']')
        else:
            f.write('%s' % (node_3_phase_2[i]) + ',')
        
        
# ========================================== 將時差變化寫入txt檔 ==========================================
node_1_offset = []
node_2_offset = []
node_3_offset = []

for i in range(len(avg_offset_per_episode)):
    node_1_offset.append(avg_offset_per_episode[i][0])
    node_2_offset.append(avg_offset_per_episode[i][1])
    node_3_offset.append(avg_offset_per_episode[i][2])

with open('test_results/phase/node_1_offset.txt', 'w') as f:
    f.write('[')
    for i in range(len(node_1_offset)):
        if i == len(node_1_offset) - 1:
            f.write('%s' % (node_1_offset[i]) + ']')
        else:
            f.write('%s' % (node_1_offset[i]) + ',')

with open('test_results/phase/node_2_offset.txt', 'w') as f:
    f.write('[')
    for i in range(len(node_2_offset)):
        if i == len(node_2_offset) - 1:
            f.write('%s' % (node_2_offset[i]) + ']')
        else:
            f.write('%s' % (node_2_offset[i]) + ',')

with open('test_results/phase/node_3_offset.txt', 'w') as f:
    f.write('[')
    for i in range(len(node_3_offset)):
        if i == len(node_3_offset) - 1:
            f.write('%s' % (node_3_offset[i]) + ']')
        else:
            f.write('%s' % (node_3_offset[i]) + ',')
            
            
# ========================================== 將等候車隊長度寫入txt檔 ==========================================
q_len_for_each_approach = [[] for i in range(12)]

for i in range(num_episodes):
    for j in range(12):
        q_len_for_each_approach[j].append(avg_queue_length_per_episode[i][j])

        
for i in range(1, 13):
    with open('test_results/queue/queue_length_%s.txt' % (i*2-1), 'w') as f:
        f.write('[')
        for j in range(len(q_len_for_each_approach[i-1])):
            if j == len(q_len_for_each_approach[i-1]) - 1:
                f.write('%s' % (q_len_for_each_approach[i-1][j]) + ']')
            else:
                f.write('%s' % (q_len_for_each_approach[i-1][j]) + ',')
                
# ========================================== 將平均每回合停等次數寫入txt檔 ==========================================
with open('test_results/number_of_stops_car.txt', 'w') as f:
    f.write('[')
    for i in range(len(avg_num_stops_car)):
        if i == len(avg_num_stops_car) - 1:
            f.write('%s' % (avg_num_stops_car[i]) + ']')
        else:
            f.write('%s' % (avg_num_stops_car[i]) + ',')
        
with open('test_results/number_of_stops_scooter.txt', 'w') as f:
    f.write('[')
    for i in range(len(avg_num_stops_scooter)):
        if i == len(avg_num_stops_scooter) - 1:
            f.write('%s' % (avg_num_stops_scooter[i]) + ']')
        else:
            f.write('%s' % (avg_num_stops_scooter[i]) + ',')

# ========================================== 將平均每回合延滯時間寫入txt檔 ==========================================
with open('test_results/delay_car.txt', 'w') as f:
    f.write('[')
    for i in range(len(avg_delay_car)):
        if i == len(avg_delay_car) - 1:
            f.write('%s' % (avg_delay_car[i]) + ']')
        else:
            f.write('%s' % (avg_delay_car[i]) + ',')
        
with open('test_results/delay_scooter.txt', 'w') as f:
    f.write('[')
    for i in range(len(avg_delay_scooter)):
        if i == len(avg_delay_scooter) - 1:
            f.write('%s' % (avg_delay_scooter[i]) + ']')
        else:
            f.write('%s' % (avg_delay_scooter[i]) + ',')
            
# ========================================== 將平均每回合總旅行時間寫入txt檔 ==========================================
with open('test_results/TravTmTot_car.txt', 'w') as f:
    f.write('[')
    for i in range(len(avg_travtmtot_car)):
        if i == len(avg_travtmtot_car) - 1:
            f.write('%s' % (avg_travtmtot_car[i]) + ']')
        else:
            f.write('%s' % (avg_travtmtot_car[i]) + ',')
        
with open('test_results/TravTmTot_scooter.txt', 'w') as f:
    f.write('[')
    for i in range(len(avg_travtmtot_scooter)):
        if i == len(avg_travtmtot_scooter) - 1:
            f.write('%s' % (avg_travtmtot_scooter[i]) + ']')
        else:
            f.write('%s' % (avg_travtmtot_scooter[i]) + ',')
            
# ========================================== 將平均每回合總停等次數寫入txt檔 ==========================================
with open('test_results/StopsTot_car.txt', 'w') as f:
    f.write('[')
    for i in range(len(avg_stopstot_car)):
        if i == len(avg_stopstot_car) - 1:
            f.write('%s' % (avg_stopstot_car[i]) + ']')
        else:
            f.write('%s' % (avg_stopstot_car[i]) + ',')
        
with open('test_results/StopsTot_scooter.txt', 'w') as f:
    f.write('[')
    for i in range(len(avg_stopstot_scooter)):
        if i == len(avg_stopstot_scooter) - 1:
            f.write('%s' % (avg_stopstot_scooter[i]) + ']')
        else:
            f.write('%s' % (avg_stopstot_scooter[i]) + ',')
            
# ========================================== 將平均每回合總延滯時間寫入txt檔 ==========================================
with open('test_results/DelayTot_car.txt', 'w') as f:
    f.write('[')
    for i in range(len(avg_delaytot_car)):
        if i == len(avg_delaytot_car) - 1:
            f.write('%s' % (avg_delaytot_car[i]) + ']')
        else:
            f.write('%s' % (avg_delaytot_car[i]) + ',')
        
with open('test_results/DelayTot_scooter.txt', 'w') as f:
    f.write('[')
    for i in range(len(avg_delaytot_scooter)):
        if i == len(avg_delaytot_scooter) - 1:
            f.write('%s' % (avg_delaytot_scooter[i]) + ']')
        else:
            f.write('%s' % (avg_delaytot_scooter[i]) + ',')

max_time_per_episode: 16.94 s
min_time_per_episode: 16.04 s
avg_time_per_episode: 16.44 s


In [5]:
env.stop_simulation()
env.del_pre_simulation()

In [6]:
env.close()