In [1]:
import numpy as np
import collections
import gym

import ptan
import torch
import torch.nn as nn
import torch.nn.functional as F
import torch.optim as optim

In [2]:
class PGN(nn.Module):
    def __init__(self,input_size,n_actions):
        super(PGN,self).__init__()
        self.net = nn.Sequential(
            nn.Linear(input_size,512),
            nn.ReLU(),
            nn.Linear(512,256),
            nn.ReLU(),
            nn.Linear(256,n_actions)
        )
        
    def forward(self,x):
        logits = self.net(x)
        return logits

In [3]:
class MeanBuffer():
    def __init__(self,capacity):
        self.capacity = capacity
        self.deque = collections.deque(maxlen=capacity)
        self.sum = 0.0
        
    def add(self,val):
        if len(self.deque)==self.capacity:
            self.sum -= self.deque[0]
        self.deque.append(val)
        self.sum+=val
    
    def mean(self):
        if not self.deque:
            return 0.0
        return self.sum/len(self.deque)

In [4]:
def getNewBatch(net,env,gamma,device='cpu',skip=1):
    while True:
        state = env.reset()
        probs =  F.softmax(net( torch.FloatTensor( state ).to(device) )).to('cpu').detach().numpy()
        action = np.random.choice( range(env.action_space.n), p=probs )
        states = [];actions = [];rewards = [];dones=[]
        done = False
        while not done:
            states.append(state)
            rewards.append(0)
            for _ in range(skip):
                state,reward,done,_ = env.step(action)
                rewards[-1]+=reward
                if done:
                    break
            actions.append(action)
            dones.append(done)

        total_rewards = sum(rewards)
        rewards.reverse()
        for i in range(1,len(rewards)):
            rewards[i] += gamma*rewards[i-1]
        rewards.reverse()

        for state,reward,action,done in zip(states,rewards,actions,dones):
            yield state,reward,action,done,total_rewards

In [4]:
TARGET_REWARD = 50
GAMMA = 0.99
LEARNING_RATE = 3e-4
BATCH_SIZE = 32
ENTROPY_BETA = 0.01
BELLMAN_STEPS = 5
BASELINE_STEPS = 50000

In [7]:
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
env = gym.make('LunarLander-v2')
net = PGN(env.observation_space.shape[0], env.action_space.n).to(device)

#agent = ptan.agent.PolicyAgent(net,device=device,preprocessor=ptan.agent.float32_preprocessor)
#exp_source = ptan.experience.ExperienceSourceFirstLast(env,agent,steps_count=BELLMAN_STEPS,gamma=GAMMA)

optimizer = optim.Adam(net.parameters(),lr=LEARNING_RATE)

total_rewards=  []
step_rewards = []
baseline_buff = MeanBuffer(BASELINE_STEPS)
step_idx= 0
done_episodes= 0

batch_states=[];batch_actions =[];batch_scales =[]
for state,reward,action,done,total_reward in getNewBatch(net,env,GAMMA,device):
    step_idx+=1
    baseline_buff.add(reward)
    baseline = baseline_buff.mean()
    batch_states.append(state)
    batch_actions.append(action)
    batch_scales.append(reward - baseline )

    if done:
        done_episodes += 1
        total_rewards.append(total_reward)
        mean_rewards = float(np.mean(total_rewards[-100:]))
        if done_episodes%50 == 0:
            print("%d: reward: %6.2f, mean_100: %6.2f, episodes: %d" % (
                    step_idx, reward, mean_rewards, done_episodes))
        if mean_rewards > TARGET_REWARD:
            print("Solved in %d steps and %d episodes!" % (step_idx, done_episodes))
            break

    if len(batch_states) < BATCH_SIZE:
        continue

    states_v = torch.FloatTensor(batch_states).to(device)
    actions_v = torch.LongTensor(batch_actions).to(device)
    scales_v = torch.FloatTensor(batch_scales).to(device)

    #apply gradient descent
    optimizer.zero_grad()

    #softmax output
    logit_v = net(states_v)
    log_prob_v = F.log_softmax(logit_v,dim=1)
    log_prob_action_v = scales_v * log_prob_v[range(BATCH_SIZE),actions_v]
    loss_policy_v = -log_prob_action_v.mean()

    prob_v = F.softmax(logit_v,dim=1)
    entropy_v = -(prob_v * log_prob_v).sum(dim=1).mean()
    entropy_loss_v = -ENTROPY_BETA * entropy_v
    loss_v = loss_policy_v + entropy_loss_v

    loss_v.backward()
    optimizer.step()

    batch_states.clear()
    batch_actions.clear()
    batch_scales.clear()  

  result = entry_point.load(False)
  after removing the cwd from sys.path.


104: reward: -100.00, mean_100: -593.09, episodes: 1
165: reward: -100.00, mean_100: -572.45, episodes: 2
239: reward: -100.00, mean_100: -640.78, episodes: 3
488: reward: -100.00, mean_100: -1040.10, episodes: 4
574: reward: -100.00, mean_100: -868.06, episodes: 5
638: reward: -100.00, mean_100: -749.31, episodes: 6
725: reward: -100.00, mean_100: -790.00, episodes: 7
784: reward: -100.00, mean_100: -758.95, episodes: 8
846: reward: -100.00, mean_100: -689.13, episodes: 9
911: reward: -100.00, mean_100: -631.79, episodes: 10
1001: reward: -100.00, mean_100: -652.50, episodes: 11
1067: reward: -100.00, mean_100: -612.15, episodes: 12
1137: reward: -100.00, mean_100: -605.59, episodes: 13
1217: reward: -100.00, mean_100: -613.44, episodes: 14
1274: reward: -100.00, mean_100: -582.20, episodes: 15
1330: reward: -100.00, mean_100: -553.57, episodes: 16
1410: reward: -100.00, mean_100: -568.02, episodes: 17
1465: reward: -100.00, mean_100: -556.97, episodes: 18
1549: reward: -100.00, mean_

11082: reward: -100.00, mean_100: -175.29, episodes: 152
11169: reward: -100.00, mean_100: -175.76, episodes: 153
11254: reward: -100.00, mean_100: -175.88, episodes: 154
11308: reward: -100.00, mean_100: -169.36, episodes: 155
11365: reward: -100.00, mean_100: -169.18, episodes: 156
11450: reward: -100.00, mean_100: -169.91, episodes: 157
11532: reward: -100.00, mean_100: -169.77, episodes: 158
11614: reward: -100.00, mean_100: -170.13, episodes: 159
11689: reward: -100.00, mean_100: -170.54, episodes: 160
11777: reward: -100.00, mean_100: -166.91, episodes: 161
11860: reward: -100.00, mean_100: -167.34, episodes: 162
11912: reward: -100.00, mean_100: -167.25, episodes: 163
12000: reward: -100.00, mean_100: -163.27, episodes: 164
12070: reward: -100.00, mean_100: -163.41, episodes: 165
12146: reward: -100.00, mean_100: -164.30, episodes: 166
12200: reward: -100.00, mean_100: -163.91, episodes: 167
12282: reward: -100.00, mean_100: -164.15, episodes: 168
12342: reward: -100.00, mean_10

21438: reward: -100.00, mean_100: -132.42, episodes: 303
21510: reward: -100.00, mean_100: -131.26, episodes: 304
21592: reward: -100.00, mean_100: -132.94, episodes: 305
21676: reward: -100.00, mean_100: -132.65, episodes: 306
21733: reward: -100.00, mean_100: -132.12, episodes: 307
21817: reward: -100.00, mean_100: -131.96, episodes: 308
21879: reward: -100.00, mean_100: -132.17, episodes: 309
21931: reward: -100.00, mean_100: -132.26, episodes: 310
21985: reward: -100.00, mean_100: -131.99, episodes: 311
22047: reward: -100.00, mean_100: -131.58, episodes: 312
22103: reward: -100.00, mean_100: -131.23, episodes: 313
22187: reward: -100.00, mean_100: -130.93, episodes: 314
22264: reward: -100.00, mean_100: -130.64, episodes: 315
22341: reward: -100.00, mean_100: -130.95, episodes: 316
22410: reward: -100.00, mean_100: -131.17, episodes: 317
22462: reward: -100.00, mean_100: -130.57, episodes: 318
22548: reward: -100.00, mean_100: -130.74, episodes: 319
22614: reward: -100.00, mean_10

32049: reward: -100.00, mean_100: -134.24, episodes: 455
32136: reward: -100.00, mean_100: -134.74, episodes: 456
32206: reward: -100.00, mean_100: -134.92, episodes: 457
32292: reward: -100.00, mean_100: -134.42, episodes: 458
32371: reward: -100.00, mean_100: -134.71, episodes: 459
32442: reward: -100.00, mean_100: -134.72, episodes: 460
32531: reward: -100.00, mean_100: -133.53, episodes: 461
32615: reward: -100.00, mean_100: -133.12, episodes: 462
32701: reward: -100.00, mean_100: -133.55, episodes: 463
32773: reward: -100.00, mean_100: -134.01, episodes: 464
32828: reward: -100.00, mean_100: -134.28, episodes: 465
32892: reward: -100.00, mean_100: -134.68, episodes: 466
32948: reward: -100.00, mean_100: -134.62, episodes: 467
33014: reward: -100.00, mean_100: -134.80, episodes: 468
33077: reward: -100.00, mean_100: -134.80, episodes: 469
33166: reward: -100.00, mean_100: -135.84, episodes: 470
33234: reward: -100.00, mean_100: -135.99, episodes: 471
33313: reward: -100.00, mean_10

42184: reward: -100.00, mean_100: -138.88, episodes: 600
42249: reward: -100.00, mean_100: -138.81, episodes: 601
42312: reward: -100.00, mean_100: -138.79, episodes: 602
42381: reward: -100.00, mean_100: -138.85, episodes: 603
42453: reward: -100.00, mean_100: -139.12, episodes: 604
42511: reward: -100.00, mean_100: -138.64, episodes: 605
42595: reward: -100.00, mean_100: -138.88, episodes: 606
42654: reward: -100.00, mean_100: -138.00, episodes: 607
42734: reward: -100.00, mean_100: -137.75, episodes: 608
42788: reward: -100.00, mean_100: -137.63, episodes: 609
42844: reward: -100.00, mean_100: -139.03, episodes: 610
42914: reward: -100.00, mean_100: -138.11, episodes: 611
42986: reward: -100.00, mean_100: -138.28, episodes: 612
43048: reward: -100.00, mean_100: -138.56, episodes: 613
43105: reward: -100.00, mean_100: -138.35, episodes: 614
43162: reward: -100.00, mean_100: -138.34, episodes: 615
43249: reward: -100.00, mean_100: -138.08, episodes: 616
43314: reward: -100.00, mean_10

52029: reward: -100.00, mean_100: -127.52, episodes: 745
52081: reward: -100.00, mean_100: -127.16, episodes: 746
52171: reward: -100.00, mean_100: -127.30, episodes: 747
52227: reward: -100.00, mean_100: -127.13, episodes: 748
52314: reward: -100.00, mean_100: -127.56, episodes: 749
52387: reward: -100.00, mean_100: -128.20, episodes: 750
52462: reward: -100.00, mean_100: -128.70, episodes: 751
52527: reward: -100.00, mean_100: -128.76, episodes: 752
52617: reward: -100.00, mean_100: -128.62, episodes: 753
52678: reward: -100.00, mean_100: -128.72, episodes: 754
52758: reward: -100.00, mean_100: -129.84, episodes: 755
52817: reward: -100.00, mean_100: -131.60, episodes: 756
52886: reward: -100.00, mean_100: -132.21, episodes: 757
52970: reward: -100.00, mean_100: -132.53, episodes: 758
53035: reward: -100.00, mean_100: -132.57, episodes: 759
53120: reward: -100.00, mean_100: -134.22, episodes: 760
53175: reward: -100.00, mean_100: -134.84, episodes: 761
53226: reward: -100.00, mean_10

61974: reward: -100.00, mean_100: -135.42, episodes: 890
62047: reward: -100.00, mean_100: -136.08, episodes: 891
62136: reward: -100.00, mean_100: -136.27, episodes: 892
62210: reward: -100.00, mean_100: -136.34, episodes: 893
62292: reward: -100.00, mean_100: -136.26, episodes: 894
62364: reward: -100.00, mean_100: -136.69, episodes: 895
62462: reward: -100.00, mean_100: -135.23, episodes: 896
62512: reward: -100.00, mean_100: -134.96, episodes: 897
62570: reward: -100.00, mean_100: -134.54, episodes: 898
62662: reward: -100.00, mean_100: -134.33, episodes: 899
62730: reward: -100.00, mean_100: -134.78, episodes: 900
62817: reward: -100.00, mean_100: -134.57, episodes: 901
62869: reward: -100.00, mean_100: -133.90, episodes: 902
62934: reward: -100.00, mean_100: -133.91, episodes: 903
63017: reward: -100.00, mean_100: -134.17, episodes: 904
63072: reward: -100.00, mean_100: -134.50, episodes: 905
63134: reward: -100.00, mean_100: -134.82, episodes: 906
63210: reward: -100.00, mean_10

72579: reward: -100.00, mean_100: -133.22, episodes: 1041
72637: reward: -100.00, mean_100: -132.84, episodes: 1042
72722: reward: -100.00, mean_100: -132.68, episodes: 1043
72797: reward: -100.00, mean_100: -133.03, episodes: 1044
72868: reward: -100.00, mean_100: -130.90, episodes: 1045
72928: reward: -100.00, mean_100: -130.90, episodes: 1046
73012: reward: -100.00, mean_100: -130.77, episodes: 1047
73099: reward: -100.00, mean_100: -130.84, episodes: 1048
73167: reward: -100.00, mean_100: -131.10, episodes: 1049
73221: reward: -100.00, mean_100: -131.03, episodes: 1050
73287: reward: -100.00, mean_100: -130.89, episodes: 1051
73354: reward: -100.00, mean_100: -130.98, episodes: 1052
73411: reward: -100.00, mean_100: -131.26, episodes: 1053
73499: reward: -100.00, mean_100: -130.35, episodes: 1054
73582: reward: -100.00, mean_100: -130.61, episodes: 1055
73658: reward: -100.00, mean_100: -130.70, episodes: 1056
73727: reward: -100.00, mean_100: -131.02, episodes: 1057
73802: reward:

82707: reward: -100.00, mean_100: -136.54, episodes: 1186
82763: reward: -100.00, mean_100: -136.52, episodes: 1187
82823: reward: -100.00, mean_100: -136.68, episodes: 1188
82907: reward: -100.00, mean_100: -137.01, episodes: 1189
82975: reward: -100.00, mean_100: -137.41, episodes: 1190
83057: reward: -100.00, mean_100: -135.26, episodes: 1191
83119: reward: -100.00, mean_100: -135.53, episodes: 1192
83176: reward: -100.00, mean_100: -135.83, episodes: 1193
83256: reward: -100.00, mean_100: -135.09, episodes: 1194
83321: reward: -100.00, mean_100: -134.77, episodes: 1195
83381: reward: -100.00, mean_100: -135.10, episodes: 1196
83466: reward: -100.00, mean_100: -134.63, episodes: 1197
83543: reward: -100.00, mean_100: -134.66, episodes: 1198
83632: reward: -100.00, mean_100: -134.14, episodes: 1199
83706: reward: -100.00, mean_100: -133.77, episodes: 1200
83760: reward: -100.00, mean_100: -134.19, episodes: 1201
83847: reward: -100.00, mean_100: -133.02, episodes: 1202
83926: reward:

93461: reward: -100.00, mean_100: -136.03, episodes: 1334
93552: reward: -100.00, mean_100: -135.93, episodes: 1335
93606: reward: -100.00, mean_100: -136.11, episodes: 1336
93691: reward: -100.00, mean_100: -135.49, episodes: 1337
93772: reward: -100.00, mean_100: -134.23, episodes: 1338
93843: reward: -100.00, mean_100: -134.88, episodes: 1339
93907: reward: -100.00, mean_100: -134.90, episodes: 1340
93995: reward: -100.00, mean_100: -134.13, episodes: 1341
94054: reward: -100.00, mean_100: -134.18, episodes: 1342
94108: reward: -100.00, mean_100: -133.37, episodes: 1343
94167: reward: -100.00, mean_100: -133.66, episodes: 1344
94225: reward: -100.00, mean_100: -133.37, episodes: 1345
94276: reward: -100.00, mean_100: -132.97, episodes: 1346
94335: reward: -100.00, mean_100: -132.14, episodes: 1347
94388: reward: -100.00, mean_100: -132.21, episodes: 1348
94463: reward: -100.00, mean_100: -132.23, episodes: 1349
94547: reward: -100.00, mean_100: -131.25, episodes: 1350
94614: reward:

103531: reward: -100.00, mean_100: -128.91, episodes: 1482
103583: reward: -100.00, mean_100: -129.30, episodes: 1483
103672: reward: -100.00, mean_100: -129.77, episodes: 1484
103748: reward: -100.00, mean_100: -130.07, episodes: 1485
103827: reward: -100.00, mean_100: -130.90, episodes: 1486
103881: reward: -100.00, mean_100: -130.54, episodes: 1487
103949: reward: -100.00, mean_100: -131.73, episodes: 1488
104018: reward: -100.00, mean_100: -131.38, episodes: 1489
104075: reward: -100.00, mean_100: -131.22, episodes: 1490
104139: reward: -100.00, mean_100: -131.35, episodes: 1491
104223: reward: -100.00, mean_100: -131.63, episodes: 1492
104306: reward: -100.00, mean_100: -130.86, episodes: 1493
104366: reward: -100.00, mean_100: -130.80, episodes: 1494
104446: reward: -100.00, mean_100: -130.40, episodes: 1495
104498: reward: -100.00, mean_100: -130.27, episodes: 1496
104586: reward: -100.00, mean_100: -130.64, episodes: 1497
104644: reward: -100.00, mean_100: -131.30, episodes: 14

113586: reward: -100.00, mean_100: -133.97, episodes: 1629
113661: reward: -100.00, mean_100: -134.62, episodes: 1630
113726: reward: -100.00, mean_100: -127.49, episodes: 1631
113791: reward: -100.00, mean_100: -127.69, episodes: 1632
113844: reward: -100.00, mean_100: -127.49, episodes: 1633
113898: reward: -100.00, mean_100: -127.73, episodes: 1634
113954: reward: -100.00, mean_100: -127.45, episodes: 1635
114031: reward: -100.00, mean_100: -128.49, episodes: 1636
114086: reward: -100.00, mean_100: -127.83, episodes: 1637
114142: reward: -100.00, mean_100: -127.85, episodes: 1638
114225: reward: -100.00, mean_100: -127.46, episodes: 1639
114290: reward: -100.00, mean_100: -127.56, episodes: 1640
114357: reward: -100.00, mean_100: -127.59, episodes: 1641
114416: reward: -100.00, mean_100: -127.29, episodes: 1642
114469: reward: -100.00, mean_100: -127.50, episodes: 1643
114537: reward: -100.00, mean_100: -127.47, episodes: 1644
114626: reward: -100.00, mean_100: -126.69, episodes: 16

123482: reward: -100.00, mean_100: -127.58, episodes: 1772
123559: reward: -100.00, mean_100: -128.59, episodes: 1773
123633: reward: -100.00, mean_100: -128.52, episodes: 1774
123716: reward: -100.00, mean_100: -128.58, episodes: 1775
123777: reward: -100.00, mean_100: -128.30, episodes: 1776
123844: reward: -100.00, mean_100: -128.40, episodes: 1777
123939: reward: -100.00, mean_100: -127.48, episodes: 1778
124038: reward: -100.00, mean_100: -126.46, episodes: 1779
124111: reward: -100.00, mean_100: -126.37, episodes: 1780
124191: reward: -100.00, mean_100: -126.16, episodes: 1781
124287: reward: -100.00, mean_100: -127.58, episodes: 1782
124347: reward: -100.00, mean_100: -125.26, episodes: 1783
124400: reward: -100.00, mean_100: -125.10, episodes: 1784
124452: reward: -100.00, mean_100: -125.14, episodes: 1785
124505: reward: -100.00, mean_100: -125.18, episodes: 1786
124573: reward: -100.00, mean_100: -124.53, episodes: 1787
124647: reward: -100.00, mean_100: -125.69, episodes: 17

133551: reward: -100.00, mean_100: -137.81, episodes: 1913
133634: reward: -100.00, mean_100: -136.64, episodes: 1914
133708: reward: -100.00, mean_100: -137.37, episodes: 1915
133796: reward: -100.00, mean_100: -137.26, episodes: 1916
133856: reward: -100.00, mean_100: -136.80, episodes: 1917
133925: reward: -100.00, mean_100: -136.31, episodes: 1918
133995: reward: -100.00, mean_100: -136.15, episodes: 1919
134046: reward: -100.00, mean_100: -135.71, episodes: 1920
134108: reward: -100.00, mean_100: -135.19, episodes: 1921
134173: reward: -100.00, mean_100: -135.01, episodes: 1922
134257: reward: -100.00, mean_100: -135.10, episodes: 1923
134321: reward: -100.00, mean_100: -135.36, episodes: 1924
134409: reward: -100.00, mean_100: -135.34, episodes: 1925
134481: reward: -100.00, mean_100: -135.40, episodes: 1926
134550: reward: -100.00, mean_100: -135.59, episodes: 1927
134615: reward: -100.00, mean_100: -135.51, episodes: 1928
134686: reward: -100.00, mean_100: -135.31, episodes: 19

143729: reward: -100.00, mean_100: -130.25, episodes: 2057
143786: reward: -100.00, mean_100: -129.57, episodes: 2058
143839: reward: -100.00, mean_100: -129.30, episodes: 2059
143893: reward: -100.00, mean_100: -129.24, episodes: 2060
143952: reward: -100.00, mean_100: -129.00, episodes: 2061
144015: reward: -100.00, mean_100: -129.11, episodes: 2062
144078: reward: -100.00, mean_100: -129.22, episodes: 2063
144161: reward: -100.00, mean_100: -129.64, episodes: 2064
144219: reward: -100.00, mean_100: -130.62, episodes: 2065
144279: reward: -100.00, mean_100: -130.96, episodes: 2066
144363: reward: -100.00, mean_100: -130.69, episodes: 2067
144435: reward: -100.00, mean_100: -131.01, episodes: 2068
144485: reward: -100.00, mean_100: -130.97, episodes: 2069
144563: reward: -100.00, mean_100: -130.92, episodes: 2070
144633: reward: -100.00, mean_100: -131.11, episodes: 2071
144698: reward: -100.00, mean_100: -130.76, episodes: 2072
144757: reward: -100.00, mean_100: -130.33, episodes: 20

153703: reward: -100.00, mean_100: -124.37, episodes: 2201
153769: reward: -100.00, mean_100: -125.58, episodes: 2202
153859: reward: -100.00, mean_100: -125.50, episodes: 2203
153924: reward: -100.00, mean_100: -126.11, episodes: 2204
154012: reward: -100.00, mean_100: -125.81, episodes: 2205
154088: reward: -100.00, mean_100: -126.56, episodes: 2206
154162: reward: -100.00, mean_100: -126.72, episodes: 2207
154222: reward: -100.00, mean_100: -126.24, episodes: 2208
154289: reward: -100.00, mean_100: -125.95, episodes: 2209
154359: reward: -100.00, mean_100: -127.34, episodes: 2210
154422: reward: -100.00, mean_100: -128.65, episodes: 2211
154480: reward: -100.00, mean_100: -127.88, episodes: 2212
154564: reward: -100.00, mean_100: -128.29, episodes: 2213
154624: reward: -100.00, mean_100: -128.46, episodes: 2214
154700: reward: -100.00, mean_100: -128.56, episodes: 2215
154764: reward: -100.00, mean_100: -129.04, episodes: 2216
154895: reward: -100.00, mean_100: -127.69, episodes: 22

163875: reward: -100.00, mean_100: -130.43, episodes: 2343
163949: reward: -100.00, mean_100: -130.75, episodes: 2344
164023: reward: -100.00, mean_100: -130.98, episodes: 2345
164098: reward: -100.00, mean_100: -131.13, episodes: 2346
164163: reward: -100.00, mean_100: -129.89, episodes: 2347
164235: reward: -100.00, mean_100: -130.59, episodes: 2348
164300: reward: -100.00, mean_100: -130.75, episodes: 2349
164376: reward: -100.00, mean_100: -132.41, episodes: 2350
164459: reward: -100.00, mean_100: -132.31, episodes: 2351
164540: reward: -100.00, mean_100: -132.88, episodes: 2352
164592: reward: -100.00, mean_100: -132.62, episodes: 2353
164669: reward: -100.00, mean_100: -132.50, episodes: 2354
164738: reward: -100.00, mean_100: -132.14, episodes: 2355
164795: reward: -100.00, mean_100: -132.00, episodes: 2356
164860: reward: -100.00, mean_100: -131.92, episodes: 2357
164922: reward: -100.00, mean_100: -131.90, episodes: 2358
164975: reward: -100.00, mean_100: -132.13, episodes: 23

173736: reward: -100.00, mean_100: -127.85, episodes: 2483
173820: reward: -100.00, mean_100: -126.95, episodes: 2484
173895: reward: -100.00, mean_100: -125.36, episodes: 2485
173954: reward: -100.00, mean_100: -125.16, episodes: 2486
174006: reward: -100.00, mean_100: -125.42, episodes: 2487
174065: reward: -100.00, mean_100: -125.24, episodes: 2488
174149: reward: -100.00, mean_100: -125.21, episodes: 2489
174208: reward: -100.00, mean_100: -124.55, episodes: 2490
174266: reward: -100.00, mean_100: -124.79, episodes: 2491
174328: reward: -100.00, mean_100: -124.10, episodes: 2492
174386: reward: -100.00, mean_100: -124.18, episodes: 2493
174466: reward: -100.00, mean_100: -124.27, episodes: 2494
174534: reward: -100.00, mean_100: -123.81, episodes: 2495
174597: reward: -100.00, mean_100: -123.62, episodes: 2496
174669: reward: -100.00, mean_100: -123.86, episodes: 2497
174753: reward: -100.00, mean_100: -124.01, episodes: 2498
174830: reward: -100.00, mean_100: -124.00, episodes: 24

183821: reward: -100.00, mean_100: -131.59, episodes: 2626
183887: reward: -100.00, mean_100: -131.33, episodes: 2627
183962: reward: -100.00, mean_100: -131.36, episodes: 2628
184053: reward: -100.00, mean_100: -130.74, episodes: 2629
184133: reward: -100.00, mean_100: -130.69, episodes: 2630
184222: reward: -100.00, mean_100: -130.33, episodes: 2631
184274: reward: -100.00, mean_100: -131.74, episodes: 2632
184356: reward: -100.00, mean_100: -131.25, episodes: 2633
184430: reward: -100.00, mean_100: -130.90, episodes: 2634
184517: reward: -100.00, mean_100: -131.55, episodes: 2635
184596: reward: -100.00, mean_100: -131.56, episodes: 2636
184675: reward: -100.00, mean_100: -131.47, episodes: 2637
184757: reward: -100.00, mean_100: -131.49, episodes: 2638
184821: reward: -100.00, mean_100: -131.12, episodes: 2639
184879: reward: -100.00, mean_100: -131.07, episodes: 2640
184954: reward: -100.00, mean_100: -131.10, episodes: 2641
185046: reward: -100.00, mean_100: -132.25, episodes: 26

193745: reward: -100.00, mean_100: -131.50, episodes: 2765
193802: reward: -100.00, mean_100: -131.59, episodes: 2766
193887: reward: -100.00, mean_100: -132.23, episodes: 2767
193941: reward: -100.00, mean_100: -132.19, episodes: 2768
193996: reward: -100.00, mean_100: -132.24, episodes: 2769
194076: reward: -100.00, mean_100: -132.69, episodes: 2770
194148: reward: -100.00, mean_100: -132.79, episodes: 2771
194224: reward: -100.00, mean_100: -132.70, episodes: 2772
194289: reward: -100.00, mean_100: -132.62, episodes: 2773
194378: reward: -100.00, mean_100: -132.31, episodes: 2774
194448: reward: -100.00, mean_100: -132.43, episodes: 2775
194520: reward: -100.00, mean_100: -132.68, episodes: 2776
194596: reward: -100.00, mean_100: -133.12, episodes: 2777
194687: reward: -100.00, mean_100: -133.94, episodes: 2778
194738: reward: -100.00, mean_100: -133.89, episodes: 2779
194793: reward: -100.00, mean_100: -133.60, episodes: 2780
194858: reward: -100.00, mean_100: -133.50, episodes: 27

203605: reward: -100.00, mean_100: -135.55, episodes: 2908
203663: reward: -100.00, mean_100: -135.39, episodes: 2909
203742: reward: -100.00, mean_100: -136.09, episodes: 2910
203805: reward: -100.00, mean_100: -136.20, episodes: 2911
203875: reward: -100.00, mean_100: -136.29, episodes: 2912
203946: reward: -100.00, mean_100: -136.14, episodes: 2913
204006: reward: -100.00, mean_100: -136.04, episodes: 2914
204063: reward: -100.00, mean_100: -136.26, episodes: 2915
204144: reward: -100.00, mean_100: -135.82, episodes: 2916
204196: reward: -100.00, mean_100: -133.81, episodes: 2917
204257: reward: -100.00, mean_100: -133.96, episodes: 2918
204316: reward: -100.00, mean_100: -133.67, episodes: 2919
204392: reward: -100.00, mean_100: -133.95, episodes: 2920
204453: reward: -100.00, mean_100: -132.04, episodes: 2921
204527: reward: -100.00, mean_100: -132.44, episodes: 2922
204584: reward: -100.00, mean_100: -132.20, episodes: 2923
204639: reward: -100.00, mean_100: -131.62, episodes: 29

213829: reward: -100.00, mean_100: -127.60, episodes: 3055
213898: reward: -100.00, mean_100: -127.92, episodes: 3056
213977: reward: -100.00, mean_100: -124.36, episodes: 3057
214044: reward: -100.00, mean_100: -124.75, episodes: 3058
214096: reward: -100.00, mean_100: -125.67, episodes: 3059
214196: reward: -100.00, mean_100: -124.55, episodes: 3060
214253: reward: -100.00, mean_100: -124.72, episodes: 3061
214326: reward: -100.00, mean_100: -124.91, episodes: 3062
214395: reward: -100.00, mean_100: -124.85, episodes: 3063
214457: reward: -100.00, mean_100: -124.93, episodes: 3064
214523: reward: -100.00, mean_100: -125.38, episodes: 3065
214576: reward: -100.00, mean_100: -125.09, episodes: 3066
214633: reward: -100.00, mean_100: -125.34, episodes: 3067
214692: reward: -100.00, mean_100: -125.36, episodes: 3068
214779: reward: -100.00, mean_100: -126.76, episodes: 3069
214838: reward: -100.00, mean_100: -126.56, episodes: 3070
214898: reward: -100.00, mean_100: -127.02, episodes: 30

223824: reward: -100.00, mean_100: -133.34, episodes: 3198
223888: reward: -100.00, mean_100: -133.23, episodes: 3199
223951: reward: -100.00, mean_100: -133.17, episodes: 3200
224025: reward: -100.00, mean_100: -133.26, episodes: 3201
224111: reward: -100.00, mean_100: -132.89, episodes: 3202
224164: reward: -100.00, mean_100: -131.70, episodes: 3203
224219: reward: -100.00, mean_100: -133.29, episodes: 3204
224270: reward: -100.00, mean_100: -132.86, episodes: 3205
224326: reward: -100.00, mean_100: -132.89, episodes: 3206
224387: reward: -100.00, mean_100: -132.25, episodes: 3207
224450: reward: -100.00, mean_100: -132.61, episodes: 3208
224507: reward: -100.00, mean_100: -132.05, episodes: 3209
224571: reward: -100.00, mean_100: -132.31, episodes: 3210
224665: reward: -100.00, mean_100: -132.79, episodes: 3211
224751: reward: -100.00, mean_100: -132.53, episodes: 3212
224812: reward: -100.00, mean_100: -132.74, episodes: 3213
224867: reward: -100.00, mean_100: -133.18, episodes: 32

233648: reward: -100.00, mean_100: -131.52, episodes: 3339
233740: reward: -100.00, mean_100: -132.16, episodes: 3340
233801: reward: -100.00, mean_100: -132.40, episodes: 3341
233871: reward: -100.00, mean_100: -132.67, episodes: 3342
233936: reward: -100.00, mean_100: -134.20, episodes: 3343
234011: reward: -100.00, mean_100: -134.27, episodes: 3344
234069: reward: -100.00, mean_100: -134.58, episodes: 3345
234158: reward: -100.00, mean_100: -135.07, episodes: 3346
234242: reward: -100.00, mean_100: -135.06, episodes: 3347
234298: reward: -100.00, mean_100: -135.00, episodes: 3348
234375: reward: -100.00, mean_100: -135.15, episodes: 3349
234434: reward: -100.00, mean_100: -134.98, episodes: 3350
234486: reward: -100.00, mean_100: -133.23, episodes: 3351
234546: reward: -100.00, mean_100: -133.68, episodes: 3352
234621: reward: -100.00, mean_100: -134.26, episodes: 3353
234685: reward: -100.00, mean_100: -134.69, episodes: 3354
234764: reward: -100.00, mean_100: -136.16, episodes: 33

243820: reward: -100.00, mean_100: -134.24, episodes: 3485
243877: reward: -100.00, mean_100: -134.38, episodes: 3486
243935: reward: -100.00, mean_100: -134.43, episodes: 3487
243988: reward: -100.00, mean_100: -134.18, episodes: 3488
244075: reward: -100.00, mean_100: -134.58, episodes: 3489
244160: reward: -100.00, mean_100: -133.29, episodes: 3490
244227: reward: -100.00, mean_100: -132.96, episodes: 3491
244305: reward: -100.00, mean_100: -133.24, episodes: 3492
244369: reward: -100.00, mean_100: -133.60, episodes: 3493
244454: reward: -100.00, mean_100: -133.63, episodes: 3494
244509: reward: -100.00, mean_100: -133.51, episodes: 3495
244595: reward: -100.00, mean_100: -134.18, episodes: 3496
244660: reward: -100.00, mean_100: -134.31, episodes: 3497
244716: reward: -100.00, mean_100: -134.10, episodes: 3498
244768: reward: -100.00, mean_100: -133.79, episodes: 3499
244823: reward: -100.00, mean_100: -133.57, episodes: 3500
244933: reward: -100.00, mean_100: -135.33, episodes: 35

253749: reward: -100.00, mean_100: -125.86, episodes: 3624
253840: reward: -100.00, mean_100: -125.52, episodes: 3625
253895: reward: -100.00, mean_100: -125.30, episodes: 3626
253962: reward: -100.00, mean_100: -125.21, episodes: 3627
254027: reward: -100.00, mean_100: -125.29, episodes: 3628
254117: reward: -100.00, mean_100: -124.53, episodes: 3629
254171: reward: -100.00, mean_100: -123.87, episodes: 3630
254226: reward: -100.00, mean_100: -124.09, episodes: 3631
254297: reward: -100.00, mean_100: -123.97, episodes: 3632
254373: reward: -100.00, mean_100: -123.77, episodes: 3633
254456: reward: -100.00, mean_100: -124.62, episodes: 3634
254548: reward: -100.00, mean_100: -123.97, episodes: 3635
254626: reward: -100.00, mean_100: -122.91, episodes: 3636
254681: reward: -100.00, mean_100: -123.20, episodes: 3637
254785: reward: -100.00, mean_100: -121.75, episodes: 3638
254868: reward: -100.00, mean_100: -122.54, episodes: 3639
254952: reward: -100.00, mean_100: -121.87, episodes: 36

263794: reward: -100.00, mean_100: -134.48, episodes: 3764
263846: reward: -100.00, mean_100: -134.20, episodes: 3765
263902: reward: -100.00, mean_100: -133.27, episodes: 3766
263976: reward: -100.00, mean_100: -134.49, episodes: 3767
264052: reward: -100.00, mean_100: -134.51, episodes: 3768
264123: reward: -100.00, mean_100: -134.31, episodes: 3769
264205: reward: -100.00, mean_100: -133.97, episodes: 3770
264339: reward: -100.00, mean_100: -132.34, episodes: 3771
264424: reward: -100.00, mean_100: -132.38, episodes: 3772
264507: reward: -100.00, mean_100: -133.74, episodes: 3773
264584: reward: -100.00, mean_100: -133.88, episodes: 3774
264645: reward: -100.00, mean_100: -134.34, episodes: 3775
264717: reward: -100.00, mean_100: -134.69, episodes: 3776
264790: reward: -100.00, mean_100: -134.17, episodes: 3777
264845: reward: -100.00, mean_100: -134.80, episodes: 3778
264936: reward: -100.00, mean_100: -135.03, episodes: 3779
264989: reward: -100.00, mean_100: -134.96, episodes: 37

273798: reward: -100.00, mean_100: -126.36, episodes: 3907
273882: reward: -100.00, mean_100: -124.71, episodes: 3908
273962: reward: -100.00, mean_100: -124.42, episodes: 3909
274022: reward: -100.00, mean_100: -124.36, episodes: 3910
274077: reward: -100.00, mean_100: -125.11, episodes: 3911
274151: reward: -100.00, mean_100: -125.72, episodes: 3912
274218: reward: -100.00, mean_100: -124.38, episodes: 3913
274308: reward: -100.00, mean_100: -126.91, episodes: 3914
274369: reward: -100.00, mean_100: -126.72, episodes: 3915
274454: reward: -100.00, mean_100: -127.26, episodes: 3916
274534: reward: -100.00, mean_100: -127.11, episodes: 3917
274587: reward: -100.00, mean_100: -126.65, episodes: 3918
274660: reward: -100.00, mean_100: -126.60, episodes: 3919
274736: reward: -100.00, mean_100: -126.61, episodes: 3920
274822: reward: -100.00, mean_100: -126.32, episodes: 3921
274874: reward: -100.00, mean_100: -125.58, episodes: 3922
274949: reward: -100.00, mean_100: -125.54, episodes: 39

284034: reward: -100.00, mean_100: -128.48, episodes: 4056
284095: reward: -100.00, mean_100: -128.41, episodes: 4057
284146: reward: -100.00, mean_100: -128.17, episodes: 4058
284234: reward: -100.00, mean_100: -128.61, episodes: 4059
284284: reward: -100.00, mean_100: -128.51, episodes: 4060
284342: reward: -100.00, mean_100: -128.89, episodes: 4061
284404: reward: -100.00, mean_100: -129.36, episodes: 4062
284473: reward: -100.00, mean_100: -129.44, episodes: 4063
284528: reward: -100.00, mean_100: -129.70, episodes: 4064
284581: reward: -100.00, mean_100: -129.59, episodes: 4065
284648: reward: -100.00, mean_100: -129.40, episodes: 4066
284702: reward: -100.00, mean_100: -128.68, episodes: 4067
284758: reward: -100.00, mean_100: -128.99, episodes: 4068
284826: reward: -100.00, mean_100: -128.41, episodes: 4069
284910: reward: -100.00, mean_100: -128.77, episodes: 4070
284987: reward: -100.00, mean_100: -128.72, episodes: 4071
285045: reward: -100.00, mean_100: -128.62, episodes: 40

293844: reward: -100.00, mean_100: -137.11, episodes: 4196
293911: reward: -100.00, mean_100: -137.48, episodes: 4197
293983: reward: -100.00, mean_100: -137.09, episodes: 4198
294038: reward: -100.00, mean_100: -136.99, episodes: 4199
294131: reward: -100.00, mean_100: -138.00, episodes: 4200
294193: reward: -100.00, mean_100: -137.78, episodes: 4201
294254: reward: -100.00, mean_100: -137.15, episodes: 4202
294312: reward: -100.00, mean_100: -136.83, episodes: 4203
294387: reward: -100.00, mean_100: -137.07, episodes: 4204
294442: reward: -100.00, mean_100: -136.40, episodes: 4205
294503: reward: -100.00, mean_100: -136.50, episodes: 4206
294594: reward: -100.00, mean_100: -135.61, episodes: 4207
294661: reward: -100.00, mean_100: -135.55, episodes: 4208
294745: reward: -100.00, mean_100: -135.05, episodes: 4209
294825: reward: -100.00, mean_100: -134.18, episodes: 4210
294894: reward: -100.00, mean_100: -133.89, episodes: 4211
294976: reward: -100.00, mean_100: -134.45, episodes: 42

303839: reward: -100.00, mean_100: -132.84, episodes: 4336
303918: reward: -100.00, mean_100: -132.35, episodes: 4337
303971: reward: -100.00, mean_100: -131.71, episodes: 4338
304024: reward: -100.00, mean_100: -130.80, episodes: 4339
304110: reward: -100.00, mean_100: -131.13, episodes: 4340
304177: reward: -100.00, mean_100: -131.11, episodes: 4341
304255: reward: -100.00, mean_100: -131.20, episodes: 4342
304323: reward: -100.00, mean_100: -131.22, episodes: 4343
304385: reward: -100.00, mean_100: -131.50, episodes: 4344
304468: reward: -100.00, mean_100: -131.58, episodes: 4345
304557: reward: -100.00, mean_100: -131.21, episodes: 4346
304611: reward: -100.00, mean_100: -131.73, episodes: 4347
304674: reward: -100.00, mean_100: -131.46, episodes: 4348
304735: reward: -100.00, mean_100: -131.64, episodes: 4349
304802: reward: -100.00, mean_100: -131.44, episodes: 4350
304873: reward: -100.00, mean_100: -131.34, episodes: 4351
304954: reward: -100.00, mean_100: -131.23, episodes: 43

314139: reward: -100.00, mean_100: -131.80, episodes: 4480
314230: reward: -100.00, mean_100: -131.75, episodes: 4481
314307: reward: -100.00, mean_100: -132.07, episodes: 4482
314376: reward: -100.00, mean_100: -130.38, episodes: 4483
314437: reward: -100.00, mean_100: -130.47, episodes: 4484
314500: reward: -100.00, mean_100: -130.11, episodes: 4485
314560: reward: -100.00, mean_100: -130.21, episodes: 4486
314629: reward: -100.00, mean_100: -130.15, episodes: 4487
314687: reward: -100.00, mean_100: -130.55, episodes: 4488
314754: reward: -100.00, mean_100: -130.69, episodes: 4489
314823: reward: -100.00, mean_100: -131.06, episodes: 4490
314884: reward: -100.00, mean_100: -131.22, episodes: 4491
314961: reward: -100.00, mean_100: -130.76, episodes: 4492
315024: reward: -100.00, mean_100: -130.46, episodes: 4493
315079: reward: -100.00, mean_100: -130.41, episodes: 4494
315139: reward: -100.00, mean_100: -130.88, episodes: 4495
315220: reward: -100.00, mean_100: -130.80, episodes: 44

324039: reward: -100.00, mean_100: -135.74, episodes: 4624
324104: reward: -100.00, mean_100: -135.59, episodes: 4625
324168: reward: -100.00, mean_100: -135.63, episodes: 4626
324225: reward: -100.00, mean_100: -135.49, episodes: 4627
324301: reward: -100.00, mean_100: -136.21, episodes: 4628
324364: reward: -100.00, mean_100: -135.95, episodes: 4629
324443: reward: -100.00, mean_100: -136.00, episodes: 4630
324501: reward: -100.00, mean_100: -135.21, episodes: 4631
324568: reward: -100.00, mean_100: -134.45, episodes: 4632
324630: reward: -100.00, mean_100: -134.87, episodes: 4633
324712: reward: -100.00, mean_100: -135.80, episodes: 4634
324764: reward: -100.00, mean_100: -136.08, episodes: 4635
324817: reward: -100.00, mean_100: -136.22, episodes: 4636
324871: reward: -100.00, mean_100: -135.97, episodes: 4637
324938: reward: -100.00, mean_100: -136.28, episodes: 4638
325009: reward: -100.00, mean_100: -136.30, episodes: 4639
325077: reward: -100.00, mean_100: -134.67, episodes: 46

334127: reward: -100.00, mean_100: -130.10, episodes: 4770
334181: reward: -100.00, mean_100: -129.40, episodes: 4771
334244: reward: -100.00, mean_100: -129.14, episodes: 4772
334315: reward: -100.00, mean_100: -128.47, episodes: 4773
334384: reward: -100.00, mean_100: -126.98, episodes: 4774
334467: reward: -100.00, mean_100: -126.84, episodes: 4775
334534: reward: -100.00, mean_100: -127.21, episodes: 4776
334615: reward: -100.00, mean_100: -126.88, episodes: 4777
334667: reward: -100.00, mean_100: -126.74, episodes: 4778
334742: reward: -100.00, mean_100: -126.87, episodes: 4779
334810: reward: -100.00, mean_100: -126.98, episodes: 4780
334871: reward: -100.00, mean_100: -127.17, episodes: 4781
334946: reward: -100.00, mean_100: -127.87, episodes: 4782
335033: reward: -100.00, mean_100: -125.97, episodes: 4783
335096: reward: -100.00, mean_100: -126.01, episodes: 4784
335187: reward: -100.00, mean_100: -126.00, episodes: 4785
335268: reward: -100.00, mean_100: -126.44, episodes: 47

344289: reward: -100.00, mean_100: -132.04, episodes: 4914
344365: reward: -100.00, mean_100: -132.23, episodes: 4915
344445: reward: -100.00, mean_100: -131.45, episodes: 4916
344497: reward: -100.00, mean_100: -130.68, episodes: 4917
344554: reward: -100.00, mean_100: -130.64, episodes: 4918
344621: reward: -100.00, mean_100: -130.46, episodes: 4919
344674: reward: -100.00, mean_100: -130.31, episodes: 4920
344754: reward: -100.00, mean_100: -130.29, episodes: 4921
344825: reward: -100.00, mean_100: -130.62, episodes: 4922
344883: reward: -100.00, mean_100: -130.93, episodes: 4923
344965: reward: -100.00, mean_100: -131.08, episodes: 4924
345050: reward: -100.00, mean_100: -131.34, episodes: 4925
345104: reward: -100.00, mean_100: -131.47, episodes: 4926
345156: reward: -100.00, mean_100: -130.86, episodes: 4927
345221: reward: -100.00, mean_100: -130.97, episodes: 4928
345306: reward: -100.00, mean_100: -130.99, episodes: 4929
345379: reward: -100.00, mean_100: -130.96, episodes: 49

353985: reward: -100.00, mean_100: -131.91, episodes: 5053
354050: reward: -100.00, mean_100: -131.70, episodes: 5054
354104: reward: -100.00, mean_100: -132.10, episodes: 5055
354168: reward: -100.00, mean_100: -131.98, episodes: 5056
354235: reward: -100.00, mean_100: -132.38, episodes: 5057
354294: reward: -100.00, mean_100: -132.53, episodes: 5058
354364: reward: -100.00, mean_100: -132.76, episodes: 5059
354441: reward: -100.00, mean_100: -132.13, episodes: 5060
354509: reward: -100.00, mean_100: -132.99, episodes: 5061
354566: reward: -100.00, mean_100: -134.72, episodes: 5062
354654: reward: -100.00, mean_100: -134.62, episodes: 5063
354725: reward: -100.00, mean_100: -134.26, episodes: 5064
354793: reward: -100.00, mean_100: -134.94, episodes: 5065
354861: reward: -100.00, mean_100: -134.95, episodes: 5066
354943: reward: -100.00, mean_100: -134.97, episodes: 5067
355022: reward: -100.00, mean_100: -134.92, episodes: 5068
355081: reward: -100.00, mean_100: -134.38, episodes: 50

364055: reward: -100.00, mean_100: -132.86, episodes: 5194
364108: reward: -100.00, mean_100: -132.97, episodes: 5195
364195: reward: -100.00, mean_100: -133.29, episodes: 5196
364281: reward: -100.00, mean_100: -133.10, episodes: 5197
364371: reward: -100.00, mean_100: -132.99, episodes: 5198
364427: reward: -100.00, mean_100: -132.48, episodes: 5199
364488: reward: -100.00, mean_100: -132.89, episodes: 5200
364538: reward: -100.00, mean_100: -132.56, episodes: 5201
364604: reward: -100.00, mean_100: -133.13, episodes: 5202
364657: reward: -100.00, mean_100: -132.47, episodes: 5203
364726: reward: -100.00, mean_100: -132.67, episodes: 5204
364788: reward: -100.00, mean_100: -133.17, episodes: 5205
364869: reward: -100.00, mean_100: -134.40, episodes: 5206
364942: reward: -100.00, mean_100: -134.83, episodes: 5207
364994: reward: -100.00, mean_100: -134.69, episodes: 5208
365066: reward: -100.00, mean_100: -134.36, episodes: 5209
365146: reward: -100.00, mean_100: -133.83, episodes: 52

374312: reward: -100.00, mean_100: -132.13, episodes: 5343
374398: reward: -100.00, mean_100: -132.26, episodes: 5344
374477: reward: -100.00, mean_100: -132.67, episodes: 5345
374547: reward: -100.00, mean_100: -132.78, episodes: 5346
374600: reward: -100.00, mean_100: -132.41, episodes: 5347
374654: reward: -100.00, mean_100: -131.88, episodes: 5348
374706: reward: -100.00, mean_100: -131.55, episodes: 5349
374788: reward: -100.00, mean_100: -131.45, episodes: 5350
374846: reward: -100.00, mean_100: -131.44, episodes: 5351
374914: reward: -100.00, mean_100: -131.22, episodes: 5352
374968: reward: -100.00, mean_100: -130.18, episodes: 5353
375043: reward: -100.00, mean_100: -130.74, episodes: 5354
375127: reward: -100.00, mean_100: -130.55, episodes: 5355
375185: reward: -100.00, mean_100: -130.03, episodes: 5356
375236: reward: -100.00, mean_100: -129.75, episodes: 5357
375300: reward: -100.00, mean_100: -130.07, episodes: 5358
375378: reward: -100.00, mean_100: -129.60, episodes: 53

383889: reward: -100.00, mean_100: -134.66, episodes: 5482
383942: reward: -100.00, mean_100: -134.57, episodes: 5483
383998: reward: -100.00, mean_100: -134.15, episodes: 5484
384056: reward: -100.00, mean_100: -133.57, episodes: 5485
384111: reward: -100.00, mean_100: -133.25, episodes: 5486
384167: reward: -100.00, mean_100: -133.43, episodes: 5487
384238: reward: -100.00, mean_100: -133.71, episodes: 5488
384295: reward: -100.00, mean_100: -133.22, episodes: 5489
384367: reward: -100.00, mean_100: -134.77, episodes: 5490
384452: reward: -100.00, mean_100: -134.74, episodes: 5491
384510: reward: -100.00, mean_100: -134.13, episodes: 5492
384567: reward: -100.00, mean_100: -133.86, episodes: 5493
384623: reward: -100.00, mean_100: -133.87, episodes: 5494
384685: reward: -100.00, mean_100: -133.84, episodes: 5495
384747: reward: -100.00, mean_100: -134.06, episodes: 5496
384826: reward: -100.00, mean_100: -134.55, episodes: 5497
384886: reward: -100.00, mean_100: -135.09, episodes: 54

393775: reward: -100.00, mean_100: -137.29, episodes: 5627
393829: reward: -100.00, mean_100: -136.48, episodes: 5628
393897: reward: -100.00, mean_100: -136.20, episodes: 5629
393959: reward: -100.00, mean_100: -136.28, episodes: 5630
394048: reward: -100.00, mean_100: -138.01, episodes: 5631
394100: reward: -100.00, mean_100: -137.77, episodes: 5632
394182: reward: -100.00, mean_100: -138.38, episodes: 5633
394272: reward: -100.00, mean_100: -137.60, episodes: 5634
394331: reward: -100.00, mean_100: -137.43, episodes: 5635
394410: reward: -100.00, mean_100: -137.51, episodes: 5636
394474: reward: -100.00, mean_100: -137.29, episodes: 5637
394534: reward: -100.00, mean_100: -137.24, episodes: 5638
394586: reward: -100.00, mean_100: -137.08, episodes: 5639
394650: reward: -100.00, mean_100: -137.18, episodes: 5640
394730: reward: -100.00, mean_100: -137.50, episodes: 5641
394783: reward: -100.00, mean_100: -137.04, episodes: 5642
394840: reward: -100.00, mean_100: -137.17, episodes: 56

403950: reward: -100.00, mean_100: -136.11, episodes: 5771
404006: reward: -100.00, mean_100: -136.55, episodes: 5772
404102: reward: -100.00, mean_100: -135.37, episodes: 5773
404176: reward: -100.00, mean_100: -135.71, episodes: 5774
404231: reward: -100.00, mean_100: -136.08, episodes: 5775
404303: reward: -100.00, mean_100: -135.79, episodes: 5776
404359: reward: -100.00, mean_100: -135.54, episodes: 5777
404424: reward: -100.00, mean_100: -135.59, episodes: 5778
404495: reward: -100.00, mean_100: -135.51, episodes: 5779
404559: reward: -100.00, mean_100: -136.14, episodes: 5780
404624: reward: -100.00, mean_100: -136.21, episodes: 5781
404701: reward: -100.00, mean_100: -135.77, episodes: 5782
404788: reward: -100.00, mean_100: -136.22, episodes: 5783
404841: reward: -100.00, mean_100: -135.34, episodes: 5784
404899: reward: -100.00, mean_100: -135.88, episodes: 5785
404970: reward: -100.00, mean_100: -135.89, episodes: 5786
405059: reward: -100.00, mean_100: -135.53, episodes: 57

413450: reward: -100.00, mean_100: -133.67, episodes: 5910
413521: reward: -100.00, mean_100: -133.37, episodes: 5911
413580: reward: -100.00, mean_100: -134.54, episodes: 5912
413638: reward: -100.00, mean_100: -134.65, episodes: 5913
413690: reward: -100.00, mean_100: -133.92, episodes: 5914
413780: reward: -100.00, mean_100: -133.45, episodes: 5915
413836: reward: -100.00, mean_100: -133.45, episodes: 5916
413891: reward: -100.00, mean_100: -132.93, episodes: 5917
413968: reward: -100.00, mean_100: -131.70, episodes: 5918
414025: reward: -100.00, mean_100: -132.39, episodes: 5919
414092: reward: -100.00, mean_100: -130.55, episodes: 5920
414166: reward: -100.00, mean_100: -130.77, episodes: 5921
414242: reward: -100.00, mean_100: -130.99, episodes: 5922
414309: reward: -100.00, mean_100: -130.62, episodes: 5923
414374: reward: -100.00, mean_100: -130.64, episodes: 5924
414456: reward: -100.00, mean_100: -131.10, episodes: 5925
414515: reward: -100.00, mean_100: -129.56, episodes: 59

423368: reward: -100.00, mean_100: -129.57, episodes: 6050
423419: reward: -100.00, mean_100: -129.61, episodes: 6051
423475: reward: -100.00, mean_100: -129.55, episodes: 6052
423527: reward: -100.00, mean_100: -129.50, episodes: 6053
423592: reward: -100.00, mean_100: -130.47, episodes: 6054
423669: reward: -100.00, mean_100: -130.18, episodes: 6055
423736: reward: -100.00, mean_100: -130.27, episodes: 6056
423819: reward: -100.00, mean_100: -130.25, episodes: 6057
423884: reward: -100.00, mean_100: -130.44, episodes: 6058
423956: reward: -100.00, mean_100: -130.34, episodes: 6059
424044: reward: -100.00, mean_100: -131.09, episodes: 6060
424122: reward: -100.00, mean_100: -131.29, episodes: 6061
424187: reward: -100.00, mean_100: -131.28, episodes: 6062
424240: reward: -100.00, mean_100: -131.17, episodes: 6063
424310: reward: -100.00, mean_100: -131.30, episodes: 6064
424385: reward: -100.00, mean_100: -131.53, episodes: 6065
424438: reward: -100.00, mean_100: -131.40, episodes: 60

433424: reward: -100.00, mean_100: -120.86, episodes: 6195
433496: reward: -100.00, mean_100: -121.29, episodes: 6196
433566: reward: -100.00, mean_100: -121.33, episodes: 6197
433620: reward: -100.00, mean_100: -121.80, episodes: 6198
433696: reward: -100.00, mean_100: -121.46, episodes: 6199
433762: reward: -100.00, mean_100: -121.01, episodes: 6200
433838: reward: -100.00, mean_100: -121.04, episodes: 6201
433895: reward: -100.00, mean_100: -120.42, episodes: 6202
433968: reward: -100.00, mean_100: -120.93, episodes: 6203
434045: reward: -100.00, mean_100: -120.73, episodes: 6204
434119: reward: -100.00, mean_100: -121.46, episodes: 6205
434194: reward: -100.00, mean_100: -121.34, episodes: 6206
434248: reward: -100.00, mean_100: -121.16, episodes: 6207
434326: reward: -100.00, mean_100: -121.32, episodes: 6208
434416: reward: -100.00, mean_100: -120.78, episodes: 6209
434479: reward: -100.00, mean_100: -120.91, episodes: 6210
434558: reward: -100.00, mean_100: -121.25, episodes: 62

443764: reward: -100.00, mean_100: -126.01, episodes: 6341
443818: reward: -100.00, mean_100: -125.57, episodes: 6342
443874: reward: -100.00, mean_100: -125.66, episodes: 6343
443935: reward: -100.00, mean_100: -125.14, episodes: 6344
443997: reward: -100.00, mean_100: -125.15, episodes: 6345
444053: reward: -100.00, mean_100: -125.20, episodes: 6346
444105: reward: -100.00, mean_100: -124.93, episodes: 6347
444183: reward: -100.00, mean_100: -124.11, episodes: 6348
444238: reward: -100.00, mean_100: -123.84, episodes: 6349
444321: reward: -100.00, mean_100: -123.84, episodes: 6350
444383: reward: -100.00, mean_100: -124.44, episodes: 6351
444469: reward: -100.00, mean_100: -124.71, episodes: 6352
444531: reward: -100.00, mean_100: -122.90, episodes: 6353
444597: reward: -100.00, mean_100: -123.02, episodes: 6354
444657: reward: -100.00, mean_100: -122.80, episodes: 6355
444709: reward: -100.00, mean_100: -122.52, episodes: 6356
444799: reward: -100.00, mean_100: -123.00, episodes: 63

454075: reward: -100.00, mean_100: -140.68, episodes: 6489
454161: reward: -100.00, mean_100: -141.34, episodes: 6490
454238: reward: -100.00, mean_100: -141.79, episodes: 6491
454311: reward: -100.00, mean_100: -142.18, episodes: 6492
454372: reward: -100.00, mean_100: -141.48, episodes: 6493
454447: reward: -100.00, mean_100: -141.76, episodes: 6494
454506: reward: -100.00, mean_100: -141.15, episodes: 6495
454575: reward: -100.00, mean_100: -140.45, episodes: 6496
454636: reward: -100.00, mean_100: -139.61, episodes: 6497
454705: reward: -100.00, mean_100: -139.70, episodes: 6498
454770: reward: -100.00, mean_100: -140.66, episodes: 6499
454824: reward: -100.00, mean_100: -139.91, episodes: 6500
454895: reward: -100.00, mean_100: -139.63, episodes: 6501
454975: reward: -100.00, mean_100: -138.58, episodes: 6502
455032: reward: -100.00, mean_100: -138.33, episodes: 6503
455090: reward: -100.00, mean_100: -138.45, episodes: 6504
455169: reward: -100.00, mean_100: -139.07, episodes: 65

464091: reward: -100.00, mean_100: -132.61, episodes: 6632
464157: reward: -100.00, mean_100: -131.70, episodes: 6633
464219: reward: -100.00, mean_100: -131.41, episodes: 6634
464296: reward: -100.00, mean_100: -131.33, episodes: 6635
464350: reward: -100.00, mean_100: -130.72, episodes: 6636
464416: reward: -100.00, mean_100: -130.39, episodes: 6637
464492: reward: -100.00, mean_100: -130.59, episodes: 6638
464571: reward: -100.00, mean_100: -130.73, episodes: 6639
464637: reward: -100.00, mean_100: -130.85, episodes: 6640
464709: reward: -100.00, mean_100: -131.61, episodes: 6641
464784: reward: -100.00, mean_100: -131.97, episodes: 6642
464867: reward: -100.00, mean_100: -131.92, episodes: 6643
464939: reward: -100.00, mean_100: -132.23, episodes: 6644
465010: reward: -100.00, mean_100: -132.32, episodes: 6645
465086: reward: -100.00, mean_100: -132.47, episodes: 6646
465138: reward: -100.00, mean_100: -131.73, episodes: 6647
465192: reward: -100.00, mean_100: -132.31, episodes: 66

474020: reward: -100.00, mean_100: -137.05, episodes: 6775
474085: reward: -100.00, mean_100: -136.88, episodes: 6776
474152: reward: -100.00, mean_100: -136.96, episodes: 6777
474214: reward: -100.00, mean_100: -136.66, episodes: 6778
474285: reward: -100.00, mean_100: -137.06, episodes: 6779
474336: reward: -100.00, mean_100: -137.34, episodes: 6780
474406: reward: -100.00, mean_100: -137.16, episodes: 6781
474462: reward: -100.00, mean_100: -136.80, episodes: 6782
474523: reward: -100.00, mean_100: -138.42, episodes: 6783
474609: reward: -100.00, mean_100: -138.22, episodes: 6784
474683: reward: -100.00, mean_100: -138.51, episodes: 6785
474754: reward: -100.00, mean_100: -138.13, episodes: 6786
474823: reward: -100.00, mean_100: -137.61, episodes: 6787
474891: reward: -100.00, mean_100: -137.09, episodes: 6788
474942: reward: -100.00, mean_100: -136.75, episodes: 6789
474998: reward: -100.00, mean_100: -137.03, episodes: 6790
475085: reward: -100.00, mean_100: -136.62, episodes: 67

483828: reward: -100.00, mean_100: -134.31, episodes: 6917
483897: reward: -100.00, mean_100: -134.38, episodes: 6918
483970: reward: -100.00, mean_100: -134.68, episodes: 6919
484056: reward: -100.00, mean_100: -135.11, episodes: 6920
484115: reward: -100.00, mean_100: -134.70, episodes: 6921
484193: reward: -100.00, mean_100: -134.21, episodes: 6922
484251: reward: -100.00, mean_100: -134.12, episodes: 6923
484306: reward: -100.00, mean_100: -133.39, episodes: 6924
484366: reward: -100.00, mean_100: -133.32, episodes: 6925
484419: reward: -100.00, mean_100: -133.36, episodes: 6926
484502: reward: -100.00, mean_100: -133.17, episodes: 6927
484563: reward: -100.00, mean_100: -133.14, episodes: 6928
484625: reward: -100.00, mean_100: -133.43, episodes: 6929
484682: reward: -100.00, mean_100: -133.13, episodes: 6930
484752: reward: -100.00, mean_100: -132.28, episodes: 6931
484829: reward: -100.00, mean_100: -132.46, episodes: 6932
484915: reward: -100.00, mean_100: -132.74, episodes: 69

494407: reward: -100.00, mean_100: -132.25, episodes: 7065
494488: reward: -100.00, mean_100: -132.72, episodes: 7066
494539: reward: -100.00, mean_100: -132.73, episodes: 7067
494593: reward: -100.00, mean_100: -133.00, episodes: 7068
494648: reward: -100.00, mean_100: -133.10, episodes: 7069
494699: reward: -100.00, mean_100: -133.19, episodes: 7070
494759: reward: -100.00, mean_100: -132.91, episodes: 7071
494829: reward: -100.00, mean_100: -133.30, episodes: 7072
494900: reward: -100.00, mean_100: -133.93, episodes: 7073
494971: reward: -100.00, mean_100: -133.90, episodes: 7074
495051: reward: -100.00, mean_100: -134.95, episodes: 7075
495173: reward: -100.00, mean_100: -133.65, episodes: 7076
495270: reward: -100.00, mean_100: -134.74, episodes: 7077
495341: reward: -100.00, mean_100: -134.74, episodes: 7078
495420: reward: -100.00, mean_100: -135.69, episodes: 7079
495508: reward: -100.00, mean_100: -135.86, episodes: 7080
495590: reward: -100.00, mean_100: -136.39, episodes: 70

504910: reward: -100.00, mean_100: -138.73, episodes: 7212
504981: reward: -100.00, mean_100: -138.25, episodes: 7213
505047: reward: -100.00, mean_100: -138.13, episodes: 7214
505124: reward: -100.00, mean_100: -138.05, episodes: 7215
505204: reward: -100.00, mean_100: -139.07, episodes: 7216
505283: reward: -100.00, mean_100: -138.98, episodes: 7217
505334: reward: -100.00, mean_100: -138.94, episodes: 7218
505410: reward: -100.00, mean_100: -139.02, episodes: 7219
505475: reward: -100.00, mean_100: -139.17, episodes: 7220
505552: reward: -100.00, mean_100: -138.24, episodes: 7221
505617: reward: -100.00, mean_100: -137.40, episodes: 7222
505705: reward: -100.00, mean_100: -137.73, episodes: 7223
505784: reward: -100.00, mean_100: -137.46, episodes: 7224
505838: reward: -100.00, mean_100: -137.38, episodes: 7225
505925: reward: -100.00, mean_100: -137.20, episodes: 7226
506013: reward: -100.00, mean_100: -137.48, episodes: 7227
506065: reward: -100.00, mean_100: -137.34, episodes: 72

514644: reward: -100.00, mean_100: -131.87, episodes: 7352
514697: reward: -100.00, mean_100: -131.62, episodes: 7353
514755: reward: -100.00, mean_100: -132.42, episodes: 7354
514810: reward: -100.00, mean_100: -131.38, episodes: 7355
514878: reward: -100.00, mean_100: -131.49, episodes: 7356
514959: reward: -100.00, mean_100: -130.77, episodes: 7357
515032: reward: -100.00, mean_100: -130.55, episodes: 7358
515093: reward: -100.00, mean_100: -130.39, episodes: 7359
515152: reward: -100.00, mean_100: -130.22, episodes: 7360
515208: reward: -100.00, mean_100: -130.01, episodes: 7361
515276: reward: -100.00, mean_100: -130.40, episodes: 7362
515359: reward: -100.00, mean_100: -131.01, episodes: 7363
515449: reward: -100.00, mean_100: -131.44, episodes: 7364
515512: reward: -100.00, mean_100: -131.39, episodes: 7365
515599: reward: -100.00, mean_100: -131.07, episodes: 7366
515675: reward: -100.00, mean_100: -130.99, episodes: 7367
515746: reward: -100.00, mean_100: -131.08, episodes: 73

524627: reward: -100.00, mean_100: -139.65, episodes: 7494
524681: reward: -100.00, mean_100: -138.87, episodes: 7495
524765: reward: -100.00, mean_100: -139.29, episodes: 7496
524828: reward: -100.00, mean_100: -139.54, episodes: 7497
524884: reward: -100.00, mean_100: -139.29, episodes: 7498
524972: reward: -100.00, mean_100: -141.65, episodes: 7499
525042: reward: -100.00, mean_100: -141.76, episodes: 7500
525094: reward: -100.00, mean_100: -141.18, episodes: 7501
525151: reward: -100.00, mean_100: -141.04, episodes: 7502
525218: reward: -100.00, mean_100: -141.81, episodes: 7503
525270: reward: -100.00, mean_100: -142.00, episodes: 7504
525339: reward: -100.00, mean_100: -142.34, episodes: 7505
525409: reward: -100.00, mean_100: -142.81, episodes: 7506
525486: reward: -100.00, mean_100: -142.86, episodes: 7507
525546: reward: -100.00, mean_100: -144.11, episodes: 7508
525614: reward: -100.00, mean_100: -144.07, episodes: 7509
525687: reward: -100.00, mean_100: -144.77, episodes: 75

534618: reward: -100.00, mean_100: -133.94, episodes: 7641
534690: reward: -100.00, mean_100: -134.36, episodes: 7642
534742: reward: -100.00, mean_100: -133.92, episodes: 7643
534807: reward: -100.00, mean_100: -133.95, episodes: 7644
534956: reward: -100.00, mean_100: -132.54, episodes: 7645
535025: reward: -100.00, mean_100: -132.83, episodes: 7646
535108: reward: -100.00, mean_100: -132.03, episodes: 7647
535168: reward: -100.00, mean_100: -130.41, episodes: 7648
535277: reward: -100.00, mean_100: -132.47, episodes: 7649
535337: reward: -100.00, mean_100: -132.11, episodes: 7650
535397: reward: -100.00, mean_100: -132.12, episodes: 7651
535466: reward: -100.00, mean_100: -132.36, episodes: 7652
535545: reward: -100.00, mean_100: -132.88, episodes: 7653
535620: reward: -100.00, mean_100: -132.89, episodes: 7654
535673: reward: -100.00, mean_100: -132.77, episodes: 7655
535740: reward: -100.00, mean_100: -132.84, episodes: 7656
535822: reward: -100.00, mean_100: -132.97, episodes: 76

544886: reward: -100.00, mean_100: -129.51, episodes: 7789
544952: reward: -100.00, mean_100: -129.67, episodes: 7790
545010: reward: -100.00, mean_100: -129.42, episodes: 7791
545089: reward: -100.00, mean_100: -129.63, episodes: 7792
545164: reward: -100.00, mean_100: -130.07, episodes: 7793
545254: reward: -100.00, mean_100: -130.86, episodes: 7794
545329: reward: -100.00, mean_100: -129.85, episodes: 7795
545413: reward: -100.00, mean_100: -130.13, episodes: 7796
545475: reward: -100.00, mean_100: -130.00, episodes: 7797
545530: reward: -100.00, mean_100: -130.43, episodes: 7798
545594: reward: -100.00, mean_100: -130.91, episodes: 7799
545670: reward: -100.00, mean_100: -131.23, episodes: 7800
545741: reward: -100.00, mean_100: -131.22, episodes: 7801
545796: reward: -100.00, mean_100: -130.81, episodes: 7802
545863: reward: -100.00, mean_100: -130.93, episodes: 7803
545914: reward: -100.00, mean_100: -131.12, episodes: 7804
545968: reward: -100.00, mean_100: -131.05, episodes: 78

554806: reward: -100.00, mean_100: -128.25, episodes: 7931
554864: reward: -100.00, mean_100: -129.87, episodes: 7932
554934: reward: -100.00, mean_100: -130.53, episodes: 7933
554992: reward: -100.00, mean_100: -130.36, episodes: 7934
555050: reward: -100.00, mean_100: -130.55, episodes: 7935
555120: reward: -100.00, mean_100: -130.75, episodes: 7936
555169: reward: -100.00, mean_100: -130.67, episodes: 7937
555218: reward: -100.00, mean_100: -130.41, episodes: 7938
555283: reward: -100.00, mean_100: -130.21, episodes: 7939
555341: reward: -100.00, mean_100: -130.23, episodes: 7940
555408: reward: -100.00, mean_100: -130.36, episodes: 7941
555477: reward: -100.00, mean_100: -130.76, episodes: 7942
555528: reward: -100.00, mean_100: -130.46, episodes: 7943
555582: reward: -100.00, mean_100: -130.76, episodes: 7944
555645: reward: -100.00, mean_100: -131.47, episodes: 7945
555699: reward: -100.00, mean_100: -132.07, episodes: 7946
555765: reward: -100.00, mean_100: -132.42, episodes: 79

564608: reward: -100.00, mean_100: -134.48, episodes: 8072
564664: reward: -100.00, mean_100: -135.00, episodes: 8073
564719: reward: -100.00, mean_100: -134.59, episodes: 8074
564777: reward: -100.00, mean_100: -134.44, episodes: 8075
564841: reward: -100.00, mean_100: -135.54, episodes: 8076
564916: reward: -100.00, mean_100: -135.57, episodes: 8077
564990: reward: -100.00, mean_100: -135.84, episodes: 8078
565056: reward: -100.00, mean_100: -135.62, episodes: 8079
565127: reward: -100.00, mean_100: -135.38, episodes: 8080
565185: reward: -100.00, mean_100: -135.34, episodes: 8081
565269: reward: -100.00, mean_100: -135.75, episodes: 8082
565358: reward: -100.00, mean_100: -136.04, episodes: 8083
565427: reward: -100.00, mean_100: -136.11, episodes: 8084
565501: reward: -100.00, mean_100: -136.87, episodes: 8085
565589: reward: -100.00, mean_100: -136.61, episodes: 8086
565642: reward: -100.00, mean_100: -136.17, episodes: 8087
565704: reward: -100.00, mean_100: -136.05, episodes: 80

575042: reward: -100.00, mean_100: -135.33, episodes: 8220
575126: reward: -100.00, mean_100: -135.78, episodes: 8221
575184: reward: -100.00, mean_100: -135.58, episodes: 8222
575258: reward: -100.00, mean_100: -136.18, episodes: 8223
575321: reward: -100.00, mean_100: -135.75, episodes: 8224
575384: reward: -100.00, mean_100: -135.22, episodes: 8225
575449: reward: -100.00, mean_100: -134.92, episodes: 8226
575529: reward: -100.00, mean_100: -134.63, episodes: 8227
575619: reward: -100.00, mean_100: -134.75, episodes: 8228
575696: reward: -100.00, mean_100: -134.76, episodes: 8229
575761: reward: -100.00, mean_100: -134.94, episodes: 8230
575828: reward: -100.00, mean_100: -134.67, episodes: 8231
575883: reward: -100.00, mean_100: -134.18, episodes: 8232
575956: reward: -100.00, mean_100: -134.10, episodes: 8233
576029: reward: -100.00, mean_100: -134.26, episodes: 8234
576096: reward: -100.00, mean_100: -134.93, episodes: 8235
576174: reward: -100.00, mean_100: -135.02, episodes: 82

585325: reward: -100.00, mean_100: -129.31, episodes: 8365
585401: reward: -100.00, mean_100: -128.87, episodes: 8366
585463: reward: -100.00, mean_100: -129.21, episodes: 8367
585552: reward: -100.00, mean_100: -128.99, episodes: 8368
585624: reward: -100.00, mean_100: -130.56, episodes: 8369
585703: reward: -100.00, mean_100: -129.84, episodes: 8370
585777: reward: -100.00, mean_100: -129.69, episodes: 8371
585861: reward: -100.00, mean_100: -127.72, episodes: 8372
585948: reward: -100.00, mean_100: -127.75, episodes: 8373
586008: reward: -100.00, mean_100: -127.98, episodes: 8374
586089: reward: -100.00, mean_100: -128.13, episodes: 8375
586175: reward: -100.00, mean_100: -127.84, episodes: 8376
586233: reward: -100.00, mean_100: -127.47, episodes: 8377
586286: reward: -100.00, mean_100: -125.38, episodes: 8378
586360: reward: -100.00, mean_100: -126.14, episodes: 8379
586449: reward: -100.00, mean_100: -125.79, episodes: 8380
586519: reward: -100.00, mean_100: -125.77, episodes: 83

595185: reward: -100.00, mean_100: -123.67, episodes: 8504
595273: reward: -100.00, mean_100: -122.54, episodes: 8505
595337: reward: -100.00, mean_100: -122.68, episodes: 8506
595436: reward: -100.00, mean_100: -125.90, episodes: 8507
595503: reward: -100.00, mean_100: -126.18, episodes: 8508
595562: reward: -100.00, mean_100: -125.66, episodes: 8509
595621: reward: -100.00, mean_100: -125.37, episodes: 8510
595685: reward: -100.00, mean_100: -125.66, episodes: 8511
595749: reward: -100.00, mean_100: -126.52, episodes: 8512
595811: reward: -100.00, mean_100: -126.09, episodes: 8513
595886: reward: -100.00, mean_100: -126.61, episodes: 8514
595944: reward: -100.00, mean_100: -125.89, episodes: 8515
596015: reward: -100.00, mean_100: -125.97, episodes: 8516
596094: reward: -100.00, mean_100: -126.44, episodes: 8517
596152: reward: -100.00, mean_100: -126.58, episodes: 8518
596223: reward: -100.00, mean_100: -124.92, episodes: 8519
596275: reward: -100.00, mean_100: -124.71, episodes: 85

604901: reward: -100.00, mean_100: -137.03, episodes: 8645
604961: reward: -100.00, mean_100: -136.97, episodes: 8646
605028: reward: -100.00, mean_100: -136.13, episodes: 8647
605116: reward: -100.00, mean_100: -136.19, episodes: 8648
605184: reward: -100.00, mean_100: -136.57, episodes: 8649
605272: reward: -100.00, mean_100: -136.95, episodes: 8650
605340: reward: -100.00, mean_100: -136.97, episodes: 8651
605411: reward: -100.00, mean_100: -135.37, episodes: 8652
605465: reward: -100.00, mean_100: -134.84, episodes: 8653
605531: reward: -100.00, mean_100: -134.49, episodes: 8654
605624: reward: -100.00, mean_100: -135.42, episodes: 8655
605691: reward: -100.00, mean_100: -135.05, episodes: 8656
605747: reward: -100.00, mean_100: -134.81, episodes: 8657
605812: reward: -100.00, mean_100: -134.93, episodes: 8658
605896: reward: -100.00, mean_100: -134.83, episodes: 8659
605973: reward: -100.00, mean_100: -134.63, episodes: 8660
606033: reward: -100.00, mean_100: -134.45, episodes: 86

614614: reward: -100.00, mean_100: -136.52, episodes: 8785
614686: reward: -100.00, mean_100: -135.96, episodes: 8786
614778: reward: -100.00, mean_100: -136.01, episodes: 8787
614839: reward: -100.00, mean_100: -136.03, episodes: 8788
614930: reward: -100.00, mean_100: -136.82, episodes: 8789
614997: reward: -100.00, mean_100: -137.17, episodes: 8790
615067: reward: -100.00, mean_100: -136.86, episodes: 8791
615153: reward: -100.00, mean_100: -137.09, episodes: 8792
615244: reward: -100.00, mean_100: -138.08, episodes: 8793
615340: reward: -100.00, mean_100: -139.18, episodes: 8794
615428: reward: -100.00, mean_100: -138.99, episodes: 8795
615488: reward: -100.00, mean_100: -138.78, episodes: 8796
615568: reward: -100.00, mean_100: -139.42, episodes: 8797
615649: reward: -100.00, mean_100: -139.12, episodes: 8798
615713: reward: -100.00, mean_100: -139.21, episodes: 8799
615772: reward: -100.00, mean_100: -139.55, episodes: 8800
615846: reward: -100.00, mean_100: -140.02, episodes: 88

625011: reward: -100.00, mean_100: -133.12, episodes: 8929
625065: reward: -100.00, mean_100: -133.09, episodes: 8930
625131: reward: -100.00, mean_100: -133.56, episodes: 8931
625208: reward: -100.00, mean_100: -134.45, episodes: 8932
625267: reward: -100.00, mean_100: -135.34, episodes: 8933
625342: reward: -100.00, mean_100: -135.74, episodes: 8934
625422: reward: -100.00, mean_100: -136.08, episodes: 8935
625478: reward: -100.00, mean_100: -136.07, episodes: 8936
625551: reward: -100.00, mean_100: -136.14, episodes: 8937
625616: reward: -100.00, mean_100: -136.89, episodes: 8938
625699: reward: -100.00, mean_100: -137.42, episodes: 8939
625755: reward: -100.00, mean_100: -137.02, episodes: 8940
625846: reward: -100.00, mean_100: -136.16, episodes: 8941
625902: reward: -100.00, mean_100: -135.59, episodes: 8942
625989: reward: -100.00, mean_100: -135.32, episodes: 8943
626057: reward: -100.00, mean_100: -135.95, episodes: 8944
626112: reward: -100.00, mean_100: -135.75, episodes: 89

634869: reward: -100.00, mean_100: -132.56, episodes: 9070
634948: reward: -100.00, mean_100: -133.00, episodes: 9071
635011: reward: -100.00, mean_100: -134.83, episodes: 9072
635081: reward: -100.00, mean_100: -135.30, episodes: 9073
635170: reward: -100.00, mean_100: -134.87, episodes: 9074
635240: reward: -100.00, mean_100: -134.68, episodes: 9075
635314: reward: -100.00, mean_100: -135.26, episodes: 9076
635373: reward: -100.00, mean_100: -135.17, episodes: 9077
635457: reward: -100.00, mean_100: -134.82, episodes: 9078
635535: reward: -100.00, mean_100: -135.28, episodes: 9079
635622: reward: -100.00, mean_100: -135.11, episodes: 9080
635693: reward: -100.00, mean_100: -135.27, episodes: 9081
635776: reward: -100.00, mean_100: -135.89, episodes: 9082
635832: reward: -100.00, mean_100: -136.29, episodes: 9083
635902: reward: -100.00, mean_100: -136.05, episodes: 9084
635990: reward: -100.00, mean_100: -136.51, episodes: 9085
636080: reward: -100.00, mean_100: -137.82, episodes: 90

644598: reward: -100.00, mean_100: -126.59, episodes: 9212
644682: reward: -100.00, mean_100: -127.05, episodes: 9213
644760: reward: -100.00, mean_100: -125.91, episodes: 9214
644827: reward: -100.00, mean_100: -126.37, episodes: 9215
644881: reward: -100.00, mean_100: -126.57, episodes: 9216
644954: reward: -100.00, mean_100: -126.83, episodes: 9217
645015: reward: -100.00, mean_100: -126.89, episodes: 9218
645083: reward: -100.00, mean_100: -126.80, episodes: 9219
645144: reward: -100.00, mean_100: -126.54, episodes: 9220
645217: reward: -100.00, mean_100: -126.84, episodes: 9221
645296: reward: -100.00, mean_100: -126.97, episodes: 9222
645349: reward: -100.00, mean_100: -126.87, episodes: 9223
645437: reward: -100.00, mean_100: -126.97, episodes: 9224
645526: reward: -100.00, mean_100: -126.86, episodes: 9225
645594: reward: -100.00, mean_100: -126.64, episodes: 9226
645681: reward: -100.00, mean_100: -126.31, episodes: 9227
645762: reward: -100.00, mean_100: -125.92, episodes: 92

654929: reward: -100.00, mean_100: -133.51, episodes: 9359
654980: reward: -100.00, mean_100: -133.59, episodes: 9360
655038: reward: -100.00, mean_100: -133.22, episodes: 9361
655109: reward: -100.00, mean_100: -133.08, episodes: 9362
655183: reward: -100.00, mean_100: -133.09, episodes: 9363
655256: reward: -100.00, mean_100: -132.67, episodes: 9364
655343: reward: -100.00, mean_100: -132.96, episodes: 9365
655413: reward: -100.00, mean_100: -133.26, episodes: 9366
655477: reward: -100.00, mean_100: -132.93, episodes: 9367
655536: reward: -100.00, mean_100: -133.29, episodes: 9368
655608: reward: -100.00, mean_100: -134.56, episodes: 9369
655671: reward: -100.00, mean_100: -134.26, episodes: 9370
655749: reward: -100.00, mean_100: -134.45, episodes: 9371
655809: reward: -100.00, mean_100: -134.30, episodes: 9372
655897: reward: -100.00, mean_100: -133.37, episodes: 9373
655964: reward: -100.00, mean_100: -133.24, episodes: 9374
656034: reward: -100.00, mean_100: -133.77, episodes: 93

665167: reward: -100.00, mean_100: -130.87, episodes: 9506
665222: reward: -100.00, mean_100: -130.77, episodes: 9507
665279: reward: -100.00, mean_100: -131.10, episodes: 9508
665362: reward: -100.00, mean_100: -131.16, episodes: 9509
665434: reward: -100.00, mean_100: -131.20, episodes: 9510
665487: reward: -100.00, mean_100: -131.49, episodes: 9511
665543: reward: -100.00, mean_100: -131.04, episodes: 9512
665610: reward: -100.00, mean_100: -130.77, episodes: 9513
665673: reward: -100.00, mean_100: -130.65, episodes: 9514
665733: reward: -100.00, mean_100: -130.48, episodes: 9515
665818: reward: -100.00, mean_100: -130.94, episodes: 9516
665885: reward: -100.00, mean_100: -130.82, episodes: 9517
665967: reward: -100.00, mean_100: -130.45, episodes: 9518
666025: reward: -100.00, mean_100: -129.25, episodes: 9519
666084: reward: -100.00, mean_100: -129.19, episodes: 9520
666141: reward: -100.00, mean_100: -128.84, episodes: 9521
666245: reward: -100.00, mean_100: -130.25, episodes: 95

674681: reward: -100.00, mean_100: -137.69, episodes: 9646
674757: reward: -100.00, mean_100: -137.28, episodes: 9647
674829: reward: -100.00, mean_100: -137.77, episodes: 9648
674885: reward: -100.00, mean_100: -137.89, episodes: 9649
674964: reward: -100.00, mean_100: -137.28, episodes: 9650
675028: reward: -100.00, mean_100: -138.13, episodes: 9651
675098: reward: -100.00, mean_100: -136.09, episodes: 9652
675183: reward: -100.00, mean_100: -136.12, episodes: 9653
675251: reward: -100.00, mean_100: -136.42, episodes: 9654
675315: reward: -100.00, mean_100: -135.88, episodes: 9655
675393: reward: -100.00, mean_100: -135.52, episodes: 9656
675459: reward: -100.00, mean_100: -135.27, episodes: 9657
675551: reward: -100.00, mean_100: -135.85, episodes: 9658
675617: reward: -100.00, mean_100: -136.18, episodes: 9659
675680: reward: -100.00, mean_100: -135.94, episodes: 9660
675744: reward: -100.00, mean_100: -135.98, episodes: 9661
675812: reward: -100.00, mean_100: -135.96, episodes: 96

685064: reward: -100.00, mean_100: -130.46, episodes: 9794
685153: reward: -100.00, mean_100: -131.21, episodes: 9795
685226: reward: -100.00, mean_100: -130.90, episodes: 9796
685304: reward: -100.00, mean_100: -130.78, episodes: 9797
685404: reward: -100.00, mean_100: -129.42, episodes: 9798
685476: reward: -100.00, mean_100: -129.63, episodes: 9799
685532: reward: -100.00, mean_100: -128.98, episodes: 9800
685593: reward: -100.00, mean_100: -128.88, episodes: 9801
685645: reward: -100.00, mean_100: -129.01, episodes: 9802
685703: reward: -100.00, mean_100: -128.31, episodes: 9803
685779: reward: -100.00, mean_100: -128.68, episodes: 9804
685864: reward: -100.00, mean_100: -128.23, episodes: 9805
685919: reward: -100.00, mean_100: -127.55, episodes: 9806
685985: reward: -100.00, mean_100: -127.13, episodes: 9807
686047: reward: -100.00, mean_100: -127.06, episodes: 9808
686105: reward: -100.00, mean_100: -126.94, episodes: 9809
686164: reward: -100.00, mean_100: -126.63, episodes: 98

695155: reward: -100.00, mean_100: -131.73, episodes: 9939
695245: reward: -100.00, mean_100: -131.52, episodes: 9940
695306: reward: -100.00, mean_100: -131.66, episodes: 9941
695383: reward: -100.00, mean_100: -132.08, episodes: 9942
695448: reward: -100.00, mean_100: -132.34, episodes: 9943
695499: reward: -100.00, mean_100: -132.24, episodes: 9944
695591: reward: -100.00, mean_100: -132.09, episodes: 9945
695661: reward: -100.00, mean_100: -132.32, episodes: 9946
695715: reward: -100.00, mean_100: -129.30, episodes: 9947
695799: reward: -100.00, mean_100: -129.44, episodes: 9948
695859: reward: -100.00, mean_100: -129.42, episodes: 9949
695935: reward: -100.00, mean_100: -131.26, episodes: 9950
695994: reward: -100.00, mean_100: -131.13, episodes: 9951
696063: reward: -100.00, mean_100: -131.17, episodes: 9952
696144: reward: -100.00, mean_100: -131.57, episodes: 9953
696229: reward: -100.00, mean_100: -130.39, episodes: 9954
696311: reward: -100.00, mean_100: -130.92, episodes: 99

705390: reward: -100.00, mean_100: -134.99, episodes: 10084
705476: reward: -100.00, mean_100: -135.11, episodes: 10085
705568: reward: -100.00, mean_100: -135.89, episodes: 10086
705635: reward: -100.00, mean_100: -135.35, episodes: 10087
705705: reward: -100.00, mean_100: -135.30, episodes: 10088
705767: reward: -100.00, mean_100: -135.28, episodes: 10089
705822: reward: -100.00, mean_100: -135.33, episodes: 10090
705890: reward: -100.00, mean_100: -134.72, episodes: 10091
705962: reward: -100.00, mean_100: -134.94, episodes: 10092
706033: reward: -100.00, mean_100: -134.72, episodes: 10093
706111: reward: -100.00, mean_100: -134.62, episodes: 10094
706174: reward: -100.00, mean_100: -134.76, episodes: 10095
706234: reward: -100.00, mean_100: -136.19, episodes: 10096
706320: reward: -100.00, mean_100: -135.90, episodes: 10097
706410: reward: -100.00, mean_100: -135.64, episodes: 10098
706478: reward: -100.00, mean_100: -135.61, episodes: 10099
706546: reward: -100.00, mean_100: -136.

715243: reward: -100.00, mean_100: -137.50, episodes: 10226
715324: reward: -100.00, mean_100: -137.90, episodes: 10227
715408: reward: -100.00, mean_100: -137.88, episodes: 10228
715473: reward: -100.00, mean_100: -137.72, episodes: 10229
715552: reward: -100.00, mean_100: -137.07, episodes: 10230
715630: reward: -100.00, mean_100: -137.43, episodes: 10231
715714: reward: -100.00, mean_100: -137.37, episodes: 10232
715796: reward: -100.00, mean_100: -137.62, episodes: 10233
715849: reward: -100.00, mean_100: -137.19, episodes: 10234
715908: reward: -100.00, mean_100: -136.93, episodes: 10235
715990: reward: -100.00, mean_100: -136.88, episodes: 10236
716044: reward: -100.00, mean_100: -136.74, episodes: 10237
716131: reward: -100.00, mean_100: -136.70, episodes: 10238
716214: reward: -100.00, mean_100: -136.70, episodes: 10239
716294: reward: -100.00, mean_100: -136.47, episodes: 10240
716356: reward: -100.00, mean_100: -136.59, episodes: 10241
716431: reward: -100.00, mean_100: -136.

724837: reward: -100.00, mean_100: -127.63, episodes: 10363
724909: reward: -100.00, mean_100: -126.43, episodes: 10364
724985: reward: -100.00, mean_100: -126.81, episodes: 10365
725038: reward: -100.00, mean_100: -126.03, episodes: 10366
725138: reward: -100.00, mean_100: -126.56, episodes: 10367
725192: reward: -100.00, mean_100: -127.24, episodes: 10368
725252: reward: -100.00, mean_100: -127.26, episodes: 10369
725337: reward: -100.00, mean_100: -125.91, episodes: 10370
725390: reward: -100.00, mean_100: -125.83, episodes: 10371
725460: reward: -100.00, mean_100: -125.92, episodes: 10372
725515: reward: -100.00, mean_100: -125.68, episodes: 10373
725571: reward: -100.00, mean_100: -125.94, episodes: 10374
725637: reward: -100.00, mean_100: -124.95, episodes: 10375
725710: reward: -100.00, mean_100: -124.72, episodes: 10376
725763: reward: -100.00, mean_100: -125.21, episodes: 10377
725816: reward: -100.00, mean_100: -123.33, episodes: 10378
725882: reward: -100.00, mean_100: -123.

734741: reward: -100.00, mean_100: -131.65, episodes: 10506
734821: reward: -100.00, mean_100: -131.74, episodes: 10507
734889: reward: -100.00, mean_100: -132.21, episodes: 10508
734958: reward: -100.00, mean_100: -132.66, episodes: 10509
735030: reward: -100.00, mean_100: -133.00, episodes: 10510
735108: reward: -100.00, mean_100: -133.19, episodes: 10511
735167: reward: -100.00, mean_100: -133.30, episodes: 10512
735245: reward: -100.00, mean_100: -133.42, episodes: 10513
735307: reward: -100.00, mean_100: -133.32, episodes: 10514
735363: reward: -100.00, mean_100: -132.75, episodes: 10515
735452: reward: -100.00, mean_100: -131.48, episodes: 10516
735538: reward: -100.00, mean_100: -130.95, episodes: 10517
735592: reward: -100.00, mean_100: -130.72, episodes: 10518
735652: reward: -100.00, mean_100: -130.22, episodes: 10519
735738: reward: -100.00, mean_100: -130.57, episodes: 10520
735801: reward: -100.00, mean_100: -129.37, episodes: 10521
735879: reward: -100.00, mean_100: -129.

744175: reward: -100.00, mean_100: -129.22, episodes: 10643
744260: reward: -100.00, mean_100: -129.24, episodes: 10644
744314: reward: -100.00, mean_100: -128.79, episodes: 10645
744375: reward: -100.00, mean_100: -129.09, episodes: 10646
744434: reward: -100.00, mean_100: -129.24, episodes: 10647
744504: reward: -100.00, mean_100: -129.52, episodes: 10648
744596: reward: -100.00, mean_100: -128.30, episodes: 10649
744660: reward: -100.00, mean_100: -128.14, episodes: 10650
744716: reward: -100.00, mean_100: -128.33, episodes: 10651
744806: reward: -100.00, mean_100: -126.48, episodes: 10652
744864: reward: -100.00, mean_100: -126.57, episodes: 10653
744947: reward: -100.00, mean_100: -126.25, episodes: 10654
745037: reward: -100.00, mean_100: -126.93, episodes: 10655
745097: reward: -100.00, mean_100: -127.19, episodes: 10656
745178: reward: -100.00, mean_100: -127.58, episodes: 10657
745257: reward: -100.00, mean_100: -126.92, episodes: 10658
745339: reward: -100.00, mean_100: -126.

753902: reward: -100.00, mean_100: -130.79, episodes: 10783
753993: reward: -100.00, mean_100: -130.57, episodes: 10784
754051: reward: -100.00, mean_100: -129.93, episodes: 10785
754115: reward: -100.00, mean_100: -129.84, episodes: 10786
754170: reward: -100.00, mean_100: -129.28, episodes: 10787
754244: reward: -100.00, mean_100: -131.65, episodes: 10788
754308: reward: -100.00, mean_100: -131.45, episodes: 10789
754373: reward: -100.00, mean_100: -132.15, episodes: 10790
754442: reward: -100.00, mean_100: -132.35, episodes: 10791
754530: reward: -100.00, mean_100: -132.33, episodes: 10792
754597: reward: -100.00, mean_100: -131.19, episodes: 10793
754656: reward: -100.00, mean_100: -130.52, episodes: 10794
754734: reward: -100.00, mean_100: -130.53, episodes: 10795
754808: reward: -100.00, mean_100: -130.83, episodes: 10796
754891: reward: -100.00, mean_100: -131.10, episodes: 10797
754943: reward: -100.00, mean_100: -131.00, episodes: 10798
754998: reward: -100.00, mean_100: -130.

763429: reward: -100.00, mean_100: -134.51, episodes: 10922
763486: reward: -100.00, mean_100: -134.91, episodes: 10923
763537: reward: -100.00, mean_100: -135.17, episodes: 10924
763592: reward: -100.00, mean_100: -135.21, episodes: 10925
763676: reward: -100.00, mean_100: -135.74, episodes: 10926
763742: reward: -100.00, mean_100: -136.32, episodes: 10927
763830: reward: -100.00, mean_100: -135.53, episodes: 10928
763880: reward: -100.00, mean_100: -135.23, episodes: 10929
763957: reward: -100.00, mean_100: -135.69, episodes: 10930
764018: reward: -100.00, mean_100: -135.57, episodes: 10931
764072: reward: -100.00, mean_100: -135.73, episodes: 10932
764131: reward: -100.00, mean_100: -135.88, episodes: 10933
764207: reward: -100.00, mean_100: -136.30, episodes: 10934
764286: reward: -100.00, mean_100: -135.66, episodes: 10935
764362: reward: -100.00, mean_100: -135.79, episodes: 10936
764433: reward: -100.00, mean_100: -135.59, episodes: 10937
764501: reward: -100.00, mean_100: -135.

773328: reward: -100.00, mean_100: -131.36, episodes: 11061
773406: reward: -100.00, mean_100: -131.47, episodes: 11062
773478: reward: -100.00, mean_100: -131.61, episodes: 11063
773547: reward: -100.00, mean_100: -131.69, episodes: 11064
773603: reward: -100.00, mean_100: -132.02, episodes: 11065
773683: reward: -100.00, mean_100: -131.77, episodes: 11066
773740: reward: -100.00, mean_100: -131.75, episodes: 11067
773803: reward: -100.00, mean_100: -131.97, episodes: 11068
773885: reward: -100.00, mean_100: -131.97, episodes: 11069
773937: reward: -100.00, mean_100: -131.95, episodes: 11070
773993: reward: -100.00, mean_100: -131.48, episodes: 11071
774058: reward: -100.00, mean_100: -131.40, episodes: 11072
774137: reward: -100.00, mean_100: -130.89, episodes: 11073
774194: reward: -100.00, mean_100: -131.37, episodes: 11074
774259: reward: -100.00, mean_100: -131.21, episodes: 11075
774312: reward: -100.00, mean_100: -130.88, episodes: 11076
774374: reward: -100.00, mean_100: -131.

782944: reward: -100.00, mean_100: -131.21, episodes: 11199
783015: reward: -100.00, mean_100: -130.77, episodes: 11200
783078: reward: -100.00, mean_100: -131.66, episodes: 11201
783142: reward: -100.00, mean_100: -131.71, episodes: 11202
783219: reward: -100.00, mean_100: -131.78, episodes: 11203
783297: reward: -100.00, mean_100: -131.39, episodes: 11204
783377: reward: -100.00, mean_100: -130.99, episodes: 11205
783434: reward: -100.00, mean_100: -130.59, episodes: 11206
783498: reward: -100.00, mean_100: -131.41, episodes: 11207
783591: reward: -100.00, mean_100: -130.38, episodes: 11208
783670: reward: -100.00, mean_100: -131.36, episodes: 11209
783723: reward: -100.00, mean_100: -128.93, episodes: 11210
783813: reward: -100.00, mean_100: -129.52, episodes: 11211
783867: reward: -100.00, mean_100: -129.23, episodes: 11212
783927: reward: -100.00, mean_100: -129.35, episodes: 11213
784001: reward: -100.00, mean_100: -129.56, episodes: 11214
784077: reward: -100.00, mean_100: -130.

792965: reward: -100.00, mean_100: -132.14, episodes: 11344
793024: reward: -100.00, mean_100: -132.57, episodes: 11345
793093: reward: -100.00, mean_100: -133.16, episodes: 11346
793171: reward: -100.00, mean_100: -133.19, episodes: 11347
793224: reward: -100.00, mean_100: -133.07, episodes: 11348
793280: reward: -100.00, mean_100: -132.72, episodes: 11349
793341: reward: -100.00, mean_100: -133.03, episodes: 11350
793400: reward: -100.00, mean_100: -132.02, episodes: 11351
793487: reward: -100.00, mean_100: -132.68, episodes: 11352
793570: reward: -100.00, mean_100: -133.13, episodes: 11353
793622: reward: -100.00, mean_100: -132.79, episodes: 11354
793710: reward: -100.00, mean_100: -133.39, episodes: 11355
793773: reward: -100.00, mean_100: -133.61, episodes: 11356
793847: reward: -100.00, mean_100: -133.50, episodes: 11357
793909: reward: -100.00, mean_100: -132.50, episodes: 11358
793976: reward: -100.00, mean_100: -130.76, episodes: 11359
794068: reward: -100.00, mean_100: -130.

802815: reward: -100.00, mean_100: -138.87, episodes: 11483
802874: reward: -100.00, mean_100: -138.22, episodes: 11484
802932: reward: -100.00, mean_100: -138.04, episodes: 11485
802989: reward: -100.00, mean_100: -138.16, episodes: 11486
803048: reward: -100.00, mean_100: -138.51, episodes: 11487
803106: reward: -100.00, mean_100: -138.34, episodes: 11488
803173: reward: -100.00, mean_100: -138.17, episodes: 11489
803257: reward: -100.00, mean_100: -138.46, episodes: 11490
803311: reward: -100.00, mean_100: -136.23, episodes: 11491
803368: reward: -100.00, mean_100: -135.98, episodes: 11492
803436: reward: -100.00, mean_100: -135.66, episodes: 11493
803489: reward: -100.00, mean_100: -135.94, episodes: 11494
803576: reward: -100.00, mean_100: -135.44, episodes: 11495
803628: reward: -100.00, mean_100: -134.91, episodes: 11496
803705: reward: -100.00, mean_100: -135.57, episodes: 11497
803762: reward: -100.00, mean_100: -135.37, episodes: 11498
803831: reward: -100.00, mean_100: -135.

812651: reward: -100.00, mean_100: -130.29, episodes: 11625
812707: reward: -100.00, mean_100: -129.63, episodes: 11626
812797: reward: -100.00, mean_100: -128.19, episodes: 11627
812865: reward: -100.00, mean_100: -128.49, episodes: 11628
812957: reward: -100.00, mean_100: -128.97, episodes: 11629
813031: reward: -100.00, mean_100: -129.28, episodes: 11630
813100: reward: -100.00, mean_100: -129.51, episodes: 11631
813157: reward: -100.00, mean_100: -129.11, episodes: 11632
813249: reward: -100.00, mean_100: -129.38, episodes: 11633
813301: reward: -100.00, mean_100: -129.30, episodes: 11634
813361: reward: -100.00, mean_100: -128.98, episodes: 11635
813422: reward: -100.00, mean_100: -128.66, episodes: 11636
813475: reward: -100.00, mean_100: -129.71, episodes: 11637
813545: reward: -100.00, mean_100: -129.72, episodes: 11638
813598: reward: -100.00, mean_100: -129.77, episodes: 11639
813678: reward: -100.00, mean_100: -129.98, episodes: 11640
813750: reward: -100.00, mean_100: -130.

822425: reward: -100.00, mean_100: -132.92, episodes: 11765
822503: reward: -100.00, mean_100: -132.79, episodes: 11766
822588: reward: -100.00, mean_100: -132.56, episodes: 11767
822646: reward: -100.00, mean_100: -133.76, episodes: 11768
822708: reward: -100.00, mean_100: -133.48, episodes: 11769
822781: reward: -100.00, mean_100: -134.21, episodes: 11770
822845: reward: -100.00, mean_100: -134.78, episodes: 11771
822908: reward: -100.00, mean_100: -135.42, episodes: 11772
822960: reward: -100.00, mean_100: -134.86, episodes: 11773
823034: reward: -100.00, mean_100: -135.32, episodes: 11774
823108: reward: -100.00, mean_100: -133.77, episodes: 11775
823184: reward: -100.00, mean_100: -134.05, episodes: 11776
823272: reward: -100.00, mean_100: -133.63, episodes: 11777
823362: reward: -100.00, mean_100: -133.74, episodes: 11778
823442: reward: -100.00, mean_100: -134.12, episodes: 11779
823522: reward: -100.00, mean_100: -134.72, episodes: 11780
823597: reward: -100.00, mean_100: -135.

832161: reward: -100.00, mean_100: -126.76, episodes: 11902
832239: reward: -100.00, mean_100: -125.33, episodes: 11903
832318: reward: -100.00, mean_100: -125.40, episodes: 11904
832381: reward: -100.00, mean_100: -125.61, episodes: 11905
832470: reward: -100.00, mean_100: -127.50, episodes: 11906
832543: reward: -100.00, mean_100: -127.47, episodes: 11907
832601: reward: -100.00, mean_100: -127.18, episodes: 11908
832714: reward: -100.00, mean_100: -128.84, episodes: 11909
832766: reward: -100.00, mean_100: -128.85, episodes: 11910
832852: reward: -100.00, mean_100: -127.43, episodes: 11911
832944: reward: -100.00, mean_100: -128.29, episodes: 11912
833024: reward: -100.00, mean_100: -127.94, episodes: 11913
833093: reward: -100.00, mean_100: -128.15, episodes: 11914
833159: reward: -100.00, mean_100: -128.37, episodes: 11915
833239: reward: -100.00, mean_100: -126.94, episodes: 11916
833314: reward: -100.00, mean_100: -127.36, episodes: 11917
833380: reward: -100.00, mean_100: -127.

841861: reward: -100.00, mean_100: -127.05, episodes: 12039
841933: reward: -100.00, mean_100: -127.31, episodes: 12040
842042: reward: -100.00, mean_100: -127.22, episodes: 12041
842100: reward: -100.00, mean_100: -128.45, episodes: 12042
842175: reward: -100.00, mean_100: -128.23, episodes: 12043
842238: reward: -100.00, mean_100: -128.55, episodes: 12044
842299: reward: -100.00, mean_100: -128.68, episodes: 12045
842375: reward: -100.00, mean_100: -128.97, episodes: 12046
842463: reward: -100.00, mean_100: -128.94, episodes: 12047
842538: reward: -100.00, mean_100: -129.05, episodes: 12048
842590: reward: -100.00, mean_100: -128.84, episodes: 12049
842673: reward: -100.00, mean_100: -129.38, episodes: 12050
842756: reward: -100.00, mean_100: -129.52, episodes: 12051
842819: reward: -100.00, mean_100: -129.45, episodes: 12052
842891: reward: -100.00, mean_100: -129.69, episodes: 12053
842945: reward: -100.00, mean_100: -129.09, episodes: 12054
843001: reward: -100.00, mean_100: -128.

851592: reward: -100.00, mean_100: -126.37, episodes: 12179
851674: reward: -100.00, mean_100: -126.24, episodes: 12180
851738: reward: -100.00, mean_100: -126.12, episodes: 12181
851799: reward: -100.00, mean_100: -126.27, episodes: 12182
851852: reward: -100.00, mean_100: -125.75, episodes: 12183
851941: reward: -100.00, mean_100: -124.44, episodes: 12184
852003: reward: -100.00, mean_100: -124.99, episodes: 12185
852073: reward: -100.00, mean_100: -126.11, episodes: 12186
852143: reward: -100.00, mean_100: -126.23, episodes: 12187
852229: reward: -100.00, mean_100: -126.51, episodes: 12188
852297: reward: -100.00, mean_100: -126.70, episodes: 12189
852375: reward: -100.00, mean_100: -126.95, episodes: 12190
852450: reward: -100.00, mean_100: -127.14, episodes: 12191
852528: reward: -100.00, mean_100: -126.98, episodes: 12192
852584: reward: -100.00, mean_100: -126.71, episodes: 12193
852660: reward: -100.00, mean_100: -126.78, episodes: 12194
852728: reward: -100.00, mean_100: -125.

861814: reward: -100.00, mean_100: -131.50, episodes: 12325
861904: reward: -100.00, mean_100: -130.52, episodes: 12326
861978: reward: -100.00, mean_100: -131.22, episodes: 12327
862032: reward: -100.00, mean_100: -130.97, episodes: 12328
862099: reward: -100.00, mean_100: -129.46, episodes: 12329
862162: reward: -100.00, mean_100: -129.29, episodes: 12330
862226: reward: -100.00, mean_100: -129.25, episodes: 12331
862312: reward: -100.00, mean_100: -128.72, episodes: 12332
862381: reward: -100.00, mean_100: -128.66, episodes: 12333
862456: reward: -100.00, mean_100: -129.18, episodes: 12334
862522: reward: -100.00, mean_100: -129.71, episodes: 12335
862577: reward: -100.00, mean_100: -130.12, episodes: 12336
862639: reward: -100.00, mean_100: -130.19, episodes: 12337
862694: reward: -100.00, mean_100: -129.82, episodes: 12338
862782: reward: -100.00, mean_100: -130.27, episodes: 12339
862871: reward: -100.00, mean_100: -130.35, episodes: 12340
862946: reward: -100.00, mean_100: -130.

871564: reward: -100.00, mean_100: -131.01, episodes: 12463
871640: reward: -100.00, mean_100: -132.45, episodes: 12464
871701: reward: -100.00, mean_100: -132.59, episodes: 12465
871790: reward: -100.00, mean_100: -133.99, episodes: 12466
871841: reward: -100.00, mean_100: -133.60, episodes: 12467
871907: reward: -100.00, mean_100: -133.74, episodes: 12468
871975: reward: -100.00, mean_100: -133.56, episodes: 12469
872038: reward: -100.00, mean_100: -133.44, episodes: 12470
872107: reward: -100.00, mean_100: -132.67, episodes: 12471
872170: reward: -100.00, mean_100: -132.46, episodes: 12472
872226: reward: -100.00, mean_100: -132.62, episodes: 12473
872280: reward: -100.00, mean_100: -132.22, episodes: 12474
872340: reward: -100.00, mean_100: -132.58, episodes: 12475
872408: reward: -100.00, mean_100: -132.96, episodes: 12476
872460: reward: -100.00, mean_100: -132.84, episodes: 12477
872543: reward: -100.00, mean_100: -132.89, episodes: 12478
872622: reward: -100.00, mean_100: -133.

880909: reward: -100.00, mean_100: -129.94, episodes: 12603
880991: reward: -100.00, mean_100: -128.47, episodes: 12604
881086: reward: -100.00, mean_100: -129.63, episodes: 12605
881141: reward: -100.00, mean_100: -128.99, episodes: 12606
881242: reward: -100.00, mean_100: -127.25, episodes: 12607
881330: reward: -100.00, mean_100: -127.00, episodes: 12608
881418: reward: -100.00, mean_100: -127.52, episodes: 12609
881498: reward: -100.00, mean_100: -126.66, episodes: 12610
881567: reward: -100.00, mean_100: -127.15, episodes: 12611
881653: reward: -100.00, mean_100: -127.14, episodes: 12612
881729: reward: -100.00, mean_100: -127.97, episodes: 12613
881796: reward: -100.00, mean_100: -127.60, episodes: 12614
881884: reward: -100.00, mean_100: -128.01, episodes: 12615
881971: reward: -100.00, mean_100: -128.18, episodes: 12616
882056: reward: -100.00, mean_100: -128.44, episodes: 12617
882121: reward: -100.00, mean_100: -128.34, episodes: 12618
882176: reward: -100.00, mean_100: -128.

891397: reward: -100.00, mean_100: -128.86, episodes: 12750
891463: reward: -100.00, mean_100: -128.79, episodes: 12751
891531: reward: -100.00, mean_100: -127.54, episodes: 12752
891616: reward: -100.00, mean_100: -127.13, episodes: 12753
891692: reward: -100.00, mean_100: -126.72, episodes: 12754
891773: reward: -100.00, mean_100: -126.79, episodes: 12755
891859: reward: -100.00, mean_100: -126.49, episodes: 12756
891936: reward: -100.00, mean_100: -126.97, episodes: 12757
891998: reward: -100.00, mean_100: -127.41, episodes: 12758
892085: reward: -100.00, mean_100: -126.46, episodes: 12759
892158: reward: -100.00, mean_100: -126.17, episodes: 12760
892228: reward: -100.00, mean_100: -126.13, episodes: 12761
892310: reward: -100.00, mean_100: -126.20, episodes: 12762
892379: reward: -100.00, mean_100: -126.60, episodes: 12763
892472: reward: -100.00, mean_100: -127.07, episodes: 12764
892547: reward: -100.00, mean_100: -127.12, episodes: 12765
892621: reward: -100.00, mean_100: -127.

901747: reward: -100.00, mean_100: -129.06, episodes: 12897
901821: reward: -100.00, mean_100: -129.51, episodes: 12898
901881: reward: -100.00, mean_100: -128.52, episodes: 12899
901977: reward: -100.00, mean_100: -127.86, episodes: 12900
902057: reward: -100.00, mean_100: -127.75, episodes: 12901
902115: reward: -100.00, mean_100: -127.88, episodes: 12902
902173: reward: -100.00, mean_100: -127.82, episodes: 12903
902250: reward: -100.00, mean_100: -127.57, episodes: 12904
902328: reward: -100.00, mean_100: -127.75, episodes: 12905
902388: reward: -100.00, mean_100: -127.11, episodes: 12906
902476: reward: -100.00, mean_100: -127.42, episodes: 12907
902539: reward: -100.00, mean_100: -127.46, episodes: 12908
902624: reward: -100.00, mean_100: -127.30, episodes: 12909
902680: reward: -100.00, mean_100: -127.34, episodes: 12910
902731: reward: -100.00, mean_100: -127.46, episodes: 12911
902799: reward: -100.00, mean_100: -127.16, episodes: 12912
902855: reward: -100.00, mean_100: -127.

911766: reward: -100.00, mean_100: -124.64, episodes: 13041
911835: reward: -100.00, mean_100: -123.36, episodes: 13042
911910: reward: -100.00, mean_100: -123.42, episodes: 13043
911999: reward: -100.00, mean_100: -123.05, episodes: 13044
912069: reward: -100.00, mean_100: -123.22, episodes: 13045
912157: reward: -100.00, mean_100: -123.38, episodes: 13046
912215: reward: -100.00, mean_100: -123.47, episodes: 13047
912273: reward: -100.00, mean_100: -123.23, episodes: 13048
912343: reward: -100.00, mean_100: -123.44, episodes: 13049
912431: reward: -100.00, mean_100: -123.23, episodes: 13050
912494: reward: -100.00, mean_100: -123.07, episodes: 13051
912569: reward: -100.00, mean_100: -123.03, episodes: 13052
912637: reward: -100.00, mean_100: -123.23, episodes: 13053
912693: reward: -100.00, mean_100: -123.12, episodes: 13054
912780: reward: -100.00, mean_100: -123.75, episodes: 13055
912852: reward: -100.00, mean_100: -124.10, episodes: 13056
912911: reward: -100.00, mean_100: -124.

921989: reward: -100.00, mean_100: -132.85, episodes: 13188
922072: reward: -100.00, mean_100: -132.66, episodes: 13189
922143: reward: -100.00, mean_100: -133.08, episodes: 13190
922226: reward: -100.00, mean_100: -133.43, episodes: 13191
922300: reward: -100.00, mean_100: -131.32, episodes: 13192
922361: reward: -100.00, mean_100: -130.76, episodes: 13193
922442: reward: -100.00, mean_100: -130.14, episodes: 13194
922519: reward: -100.00, mean_100: -130.25, episodes: 13195
922579: reward: -100.00, mean_100: -130.09, episodes: 13196
922639: reward: -100.00, mean_100: -129.51, episodes: 13197
922698: reward: -100.00, mean_100: -130.06, episodes: 13198
922759: reward: -100.00, mean_100: -129.44, episodes: 13199
922843: reward: -100.00, mean_100: -128.90, episodes: 13200
922905: reward: -100.00, mean_100: -128.60, episodes: 13201
922967: reward: -100.00, mean_100: -127.03, episodes: 13202
923054: reward: -100.00, mean_100: -127.57, episodes: 13203
923117: reward: -100.00, mean_100: -127.

931724: reward: -100.00, mean_100: -128.32, episodes: 13327
931804: reward: -100.00, mean_100: -127.42, episodes: 13328
931888: reward: -100.00, mean_100: -127.42, episodes: 13329
931954: reward: -100.00, mean_100: -127.59, episodes: 13330
932013: reward: -100.00, mean_100: -126.93, episodes: 13331
932115: reward: -100.00, mean_100: -125.09, episodes: 13332
932215: reward: -100.00, mean_100: -124.10, episodes: 13333
932279: reward: -100.00, mean_100: -123.96, episodes: 13334
932345: reward: -100.00, mean_100: -124.42, episodes: 13335
932401: reward: -100.00, mean_100: -123.82, episodes: 13336
932478: reward: -100.00, mean_100: -124.02, episodes: 13337
932533: reward: -100.00, mean_100: -123.80, episodes: 13338
932622: reward: -100.00, mean_100: -123.06, episodes: 13339
932698: reward: -100.00, mean_100: -123.26, episodes: 13340
932756: reward: -100.00, mean_100: -123.43, episodes: 13341
932811: reward: -100.00, mean_100: -123.05, episodes: 13342
932894: reward: -100.00, mean_100: -123.

941566: reward: -100.00, mean_100: -133.88, episodes: 13467
941651: reward: -100.00, mean_100: -133.74, episodes: 13468
941706: reward: -100.00, mean_100: -133.71, episodes: 13469
941769: reward: -100.00, mean_100: -133.82, episodes: 13470
941822: reward: -100.00, mean_100: -133.12, episodes: 13471
941901: reward: -100.00, mean_100: -133.09, episodes: 13472
941953: reward: -100.00, mean_100: -132.96, episodes: 13473
942008: reward: -100.00, mean_100: -132.62, episodes: 13474
942093: reward: -100.00, mean_100: -132.30, episodes: 13475
942181: reward: -100.00, mean_100: -131.74, episodes: 13476
942270: reward: -100.00, mean_100: -131.50, episodes: 13477
942339: reward: -100.00, mean_100: -131.91, episodes: 13478
942406: reward: -100.00, mean_100: -132.04, episodes: 13479
942480: reward: -100.00, mean_100: -131.78, episodes: 13480
942546: reward: -100.00, mean_100: -131.92, episodes: 13481
942601: reward: -100.00, mean_100: -131.35, episodes: 13482
942661: reward: -100.00, mean_100: -131.

951735: reward: -100.00, mean_100: -138.06, episodes: 13612
951790: reward: -100.00, mean_100: -137.49, episodes: 13613
951877: reward: -100.00, mean_100: -137.58, episodes: 13614
951925: reward: -100.00, mean_100: -137.59, episodes: 13615
951984: reward: -100.00, mean_100: -138.29, episodes: 13616
952065: reward: -100.00, mean_100: -138.22, episodes: 13617
952130: reward: -100.00, mean_100: -138.41, episodes: 13618
952195: reward: -100.00, mean_100: -138.70, episodes: 13619
952264: reward: -100.00, mean_100: -137.34, episodes: 13620
952317: reward: -100.00, mean_100: -137.40, episodes: 13621
952393: reward: -100.00, mean_100: -137.71, episodes: 13622
952481: reward: -100.00, mean_100: -136.55, episodes: 13623
952548: reward: -100.00, mean_100: -136.61, episodes: 13624
952624: reward: -100.00, mean_100: -135.39, episodes: 13625
952685: reward: -100.00, mean_100: -135.73, episodes: 13626
952741: reward: -100.00, mean_100: -134.76, episodes: 13627
952811: reward: -100.00, mean_100: -134.

962128: reward: -100.00, mean_100: -133.84, episodes: 13756
962192: reward: -100.00, mean_100: -133.59, episodes: 13757
962245: reward: -100.00, mean_100: -133.46, episodes: 13758
962309: reward: -100.00, mean_100: -134.91, episodes: 13759
962363: reward: -100.00, mean_100: -134.11, episodes: 13760
962448: reward: -100.00, mean_100: -133.63, episodes: 13761
962529: reward: -100.00, mean_100: -134.35, episodes: 13762
962616: reward: -100.00, mean_100: -131.83, episodes: 13763
962701: reward: -100.00, mean_100: -132.26, episodes: 13764
962763: reward: -100.00, mean_100: -132.04, episodes: 13765
962836: reward: -100.00, mean_100: -132.15, episodes: 13766
962894: reward: -100.00, mean_100: -131.22, episodes: 13767
962962: reward: -100.00, mean_100: -131.32, episodes: 13768
963023: reward: -100.00, mean_100: -130.89, episodes: 13769
963093: reward: -100.00, mean_100: -131.26, episodes: 13770
963146: reward: -100.00, mean_100: -131.19, episodes: 13771
963200: reward: -100.00, mean_100: -131.

971930: reward: -100.00, mean_100: -126.40, episodes: 13898
971987: reward: -100.00, mean_100: -125.28, episodes: 13899
972057: reward: -100.00, mean_100: -124.54, episodes: 13900
972144: reward: -100.00, mean_100: -123.97, episodes: 13901
972200: reward: -100.00, mean_100: -123.17, episodes: 13902
972278: reward: -100.00, mean_100: -123.66, episodes: 13903
972348: reward: -100.00, mean_100: -123.33, episodes: 13904
972426: reward: -100.00, mean_100: -122.58, episodes: 13905
972494: reward: -100.00, mean_100: -122.39, episodes: 13906
972567: reward: -100.00, mean_100: -122.86, episodes: 13907
972628: reward: -100.00, mean_100: -123.00, episodes: 13908
972708: reward: -100.00, mean_100: -123.39, episodes: 13909
972788: reward: -100.00, mean_100: -123.95, episodes: 13910
972899: reward: -100.00, mean_100: -128.01, episodes: 13911
972988: reward: -100.00, mean_100: -127.98, episodes: 13912
973067: reward: -100.00, mean_100: -129.00, episodes: 13913
973129: reward: -100.00, mean_100: -129.

981788: reward: -100.00, mean_100: -127.29, episodes: 14038
981842: reward: -100.00, mean_100: -127.80, episodes: 14039
981928: reward: -100.00, mean_100: -128.22, episodes: 14040
982004: reward: -100.00, mean_100: -129.03, episodes: 14041
982062: reward: -100.00, mean_100: -129.19, episodes: 14042
982161: reward: -100.00, mean_100: -130.82, episodes: 14043
982218: reward: -100.00, mean_100: -130.77, episodes: 14044
982304: reward: -100.00, mean_100: -130.53, episodes: 14045
982377: reward: -100.00, mean_100: -130.32, episodes: 14046
982440: reward: -100.00, mean_100: -130.84, episodes: 14047
982505: reward: -100.00, mean_100: -131.23, episodes: 14048
982593: reward: -100.00, mean_100: -131.27, episodes: 14049
982672: reward: -100.00, mean_100: -131.15, episodes: 14050
982731: reward: -100.00, mean_100: -131.02, episodes: 14051
982815: reward: -100.00, mean_100: -130.84, episodes: 14052
982871: reward: -100.00, mean_100: -131.13, episodes: 14053
982934: reward: -100.00, mean_100: -130.

991716: reward: -100.00, mean_100: -138.03, episodes: 14176
991791: reward: -100.00, mean_100: -137.98, episodes: 14177
991865: reward: -100.00, mean_100: -137.91, episodes: 14178
991918: reward: -100.00, mean_100: -137.86, episodes: 14179
992019: reward: -100.00, mean_100: -140.13, episodes: 14180
992070: reward: -100.00, mean_100: -140.04, episodes: 14181
992145: reward: -100.00, mean_100: -140.11, episodes: 14182
992223: reward: -100.00, mean_100: -140.12, episodes: 14183
992279: reward: -100.00, mean_100: -140.13, episodes: 14184
992327: reward: -100.00, mean_100: -140.08, episodes: 14185
992389: reward: -100.00, mean_100: -139.88, episodes: 14186
992453: reward: -100.00, mean_100: -140.16, episodes: 14187
992521: reward: -100.00, mean_100: -139.68, episodes: 14188
992584: reward: -100.00, mean_100: -139.05, episodes: 14189
992649: reward: -100.00, mean_100: -139.43, episodes: 14190
992739: reward: -100.00, mean_100: -139.37, episodes: 14191
992815: reward: -100.00, mean_100: -139.

1001193: reward: -100.00, mean_100: -131.18, episodes: 14313
1001254: reward: -100.00, mean_100: -130.77, episodes: 14314
1001317: reward: -100.00, mean_100: -130.41, episodes: 14315
1001393: reward: -100.00, mean_100: -130.75, episodes: 14316
1001446: reward: -100.00, mean_100: -130.76, episodes: 14317
1001532: reward: -100.00, mean_100: -130.96, episodes: 14318
1001602: reward: -100.00, mean_100: -131.07, episodes: 14319
1001659: reward: -100.00, mean_100: -131.05, episodes: 14320
1001729: reward: -100.00, mean_100: -131.27, episodes: 14321
1001793: reward: -100.00, mean_100: -131.43, episodes: 14322
1001850: reward: -100.00, mean_100: -132.38, episodes: 14323
1001929: reward: -100.00, mean_100: -133.19, episodes: 14324
1001982: reward: -100.00, mean_100: -132.94, episodes: 14325
1002070: reward: -100.00, mean_100: -133.22, episodes: 14326
1002155: reward: -100.00, mean_100: -133.04, episodes: 14327
1002233: reward: -100.00, mean_100: -133.38, episodes: 14328
1002302: reward: -100.00

1011133: reward: -100.00, mean_100: -129.24, episodes: 14457
1011186: reward: -100.00, mean_100: -127.84, episodes: 14458
1011249: reward: -100.00, mean_100: -127.54, episodes: 14459
1011308: reward: -100.00, mean_100: -128.00, episodes: 14460
1011396: reward: -100.00, mean_100: -127.74, episodes: 14461
1011469: reward: -100.00, mean_100: -127.83, episodes: 14462
1011527: reward: -100.00, mean_100: -127.81, episodes: 14463
1011605: reward: -100.00, mean_100: -128.05, episodes: 14464
1011672: reward: -100.00, mean_100: -127.81, episodes: 14465
1011725: reward: -100.00, mean_100: -127.23, episodes: 14466
1011808: reward: -100.00, mean_100: -126.75, episodes: 14467
1011866: reward: -100.00, mean_100: -127.13, episodes: 14468
1011918: reward: -100.00, mean_100: -126.82, episodes: 14469
1011977: reward: -100.00, mean_100: -127.25, episodes: 14470
1012058: reward: -100.00, mean_100: -127.45, episodes: 14471
1012119: reward: -100.00, mean_100: -127.16, episodes: 14472
1012172: reward: -100.00

1021446: reward: -100.00, mean_100: -127.79, episodes: 14605
1021505: reward: -100.00, mean_100: -127.86, episodes: 14606
1021561: reward: -100.00, mean_100: -127.85, episodes: 14607
1021618: reward: -100.00, mean_100: -127.94, episodes: 14608
1021688: reward: -100.00, mean_100: -128.17, episodes: 14609
1021756: reward: -100.00, mean_100: -128.32, episodes: 14610
1021844: reward: -100.00, mean_100: -128.07, episodes: 14611
1021913: reward: -100.00, mean_100: -128.11, episodes: 14612
1021991: reward: -100.00, mean_100: -128.40, episodes: 14613
1022051: reward: -100.00, mean_100: -128.65, episodes: 14614
1022106: reward: -100.00, mean_100: -127.70, episodes: 14615
1022159: reward: -100.00, mean_100: -127.77, episodes: 14616
1022240: reward: -100.00, mean_100: -127.51, episodes: 14617
1022302: reward: -100.00, mean_100: -128.12, episodes: 14618
1022375: reward: -100.00, mean_100: -127.87, episodes: 14619
1022435: reward: -100.00, mean_100: -127.88, episodes: 14620
1022512: reward: -100.00

1030799: reward: -100.00, mean_100: -132.32, episodes: 14741
1030889: reward: -100.00, mean_100: -132.03, episodes: 14742
1030966: reward: -100.00, mean_100: -131.70, episodes: 14743
1031050: reward: -100.00, mean_100: -131.03, episodes: 14744
1031134: reward: -100.00, mean_100: -131.03, episodes: 14745
1031216: reward: -100.00, mean_100: -130.69, episodes: 14746
1031288: reward: -100.00, mean_100: -129.31, episodes: 14747
1031359: reward: -100.00, mean_100: -130.02, episodes: 14748
1031417: reward: -100.00, mean_100: -129.95, episodes: 14749
1031479: reward: -100.00, mean_100: -129.80, episodes: 14750
1031550: reward: -100.00, mean_100: -130.35, episodes: 14751
1031602: reward: -100.00, mean_100: -130.47, episodes: 14752
1031658: reward: -100.00, mean_100: -129.28, episodes: 14753
1031717: reward: -100.00, mean_100: -129.53, episodes: 14754
1031793: reward: -100.00, mean_100: -129.64, episodes: 14755
1031861: reward: -100.00, mean_100: -129.98, episodes: 14756
1031959: reward: -100.00

1040345: reward: -100.00, mean_100: -130.39, episodes: 14878
1040423: reward: -100.00, mean_100: -130.80, episodes: 14879
1040475: reward: -100.00, mean_100: -130.44, episodes: 14880
1040525: reward: -100.00, mean_100: -131.54, episodes: 14881
1040587: reward: -100.00, mean_100: -131.63, episodes: 14882
1040642: reward: -100.00, mean_100: -131.67, episodes: 14883
1040700: reward: -100.00, mean_100: -131.20, episodes: 14884
1040773: reward: -100.00, mean_100: -131.29, episodes: 14885
1040844: reward: -100.00, mean_100: -131.49, episodes: 14886
1040923: reward: -100.00, mean_100: -131.52, episodes: 14887
1040990: reward: -100.00, mean_100: -131.73, episodes: 14888
1041041: reward: -100.00, mean_100: -131.43, episodes: 14889
1041116: reward: -100.00, mean_100: -131.29, episodes: 14890
1041173: reward: -100.00, mean_100: -131.90, episodes: 14891
1041230: reward: -100.00, mean_100: -131.76, episodes: 14892
1041309: reward: -100.00, mean_100: -131.88, episodes: 14893
1041362: reward: -100.00

1050739: reward: -100.00, mean_100: -120.74, episodes: 15023
1050791: reward: -100.00, mean_100: -120.87, episodes: 15024
1050848: reward: -100.00, mean_100: -120.96, episodes: 15025
1050910: reward: -100.00, mean_100: -119.78, episodes: 15026
1050991: reward: -100.00, mean_100: -119.18, episodes: 15027
1051066: reward: -100.00, mean_100: -118.77, episodes: 15028
1051134: reward: -100.00, mean_100: -119.10, episodes: 15029
1051214: reward: -100.00, mean_100: -120.08, episodes: 15030
1051270: reward: -100.00, mean_100: -120.01, episodes: 15031
1051357: reward: -100.00, mean_100: -119.56, episodes: 15032
1051431: reward: -100.00, mean_100: -119.50, episodes: 15033
1051498: reward: -100.00, mean_100: -120.76, episodes: 15034
1051578: reward: -100.00, mean_100: -120.49, episodes: 15035
1051634: reward: -100.00, mean_100: -120.42, episodes: 15036
1051690: reward: -100.00, mean_100: -119.37, episodes: 15037
1051773: reward: -100.00, mean_100: -119.91, episodes: 15038
1051833: reward: -100.00

1060400: reward: -100.00, mean_100: -134.99, episodes: 15159
1060485: reward: -100.00, mean_100: -136.77, episodes: 15160
1060542: reward: -100.00, mean_100: -136.62, episodes: 15161
1060594: reward: -100.00, mean_100: -136.49, episodes: 15162
1060646: reward: -100.00, mean_100: -136.13, episodes: 15163
1060727: reward: -100.00, mean_100: -136.30, episodes: 15164
1060795: reward: -100.00, mean_100: -136.37, episodes: 15165
1060868: reward: -100.00, mean_100: -134.78, episodes: 15166
1060952: reward: -100.00, mean_100: -135.22, episodes: 15167
1061005: reward: -100.00, mean_100: -134.83, episodes: 15168
1061064: reward: -100.00, mean_100: -134.78, episodes: 15169
1061116: reward: -100.00, mean_100: -134.77, episodes: 15170
1061175: reward: -100.00, mean_100: -134.72, episodes: 15171
1061247: reward: -100.00, mean_100: -134.72, episodes: 15172
1061309: reward: -100.00, mean_100: -134.36, episodes: 15173
1061384: reward: -100.00, mean_100: -134.19, episodes: 15174
1061451: reward: -100.00

1070386: reward: -100.00, mean_100: -131.58, episodes: 15303
1070467: reward: -100.00, mean_100: -131.94, episodes: 15304
1070528: reward: -100.00, mean_100: -132.08, episodes: 15305
1070582: reward: -100.00, mean_100: -131.89, episodes: 15306
1070643: reward: -100.00, mean_100: -133.09, episodes: 15307
1070737: reward: -100.00, mean_100: -131.48, episodes: 15308
1070803: reward: -100.00, mean_100: -130.14, episodes: 15309
1070868: reward: -100.00, mean_100: -130.15, episodes: 15310
1070941: reward: -100.00, mean_100: -130.26, episodes: 15311
1071012: reward: -100.00, mean_100: -130.80, episodes: 15312
1071085: reward: -100.00, mean_100: -132.33, episodes: 15313
1071157: reward: -100.00, mean_100: -131.23, episodes: 15314
1071229: reward: -100.00, mean_100: -131.54, episodes: 15315
1071299: reward: -100.00, mean_100: -131.16, episodes: 15316
1071386: reward: -100.00, mean_100: -130.76, episodes: 15317
1071464: reward: -100.00, mean_100: -130.70, episodes: 15318
1071531: reward: -100.00

1080675: reward: -100.00, mean_100: -132.96, episodes: 15448
1080732: reward: -100.00, mean_100: -133.54, episodes: 15449
1080820: reward: -100.00, mean_100: -133.81, episodes: 15450
1080872: reward: -100.00, mean_100: -133.09, episodes: 15451
1080973: reward: -100.00, mean_100: -134.62, episodes: 15452
1081028: reward: -100.00, mean_100: -134.62, episodes: 15453
1081093: reward: -100.00, mean_100: -135.08, episodes: 15454
1081160: reward: -100.00, mean_100: -135.48, episodes: 15455
1081242: reward: -100.00, mean_100: -135.73, episodes: 15456
1081311: reward: -100.00, mean_100: -135.86, episodes: 15457
1081366: reward: -100.00, mean_100: -136.18, episodes: 15458
1081424: reward: -100.00, mean_100: -136.32, episodes: 15459
1081502: reward: -100.00, mean_100: -137.00, episodes: 15460
1081582: reward: -100.00, mean_100: -135.87, episodes: 15461
1081658: reward: -100.00, mean_100: -135.95, episodes: 15462
1081728: reward: -100.00, mean_100: -134.59, episodes: 15463
1081811: reward: -100.00

1090120: reward: -100.00, mean_100: -130.06, episodes: 15585
1090204: reward: -100.00, mean_100: -129.80, episodes: 15586
1090271: reward: -100.00, mean_100: -129.95, episodes: 15587
1090325: reward: -100.00, mean_100: -129.91, episodes: 15588
1090405: reward: -100.00, mean_100: -130.65, episodes: 15589
1090468: reward: -100.00, mean_100: -130.01, episodes: 15590
1090521: reward: -100.00, mean_100: -129.69, episodes: 15591
1090575: reward: -100.00, mean_100: -129.88, episodes: 15592
1090651: reward: -100.00, mean_100: -129.97, episodes: 15593
1090704: reward: -100.00, mean_100: -131.56, episodes: 15594
1090759: reward: -100.00, mean_100: -130.93, episodes: 15595
1090826: reward: -100.00, mean_100: -131.13, episodes: 15596
1090888: reward: -100.00, mean_100: -131.31, episodes: 15597
1090975: reward: -100.00, mean_100: -131.04, episodes: 15598
1091034: reward: -100.00, mean_100: -131.21, episodes: 15599
1091108: reward: -100.00, mean_100: -131.77, episodes: 15600
1091181: reward: -100.00

1100212: reward: -100.00, mean_100: -133.63, episodes: 15729
1100297: reward: -100.00, mean_100: -134.37, episodes: 15730
1100376: reward: -100.00, mean_100: -135.57, episodes: 15731
1100456: reward: -100.00, mean_100: -135.13, episodes: 15732
1100532: reward: -100.00, mean_100: -134.67, episodes: 15733
1100627: reward: -100.00, mean_100: -133.45, episodes: 15734
1100684: reward: -100.00, mean_100: -133.24, episodes: 15735
1100755: reward: -100.00, mean_100: -132.64, episodes: 15736
1100816: reward: -100.00, mean_100: -132.37, episodes: 15737
1100878: reward: -100.00, mean_100: -131.97, episodes: 15738
1100936: reward: -100.00, mean_100: -131.67, episodes: 15739
1101004: reward: -100.00, mean_100: -131.25, episodes: 15740
1101084: reward: -100.00, mean_100: -131.52, episodes: 15741
1101161: reward: -100.00, mean_100: -129.96, episodes: 15742
1101256: reward: -100.00, mean_100: -130.33, episodes: 15743
1101335: reward: -100.00, mean_100: -130.54, episodes: 15744
1101399: reward: -100.00

1109683: reward: -100.00, mean_100: -132.04, episodes: 15866
1109738: reward: -100.00, mean_100: -131.26, episodes: 15867
1109817: reward: -100.00, mean_100: -131.52, episodes: 15868
1109882: reward: -100.00, mean_100: -132.01, episodes: 15869
1109949: reward: -100.00, mean_100: -131.75, episodes: 15870
1110022: reward: -100.00, mean_100: -131.47, episodes: 15871
1110095: reward: -100.00, mean_100: -131.48, episodes: 15872
1110161: reward: -100.00, mean_100: -130.27, episodes: 15873
1110215: reward: -100.00, mean_100: -130.20, episodes: 15874
1110293: reward: -100.00, mean_100: -130.87, episodes: 15875
1110377: reward: -100.00, mean_100: -130.93, episodes: 15876
1110439: reward: -100.00, mean_100: -131.34, episodes: 15877
1110504: reward: -100.00, mean_100: -131.15, episodes: 15878
1110574: reward: -100.00, mean_100: -130.82, episodes: 15879
1110676: reward: -100.00, mean_100: -128.91, episodes: 15880
1110741: reward: -100.00, mean_100: -129.04, episodes: 15881
1110794: reward: -100.00

1119485: reward: -100.00, mean_100: -127.04, episodes: 16010
1119559: reward: -100.00, mean_100: -127.08, episodes: 16011
1119631: reward: -100.00, mean_100: -126.95, episodes: 16012
1119695: reward: -100.00, mean_100: -128.45, episodes: 16013
1119759: reward: -100.00, mean_100: -128.56, episodes: 16014
1119825: reward: -100.00, mean_100: -128.57, episodes: 16015
1119908: reward: -100.00, mean_100: -128.29, episodes: 16016
1119963: reward: -100.00, mean_100: -127.89, episodes: 16017
1120042: reward: -100.00, mean_100: -127.55, episodes: 16018
1120099: reward: -100.00, mean_100: -127.79, episodes: 16019
1120172: reward: -100.00, mean_100: -128.83, episodes: 16020
1120246: reward: -100.00, mean_100: -128.97, episodes: 16021
1120304: reward: -100.00, mean_100: -128.68, episodes: 16022
1120363: reward: -100.00, mean_100: -128.30, episodes: 16023
1120439: reward: -100.00, mean_100: -129.58, episodes: 16024
1120494: reward: -100.00, mean_100: -130.05, episodes: 16025
1120557: reward: -100.00

1129547: reward: -100.00, mean_100: -134.71, episodes: 16156
1129622: reward: -100.00, mean_100: -134.49, episodes: 16157
1129683: reward: -100.00, mean_100: -134.22, episodes: 16158
1129740: reward: -100.00, mean_100: -134.14, episodes: 16159
1129791: reward: -100.00, mean_100: -134.14, episodes: 16160
1129847: reward: -100.00, mean_100: -134.25, episodes: 16161
1129899: reward: -100.00, mean_100: -133.76, episodes: 16162
1129982: reward: -100.00, mean_100: -134.57, episodes: 16163
1130051: reward: -100.00, mean_100: -134.36, episodes: 16164
1130130: reward: -100.00, mean_100: -133.94, episodes: 16165
1130185: reward: -100.00, mean_100: -133.50, episodes: 16166
1130240: reward: -100.00, mean_100: -133.28, episodes: 16167
1130311: reward: -100.00, mean_100: -134.00, episodes: 16168
1130388: reward: -100.00, mean_100: -134.02, episodes: 16169
1130448: reward: -100.00, mean_100: -133.78, episodes: 16170
1130521: reward: -100.00, mean_100: -133.56, episodes: 16171
1130574: reward: -100.00

1139793: reward: -100.00, mean_100: -135.78, episodes: 16303
1139872: reward: -100.00, mean_100: -135.74, episodes: 16304
1139929: reward: -100.00, mean_100: -135.71, episodes: 16305
1139989: reward: -100.00, mean_100: -135.52, episodes: 16306
1140074: reward: -100.00, mean_100: -135.22, episodes: 16307
1140126: reward: -100.00, mean_100: -135.15, episodes: 16308
1140200: reward: -100.00, mean_100: -135.23, episodes: 16309
1140267: reward: -100.00, mean_100: -135.30, episodes: 16310
1140341: reward: -100.00, mean_100: -135.76, episodes: 16311
1140412: reward: -100.00, mean_100: -134.25, episodes: 16312
1140473: reward: -100.00, mean_100: -134.22, episodes: 16313
1140554: reward: -100.00, mean_100: -133.98, episodes: 16314
1140635: reward: -100.00, mean_100: -134.48, episodes: 16315
1140705: reward: -100.00, mean_100: -135.14, episodes: 16316
1140762: reward: -100.00, mean_100: -134.74, episodes: 16317
1140835: reward: -100.00, mean_100: -134.86, episodes: 16318
1140925: reward: -100.00

1149357: reward: -100.00, mean_100: -135.69, episodes: 16439
1149408: reward: -100.00, mean_100: -135.21, episodes: 16440
1149478: reward: -100.00, mean_100: -135.92, episodes: 16441
1149545: reward: -100.00, mean_100: -136.23, episodes: 16442
1149625: reward: -100.00, mean_100: -135.97, episodes: 16443
1149690: reward: -100.00, mean_100: -135.71, episodes: 16444
1149763: reward: -100.00, mean_100: -136.03, episodes: 16445
1149839: reward: -100.00, mean_100: -136.81, episodes: 16446
1149892: reward: -100.00, mean_100: -136.45, episodes: 16447
1149961: reward: -100.00, mean_100: -136.36, episodes: 16448
1150021: reward: -100.00, mean_100: -136.89, episodes: 16449
1150076: reward: -100.00, mean_100: -136.95, episodes: 16450
1150157: reward: -100.00, mean_100: -136.79, episodes: 16451
1150228: reward: -100.00, mean_100: -136.72, episodes: 16452
1150314: reward: -100.00, mean_100: -136.53, episodes: 16453
1150384: reward: -100.00, mean_100: -137.23, episodes: 16454
1150464: reward: -100.00

1158817: reward: -100.00, mean_100: -135.69, episodes: 16574
1158910: reward: -100.00, mean_100: -135.46, episodes: 16575
1158992: reward: -100.00, mean_100: -135.06, episodes: 16576
1159074: reward: -100.00, mean_100: -134.72, episodes: 16577
1159158: reward: -100.00, mean_100: -135.29, episodes: 16578
1159244: reward: -100.00, mean_100: -135.79, episodes: 16579
1159322: reward: -100.00, mean_100: -135.48, episodes: 16580
1159419: reward: -100.00, mean_100: -135.93, episodes: 16581
1159499: reward: -100.00, mean_100: -136.45, episodes: 16582
1159563: reward: -100.00, mean_100: -136.26, episodes: 16583
1159654: reward: -100.00, mean_100: -136.19, episodes: 16584
1159712: reward: -100.00, mean_100: -137.72, episodes: 16585
1159771: reward: -100.00, mean_100: -137.64, episodes: 16586
1159854: reward: -100.00, mean_100: -138.56, episodes: 16587
1159915: reward: -100.00, mean_100: -138.58, episodes: 16588
1159986: reward: -100.00, mean_100: -138.40, episodes: 16589
1160053: reward: -100.00

1169164: reward: -100.00, mean_100: -136.12, episodes: 16717
1169238: reward: -100.00, mean_100: -136.02, episodes: 16718
1169301: reward: -100.00, mean_100: -135.71, episodes: 16719
1169419: reward: -100.00, mean_100: -134.97, episodes: 16720
1169488: reward: -100.00, mean_100: -135.46, episodes: 16721
1169578: reward: -100.00, mean_100: -137.01, episodes: 16722
1169653: reward: -100.00, mean_100: -137.31, episodes: 16723
1169735: reward: -100.00, mean_100: -136.09, episodes: 16724
1169823: reward: -100.00, mean_100: -136.37, episodes: 16725
1169886: reward: -100.00, mean_100: -136.89, episodes: 16726
1169995: reward: -100.00, mean_100: -135.80, episodes: 16727
1170047: reward: -100.00, mean_100: -136.13, episodes: 16728
1170111: reward: -100.00, mean_100: -135.77, episodes: 16729
1170199: reward: -100.00, mean_100: -136.32, episodes: 16730
1170280: reward: -100.00, mean_100: -136.29, episodes: 16731
1170347: reward: -100.00, mean_100: -136.17, episodes: 16732
1170406: reward: -100.00

1178780: reward: -100.00, mean_100: -131.53, episodes: 16857
1178839: reward: -100.00, mean_100: -131.32, episodes: 16858
1178895: reward: -100.00, mean_100: -131.45, episodes: 16859
1178969: reward: -100.00, mean_100: -131.09, episodes: 16860
1179038: reward: -100.00, mean_100: -131.17, episodes: 16861
1179129: reward: -100.00, mean_100: -130.76, episodes: 16862
1179200: reward: -100.00, mean_100: -130.77, episodes: 16863
1179253: reward: -100.00, mean_100: -130.32, episodes: 16864
1179308: reward: -100.00, mean_100: -129.91, episodes: 16865
1179367: reward: -100.00, mean_100: -129.80, episodes: 16866
1179450: reward: -100.00, mean_100: -129.85, episodes: 16867
1179516: reward: -100.00, mean_100: -129.18, episodes: 16868
1179582: reward: -100.00, mean_100: -129.01, episodes: 16869
1179649: reward: -100.00, mean_100: -128.36, episodes: 16870
1179700: reward: -100.00, mean_100: -128.47, episodes: 16871
1179759: reward: -100.00, mean_100: -128.30, episodes: 16872
1179847: reward: -100.00

1188769: reward: -100.00, mean_100: -123.44, episodes: 16999
1188822: reward: -100.00, mean_100: -123.09, episodes: 17000
1188878: reward: -100.00, mean_100: -122.79, episodes: 17001
1188936: reward: -100.00, mean_100: -123.04, episodes: 17002
1189009: reward: -100.00, mean_100: -122.92, episodes: 17003
1189067: reward: -100.00, mean_100: -122.75, episodes: 17004
1189140: reward: -100.00, mean_100: -122.35, episodes: 17005
1189194: reward: -100.00, mean_100: -122.22, episodes: 17006
1189272: reward: -100.00, mean_100: -122.96, episodes: 17007
1189327: reward: -100.00, mean_100: -122.50, episodes: 17008
1189409: reward: -100.00, mean_100: -122.32, episodes: 17009
1189483: reward: -100.00, mean_100: -122.29, episodes: 17010
1189573: reward: -100.00, mean_100: -122.24, episodes: 17011
1189647: reward: -100.00, mean_100: -122.64, episodes: 17012
1189724: reward: -100.00, mean_100: -122.19, episodes: 17013
1189789: reward: -100.00, mean_100: -122.62, episodes: 17014
1189862: reward: -100.00

1198540: reward: -100.00, mean_100: -136.09, episodes: 17140
1198614: reward: -100.00, mean_100: -136.62, episodes: 17141
1198692: reward: -100.00, mean_100: -135.86, episodes: 17142
1198760: reward: -100.00, mean_100: -136.12, episodes: 17143
1198817: reward: -100.00, mean_100: -135.97, episodes: 17144
1198876: reward: -100.00, mean_100: -136.00, episodes: 17145
1198928: reward: -100.00, mean_100: -135.66, episodes: 17146
1199000: reward: -100.00, mean_100: -135.48, episodes: 17147
1199052: reward: -100.00, mean_100: -135.24, episodes: 17148
1199108: reward: -100.00, mean_100: -135.80, episodes: 17149
1199186: reward: -100.00, mean_100: -135.36, episodes: 17150
1199245: reward: -100.00, mean_100: -135.23, episodes: 17151
1199321: reward: -100.00, mean_100: -135.24, episodes: 17152
1199395: reward: -100.00, mean_100: -135.47, episodes: 17153
1199457: reward: -100.00, mean_100: -136.23, episodes: 17154
1199525: reward: -100.00, mean_100: -136.00, episodes: 17155
1199593: reward: -100.00

1208161: reward: -100.00, mean_100: -138.94, episodes: 17279
1208234: reward: -100.00, mean_100: -138.89, episodes: 17280
1208296: reward: -100.00, mean_100: -138.90, episodes: 17281
1208381: reward: -100.00, mean_100: -138.68, episodes: 17282
1208459: reward: -100.00, mean_100: -138.42, episodes: 17283
1208515: reward: -100.00, mean_100: -138.21, episodes: 17284
1208582: reward: -100.00, mean_100: -138.24, episodes: 17285
1208669: reward: -100.00, mean_100: -138.16, episodes: 17286
1208761: reward: -100.00, mean_100: -139.14, episodes: 17287
1208818: reward: -100.00, mean_100: -139.12, episodes: 17288
1208875: reward: -100.00, mean_100: -139.19, episodes: 17289
1208933: reward: -100.00, mean_100: -139.15, episodes: 17290
1209014: reward: -100.00, mean_100: -139.79, episodes: 17291
1209105: reward: -100.00, mean_100: -139.50, episodes: 17292
1209189: reward: -100.00, mean_100: -140.06, episodes: 17293
1209242: reward: -100.00, mean_100: -139.87, episodes: 17294
1209294: reward: -100.00

1217682: reward: -100.00, mean_100: -131.09, episodes: 17418
1217737: reward: -100.00, mean_100: -131.34, episodes: 17419
1217820: reward: -100.00, mean_100: -131.24, episodes: 17420
1217873: reward: -100.00, mean_100: -130.82, episodes: 17421
1217941: reward: -100.00, mean_100: -131.37, episodes: 17422
1218021: reward: -100.00, mean_100: -131.54, episodes: 17423
1218099: reward: -100.00, mean_100: -131.72, episodes: 17424
1218182: reward: -100.00, mean_100: -133.95, episodes: 17425
1218265: reward: -100.00, mean_100: -134.89, episodes: 17426
1218353: reward: -100.00, mean_100: -134.84, episodes: 17427
1218415: reward: -100.00, mean_100: -134.63, episodes: 17428
1218490: reward: -100.00, mean_100: -134.27, episodes: 17429
1218572: reward: -100.00, mean_100: -133.65, episodes: 17430
1218635: reward: -100.00, mean_100: -133.61, episodes: 17431
1218691: reward: -100.00, mean_100: -133.81, episodes: 17432
1218759: reward: -100.00, mean_100: -133.87, episodes: 17433
1218831: reward: -100.00

1227272: reward: -100.00, mean_100: -135.47, episodes: 17553
1227343: reward: -100.00, mean_100: -135.09, episodes: 17554
1227421: reward: -100.00, mean_100: -135.58, episodes: 17555
1227484: reward: -100.00, mean_100: -135.48, episodes: 17556
1227568: reward: -100.00, mean_100: -136.10, episodes: 17557
1227634: reward: -100.00, mean_100: -136.05, episodes: 17558
1227694: reward: -100.00, mean_100: -135.57, episodes: 17559
1227768: reward: -100.00, mean_100: -135.51, episodes: 17560
1227826: reward: -100.00, mean_100: -135.59, episodes: 17561
1227901: reward: -100.00, mean_100: -135.21, episodes: 17562
1227953: reward: -100.00, mean_100: -134.53, episodes: 17563
1228019: reward: -100.00, mean_100: -132.61, episodes: 17564
1228089: reward: -100.00, mean_100: -133.33, episodes: 17565
1228169: reward: -100.00, mean_100: -133.94, episodes: 17566
1228229: reward: -100.00, mean_100: -133.93, episodes: 17567
1228288: reward: -100.00, mean_100: -133.90, episodes: 17568
1228363: reward: -100.00

1237069: reward: -100.00, mean_100: -129.38, episodes: 17691
1237151: reward: -100.00, mean_100: -128.81, episodes: 17692
1237222: reward: -100.00, mean_100: -129.09, episodes: 17693
1237306: reward: -100.00, mean_100: -129.52, episodes: 17694
1237389: reward: -100.00, mean_100: -130.24, episodes: 17695
1237474: reward: -100.00, mean_100: -130.61, episodes: 17696
1237556: reward: -100.00, mean_100: -131.32, episodes: 17697
1237625: reward: -100.00, mean_100: -131.50, episodes: 17698
1237688: reward: -100.00, mean_100: -130.93, episodes: 17699
1237776: reward: -100.00, mean_100: -131.22, episodes: 17700
1237843: reward: -100.00, mean_100: -131.40, episodes: 17701
1237934: reward: -100.00, mean_100: -131.18, episodes: 17702
1238023: reward: -100.00, mean_100: -131.00, episodes: 17703
1238112: reward: -100.00, mean_100: -130.50, episodes: 17704
1238176: reward: -100.00, mean_100: -130.92, episodes: 17705
1238254: reward: -100.00, mean_100: -130.82, episodes: 17706
1238307: reward: -100.00

1246615: reward: -100.00, mean_100: -134.57, episodes: 17826
1246693: reward: -100.00, mean_100: -134.12, episodes: 17827
1246751: reward: -100.00, mean_100: -134.57, episodes: 17828
1246809: reward: -100.00, mean_100: -134.50, episodes: 17829
1246906: reward: -100.00, mean_100: -133.02, episodes: 17830
1246984: reward: -100.00, mean_100: -132.25, episodes: 17831
1247036: reward: -100.00, mean_100: -133.13, episodes: 17832
1247120: reward: -100.00, mean_100: -133.01, episodes: 17833
1247197: reward: -100.00, mean_100: -132.81, episodes: 17834
1247276: reward: -100.00, mean_100: -132.80, episodes: 17835
1247344: reward: -100.00, mean_100: -133.04, episodes: 17836
1247403: reward: -100.00, mean_100: -134.18, episodes: 17837
1247482: reward: -100.00, mean_100: -133.92, episodes: 17838
1247541: reward: -100.00, mean_100: -133.87, episodes: 17839
1247599: reward: -100.00, mean_100: -133.82, episodes: 17840
1247661: reward: -100.00, mean_100: -134.00, episodes: 17841
1247748: reward: -100.00

1256032: reward: -100.00, mean_100: -135.61, episodes: 17961
1256119: reward: -100.00, mean_100: -135.96, episodes: 17962
1256175: reward: -100.00, mean_100: -135.93, episodes: 17963
1256228: reward: -100.00, mean_100: -135.38, episodes: 17964
1256284: reward: -100.00, mean_100: -135.46, episodes: 17965
1256355: reward: -100.00, mean_100: -135.06, episodes: 17966
1256447: reward: -100.00, mean_100: -133.86, episodes: 17967
1256528: reward: -100.00, mean_100: -133.89, episodes: 17968
1256582: reward: -100.00, mean_100: -133.61, episodes: 17969
1256640: reward: -100.00, mean_100: -133.51, episodes: 17970
1256730: reward: -100.00, mean_100: -133.53, episodes: 17971
1256807: reward: -100.00, mean_100: -133.90, episodes: 17972
1256861: reward: -100.00, mean_100: -134.04, episodes: 17973
1256917: reward: -100.00, mean_100: -134.18, episodes: 17974
1256976: reward: -100.00, mean_100: -134.68, episodes: 17975
1257049: reward: -100.00, mean_100: -134.83, episodes: 17976
1257137: reward: -100.00

1265628: reward: -100.00, mean_100: -126.27, episodes: 18099
1265704: reward: -100.00, mean_100: -125.95, episodes: 18100
1265794: reward: -100.00, mean_100: -125.88, episodes: 18101
1265876: reward: -100.00, mean_100: -126.39, episodes: 18102
1265945: reward: -100.00, mean_100: -126.03, episodes: 18103
1266005: reward: -100.00, mean_100: -125.78, episodes: 18104
1266074: reward: -100.00, mean_100: -125.61, episodes: 18105
1266135: reward: -100.00, mean_100: -125.19, episodes: 18106
1266223: reward: -100.00, mean_100: -126.19, episodes: 18107
1266272: reward: -100.00, mean_100: -126.31, episodes: 18108
1266352: reward: -100.00, mean_100: -127.44, episodes: 18109
1266410: reward: -100.00, mean_100: -126.64, episodes: 18110
1266463: reward: -100.00, mean_100: -126.40, episodes: 18111
1266546: reward: -100.00, mean_100: -126.24, episodes: 18112
1266613: reward: -100.00, mean_100: -126.28, episodes: 18113
1266698: reward: -100.00, mean_100: -126.16, episodes: 18114
1266750: reward: -100.00

KeyboardInterrupt: 

In [5]:
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
env = gym.make("LunarLander-v2")
net = PGN(env.observation_space.shape[0], env.action_space.n).to(device)
# The agent inputs the state into the neural network, and gets back logits.
# It puts these through a softmax to get probabilities, then samples an action.
agent = ptan.agent.PolicyAgent(net, preprocessor=ptan.agent.float32_preprocessor,
                               apply_softmax=True, device=device)
# The experience source interacts with the environment and returns (s,a,r,s') transitions
exp_source = ptan.experience.ExperienceSourceFirstLast(env, agent, gamma=GAMMA, steps_count=BELLMAN_STEPS)

optimizer = optim.Adam(net.parameters(), lr=LEARNING_RATE)

total_rewards = []
step_rewards = []
baseline_buf = MeanBuffer(BASELINE_STEPS)
step_idx = 0
done_episodes = 0

batch_states, batch_actions, batch_scales = [], [], []

# each iteration runs one action in the environment and returns a (s,a,r,s') transition
for step_idx, exp in enumerate(exp_source):
    baseline_buf.add(exp.reward)
    baseline = baseline_buf.mean()
    batch_states.append(exp.state)
    batch_actions.append(int(exp.action))
    batch_scales.append(exp.reward - baseline)

    # handle when an episode is completed
    episode_rewards = exp_source.pop_total_rewards()
    if episode_rewards:
        done_episodes += 1
        reward = episode_rewards[0]
        total_rewards.append(reward)
        mean_rewards = float(np.mean(total_rewards[-100:]))
        print("%d: reward: %6.2f, mean_100: %6.2f, episodes: %d" % (
            step_idx, reward, mean_rewards, done_episodes))
        if mean_rewards > TARGET_REWARD:
            print("Solved in %d steps and %d episodes!" % (step_idx, done_episodes))
            break

    if len(batch_states) < BATCH_SIZE:
        continue

    # copy training data to the GPU
    states_v = torch.FloatTensor(batch_states).to(device)
    batch_actions_t = torch.LongTensor(batch_actions).to(device)
    batch_scale_v = torch.FloatTensor(batch_scales).to(device)

    # apply gradient descent
    optimizer.zero_grad()
    logits_v = net(states_v)
    # apply the softmax and take the logarithm in one step, more precise
    log_prob_v = F.log_softmax(logits_v, dim=1)
    # scale the log probs according to (reward - baseline)
    log_prob_actions_v = batch_scale_v * log_prob_v[range(BATCH_SIZE), batch_actions_t]
    # take the mean cross-entropy across all batches
    loss_policy_v = -log_prob_actions_v.mean()

    # subtract the entropy bonus from the loss function
    prob_v = F.softmax(logits_v, dim=1)
    entropy_v = -(prob_v * log_prob_v).sum(dim=1).mean()
    entropy_loss_v = -ENTROPY_BETA * entropy_v
    loss_v = loss_policy_v + entropy_loss_v

    loss_v.backward()
    optimizer.step()
    
    batch_states.clear()
    batch_actions.clear()
    batch_scales.clear()

  result = entry_point.load(False)


101: reward: -316.80, mean_100: -316.80, episodes: 1
218: reward: -250.77, mean_100: -283.79, episodes: 2
282: reward: -60.48, mean_100: -209.35, episodes: 3
351: reward: -50.23, mean_100: -169.57, episodes: 4
433: reward: -129.47, mean_100: -161.55, episodes: 5
537: reward: -132.78, mean_100: -156.76, episodes: 6
615: reward: -99.09, mean_100: -148.52, episodes: 7
753: reward: -230.17, mean_100: -158.72, episodes: 8
828: reward: -202.63, mean_100: -163.60, episodes: 9
923: reward: -415.78, mean_100: -188.82, episodes: 10
1024: reward: -175.18, mean_100: -187.58, episodes: 11
1123: reward: -302.42, mean_100: -197.15, episodes: 12
1216: reward: -316.71, mean_100: -206.35, episodes: 13
1290: reward: -243.24, mean_100: -208.98, episodes: 14
1427: reward: -129.84, mean_100: -203.71, episodes: 15
1512: reward: -130.61, mean_100: -199.14, episodes: 16
1601: reward: -394.82, mean_100: -210.65, episodes: 17
1698: reward: -346.91, mean_100: -218.22, episodes: 18
1772: reward: -105.20, mean_100:

20198: reward: -187.43, mean_100: -227.74, episodes: 150
20348: reward: -125.85, mean_100: -227.90, episodes: 151
20498: reward: -140.87, mean_100: -228.55, episodes: 152
20666: reward: -144.49, mean_100: -229.22, episodes: 153
20832: reward: -111.59, mean_100: -228.33, episodes: 154
21015: reward: -220.69, mean_100: -228.05, episodes: 155
21238: reward: -172.31, mean_100: -225.27, episodes: 156
21361: reward: -278.54, mean_100: -223.19, episodes: 157
21539: reward: -143.14, mean_100: -222.06, episodes: 158
21693: reward: -187.33, mean_100: -220.05, episodes: 159
21825: reward: -329.94, mean_100: -218.10, episodes: 160
21977: reward: -122.92, mean_100: -217.65, episodes: 161
22139: reward: -264.72, mean_100: -218.09, episodes: 162
22319: reward:  -1.42, mean_100: -216.46, episodes: 163
22440: reward: -184.02, mean_100: -214.84, episodes: 164
22567: reward:   3.62, mean_100: -211.56, episodes: 165
22736: reward: -266.96, mean_100: -211.50, episodes: 166
22883: reward: -214.02, mean_100:

51580: reward: 166.91, mean_100: -76.15, episodes: 296
51816: reward: -59.12, mean_100: -75.06, episodes: 297
52284: reward: -57.79, mean_100: -72.41, episodes: 298
52504: reward: -16.37, mean_100: -71.13, episodes: 299
53012: reward:  -3.29, mean_100: -67.70, episodes: 300
53437: reward: 186.48, mean_100: -64.94, episodes: 301
53775: reward: -97.31, mean_100: -64.52, episodes: 302
53979: reward: -120.43, mean_100: -65.57, episodes: 303
54260: reward: -79.65, mean_100: -63.79, episodes: 304
54493: reward:  24.44, mean_100: -62.71, episodes: 305
54836: reward: -349.26, mean_100: -66.28, episodes: 306
55837: reward:  84.67, mean_100: -64.96, episodes: 307
56097: reward: -42.26, mean_100: -64.89, episodes: 308
56403: reward: -98.20, mean_100: -65.51, episodes: 309
56771: reward: -41.17, mean_100: -63.78, episodes: 310
56939: reward:  32.17, mean_100: -62.37, episodes: 311
57940: reward:  -9.64, mean_100: -62.51, episodes: 312
58890: reward: -189.67, mean_100: -61.96, episodes: 313
59476: 

114814: reward: -66.06, mean_100: -31.33, episodes: 444
115815: reward:  60.40, mean_100: -30.37, episodes: 445
116067: reward: -18.77, mean_100: -29.74, episodes: 446
116455: reward: -75.35, mean_100: -29.93, episodes: 447
116609: reward:  35.09, mean_100: -29.96, episodes: 448
116819: reward: -24.77, mean_100: -30.08, episodes: 449
117218: reward: -48.00, mean_100: -30.14, episodes: 450
117490: reward:  27.79, mean_100: -29.88, episodes: 451
118491: reward:  85.70, mean_100: -30.08, episodes: 452
118924: reward: -115.97, mean_100: -31.13, episodes: 453
119073: reward: -151.12, mean_100: -31.48, episodes: 454
120074: reward: 141.44, mean_100: -28.68, episodes: 455
120252: reward:  37.01, mean_100: -28.34, episodes: 456
120520: reward:  43.51, mean_100: -27.65, episodes: 457
120931: reward: -37.14, mean_100: -27.94, episodes: 458
121272: reward:   2.43, mean_100: -27.70, episodes: 459
121436: reward:  10.41, mean_100: -26.80, episodes: 460
121612: reward:  68.48, mean_100: -26.08, epis

171849: reward: -40.94, mean_100: -36.67, episodes: 590
172160: reward: -203.00, mean_100: -38.00, episodes: 591
172445: reward: -69.28, mean_100: -37.16, episodes: 592
173050: reward: 185.14, mean_100: -35.98, episodes: 593
174051: reward:  21.14, mean_100: -37.33, episodes: 594
174709: reward: 123.52, mean_100: -36.60, episodes: 595
174911: reward:  36.06, mean_100: -36.24, episodes: 596
175814: reward: 105.48, mean_100: -36.61, episodes: 597
176815: reward: 199.51, mean_100: -36.17, episodes: 598
177046: reward: -36.25, mean_100: -37.34, episodes: 599
177225: reward: -54.49, mean_100: -36.38, episodes: 600
177320: reward:  20.61, mean_100: -34.48, episodes: 601
178321: reward: 105.02, mean_100: -31.56, episodes: 602
178560: reward:  -6.66, mean_100: -33.37, episodes: 603
178759: reward:  10.88, mean_100: -33.02, episodes: 604
179170: reward: 152.00, mean_100: -30.08, episodes: 605
179406: reward:  15.29, mean_100: -29.57, episodes: 606
179806: reward: -42.63, mean_100: -28.24, episo