### This file is for testing radical changes to the codebase. Upside Down Reinforcement Learning.py is the working version.

In [17]:
import time
from copy import deepcopy
import gym
import numpy as np
import torch
import torch.nn.functional as F

In [18]:
env = gym.make('LunarLander-v2')

In [19]:
#command takes form [derired reward, desired horizon]
def random_policy(obs, command):
    return np.random.randint(env.action_space.n)

In [20]:
#Visualise agent function
def visualise_agent(policy, command, n=5):
    try:
        for trial_i in range(n):
            current_command = deepcopy(command)
            observation = env.reset()
            done=False
            t=0
            episode_return=0
            while not done:
                env.render()
                action = policy(torch.tensor([observation]).double(), torch.tensor([command]).double())
                observation, reward, done, info = env.step(action)
                episode_return+=reward
                current_command[0]-= reward
                current_command[1] = max(1, current_command[1]-1)
                t+=1
            env.render()
            time.sleep(1.5)
            print("Episode {} finished after {} timesteps. Return = {}".format(trial_i, t, episode_return))
        env.close()
    except KeyboardInterrupt:
        env.close()

In [21]:
#visualise_agent(random_policy, command=[500, 500], n=3)

In [22]:
class FCNN_AGENT(torch.nn.Module):
    def __init__(self, command_scale):
        super().__init__()
        embedding_size=32
        hidden_size=64
        self.command_scale=command_scale
        self.observation_embedding = torch.nn.Sequential(
            torch.nn.Linear(np.prod(env.observation_space.shape), embedding_size),
            torch.nn.Tanh()
        )
        self.command_embedding = torch.nn.Sequential(
            torch.nn.Linear(2, embedding_size),
            torch.nn.Sigmoid()
        )
        self.to_output = torch.nn.Sequential(
            torch.nn.Linear(embedding_size, hidden_size),
            torch.nn.ReLU(),
            torch.nn.Linear(hidden_size, hidden_size),
            torch.nn.ReLU(),
            torch.nn.Linear(hidden_size, env.action_space.n)
        )
    
    def forward(self, observation, command):
        obs_emebdding = self.observation_embedding(observation)
        cmd_embedding = self.command_embedding(command*self.command_scale)
        embedding = torch.mul(obs_emebdding, cmd_embedding)
        action_prob_logits = self.to_output(embedding)
        return action_prob_logits
    
    def create_optimizer(self, lr):
        self.optimizer = torch.optim.Adam(self.parameters(), lr=lr)

In [23]:
def collect_experience(policy, replay_buffer, replay_size, last_few, n_episodes=100, log_to_tensorboard=True):
    global i_episode
    init_replay_buffer = deepcopy(replay_buffer)
    try:
        for _ in range(n_episodes):
            command = sample_command(init_replay_buffer, last_few)
            writer.add_scalar('Command desired reward/Episode', command[0], i_episode)    # write loss to a graph
            writer.add_scalar('Command horizon/Episode', command[1], i_episode)    # write loss to a graph
            observation = env.reset()
            episode_mem = {'observation':[],
                           'action':[],
                           'reward':[],}
            done=False
            while not done:
                action = policy(torch.tensor([observation]).double(), torch.tensor([command]).double())
                new_observation, reward, done, info = env.step(action)
                
                episode_mem['observation'].append(observation)
                episode_mem['action'].append(action)
                episode_mem['reward'].append(reward)
                
                observation=new_observation
                command[0]-= reward
                command[1] = max(1, command[1]-1)
            episode_mem['return']=sum(episode_mem['reward'])
            episode_mem['episode_len']=len(episode_mem['observation'])
            replay_buffer.append(episode_mem)
            i_episode+=1
            if log_to_tensorboard: writer.add_scalar('Return/Episode', sum(episode_mem['reward']), i_episode)    # write loss to a graph
            print("Episode {} finished after {} timesteps. Return = {}".format(i_episode, len(episode_mem['observation']), sum(episode_mem['reward'])))
        env.close()
    except KeyboardInterrupt:
        env.close()
    replay_buffer = sorted(replay_buffer, key=lambda x:x['return'])[-replay_size:]
    return replay_buffer

def sample_command(replay_buffer, last_few):
    if len(replay_buffer)==0:
        return [1, 1]
    else:
        command_samples = replay_buffer[-last_few:]
        lengths = [mem['episode_len'] for mem in command_samples]
        returns = [mem['return'] for mem in command_samples]
        mean_return, std_return = np.mean(returns), np.std(returns)
        command_horizon = np.mean(lengths)
        desired_reward = np.random.uniform(mean_return, mean_return+std_return)
        return [desired_reward, command_horizon]

In [24]:
def train_net(policy_net, replay_buffer, n_updates=100, batch_size=64, log_to_tensorboard=True):
    global i_updates
    all_costs = []
    for i in range(n_updates):
        batch_observations = np.zeros((batch_size, np.prod(env.observation_space.shape)))
        batch_commands = np.zeros((batch_size, 2))
        batch_label = np.zeros((batch_size))
        for b in range(batch_size):
            sample_episode = np.random.randint(0, len(replay_buffer))
            sample_t1 = np.random.randint(0, len(replay_buffer[sample_episode]['observation']))
            sample_t2 = len(replay_buffer[sample_episode]['observation'])
            ##sample_t2 = np.random.randint(sample_t1+1, len(replay_buffer[sample_episode]['observation'])+1)
            sample_horizon = sample_t2-sample_t1
            sample_mem = replay_buffer[sample_episode]['observation'][sample_t1]
            sample_desired_reward = sum(replay_buffer[sample_episode]['reward'][sample_t1:sample_t2])
            network_input = np.append(sample_mem, [sample_desired_reward, sample_horizon])
            label = replay_buffer[sample_episode]['action'][sample_t1]
            batch_observations[b] = sample_mem
            batch_commands[b] = [sample_desired_reward, sample_horizon]
            batch_label[b] = label
        batch_observations = torch.tensor(batch_observations).double()
        batch_commands = torch.tensor(batch_commands).double()
        batch_label = torch.tensor(batch_label).long()
        pred = policy_net(batch_observations, batch_commands)
        cost = F.cross_entropy(pred, batch_label)
        if log_to_tensorboard: writer.add_scalar('Cost/NN update', cost.item() , i_updates)    # write loss to a graph
        all_costs.append(cost.item())
        cost.backward()
        policy_net.optimizer.step()
        policy_net.optimizer.zero_grad()
        i_updates+=1
    return np.mean(all_costs)

In [25]:
def create_greedy_policy(policy_network):
    def policy(obs, command):
        action_logits = policy_network(obs, command)
        action = np.argmax(action_logits.detach().numpy())
        return action
    return policy

def create_stochastic_policy(policy_network):
    def policy(obs, command):
        action_logits = policy_network(obs, command)
        action_probs = F.softmax(action_logits, dim=-1)
        action = torch.distributions.Categorical(action_probs).sample().item()
        return action
    return policy

In [30]:
i_episode=0
i_updates=0 #number of parameter updates to the neural network
replay_buffer = []
log_to_tensorboard = True

replay_size = 600
last_few = 50
batch_size = 128
n_warm_up_episodes = 50
n_episodes_per_iter = 50
n_updates_per_iter = 100
command_scale = 0.02
lr = 0.001

agent = FCNN_AGENT(command_scale).double()
agent.create_optimizer(lr)

stochastic_policy = create_stochastic_policy(agent)
greedy_policy = create_greedy_policy(agent)

In [31]:
# SET UP TRAINING VISUALISATION
# SET UP TRAINING VISUALISATION
if log_to_tensorboard: from torch.utils.tensorboard import SummaryWriter
if log_to_tensorboard: writer = SummaryWriter() # we will use this to show our models performance on a graph using tensorboard

In [32]:
#Collect warm up episodes
replay_buffer = collect_experience(random_policy, replay_buffer, replay_size, last_few, n_warm_up_episodes, log_to_tensorboard)
train_net(agent, replay_buffer, n_updates_per_iter, batch_size, log_to_tensorboard)

Episode 1 finished after 113 timesteps. Return = -390.6940084980434
Episode 2 finished after 95 timesteps. Return = -120.41847788549137
Episode 3 finished after 83 timesteps. Return = -63.466702803711684
Episode 4 finished after 92 timesteps. Return = -483.2119955776536
Episode 5 finished after 76 timesteps. Return = -65.54904155474622
Episode 6 finished after 85 timesteps. Return = -330.4744824335155
Episode 7 finished after 71 timesteps. Return = -101.51475416269122
Episode 8 finished after 132 timesteps. Return = -106.36861748934871
Episode 9 finished after 80 timesteps. Return = -139.41218679279342
Episode 10 finished after 90 timesteps. Return = -118.64424067999293
Episode 11 finished after 122 timesteps. Return = -123.55016476157775
Episode 12 finished after 77 timesteps. Return = -129.0480454451287
Episode 13 finished after 76 timesteps. Return = -159.92647263149016
Episode 14 finished after 137 timesteps. Return = -358.05057496315317
Episode 15 finished after 62 timesteps. Retu

1.3842409914766693

In [33]:
n_iters = 1000
for i in range(n_iters):
    replay_buffer = collect_experience(stochastic_policy, replay_buffer, replay_size, last_few, n_episodes_per_iter, log_to_tensorboard)
    train_net(agent, replay_buffer, n_updates_per_iter, batch_size, log_to_tensorboard)

Episode 51 finished after 89 timesteps. Return = -382.6634132320046
Episode 52 finished after 66 timesteps. Return = -96.67326599693764
Episode 53 finished after 83 timesteps. Return = -90.11398104032524
Episode 54 finished after 101 timesteps. Return = -240.75159988453566
Episode 55 finished after 95 timesteps. Return = -428.78147663886165
Episode 56 finished after 70 timesteps. Return = -119.74727539795562
Episode 57 finished after 62 timesteps. Return = -63.50740351740876
Episode 58 finished after 90 timesteps. Return = -113.2061045242965
Episode 59 finished after 60 timesteps. Return = -64.30061205143289
Episode 60 finished after 144 timesteps. Return = -245.41588137987995
Episode 61 finished after 75 timesteps. Return = -260.00580021195094
Episode 62 finished after 90 timesteps. Return = -372.659845652631
Episode 63 finished after 79 timesteps. Return = -96.01097819183681
Episode 64 finished after 90 timesteps. Return = -238.6632391503006
Episode 65 finished after 66 timesteps. Re

Episode 173 finished after 113 timesteps. Return = -146.16898015623693
Episode 174 finished after 111 timesteps. Return = -489.8619186321915
Episode 175 finished after 154 timesteps. Return = -275.6482248869564
Episode 176 finished after 89 timesteps. Return = -224.8834971080876
Episode 177 finished after 63 timesteps. Return = -71.52222855604283
Episode 178 finished after 82 timesteps. Return = -140.30410781962425
Episode 179 finished after 79 timesteps. Return = -122.02366125992998
Episode 180 finished after 124 timesteps. Return = -165.93310409176826
Episode 181 finished after 103 timesteps. Return = -276.1198114251433
Episode 182 finished after 99 timesteps. Return = -130.49951256522752
Episode 183 finished after 99 timesteps. Return = -265.4959204137937
Episode 184 finished after 73 timesteps. Return = -149.19292178189835
Episode 185 finished after 109 timesteps. Return = -105.72955728693329
Episode 186 finished after 118 timesteps. Return = -315.0724182603636
Episode 187 finished

Episode 293 finished after 124 timesteps. Return = -122.74732759471013
Episode 294 finished after 112 timesteps. Return = -303.41056760790195
Episode 295 finished after 100 timesteps. Return = -125.1671671613999
Episode 296 finished after 105 timesteps. Return = -483.78897310718884
Episode 297 finished after 78 timesteps. Return = -179.34726002064474
Episode 298 finished after 113 timesteps. Return = -132.6269211254701
Episode 299 finished after 102 timesteps. Return = -122.54722847844738
Episode 300 finished after 91 timesteps. Return = -113.98932139236821
Episode 301 finished after 106 timesteps. Return = -163.19702207224162
Episode 302 finished after 117 timesteps. Return = -101.54200341571133
Episode 303 finished after 101 timesteps. Return = -364.2796980802363
Episode 304 finished after 96 timesteps. Return = -92.37097074650198
Episode 305 finished after 113 timesteps. Return = -122.6096648109436
Episode 306 finished after 79 timesteps. Return = -116.2119978044935
Episode 307 fini

Episode 413 finished after 66 timesteps. Return = -171.60735951303937
Episode 414 finished after 116 timesteps. Return = -308.62132575207926
Episode 415 finished after 102 timesteps. Return = -46.12720090508134
Episode 416 finished after 68 timesteps. Return = -227.60044391782316
Episode 417 finished after 69 timesteps. Return = -66.6542079852322
Episode 418 finished after 58 timesteps. Return = -120.95926202800892
Episode 419 finished after 93 timesteps. Return = -121.97172074324645
Episode 420 finished after 83 timesteps. Return = -112.05950002555619
Episode 421 finished after 77 timesteps. Return = -61.81662354310717
Episode 422 finished after 93 timesteps. Return = -198.84863964387483
Episode 423 finished after 88 timesteps. Return = -85.31832746328156
Episode 424 finished after 100 timesteps. Return = -82.797372410277
Episode 425 finished after 111 timesteps. Return = -153.3357664389659
Episode 426 finished after 103 timesteps. Return = -150.95175225912004
Episode 427 finished aft

Episode 536 finished after 81 timesteps. Return = -141.82492359367524
Episode 537 finished after 68 timesteps. Return = -85.90298487671994
Episode 538 finished after 127 timesteps. Return = -102.10353714031793
Episode 539 finished after 61 timesteps. Return = -118.91272534123863
Episode 540 finished after 71 timesteps. Return = -121.16697081074602
Episode 541 finished after 138 timesteps. Return = -140.38418268276484
Episode 542 finished after 134 timesteps. Return = -341.0148507021874
Episode 543 finished after 86 timesteps. Return = -132.48753123742733
Episode 544 finished after 107 timesteps. Return = -134.97071038142502
Episode 545 finished after 56 timesteps. Return = -93.0747473823195
Episode 546 finished after 61 timesteps. Return = -62.25213987671873
Episode 547 finished after 67 timesteps. Return = -62.296819692297284
Episode 548 finished after 123 timesteps. Return = -135.9170623854852
Episode 549 finished after 119 timesteps. Return = -122.44812014021304
Episode 550 finished

Episode 656 finished after 90 timesteps. Return = -130.5924676656058
Episode 657 finished after 82 timesteps. Return = -112.46489329631183
Episode 658 finished after 112 timesteps. Return = -122.06208652736747
Episode 659 finished after 90 timesteps. Return = -132.23583748964245
Episode 660 finished after 129 timesteps. Return = -78.38527289576243
Episode 661 finished after 122 timesteps. Return = -133.75853779768534
Episode 662 finished after 102 timesteps. Return = -161.68176894661224
Episode 663 finished after 138 timesteps. Return = -109.47651307435751
Episode 664 finished after 81 timesteps. Return = -106.57510215020747
Episode 665 finished after 68 timesteps. Return = -105.47842509737082
Episode 666 finished after 100 timesteps. Return = -99.18570147927662
Episode 667 finished after 77 timesteps. Return = -66.48612336019178
Episode 668 finished after 132 timesteps. Return = -148.0942640805314
Episode 669 finished after 79 timesteps. Return = -102.81318275258
Episode 670 finished 

Episode 774 finished after 86 timesteps. Return = -193.95993790522266
Episode 775 finished after 75 timesteps. Return = -97.4166698830564
Episode 776 finished after 95 timesteps. Return = -172.0346039256844
Episode 777 finished after 91 timesteps. Return = -107.93343885659841
Episode 778 finished after 97 timesteps. Return = -115.48608581919441
Episode 779 finished after 124 timesteps. Return = -238.22086737001268
Episode 780 finished after 103 timesteps. Return = -139.53379521485067
Episode 781 finished after 103 timesteps. Return = -180.67398866916878
Episode 782 finished after 84 timesteps. Return = -88.21558524260765
Episode 783 finished after 80 timesteps. Return = -95.32987421706629
Episode 784 finished after 61 timesteps. Return = -51.085492128197146
Episode 785 finished after 83 timesteps. Return = -85.04089320818034
Episode 786 finished after 81 timesteps. Return = -89.70516948790947
Episode 787 finished after 84 timesteps. Return = -113.11066580113699
Episode 788 finished aft

Episode 893 finished after 112 timesteps. Return = -126.42406565136848
Episode 894 finished after 110 timesteps. Return = -312.26952650616624
Episode 895 finished after 62 timesteps. Return = -90.55532648903595
Episode 896 finished after 90 timesteps. Return = -135.46035772195958
Episode 897 finished after 115 timesteps. Return = -123.40849340907094
Episode 898 finished after 91 timesteps. Return = -117.89363496756565
Episode 899 finished after 92 timesteps. Return = -99.88885733830303
Episode 900 finished after 125 timesteps. Return = -90.08144784701952
Episode 901 finished after 86 timesteps. Return = -217.20029946326105
Episode 902 finished after 68 timesteps. Return = -78.32654124766205
Episode 903 finished after 91 timesteps. Return = -167.06652989595753
Episode 904 finished after 96 timesteps. Return = -143.77345564140188
Episode 905 finished after 136 timesteps. Return = -117.64539093807403
Episode 906 finished after 67 timesteps. Return = -77.27820504187962
Episode 907 finished

Episode 1011 finished after 84 timesteps. Return = -88.37613798591795
Episode 1012 finished after 67 timesteps. Return = -64.44294687233817
Episode 1013 finished after 60 timesteps. Return = -72.01093347069217
Episode 1014 finished after 106 timesteps. Return = -80.94236885704007
Episode 1015 finished after 63 timesteps. Return = -103.69216134795813
Episode 1016 finished after 84 timesteps. Return = -111.29524270215498
Episode 1017 finished after 118 timesteps. Return = -8.848042791557347
Episode 1018 finished after 87 timesteps. Return = -100.34119688193881
Episode 1019 finished after 125 timesteps. Return = -78.8377303782861
Episode 1020 finished after 115 timesteps. Return = -79.3095450659997
Episode 1021 finished after 73 timesteps. Return = -64.54593436543385
Episode 1022 finished after 62 timesteps. Return = -73.74416146519053
Episode 1023 finished after 122 timesteps. Return = -66.55915635043942
Episode 1024 finished after 90 timesteps. Return = -86.61242119199781
Episode 1025 f

Episode 1128 finished after 74 timesteps. Return = -116.78816321279527
Episode 1129 finished after 96 timesteps. Return = -4.523107633417467
Episode 1130 finished after 116 timesteps. Return = -94.59965733097435
Episode 1131 finished after 70 timesteps. Return = -75.38326097918329
Episode 1132 finished after 86 timesteps. Return = -188.78900999274734
Episode 1133 finished after 131 timesteps. Return = -98.98949327807027
Episode 1134 finished after 79 timesteps. Return = -69.46029286817394
Episode 1135 finished after 71 timesteps. Return = -83.5808495471683
Episode 1136 finished after 82 timesteps. Return = -78.7760466554883
Episode 1137 finished after 119 timesteps. Return = -97.70143128392495
Episode 1138 finished after 84 timesteps. Return = -49.94336396490653
Episode 1139 finished after 109 timesteps. Return = -118.09715893757034
Episode 1140 finished after 119 timesteps. Return = -120.80798729000956
Episode 1141 finished after 81 timesteps. Return = -70.3908140141126
Episode 1142 f

Episode 1244 finished after 124 timesteps. Return = -74.72468982467404
Episode 1245 finished after 111 timesteps. Return = -95.58172592853303
Episode 1246 finished after 145 timesteps. Return = -112.25886886279125
Episode 1247 finished after 93 timesteps. Return = -125.4661487440315
Episode 1248 finished after 146 timesteps. Return = -164.4839532858507
Episode 1249 finished after 120 timesteps. Return = -159.50423096220658
Episode 1250 finished after 121 timesteps. Return = -97.15556136045659
Episode 1251 finished after 131 timesteps. Return = -42.02162218095913
Episode 1252 finished after 107 timesteps. Return = -98.8872725041524
Episode 1253 finished after 99 timesteps. Return = -78.05042247146064
Episode 1254 finished after 84 timesteps. Return = -83.78617304054826
Episode 1255 finished after 94 timesteps. Return = -35.80046205551865
Episode 1256 finished after 68 timesteps. Return = -162.1078704461611
Episode 1257 finished after 70 timesteps. Return = -61.45434949184178
Episode 125

Episode 1362 finished after 130 timesteps. Return = -75.29658387234291
Episode 1363 finished after 86 timesteps. Return = -130.32039347563475
Episode 1364 finished after 65 timesteps. Return = -88.73188822565777
Episode 1365 finished after 97 timesteps. Return = -85.10668912649585
Episode 1366 finished after 92 timesteps. Return = -110.93511105758103
Episode 1367 finished after 82 timesteps. Return = -40.29883463918955
Episode 1368 finished after 91 timesteps. Return = -129.46766908637784
Episode 1369 finished after 77 timesteps. Return = -60.512828311212175
Episode 1370 finished after 122 timesteps. Return = -67.30021346451213
Episode 1371 finished after 124 timesteps. Return = -87.13591969709273
Episode 1372 finished after 99 timesteps. Return = -125.31587303355487
Episode 1373 finished after 132 timesteps. Return = -115.33796731100472
Episode 1374 finished after 125 timesteps. Return = -188.68425997004044
Episode 1375 finished after 69 timesteps. Return = -74.32337053293183
Episode 

Episode 1482 finished after 119 timesteps. Return = -67.2891864825235
Episode 1483 finished after 114 timesteps. Return = -121.5999209591483
Episode 1484 finished after 86 timesteps. Return = -97.14087857516034
Episode 1485 finished after 75 timesteps. Return = -95.93260304743934
Episode 1486 finished after 102 timesteps. Return = -142.58769186292403
Episode 1487 finished after 82 timesteps. Return = -85.1714117497094
Episode 1488 finished after 81 timesteps. Return = -63.65058114851522
Episode 1489 finished after 58 timesteps. Return = -98.79968396328428
Episode 1490 finished after 72 timesteps. Return = -14.422648792754288
Episode 1491 finished after 103 timesteps. Return = -78.89671909771056
Episode 1492 finished after 74 timesteps. Return = -54.23165156493073
Episode 1493 finished after 97 timesteps. Return = -97.38646162804744
Episode 1494 finished after 142 timesteps. Return = -36.861878718622606
Episode 1495 finished after 122 timesteps. Return = -123.23154382934462
Episode 1496

Episode 1598 finished after 121 timesteps. Return = -53.10104401621619
Episode 1599 finished after 78 timesteps. Return = -178.74026059645837
Episode 1600 finished after 80 timesteps. Return = -82.55671759610667
Episode 1601 finished after 84 timesteps. Return = -81.23276379298734
Episode 1602 finished after 128 timesteps. Return = -52.288883572025554
Episode 1603 finished after 64 timesteps. Return = 19.6884508815844
Episode 1604 finished after 115 timesteps. Return = -261.7586201628968
Episode 1605 finished after 118 timesteps. Return = -113.44062650839827
Episode 1606 finished after 73 timesteps. Return = -145.44418949115416
Episode 1607 finished after 86 timesteps. Return = -58.65974544985144
Episode 1608 finished after 132 timesteps. Return = -96.01105270767394
Episode 1609 finished after 108 timesteps. Return = -102.52358076070892
Episode 1610 finished after 128 timesteps. Return = -83.76459642595923
Episode 1611 finished after 60 timesteps. Return = -205.2316572544172
Episode 16

Episode 1715 finished after 140 timesteps. Return = -50.28861801908573
Episode 1716 finished after 102 timesteps. Return = -104.06795547699033
Episode 1717 finished after 86 timesteps. Return = -94.62151246122652
Episode 1718 finished after 126 timesteps. Return = -104.02052918480369
Episode 1719 finished after 103 timesteps. Return = -129.94353570942224
Episode 1720 finished after 82 timesteps. Return = -93.9561337759025
Episode 1721 finished after 88 timesteps. Return = -217.0805588282522
Episode 1722 finished after 100 timesteps. Return = -73.97457277523448
Episode 1723 finished after 57 timesteps. Return = -83.9911061261588
Episode 1724 finished after 113 timesteps. Return = -95.20659231851018
Episode 1725 finished after 91 timesteps. Return = -89.25702797343783
Episode 1726 finished after 123 timesteps. Return = -80.94784637183275
Episode 1727 finished after 118 timesteps. Return = -201.9575004585878
Episode 1728 finished after 129 timesteps. Return = -79.22090436564298
Episode 17

Episode 1833 finished after 85 timesteps. Return = -135.78523504606983
Episode 1834 finished after 78 timesteps. Return = -67.85677823675564
Episode 1835 finished after 105 timesteps. Return = -56.97335799771336
Episode 1836 finished after 98 timesteps. Return = -105.3111876233819
Episode 1837 finished after 118 timesteps. Return = -102.46921192080458
Episode 1838 finished after 129 timesteps. Return = -119.05726169006472
Episode 1839 finished after 107 timesteps. Return = -107.2009295005147
Episode 1840 finished after 96 timesteps. Return = -127.1382980500305
Episode 1841 finished after 82 timesteps. Return = -81.19516187828788
Episode 1842 finished after 65 timesteps. Return = -80.06637835653407
Episode 1843 finished after 91 timesteps. Return = -63.22001225802991
Episode 1844 finished after 89 timesteps. Return = -175.98473941908856
Episode 1845 finished after 123 timesteps. Return = -180.18004870723104
Episode 1846 finished after 113 timesteps. Return = -109.68948906470754
Episode 

Episode 1951 finished after 91 timesteps. Return = -97.71620328982186
Episode 1952 finished after 68 timesteps. Return = -77.66845846024502
Episode 1953 finished after 83 timesteps. Return = -92.35602222681325
Episode 1954 finished after 84 timesteps. Return = -79.34201616060372
Episode 1955 finished after 113 timesteps. Return = -160.23354903712737
Episode 1956 finished after 76 timesteps. Return = -147.91068261905846
Episode 1957 finished after 79 timesteps. Return = -88.08512519070847
Episode 1958 finished after 110 timesteps. Return = -82.69204499093114
Episode 1959 finished after 82 timesteps. Return = -102.05585047863275
Episode 1960 finished after 134 timesteps. Return = -129.0965680591373
Episode 1961 finished after 59 timesteps. Return = -103.38578278424984
Episode 1962 finished after 141 timesteps. Return = -16.898147100849542
Episode 1963 finished after 107 timesteps. Return = -130.59426988837416
Episode 1964 finished after 128 timesteps. Return = 5.823640761717513
Episode 1

Episode 2072 finished after 104 timesteps. Return = -144.40604195177517
Episode 2073 finished after 93 timesteps. Return = -126.19173708236858
Episode 2074 finished after 123 timesteps. Return = -290.81800404050045
Episode 2075 finished after 112 timesteps. Return = -101.8693469228258
Episode 2076 finished after 115 timesteps. Return = -229.26984303755088
Episode 2077 finished after 73 timesteps. Return = -137.11460119085842
Episode 2078 finished after 84 timesteps. Return = -75.5144712758996
Episode 2079 finished after 95 timesteps. Return = -198.82146105790866
Episode 2080 finished after 110 timesteps. Return = -126.45714734060412
Episode 2081 finished after 116 timesteps. Return = -256.61912101439304
Episode 2082 finished after 76 timesteps. Return = -183.6016801929365
Episode 2083 finished after 73 timesteps. Return = -70.606168918038
Episode 2084 finished after 73 timesteps. Return = -98.10132661681301
Episode 2085 finished after 123 timesteps. Return = -88.99724777459478
Episode 

Episode 2189 finished after 115 timesteps. Return = -142.04146435745133
Episode 2190 finished after 126 timesteps. Return = -181.84620038466198
Episode 2191 finished after 62 timesteps. Return = -107.3043218348005
Episode 2192 finished after 104 timesteps. Return = -20.260958794478498
Episode 2193 finished after 113 timesteps. Return = -118.99419278218875
Episode 2194 finished after 83 timesteps. Return = -115.48817745844683
Episode 2195 finished after 119 timesteps. Return = -211.85889607245576
Episode 2196 finished after 68 timesteps. Return = -146.3531883964033
Episode 2197 finished after 121 timesteps. Return = -102.50155818826947
Episode 2198 finished after 65 timesteps. Return = -103.1908561931492
Episode 2199 finished after 76 timesteps. Return = -92.79741165194862
Episode 2200 finished after 137 timesteps. Return = -208.5464945326138
Episode 2201 finished after 82 timesteps. Return = -44.69172275824705
Episode 2202 finished after 81 timesteps. Return = -38.63414668847291
Episod

Episode 2306 finished after 119 timesteps. Return = -160.14073250676626
Episode 2307 finished after 119 timesteps. Return = -78.75706431509713
Episode 2308 finished after 112 timesteps. Return = -156.34120006928478
Episode 2309 finished after 130 timesteps. Return = -182.563096312515
Episode 2310 finished after 92 timesteps. Return = -168.54926286563764
Episode 2311 finished after 113 timesteps. Return = -132.62648034009874
Episode 2312 finished after 81 timesteps. Return = -22.584032293025302
Episode 2313 finished after 101 timesteps. Return = -225.30914715585834
Episode 2314 finished after 106 timesteps. Return = -124.48377374991173
Episode 2315 finished after 66 timesteps. Return = -55.6675938896433
Episode 2316 finished after 102 timesteps. Return = -38.168155784813436
Episode 2317 finished after 106 timesteps. Return = -298.74606597759134
Episode 2318 finished after 86 timesteps. Return = -128.81357477921594
Episode 2319 finished after 99 timesteps. Return = -47.03297984059358
Epi

Episode 2423 finished after 86 timesteps. Return = 14.828718856593397
Episode 2424 finished after 63 timesteps. Return = -47.95901994456342
Episode 2425 finished after 126 timesteps. Return = -128.3897171582576
Episode 2426 finished after 122 timesteps. Return = -170.92160091787196
Episode 2427 finished after 121 timesteps. Return = -10.673422992886245
Episode 2428 finished after 145 timesteps. Return = -81.81395976037975
Episode 2429 finished after 82 timesteps. Return = -81.13267550449143
Episode 2430 finished after 75 timesteps. Return = -145.51252700392178
Episode 2431 finished after 108 timesteps. Return = -155.39043311597902
Episode 2432 finished after 62 timesteps. Return = -97.42823169778958
Episode 2433 finished after 118 timesteps. Return = -150.8789577298343
Episode 2434 finished after 78 timesteps. Return = -193.53035334617508
Episode 2435 finished after 115 timesteps. Return = -170.9583992171266
Episode 2436 finished after 101 timesteps. Return = -230.63459354131027
Episod

Episode 2539 finished after 104 timesteps. Return = -244.51954097935766
Episode 2540 finished after 64 timesteps. Return = -170.80222478592827
Episode 2541 finished after 64 timesteps. Return = -102.32463420614289
Episode 2542 finished after 107 timesteps. Return = -50.48765432378126
Episode 2543 finished after 79 timesteps. Return = -82.22826919266174
Episode 2544 finished after 86 timesteps. Return = -161.1864855454912
Episode 2545 finished after 91 timesteps. Return = -178.66251656166423
Episode 2546 finished after 117 timesteps. Return = -196.246000433324
Episode 2547 finished after 101 timesteps. Return = -227.58263314665209
Episode 2548 finished after 104 timesteps. Return = -280.1860589268042
Episode 2549 finished after 119 timesteps. Return = -91.86886615882193
Episode 2550 finished after 113 timesteps. Return = -181.13473665137104
Episode 2551 finished after 60 timesteps. Return = -142.3237627232239
Episode 2552 finished after 104 timesteps. Return = -316.6409512209967
Episode

Episode 2657 finished after 88 timesteps. Return = -315.2663486734276
Episode 2658 finished after 127 timesteps. Return = -48.888682570665196
Episode 2659 finished after 95 timesteps. Return = -218.94137751476035
Episode 2660 finished after 103 timesteps. Return = -163.31383101296132
Episode 2661 finished after 82 timesteps. Return = -187.19828723695824
Episode 2662 finished after 111 timesteps. Return = -201.18445207310205
Episode 2663 finished after 106 timesteps. Return = -105.66747882197897
Episode 2664 finished after 116 timesteps. Return = -174.92234147814628
Episode 2665 finished after 105 timesteps. Return = -90.93339492165404
Episode 2666 finished after 63 timesteps. Return = -130.26585583832156
Episode 2667 finished after 79 timesteps. Return = 5.579278159817662
Episode 2668 finished after 124 timesteps. Return = -114.14743326390852
Episode 2669 finished after 113 timesteps. Return = -85.58610147019961
Episode 2670 finished after 75 timesteps. Return = -450.4187718824927
Epis

Episode 2773 finished after 79 timesteps. Return = -145.57775207036025
Episode 2774 finished after 89 timesteps. Return = -155.6420235019879
Episode 2775 finished after 78 timesteps. Return = -39.585405469161216
Episode 2776 finished after 149 timesteps. Return = -202.78960296424037
Episode 2777 finished after 66 timesteps. Return = -117.98315887092664
Episode 2778 finished after 125 timesteps. Return = -37.4978488952175
Episode 2779 finished after 90 timesteps. Return = -135.9063024373146
Episode 2780 finished after 120 timesteps. Return = -4.719422235195879
Episode 2781 finished after 143 timesteps. Return = -86.198688485903
Episode 2782 finished after 76 timesteps. Return = -130.13874441426637
Episode 2783 finished after 69 timesteps. Return = -93.45344298167333
Episode 2784 finished after 78 timesteps. Return = -152.82831885631998
Episode 2785 finished after 83 timesteps. Return = -107.49155254812635
Episode 2786 finished after 83 timesteps. Return = -107.82386843165858
Episode 278

Episode 2890 finished after 117 timesteps. Return = -163.1022369570781
Episode 2891 finished after 77 timesteps. Return = -134.10493986488115
Episode 2892 finished after 126 timesteps. Return = -98.32289819705157
Episode 2893 finished after 122 timesteps. Return = -80.62329649736759
Episode 2894 finished after 118 timesteps. Return = -313.18251500453505
Episode 2895 finished after 105 timesteps. Return = -108.05406334504511
Episode 2896 finished after 109 timesteps. Return = -118.63477818116655
Episode 2897 finished after 72 timesteps. Return = -225.24419517977086
Episode 2898 finished after 74 timesteps. Return = -51.70597998643663
Episode 2899 finished after 79 timesteps. Return = -76.06671966080076
Episode 2900 finished after 91 timesteps. Return = -73.14340218252553
Episode 2901 finished after 94 timesteps. Return = -128.28128593443958
Episode 2902 finished after 114 timesteps. Return = -135.73592204578384
Episode 2903 finished after 76 timesteps. Return = -123.7905334913743
Episod

Episode 3006 finished after 121 timesteps. Return = -189.9081260515111
Episode 3007 finished after 115 timesteps. Return = -37.540969978559644
Episode 3008 finished after 104 timesteps. Return = -205.9032643008098
Episode 3009 finished after 94 timesteps. Return = -86.01793309841604
Episode 3010 finished after 88 timesteps. Return = -380.35404330029706
Episode 3011 finished after 76 timesteps. Return = -94.44282211663716
Episode 3012 finished after 76 timesteps. Return = -65.4774475900976
Episode 3013 finished after 99 timesteps. Return = -15.330761582683877
Episode 3014 finished after 95 timesteps. Return = -91.28233030743584
Episode 3015 finished after 93 timesteps. Return = -206.37055375978616
Episode 3016 finished after 116 timesteps. Return = -208.24027204708065
Episode 3017 finished after 98 timesteps. Return = -294.2796250229561
Episode 3018 finished after 83 timesteps. Return = -191.92398451983763
Episode 3019 finished after 71 timesteps. Return = -187.9868335154095
Episode 302

Episode 3123 finished after 76 timesteps. Return = -82.89714079150822
Episode 3124 finished after 109 timesteps. Return = -34.99706028654309
Episode 3125 finished after 71 timesteps. Return = -181.22187878600687
Episode 3126 finished after 87 timesteps. Return = -223.86071591633734
Episode 3127 finished after 114 timesteps. Return = -224.7789238461561
Episode 3128 finished after 112 timesteps. Return = -125.0977152263787
Episode 3129 finished after 109 timesteps. Return = -34.4376779704174
Episode 3130 finished after 112 timesteps. Return = -222.84344373344942
Episode 3131 finished after 84 timesteps. Return = -75.43343331426121
Episode 3132 finished after 123 timesteps. Return = -54.30996644796529
Episode 3133 finished after 79 timesteps. Return = -183.9604725892308
Episode 3134 finished after 120 timesteps. Return = -194.671247622968
Episode 3135 finished after 80 timesteps. Return = -81.8197290587926
Episode 3136 finished after 79 timesteps. Return = -26.2583308843313
Episode 3137 f

Episode 3243 finished after 103 timesteps. Return = -164.1198944557799
Episode 3244 finished after 90 timesteps. Return = -129.36390730922213
Episode 3245 finished after 81 timesteps. Return = -164.32251658122885
Episode 3246 finished after 111 timesteps. Return = -95.1776874730724
Episode 3247 finished after 121 timesteps. Return = -165.86033528158663
Episode 3248 finished after 107 timesteps. Return = -130.44260924763702
Episode 3249 finished after 117 timesteps. Return = -66.4679415485911
Episode 3250 finished after 139 timesteps. Return = -9.087968962907937
Episode 3251 finished after 81 timesteps. Return = -321.689178978846
Episode 3252 finished after 87 timesteps. Return = -206.89547164239212
Episode 3253 finished after 103 timesteps. Return = -168.581722674633
Episode 3254 finished after 115 timesteps. Return = -133.14817502023834
Episode 3255 finished after 95 timesteps. Return = -199.2955094716238
Episode 3256 finished after 87 timesteps. Return = -155.05987603198514
Episode 3

Episode 3362 finished after 117 timesteps. Return = -111.87738992370164
Episode 3363 finished after 101 timesteps. Return = -86.47323906396024
Episode 3364 finished after 82 timesteps. Return = -69.71818118364345
Episode 3365 finished after 132 timesteps. Return = -118.08055420007786
Episode 3366 finished after 73 timesteps. Return = -247.92406355426215
Episode 3367 finished after 92 timesteps. Return = -234.03642439317682
Episode 3368 finished after 78 timesteps. Return = -171.19853015289158
Episode 3369 finished after 69 timesteps. Return = -152.5704332925968
Episode 3370 finished after 145 timesteps. Return = -149.37909155438132
Episode 3371 finished after 77 timesteps. Return = -155.96193693674388
Episode 3372 finished after 69 timesteps. Return = -165.0066360042953
Episode 3373 finished after 96 timesteps. Return = -2.73080169232027
Episode 3374 finished after 61 timesteps. Return = -177.2018376079444
Episode 3375 finished after 108 timesteps. Return = -98.90842391440499
Episode 3

Episode 3481 finished after 125 timesteps. Return = -221.67252382488712
Episode 3482 finished after 82 timesteps. Return = -306.96423763639973
Episode 3483 finished after 100 timesteps. Return = -221.10228715399714
Episode 3484 finished after 86 timesteps. Return = -410.7136181835658
Episode 3485 finished after 133 timesteps. Return = -169.1982575813014
Episode 3486 finished after 74 timesteps. Return = -115.7392469890083
Episode 3487 finished after 112 timesteps. Return = -281.9766362519745
Episode 3488 finished after 101 timesteps. Return = -223.8427408456447
Episode 3489 finished after 124 timesteps. Return = -86.81323729687327
Episode 3490 finished after 99 timesteps. Return = -87.33374948536698
Episode 3491 finished after 91 timesteps. Return = -264.3396264390276
Episode 3492 finished after 80 timesteps. Return = -221.41606739826403
Episode 3493 finished after 132 timesteps. Return = -304.12483149234856
Episode 3494 finished after 108 timesteps. Return = -372.558305056786
Episode 

Episode 3597 finished after 133 timesteps. Return = -154.75319295221828
Episode 3598 finished after 146 timesteps. Return = -67.34585833705236
Episode 3599 finished after 88 timesteps. Return = -175.6617508815836
Episode 3600 finished after 117 timesteps. Return = -353.7590518815938
Episode 3601 finished after 106 timesteps. Return = -114.36964056255948
Episode 3602 finished after 78 timesteps. Return = -203.80906523349665
Episode 3603 finished after 118 timesteps. Return = -88.72205945706018
Episode 3604 finished after 67 timesteps. Return = -159.1550156254305
Episode 3605 finished after 145 timesteps. Return = -25.640076418795303
Episode 3606 finished after 102 timesteps. Return = -167.2640251748298
Episode 3607 finished after 75 timesteps. Return = -208.4856158185002
Episode 3608 finished after 79 timesteps. Return = -92.65950810218561
Episode 3609 finished after 72 timesteps. Return = -60.53626299995466
Episode 3610 finished after 74 timesteps. Return = -221.56188750023117
Episode 

Episode 3715 finished after 102 timesteps. Return = -180.0565701845771
Episode 3716 finished after 122 timesteps. Return = -217.269852691435
Episode 3717 finished after 80 timesteps. Return = -150.16930756639027
Episode 3718 finished after 120 timesteps. Return = -118.685588962939
Episode 3719 finished after 94 timesteps. Return = -118.83456785790752
Episode 3720 finished after 97 timesteps. Return = -108.38525775933658
Episode 3721 finished after 87 timesteps. Return = -240.90486804000594
Episode 3722 finished after 130 timesteps. Return = -197.60484954980345
Episode 3723 finished after 102 timesteps. Return = -228.59413958005936
Episode 3724 finished after 124 timesteps. Return = -137.63723669326708
Episode 3725 finished after 148 timesteps. Return = -10.434967511466567
Episode 3726 finished after 72 timesteps. Return = -85.12839804996689
Episode 3727 finished after 83 timesteps. Return = -137.21192986163618
Episode 3728 finished after 92 timesteps. Return = -191.45248574281703
Episo

Episode 3834 finished after 94 timesteps. Return = -239.50547791336157
Episode 3835 finished after 104 timesteps. Return = -227.4131413110202
Episode 3836 finished after 81 timesteps. Return = -81.2253510743006
Episode 3837 finished after 89 timesteps. Return = -196.2893437808429
Episode 3838 finished after 148 timesteps. Return = -165.45945224093987
Episode 3839 finished after 134 timesteps. Return = -149.09722688135986
Episode 3840 finished after 107 timesteps. Return = -160.0887110187125
Episode 3841 finished after 78 timesteps. Return = -106.70420144296415
Episode 3842 finished after 91 timesteps. Return = -241.46291070081716
Episode 3843 finished after 142 timesteps. Return = 15.252919197964005
Episode 3844 finished after 106 timesteps. Return = -127.31802551607667
Episode 3845 finished after 101 timesteps. Return = -299.64794324159914
Episode 3846 finished after 110 timesteps. Return = -65.3423991061051
Episode 3847 finished after 162 timesteps. Return = -22.644960459903174
Episo

Episode 3951 finished after 108 timesteps. Return = -201.2272223426593
Episode 3952 finished after 141 timesteps. Return = -187.566611836485
Episode 3953 finished after 113 timesteps. Return = -366.65297335420047
Episode 3954 finished after 58 timesteps. Return = -146.0082498914761
Episode 3955 finished after 71 timesteps. Return = -139.62802325224513
Episode 3956 finished after 128 timesteps. Return = -17.785585852354785
Episode 3957 finished after 68 timesteps. Return = -104.66397066839386
Episode 3958 finished after 109 timesteps. Return = -124.30826955375124
Episode 3959 finished after 96 timesteps. Return = -170.8688397659202
Episode 3960 finished after 99 timesteps. Return = -239.0223250862873
Episode 3961 finished after 93 timesteps. Return = -114.8322089541955
Episode 3962 finished after 66 timesteps. Return = -207.1671135386169
Episode 3963 finished after 71 timesteps. Return = -143.54659960548574
Episode 3964 finished after 85 timesteps. Return = -98.7652545697735
Episode 396

Episode 4068 finished after 122 timesteps. Return = -207.6039266334162
Episode 4069 finished after 112 timesteps. Return = -334.9379693120086
Episode 4070 finished after 136 timesteps. Return = -346.73099793999233
Episode 4071 finished after 135 timesteps. Return = -89.2847348608414
Episode 4072 finished after 96 timesteps. Return = -235.05212234500908
Episode 4073 finished after 117 timesteps. Return = -208.99536278659468
Episode 4074 finished after 70 timesteps. Return = -159.30435168008108
Episode 4075 finished after 102 timesteps. Return = -278.86922541356444
Episode 4076 finished after 70 timesteps. Return = -94.12734919874899
Episode 4077 finished after 135 timesteps. Return = -189.49781661952687
Episode 4078 finished after 107 timesteps. Return = -120.52344636724845
Episode 4079 finished after 103 timesteps. Return = -72.12113731194773
Episode 4080 finished after 167 timesteps. Return = -151.45459287271777
Episode 4081 finished after 87 timesteps. Return = -149.4094004999571
Epi

Episode 4187 finished after 105 timesteps. Return = -210.7733624283995
Episode 4188 finished after 113 timesteps. Return = -221.60982272964384
Episode 4189 finished after 120 timesteps. Return = -171.62869238203427
Episode 4190 finished after 68 timesteps. Return = -166.2111872671968
Episode 4191 finished after 79 timesteps. Return = -210.89736189321368
Episode 4192 finished after 104 timesteps. Return = -119.3373918704079
Episode 4193 finished after 76 timesteps. Return = -191.8929309501585
Episode 4194 finished after 153 timesteps. Return = -219.305248857802
Episode 4195 finished after 133 timesteps. Return = -248.59045756195417
Episode 4196 finished after 102 timesteps. Return = -170.22736381596795
Episode 4197 finished after 150 timesteps. Return = -157.10723414993083
Episode 4198 finished after 73 timesteps. Return = -151.58775352864902
Episode 4199 finished after 104 timesteps. Return = -181.49800635421323
Episode 4200 finished after 76 timesteps. Return = -223.2992093907393
Epis

Episode 4306 finished after 94 timesteps. Return = -248.37893203296096
Episode 4307 finished after 117 timesteps. Return = -63.60265877870939
Episode 4308 finished after 151 timesteps. Return = -370.37441261348937
Episode 4309 finished after 97 timesteps. Return = -241.24528295379918
Episode 4310 finished after 117 timesteps. Return = -6.093762043512683
Episode 4311 finished after 128 timesteps. Return = -283.8513430191424
Episode 4312 finished after 163 timesteps. Return = -68.86083250967818
Episode 4313 finished after 90 timesteps. Return = -224.6907887574706
Episode 4314 finished after 126 timesteps. Return = -47.31105239800754
Episode 4315 finished after 101 timesteps. Return = -141.4750522602231
Episode 4316 finished after 106 timesteps. Return = -134.4925073968909
Episode 4317 finished after 90 timesteps. Return = -309.5968023997127
Episode 4318 finished after 67 timesteps. Return = -190.35412884236044
Episode 4319 finished after 126 timesteps. Return = -263.8558431130879
Episode

Episode 4424 finished after 138 timesteps. Return = -289.5685343201777
Episode 4425 finished after 138 timesteps. Return = -127.02319119374697
Episode 4426 finished after 119 timesteps. Return = -45.45493045643596
Episode 4427 finished after 141 timesteps. Return = -109.97337630854237
Episode 4428 finished after 146 timesteps. Return = -161.49901516473844
Episode 4429 finished after 141 timesteps. Return = -88.16778778909467
Episode 4430 finished after 158 timesteps. Return = -99.23660117895707
Episode 4431 finished after 110 timesteps. Return = -139.50871672124998
Episode 4432 finished after 101 timesteps. Return = -65.87402796186439
Episode 4433 finished after 139 timesteps. Return = -129.1864431249172
Episode 4434 finished after 162 timesteps. Return = -9.562014911549852
Episode 4435 finished after 146 timesteps. Return = -216.57083617841627
Episode 4436 finished after 107 timesteps. Return = -25.087082648337812
Episode 4437 finished after 137 timesteps. Return = -217.77713753519004

Episode 4540 finished after 1000 timesteps. Return = 66.12809705892077
Episode 4541 finished after 149 timesteps. Return = -278.70495650268754
Episode 4542 finished after 65 timesteps. Return = -281.3553153438337
Episode 4543 finished after 175 timesteps. Return = -208.43758893961046
Episode 4544 finished after 88 timesteps. Return = -47.70592714403296
Episode 4545 finished after 89 timesteps. Return = -234.3593463995721
Episode 4546 finished after 64 timesteps. Return = -204.66316462180703
Episode 4547 finished after 131 timesteps. Return = -259.5134166267277
Episode 4548 finished after 143 timesteps. Return = -95.39416170183888
Episode 4549 finished after 138 timesteps. Return = -359.7224551158161
Episode 4550 finished after 162 timesteps. Return = -138.49079560740734
Episode 4551 finished after 69 timesteps. Return = -143.91936069443946
Episode 4552 finished after 122 timesteps. Return = -36.67968953840543
Episode 4553 finished after 89 timesteps. Return = -84.01920335426365
Episode

Episode 4656 finished after 111 timesteps. Return = -337.3032383240702
Episode 4657 finished after 93 timesteps. Return = -190.17170990072276
Episode 4658 finished after 78 timesteps. Return = -87.7154058158691
Episode 4659 finished after 104 timesteps. Return = -335.20119703363457
Episode 4660 finished after 94 timesteps. Return = -224.95079568612434
Episode 4661 finished after 125 timesteps. Return = -188.23316226884089
Episode 4662 finished after 91 timesteps. Return = -77.61609991636527
Episode 4663 finished after 91 timesteps. Return = -390.01303358015986
Episode 4664 finished after 137 timesteps. Return = -167.59491692630138
Episode 4665 finished after 96 timesteps. Return = -208.63603033515523
Episode 4666 finished after 174 timesteps. Return = -145.1922173641442
Episode 4667 finished after 130 timesteps. Return = -164.52679078559447
Episode 4668 finished after 84 timesteps. Return = -127.82655673040503
Episode 4669 finished after 176 timesteps. Return = -176.98372618738915
Epis

Episode 4777 finished after 86 timesteps. Return = -352.2506636719248
Episode 4778 finished after 90 timesteps. Return = -117.70790451662756
Episode 4779 finished after 73 timesteps. Return = -269.1128121853296
Episode 4780 finished after 168 timesteps. Return = -99.2980112629159
Episode 4781 finished after 95 timesteps. Return = -33.23608678468328
Episode 4782 finished after 96 timesteps. Return = -194.72945031805187
Episode 4783 finished after 97 timesteps. Return = -302.7540865937191
Episode 4784 finished after 80 timesteps. Return = -178.73544051209035
Episode 4785 finished after 102 timesteps. Return = -208.14160724747188
Episode 4786 finished after 99 timesteps. Return = -282.3622355587029
Episode 4787 finished after 119 timesteps. Return = -86.595873373579
Episode 4788 finished after 81 timesteps. Return = -279.6531549895117
Episode 4789 finished after 86 timesteps. Return = -212.70143816314527
Episode 4790 finished after 107 timesteps. Return = -194.58132867698032
Episode 4791 

Episode 4895 finished after 123 timesteps. Return = -205.3314122482092
Episode 4896 finished after 83 timesteps. Return = -343.1615356507387
Episode 4897 finished after 97 timesteps. Return = -175.74468030785357
Episode 4898 finished after 155 timesteps. Return = -32.67194635119907
Episode 4899 finished after 118 timesteps. Return = -259.60037796455214
Episode 4900 finished after 65 timesteps. Return = -137.1591185101327
Episode 4901 finished after 71 timesteps. Return = -131.17096830668012
Episode 4902 finished after 69 timesteps. Return = -278.09600766753175
Episode 4903 finished after 116 timesteps. Return = -142.8385135274716
Episode 4904 finished after 110 timesteps. Return = -166.61838012188178
Episode 4905 finished after 142 timesteps. Return = -195.30150990266162
Episode 4906 finished after 61 timesteps. Return = -215.82337246823798
Episode 4907 finished after 78 timesteps. Return = -180.1578763253513
Episode 4908 finished after 92 timesteps. Return = -205.16296792268395
Episod

Episode 5011 finished after 107 timesteps. Return = -521.0775703907659
Episode 5012 finished after 123 timesteps. Return = -239.25133263979606
Episode 5013 finished after 145 timesteps. Return = -61.183676260765196
Episode 5014 finished after 155 timesteps. Return = -340.1965610791501
Episode 5015 finished after 90 timesteps. Return = -239.64593780273455
Episode 5016 finished after 132 timesteps. Return = -118.65321385251072
Episode 5017 finished after 98 timesteps. Return = -395.47862559066607
Episode 5018 finished after 93 timesteps. Return = -209.5402024328818
Episode 5019 finished after 139 timesteps. Return = -218.00268971590455
Episode 5020 finished after 132 timesteps. Return = -251.85040156043596
Episode 5021 finished after 140 timesteps. Return = -227.87080063374123
Episode 5022 finished after 139 timesteps. Return = -327.88548319086647
Episode 5023 finished after 142 timesteps. Return = -88.97574381968776
Episode 5024 finished after 133 timesteps. Return = -81.59359755712012


Episode 5128 finished after 117 timesteps. Return = -170.62342177283267
Episode 5129 finished after 155 timesteps. Return = -193.62700206521154
Episode 5130 finished after 97 timesteps. Return = -227.19170336508932
Episode 5131 finished after 90 timesteps. Return = -182.71965930607632
Episode 5132 finished after 197 timesteps. Return = -59.590601355637574
Episode 5133 finished after 140 timesteps. Return = -287.6996868004551
Episode 5134 finished after 190 timesteps. Return = -116.77262754914402
Episode 5135 finished after 95 timesteps. Return = -433.3492464216025
Episode 5136 finished after 134 timesteps. Return = -129.2702937068668
Episode 5137 finished after 189 timesteps. Return = -321.5467848269336
Episode 5138 finished after 116 timesteps. Return = -18.952223015381776
Episode 5139 finished after 80 timesteps. Return = -44.60551207306031
Episode 5140 finished after 67 timesteps. Return = -85.29634184922955
Episode 5141 finished after 84 timesteps. Return = -217.34117351170818
Epis

Episode 5244 finished after 169 timesteps. Return = -168.99286315502184
Episode 5245 finished after 130 timesteps. Return = -101.8579321100421
Episode 5246 finished after 104 timesteps. Return = -322.947717678924
Episode 5247 finished after 111 timesteps. Return = -154.89197699594635
Episode 5248 finished after 144 timesteps. Return = -116.25206335404762
Episode 5249 finished after 87 timesteps. Return = -103.06913697826128
Episode 5250 finished after 164 timesteps. Return = -145.42809867988868
Episode 5251 finished after 157 timesteps. Return = -219.6500724092336
Episode 5252 finished after 140 timesteps. Return = -174.35189042462855
Episode 5253 finished after 60 timesteps. Return = -207.8389177345984
Episode 5254 finished after 80 timesteps. Return = -110.88254436766945
Episode 5255 finished after 72 timesteps. Return = -287.7747350047499
Episode 5256 finished after 84 timesteps. Return = -149.84923297147142
Episode 5257 finished after 125 timesteps. Return = -196.30468650396566
Epi

Episode 5360 finished after 99 timesteps. Return = -76.13444212956097
Episode 5361 finished after 92 timesteps. Return = -186.6726883805453
Episode 5362 finished after 131 timesteps. Return = -101.35825850801079
Episode 5363 finished after 114 timesteps. Return = -218.90770537030699
Episode 5364 finished after 69 timesteps. Return = -180.59288380532757
Episode 5365 finished after 106 timesteps. Return = -236.30636932571568
Episode 5366 finished after 131 timesteps. Return = -184.34839468427117
Episode 5367 finished after 216 timesteps. Return = -291.89325318881237
Episode 5368 finished after 102 timesteps. Return = -151.43792314303403
Episode 5369 finished after 103 timesteps. Return = -165.65887368920534
Episode 5370 finished after 151 timesteps. Return = -190.90882556720874
Episode 5371 finished after 148 timesteps. Return = -126.93451469181755
Episode 5372 finished after 118 timesteps. Return = -178.89042506257712
Episode 5373 finished after 118 timesteps. Return = -128.439137309502

Episode 5475 finished after 394 timesteps. Return = -143.9818316043923
Episode 5476 finished after 181 timesteps. Return = -99.33150916149953
Episode 5477 finished after 72 timesteps. Return = -73.64329782652642
Episode 5478 finished after 121 timesteps. Return = -254.40330017466883
Episode 5479 finished after 70 timesteps. Return = -117.49501057116713
Episode 5480 finished after 183 timesteps. Return = -130.19664046567993
Episode 5481 finished after 72 timesteps. Return = -214.5542379312963
Episode 5482 finished after 75 timesteps. Return = -355.32396641927227
Episode 5483 finished after 89 timesteps. Return = -242.61268690928307
Episode 5484 finished after 95 timesteps. Return = -276.98149478352434
Episode 5485 finished after 110 timesteps. Return = -167.126270946612
Episode 5486 finished after 153 timesteps. Return = -178.15322726402624
Episode 5487 finished after 140 timesteps. Return = -119.42227618332839
Episode 5488 finished after 154 timesteps. Return = -314.9908907677043
Episo

Episode 5591 finished after 141 timesteps. Return = -48.16215327127158
Episode 5592 finished after 68 timesteps. Return = -60.58757323855618
Episode 5593 finished after 105 timesteps. Return = -218.8705356831656
Episode 5594 finished after 145 timesteps. Return = -213.86279361661173
Episode 5595 finished after 153 timesteps. Return = -250.6137992584688
Episode 5596 finished after 92 timesteps. Return = -25.996984533135617
Episode 5597 finished after 113 timesteps. Return = -116.68960430462187
Episode 5598 finished after 159 timesteps. Return = -183.8505686816565
Episode 5599 finished after 75 timesteps. Return = -127.82887876231834
Episode 5600 finished after 92 timesteps. Return = -90.59236296309854
Episode 5601 finished after 67 timesteps. Return = -139.58116377694898
Episode 5602 finished after 204 timesteps. Return = 26.81921775031232
Episode 5603 finished after 100 timesteps. Return = -223.65072130822114
Episode 5604 finished after 112 timesteps. Return = -128.13714295213637
Episo

Episode 5711 finished after 97 timesteps. Return = -126.42190292959373
Episode 5712 finished after 84 timesteps. Return = -157.67848748329897
Episode 5713 finished after 120 timesteps. Return = -153.93554559938815
Episode 5714 finished after 96 timesteps. Return = -176.84219145393237
Episode 5715 finished after 126 timesteps. Return = -264.2858830273839
Episode 5716 finished after 165 timesteps. Return = -296.9812416775147
Episode 5717 finished after 80 timesteps. Return = -90.08785503307837
Episode 5718 finished after 192 timesteps. Return = -30.755492791733985
Episode 5719 finished after 122 timesteps. Return = -216.14811981363732
Episode 5720 finished after 102 timesteps. Return = -231.02907259107084
Episode 5721 finished after 122 timesteps. Return = -354.4466675287082
Episode 5722 finished after 113 timesteps. Return = -218.08753851741574
Episode 5723 finished after 97 timesteps. Return = -352.1174544815161
Episode 5724 finished after 1000 timesteps. Return = -3.132871127759506
Ep

Episode 5827 finished after 112 timesteps. Return = -209.0905659071152
Episode 5828 finished after 184 timesteps. Return = -150.20943882806074
Episode 5829 finished after 91 timesteps. Return = -221.68137906656725
Episode 5830 finished after 70 timesteps. Return = -112.56832377107776
Episode 5831 finished after 67 timesteps. Return = -187.07890853090527
Episode 5832 finished after 142 timesteps. Return = -237.78112140874137
Episode 5833 finished after 108 timesteps. Return = -321.6137025963771
Episode 5834 finished after 91 timesteps. Return = -167.66733506337636
Episode 5835 finished after 165 timesteps. Return = -351.6369337245432
Episode 5836 finished after 71 timesteps. Return = -190.17269593031585
Episode 5837 finished after 106 timesteps. Return = -106.95176022468128
Episode 5838 finished after 75 timesteps. Return = -409.3643510365779
Episode 5839 finished after 102 timesteps. Return = -167.44744630052048
Episode 5840 finished after 78 timesteps. Return = -137.94167381009572
Epi

Episode 5946 finished after 133 timesteps. Return = -113.06135517128051
Episode 5947 finished after 168 timesteps. Return = -134.52639177183255
Episode 5948 finished after 113 timesteps. Return = -158.53859094969977
Episode 5949 finished after 106 timesteps. Return = -201.28842443933252
Episode 5950 finished after 125 timesteps. Return = -120.75515696529574
Episode 5951 finished after 97 timesteps. Return = -186.24139237687763
Episode 5952 finished after 68 timesteps. Return = -224.74558604627794
Episode 5953 finished after 74 timesteps. Return = -105.62129572182039
Episode 5954 finished after 173 timesteps. Return = 54.28660182016668
Episode 5955 finished after 182 timesteps. Return = -305.2374035842231
Episode 5956 finished after 168 timesteps. Return = -176.7588611862623
Episode 5957 finished after 124 timesteps. Return = -200.7679628386224
Episode 5958 finished after 151 timesteps. Return = -167.538919027248
Episode 5959 finished after 91 timesteps. Return = -191.11739766854674
Epi

Episode 6063 finished after 143 timesteps. Return = -14.296402039299366
Episode 6064 finished after 140 timesteps. Return = -249.79965170278692
Episode 6065 finished after 171 timesteps. Return = -177.88842351366233
Episode 6066 finished after 66 timesteps. Return = -172.0410415321582
Episode 6067 finished after 112 timesteps. Return = -161.0410876443832
Episode 6068 finished after 178 timesteps. Return = -328.9020431984733
Episode 6069 finished after 118 timesteps. Return = -105.65922513287488
Episode 6070 finished after 71 timesteps. Return = -156.16557845434406
Episode 6071 finished after 122 timesteps. Return = -100.41223961929198
Episode 6072 finished after 174 timesteps. Return = -241.70215469117554
Episode 6073 finished after 181 timesteps. Return = -334.22875762121566
Episode 6074 finished after 91 timesteps. Return = -151.25358592555548
Episode 6075 finished after 108 timesteps. Return = -328.47129044250187
Episode 6076 finished after 97 timesteps. Return = -115.84667333671675

Episode 6181 finished after 194 timesteps. Return = -99.47360338263289
Episode 6182 finished after 90 timesteps. Return = -183.94588859393252
Episode 6183 finished after 81 timesteps. Return = -242.65322584182286
Episode 6184 finished after 132 timesteps. Return = -367.29230848277217
Episode 6185 finished after 125 timesteps. Return = -85.10591649953105
Episode 6186 finished after 139 timesteps. Return = -284.3253146402035
Episode 6187 finished after 132 timesteps. Return = -229.8931919400956
Episode 6188 finished after 87 timesteps. Return = -188.32247350856176
Episode 6189 finished after 178 timesteps. Return = -153.75998162182125
Episode 6190 finished after 72 timesteps. Return = -274.74504379477327
Episode 6191 finished after 87 timesteps. Return = -116.61460229802847
Episode 6192 finished after 80 timesteps. Return = -241.65516525457645
Episode 6193 finished after 125 timesteps. Return = -319.70500869229863
Episode 6194 finished after 67 timesteps. Return = -140.80089019917796
Epi

Episode 6300 finished after 142 timesteps. Return = -20.720460335083658
Episode 6301 finished after 110 timesteps. Return = -203.75641892144864
Episode 6302 finished after 95 timesteps. Return = -272.7510478592618
Episode 6303 finished after 165 timesteps. Return = -137.28328959849415
Episode 6304 finished after 72 timesteps. Return = -96.81451373021005
Episode 6305 finished after 108 timesteps. Return = -173.3001895031849
Episode 6306 finished after 112 timesteps. Return = -185.70243816484052
Episode 6307 finished after 135 timesteps. Return = -153.40557970074107
Episode 6308 finished after 131 timesteps. Return = -145.24889520563883
Episode 6309 finished after 179 timesteps. Return = -168.39654803591498
Episode 6310 finished after 82 timesteps. Return = -173.05455043500706
Episode 6311 finished after 120 timesteps. Return = -366.4670374703729
Episode 6312 finished after 112 timesteps. Return = -237.17302526529105
Episode 6313 finished after 70 timesteps. Return = -249.8388111246742
E

Episode 6419 finished after 172 timesteps. Return = -116.69550265416775
Episode 6420 finished after 100 timesteps. Return = -141.58760216726168
Episode 6421 finished after 141 timesteps. Return = -118.41163993966791
Episode 6422 finished after 118 timesteps. Return = -102.22368572713472
Episode 6423 finished after 72 timesteps. Return = -125.28374144849685
Episode 6424 finished after 106 timesteps. Return = -89.99270842994689
Episode 6425 finished after 128 timesteps. Return = -95.66317284871803
Episode 6426 finished after 126 timesteps. Return = -99.2772901428257
Episode 6427 finished after 102 timesteps. Return = -233.3855259521922
Episode 6428 finished after 88 timesteps. Return = -171.14298727005064
Episode 6429 finished after 79 timesteps. Return = -135.41485894355475
Episode 6430 finished after 86 timesteps. Return = -36.789677643456585
Episode 6431 finished after 77 timesteps. Return = -55.85149553021376
Episode 6432 finished after 179 timesteps. Return = -80.83598776060362
Epis

Episode 6537 finished after 120 timesteps. Return = -93.4324652105376
Episode 6538 finished after 130 timesteps. Return = -67.41365649198167
Episode 6539 finished after 80 timesteps. Return = -44.820880082491016
Episode 6540 finished after 188 timesteps. Return = -50.52256094928343
Episode 6541 finished after 173 timesteps. Return = -219.54811076146763
Episode 6542 finished after 89 timesteps. Return = -77.91114490723709
Episode 6543 finished after 113 timesteps. Return = -245.33746986193137
Episode 6544 finished after 78 timesteps. Return = -128.17413769069526
Episode 6545 finished after 109 timesteps. Return = -254.15570054571424
Episode 6546 finished after 90 timesteps. Return = -65.13769065795304
Episode 6547 finished after 105 timesteps. Return = -287.1699416852206
Episode 6548 finished after 68 timesteps. Return = -93.20157333716584
Episode 6549 finished after 76 timesteps. Return = -141.43156993782276
Episode 6550 finished after 81 timesteps. Return = -105.79104413738774
Episode

Episode 6656 finished after 201 timesteps. Return = -215.22848292312727
Episode 6657 finished after 135 timesteps. Return = -54.996408061784734
Episode 6658 finished after 73 timesteps. Return = -99.7464912954029
Episode 6659 finished after 172 timesteps. Return = -98.38272793294914
Episode 6660 finished after 220 timesteps. Return = -345.9356188234958
Episode 6661 finished after 91 timesteps. Return = -126.23802992114693
Episode 6662 finished after 77 timesteps. Return = -62.84970422946612
Episode 6663 finished after 131 timesteps. Return = -111.66037903349617
Episode 6664 finished after 155 timesteps. Return = -203.3477887602411
Episode 6665 finished after 162 timesteps. Return = -203.6912069041932
Episode 6666 finished after 89 timesteps. Return = -218.38241960263122
Episode 6667 finished after 125 timesteps. Return = -241.23405365481472
Episode 6668 finished after 118 timesteps. Return = -170.92668262752008
Episode 6669 finished after 148 timesteps. Return = -47.09153767678472
Epis

Episode 6774 finished after 126 timesteps. Return = -174.85015738944423
Episode 6775 finished after 108 timesteps. Return = -44.091411532342185
Episode 6776 finished after 163 timesteps. Return = -260.12612546746027
Episode 6777 finished after 208 timesteps. Return = 11.220203965392642
Episode 6778 finished after 157 timesteps. Return = -193.14168938727363
Episode 6779 finished after 78 timesteps. Return = -37.868070226513005
Episode 6780 finished after 195 timesteps. Return = -267.98698903420143
Episode 6781 finished after 113 timesteps. Return = -57.107525347153
Episode 6782 finished after 99 timesteps. Return = -136.8187935135449
Episode 6783 finished after 94 timesteps. Return = -117.19664842879939
Episode 6784 finished after 121 timesteps. Return = -93.54002511441796
Episode 6785 finished after 154 timesteps. Return = -174.11253843367913
Episode 6786 finished after 129 timesteps. Return = -218.3707193141122
Episode 6787 finished after 106 timesteps. Return = -245.55912467876138
Ep

Episode 6890 finished after 114 timesteps. Return = -271.2481590967541
Episode 6891 finished after 286 timesteps. Return = -219.6124429658036
Episode 6892 finished after 73 timesteps. Return = -193.41659816708113
Episode 6893 finished after 101 timesteps. Return = -88.40166174739053
Episode 6894 finished after 107 timesteps. Return = -86.92095415795069
Episode 6895 finished after 69 timesteps. Return = -174.56743223969238
Episode 6896 finished after 74 timesteps. Return = -275.4539467389283
Episode 6897 finished after 116 timesteps. Return = -23.55402974717944
Episode 6898 finished after 76 timesteps. Return = -30.49618804490052
Episode 6899 finished after 92 timesteps. Return = -148.4108972061603
Episode 6900 finished after 144 timesteps. Return = -139.1329056355718
Episode 6901 finished after 120 timesteps. Return = -164.26348196347666
Episode 6902 finished after 110 timesteps. Return = -140.1369456621353
Episode 6903 finished after 81 timesteps. Return = -108.7901282206821
Episode 6

Episode 7009 finished after 1000 timesteps. Return = 60.313677900856014
Episode 7010 finished after 128 timesteps. Return = -142.14116348284176
Episode 7011 finished after 226 timesteps. Return = -302.7746091107345
Episode 7012 finished after 91 timesteps. Return = -239.79198161705634
Episode 7013 finished after 126 timesteps. Return = -184.65546342476517
Episode 7014 finished after 166 timesteps. Return = -115.45803756120755
Episode 7015 finished after 187 timesteps. Return = -116.98559865187971
Episode 7016 finished after 83 timesteps. Return = -205.00708548895386
Episode 7017 finished after 76 timesteps. Return = -169.68866997180862
Episode 7018 finished after 138 timesteps. Return = -236.75088682265957
Episode 7019 finished after 126 timesteps. Return = 6.703904618982804
Episode 7020 finished after 186 timesteps. Return = -289.6949868722595
Episode 7021 finished after 63 timesteps. Return = -148.3571893482394
Episode 7022 finished after 173 timesteps. Return = -234.35468879231536
E

Episode 7127 finished after 84 timesteps. Return = -366.2950312237764
Episode 7128 finished after 123 timesteps. Return = -416.7252832471999
Episode 7129 finished after 89 timesteps. Return = -193.5960459668156
Episode 7130 finished after 191 timesteps. Return = -123.25128128415714
Episode 7131 finished after 152 timesteps. Return = -149.69991129688765
Episode 7132 finished after 64 timesteps. Return = -97.47563770674864
Episode 7133 finished after 176 timesteps. Return = -68.92451185590853
Episode 7134 finished after 150 timesteps. Return = -101.96634025214061
Episode 7135 finished after 260 timesteps. Return = -112.58100264269478
Episode 7136 finished after 157 timesteps. Return = -226.0491606551692
Episode 7137 finished after 201 timesteps. Return = -39.66105328152678
Episode 7138 finished after 133 timesteps. Return = -93.35812334810045
Episode 7139 finished after 157 timesteps. Return = -249.4162999285438
Episode 7140 finished after 162 timesteps. Return = -97.61992418866309
Episo

Episode 7245 finished after 148 timesteps. Return = -146.81856412245955
Episode 7246 finished after 306 timesteps. Return = -136.70637422396908
Episode 7247 finished after 101 timesteps. Return = -126.52264381672447
Episode 7248 finished after 203 timesteps. Return = -112.07160201395521
Episode 7249 finished after 140 timesteps. Return = -98.61994074639244
Episode 7250 finished after 131 timesteps. Return = -250.87859795027356
Episode 7251 finished after 161 timesteps. Return = -96.73287774812303
Episode 7252 finished after 197 timesteps. Return = -36.513366746365605
Episode 7253 finished after 138 timesteps. Return = -144.91778792610194
Episode 7254 finished after 291 timesteps. Return = -14.761442162401707
Episode 7255 finished after 187 timesteps. Return = -79.55422767668126
Episode 7256 finished after 76 timesteps. Return = -108.20523418341232
Episode 7257 finished after 88 timesteps. Return = -112.53008312551866
Episode 7258 finished after 135 timesteps. Return = -74.8905246190222

Episode 7362 finished after 271 timesteps. Return = -240.6718709630884
Episode 7363 finished after 192 timesteps. Return = -197.25322456017784
Episode 7364 finished after 95 timesteps. Return = -191.63785635777054
Episode 7365 finished after 169 timesteps. Return = -151.0487110600291
Episode 7366 finished after 168 timesteps. Return = -268.20717936845284
Episode 7367 finished after 209 timesteps. Return = -271.4611115996366
Episode 7368 finished after 71 timesteps. Return = -218.50926324235544
Episode 7369 finished after 83 timesteps. Return = -257.62563278850774
Episode 7370 finished after 180 timesteps. Return = -453.404046998279
Episode 7371 finished after 167 timesteps. Return = -6.337614738506886
Episode 7372 finished after 97 timesteps. Return = -219.47040815805033
Episode 7373 finished after 61 timesteps. Return = -181.7109080364005
Episode 7374 finished after 297 timesteps. Return = -222.24935651763502
Episode 7375 finished after 153 timesteps. Return = -205.3338643834352
Episo

Episode 7477 finished after 227 timesteps. Return = -105.05096235598108
Episode 7478 finished after 172 timesteps. Return = -208.55582709466165
Episode 7479 finished after 335 timesteps. Return = -151.68265508958217
Episode 7480 finished after 163 timesteps. Return = -164.86172296983364
Episode 7481 finished after 125 timesteps. Return = -90.70352857510794
Episode 7482 finished after 147 timesteps. Return = -274.1118126491704
Episode 7483 finished after 198 timesteps. Return = -247.21086645656365
Episode 7484 finished after 116 timesteps. Return = -66.26899972672912
Episode 7485 finished after 164 timesteps. Return = -103.76857236655262
Episode 7486 finished after 142 timesteps. Return = -71.60764625562223
Episode 7487 finished after 68 timesteps. Return = -80.49667108568421
Episode 7488 finished after 101 timesteps. Return = -167.91373461883998
Episode 7489 finished after 71 timesteps. Return = -159.96089718079736
Episode 7490 finished after 158 timesteps. Return = -107.45340896193362

Episode 7593 finished after 136 timesteps. Return = -189.0604726918615
Episode 7594 finished after 134 timesteps. Return = -208.10706915814743
Episode 7595 finished after 104 timesteps. Return = -276.07575831315694
Episode 7596 finished after 157 timesteps. Return = -183.69495043247036
Episode 7597 finished after 139 timesteps. Return = 13.031799766945554
Episode 7598 finished after 72 timesteps. Return = -182.55838736539124
Episode 7599 finished after 147 timesteps. Return = -108.1208556581155
Episode 7600 finished after 122 timesteps. Return = -128.3623718197442
Episode 7601 finished after 93 timesteps. Return = -65.65979398599879
Episode 7602 finished after 95 timesteps. Return = -179.09686753174805
Episode 7603 finished after 236 timesteps. Return = -195.50690240016763
Episode 7604 finished after 231 timesteps. Return = -255.69433636123634
Episode 7605 finished after 94 timesteps. Return = -74.32010575201355
Episode 7606 finished after 169 timesteps. Return = -149.3230987078744
Epi

Episode 7709 finished after 364 timesteps. Return = -212.50612199644917
Episode 7710 finished after 86 timesteps. Return = -56.90625460632659
Episode 7711 finished after 111 timesteps. Return = -272.44386928255005
Episode 7712 finished after 92 timesteps. Return = -141.73404685304374
Episode 7713 finished after 184 timesteps. Return = -117.63254947612101
Episode 7714 finished after 96 timesteps. Return = -113.25010793141733
Episode 7715 finished after 83 timesteps. Return = -44.55395869852441
Episode 7716 finished after 66 timesteps. Return = -203.54602730635304
Episode 7717 finished after 146 timesteps. Return = -89.74589938595939
Episode 7718 finished after 107 timesteps. Return = -229.0140090632919
Episode 7719 finished after 1000 timesteps. Return = 69.18965575580175
Episode 7720 finished after 146 timesteps. Return = -109.20979557396628
Episode 7721 finished after 104 timesteps. Return = -82.67312347454717
Episode 7722 finished after 203 timesteps. Return = -212.68748260825637
Epi

Episode 7825 finished after 103 timesteps. Return = -84.08849463033845
Episode 7826 finished after 155 timesteps. Return = -75.92074798637677
Episode 7827 finished after 236 timesteps. Return = -15.63372278470895
Episode 7828 finished after 103 timesteps. Return = 6.016268839970309
Episode 7829 finished after 100 timesteps. Return = -189.9678677424059
Episode 7830 finished after 215 timesteps. Return = -151.10779828415306
Episode 7831 finished after 105 timesteps. Return = -72.91629037823526
Episode 7832 finished after 90 timesteps. Return = -196.7322168881401
Episode 7833 finished after 96 timesteps. Return = -18.848068218929882
Episode 7834 finished after 187 timesteps. Return = -208.8531327453424
Episode 7835 finished after 149 timesteps. Return = -45.515274824927644
Episode 7836 finished after 152 timesteps. Return = -164.7097580167744
Episode 7837 finished after 111 timesteps. Return = 13.319940397947988
Episode 7838 finished after 238 timesteps. Return = -111.50922680376273
Episo

Episode 7943 finished after 124 timesteps. Return = -77.28129482291254
Episode 7944 finished after 149 timesteps. Return = -221.42653233861841
Episode 7945 finished after 134 timesteps. Return = -17.986359922732944
Episode 7946 finished after 69 timesteps. Return = -135.27891755859457
Episode 7947 finished after 93 timesteps. Return = -36.47310257742938
Episode 7948 finished after 106 timesteps. Return = -88.87371005134693
Episode 7949 finished after 170 timesteps. Return = -134.62058727834363
Episode 7950 finished after 315 timesteps. Return = -214.00674046237432
Episode 7951 finished after 206 timesteps. Return = -90.49663847017914
Episode 7952 finished after 92 timesteps. Return = -52.527698662562685
Episode 7953 finished after 156 timesteps. Return = -185.0102000378181
Episode 7954 finished after 88 timesteps. Return = -151.92790839363698
Episode 7955 finished after 112 timesteps. Return = -67.8290501507698
Episode 7956 finished after 127 timesteps. Return = -189.33144859575455
Epi

Episode 8060 finished after 160 timesteps. Return = -106.48037939341042
Episode 8061 finished after 250 timesteps. Return = -93.88199210055056
Episode 8062 finished after 228 timesteps. Return = -223.29101028723514
Episode 8063 finished after 298 timesteps. Return = -207.82721843002287
Episode 8064 finished after 203 timesteps. Return = -189.06586013249313
Episode 8065 finished after 270 timesteps. Return = -129.21826127530622
Episode 8066 finished after 132 timesteps. Return = -91.29805432188913
Episode 8067 finished after 129 timesteps. Return = -227.49194673628318
Episode 8068 finished after 183 timesteps. Return = -181.26050396407044
Episode 8069 finished after 145 timesteps. Return = -176.35901981972896
Episode 8070 finished after 144 timesteps. Return = -135.97229609265509
Episode 8071 finished after 94 timesteps. Return = 23.734836508873897
Episode 8072 finished after 266 timesteps. Return = -70.7507523741957
Episode 8073 finished after 90 timesteps. Return = -51.38132286156693


Episode 8177 finished after 193 timesteps. Return = -268.25427472852016
Episode 8178 finished after 213 timesteps. Return = -49.94238661350275
Episode 8179 finished after 371 timesteps. Return = -378.9005334402413
Episode 8180 finished after 119 timesteps. Return = -51.64312085644209
Episode 8181 finished after 104 timesteps. Return = -9.322361721354326
Episode 8182 finished after 204 timesteps. Return = -226.72262658099743
Episode 8183 finished after 220 timesteps. Return = -162.60271048958637
Episode 8184 finished after 70 timesteps. Return = -71.88260679953547
Episode 8185 finished after 90 timesteps. Return = -77.46305947966283
Episode 8186 finished after 371 timesteps. Return = -171.00564847534812
Episode 8187 finished after 241 timesteps. Return = -124.02844643757875
Episode 8188 finished after 126 timesteps. Return = -172.25408061786294
Episode 8189 finished after 160 timesteps. Return = -135.80289287260507
Episode 8190 finished after 203 timesteps. Return = -192.5859156895319
E

Episode 8292 finished after 154 timesteps. Return = -180.26484229549268
Episode 8293 finished after 120 timesteps. Return = -98.43707518574271
Episode 8294 finished after 94 timesteps. Return = -56.813271139980245
Episode 8295 finished after 111 timesteps. Return = -96.04962610717327
Episode 8296 finished after 131 timesteps. Return = -188.89650763095636
Episode 8297 finished after 68 timesteps. Return = -160.76597938162865
Episode 8298 finished after 78 timesteps. Return = -70.25056439378847
Episode 8299 finished after 114 timesteps. Return = -266.8963746428036
Episode 8300 finished after 83 timesteps. Return = -119.01790978198743
Episode 8301 finished after 172 timesteps. Return = -303.96523015155253
Episode 8302 finished after 145 timesteps. Return = -50.06291783215502
Episode 8303 finished after 224 timesteps. Return = -131.41161293937276
Episode 8304 finished after 170 timesteps. Return = -125.63414549520907
Episode 8305 finished after 109 timesteps. Return = -194.42735252574263
E

Episode 8410 finished after 66 timesteps. Return = -89.06655382894523
Episode 8411 finished after 141 timesteps. Return = -102.95543863622109
Episode 8412 finished after 77 timesteps. Return = -73.60269007182922
Episode 8413 finished after 143 timesteps. Return = -86.05183579425422
Episode 8414 finished after 170 timesteps. Return = -73.4480938194747
Episode 8415 finished after 69 timesteps. Return = -121.08183286044117
Episode 8416 finished after 130 timesteps. Return = -137.765477072239
Episode 8417 finished after 96 timesteps. Return = -126.3681569726672
Episode 8418 finished after 140 timesteps. Return = -138.17159938405104
Episode 8419 finished after 95 timesteps. Return = -69.04341153545141
Episode 8420 finished after 69 timesteps. Return = -138.72117112317812
Episode 8421 finished after 243 timesteps. Return = -78.3931306128023
Episode 8422 finished after 166 timesteps. Return = -113.1996756768238
Episode 8423 finished after 345 timesteps. Return = -143.3168720460751
Episode 842

Episode 8526 finished after 264 timesteps. Return = -110.74630695462777
Episode 8527 finished after 165 timesteps. Return = -174.47572719774718
Episode 8528 finished after 86 timesteps. Return = -62.37756639463069
Episode 8529 finished after 125 timesteps. Return = -71.89256944449971
Episode 8530 finished after 201 timesteps. Return = -188.3075787598106
Episode 8531 finished after 123 timesteps. Return = -194.91244434216827
Episode 8532 finished after 177 timesteps. Return = -321.19952424740893
Episode 8533 finished after 84 timesteps. Return = -69.44565908142117
Episode 8534 finished after 163 timesteps. Return = -229.0207901447778
Episode 8535 finished after 218 timesteps. Return = -227.9684566788959
Episode 8536 finished after 140 timesteps. Return = -57.12866484957888
Episode 8537 finished after 181 timesteps. Return = -270.2276666725403
Episode 8538 finished after 108 timesteps. Return = -195.17768576378796
Episode 8539 finished after 111 timesteps. Return = -155.80113620037963
Ep

Episode 8643 finished after 111 timesteps. Return = -4.762283131995005
Episode 8644 finished after 115 timesteps. Return = -163.34704763191422
Episode 8645 finished after 167 timesteps. Return = -301.4397436172195
Episode 8646 finished after 221 timesteps. Return = -227.07999543119803
Episode 8647 finished after 145 timesteps. Return = 8.833110470819577
Episode 8648 finished after 70 timesteps. Return = -155.9859262013439
Episode 8649 finished after 362 timesteps. Return = -341.54284579950496
Episode 8650 finished after 282 timesteps. Return = -94.63105773753668
Episode 8651 finished after 224 timesteps. Return = -208.88853995206802
Episode 8652 finished after 324 timesteps. Return = -92.72596830485901
Episode 8653 finished after 141 timesteps. Return = -117.36214711734553
Episode 8654 finished after 132 timesteps. Return = -169.15180700095
Episode 8655 finished after 119 timesteps. Return = -126.97265331650107
Episode 8656 finished after 195 timesteps. Return = -212.69363522336403
Epi

Episode 8759 finished after 404 timesteps. Return = -294.8516996658134
Episode 8760 finished after 162 timesteps. Return = -90.64571209918891
Episode 8761 finished after 134 timesteps. Return = -193.2462050542024
Episode 8762 finished after 166 timesteps. Return = -184.5081828483306
Episode 8763 finished after 385 timesteps. Return = -204.30352574461858
Episode 8764 finished after 201 timesteps. Return = -199.01018611060192
Episode 8765 finished after 221 timesteps. Return = -265.9942538685387
Episode 8766 finished after 177 timesteps. Return = -75.34404864240196
Episode 8767 finished after 120 timesteps. Return = -268.9433837085419
Episode 8768 finished after 97 timesteps. Return = -131.9135872280761
Episode 8769 finished after 87 timesteps. Return = -88.12787430523471
Episode 8770 finished after 75 timesteps. Return = -207.33483341345368
Episode 8771 finished after 144 timesteps. Return = -242.01926897189767
Episode 8772 finished after 138 timesteps. Return = -147.55948460724238
Epis

Episode 8875 finished after 379 timesteps. Return = -328.08064813354065
Episode 8876 finished after 210 timesteps. Return = -148.94112991238495
Episode 8877 finished after 121 timesteps. Return = -86.36059966613945
Episode 8878 finished after 135 timesteps. Return = -90.91093689413681
Episode 8879 finished after 64 timesteps. Return = -125.45284474170214
Episode 8880 finished after 382 timesteps. Return = -116.6031192768913
Episode 8881 finished after 211 timesteps. Return = -162.5570561443211
Episode 8882 finished after 305 timesteps. Return = -148.76492868013975
Episode 8883 finished after 105 timesteps. Return = -306.6141207786252
Episode 8884 finished after 264 timesteps. Return = -206.29869069795114
Episode 8885 finished after 106 timesteps. Return = -51.88614835566769
Episode 8886 finished after 305 timesteps. Return = -219.9682681270407
Episode 8887 finished after 79 timesteps. Return = -72.66670625704316
Episode 8888 finished after 86 timesteps. Return = -64.04100931277216
Epis

Episode 8993 finished after 163 timesteps. Return = -175.37098143093937
Episode 8994 finished after 148 timesteps. Return = -23.045600818747033
Episode 8995 finished after 106 timesteps. Return = -6.680598672849044
Episode 8996 finished after 194 timesteps. Return = -80.81513852129525
Episode 8997 finished after 136 timesteps. Return = -96.61080610803096
Episode 8998 finished after 155 timesteps. Return = -86.39880567284784
Episode 8999 finished after 122 timesteps. Return = 36.33292423963806
Episode 9000 finished after 278 timesteps. Return = -238.72538403734205
Episode 9001 finished after 487 timesteps. Return = -296.6933442639481
Episode 9002 finished after 287 timesteps. Return = -278.3832671930329
Episode 9003 finished after 455 timesteps. Return = -89.69350789510567
Episode 9004 finished after 204 timesteps. Return = -64.74806624438571
Episode 9005 finished after 185 timesteps. Return = -51.13858279729625
Episode 9006 finished after 76 timesteps. Return = -95.62051237369036
Episo

Episode 9111 finished after 83 timesteps. Return = -47.16949756285067
Episode 9112 finished after 90 timesteps. Return = -100.99017794673176
Episode 9113 finished after 146 timesteps. Return = -189.5469398388376
Episode 9114 finished after 273 timesteps. Return = -7.750359404174077
Episode 9115 finished after 81 timesteps. Return = -180.99392111336678
Episode 9116 finished after 306 timesteps. Return = -389.65904292526386
Episode 9117 finished after 102 timesteps. Return = -83.16172308473386
Episode 9118 finished after 297 timesteps. Return = -208.13903988938372
Episode 9119 finished after 175 timesteps. Return = 4.107458600343591
Episode 9120 finished after 86 timesteps. Return = -61.19933590276323
Episode 9121 finished after 122 timesteps. Return = -201.49400333315947
Episode 9122 finished after 134 timesteps. Return = -210.28088494889226
Episode 9123 finished after 89 timesteps. Return = -94.62992191216482
Episode 9124 finished after 107 timesteps. Return = -46.798341345594885
Episo

Episode 9227 finished after 276 timesteps. Return = -418.29265178413186
Episode 9228 finished after 146 timesteps. Return = -176.8321186717979
Episode 9229 finished after 229 timesteps. Return = -56.422877164681736
Episode 9230 finished after 149 timesteps. Return = -187.96992874113397
Episode 9231 finished after 79 timesteps. Return = -92.50165086905626
Episode 9232 finished after 126 timesteps. Return = -43.88092247003483
Episode 9233 finished after 593 timesteps. Return = -303.17500618334054
Episode 9234 finished after 336 timesteps. Return = -260.5907071035138
Episode 9235 finished after 205 timesteps. Return = -192.27924513287735
Episode 9236 finished after 135 timesteps. Return = -250.3337944894727
Episode 9237 finished after 436 timesteps. Return = -270.94888531643346
Episode 9238 finished after 292 timesteps. Return = -130.1536560139201
Episode 9239 finished after 191 timesteps. Return = 7.938308602846078
Episode 9240 finished after 149 timesteps. Return = -183.92503062429708
E

Episode 9342 finished after 146 timesteps. Return = -76.64706113202764
Episode 9343 finished after 466 timesteps. Return = -260.34928568709813
Episode 9344 finished after 270 timesteps. Return = -143.31029545441368
Episode 9345 finished after 90 timesteps. Return = -119.46460544935208
Episode 9346 finished after 374 timesteps. Return = -288.11306407593025
Episode 9347 finished after 280 timesteps. Return = -223.2255630519743
Episode 9348 finished after 377 timesteps. Return = -179.68007791775318
Episode 9349 finished after 121 timesteps. Return = -204.7787253895844
Episode 9350 finished after 115 timesteps. Return = -47.5372819583872
Episode 9351 finished after 94 timesteps. Return = -124.28954310315791
Episode 9352 finished after 269 timesteps. Return = -207.1595760410942
Episode 9353 finished after 69 timesteps. Return = -204.41143776378917
Episode 9354 finished after 197 timesteps. Return = -115.27583155989103
Episode 9355 finished after 215 timesteps. Return = -281.28367061549113
E

Episode 9459 finished after 94 timesteps. Return = -63.95527816141002
Episode 9460 finished after 97 timesteps. Return = -46.72029336857019
Episode 9461 finished after 91 timesteps. Return = -73.01642312626873
Episode 9462 finished after 283 timesteps. Return = -245.41996046718407
Episode 9463 finished after 83 timesteps. Return = -223.49158762430773
Episode 9464 finished after 164 timesteps. Return = -178.2317794714578
Episode 9465 finished after 1000 timesteps. Return = 110.73314977397781
Episode 9466 finished after 368 timesteps. Return = -161.48085400121093
Episode 9467 finished after 217 timesteps. Return = -56.88899060925045
Episode 9468 finished after 201 timesteps. Return = -0.7146244879305783
Episode 9469 finished after 179 timesteps. Return = -88.95684616679131
Episode 9470 finished after 98 timesteps. Return = -78.15146561533334
Episode 9471 finished after 141 timesteps. Return = -92.19635303515463
Episode 9472 finished after 178 timesteps. Return = -314.82482625540604
Episo

Episode 9578 finished after 169 timesteps. Return = -259.40601281704227
Episode 9579 finished after 198 timesteps. Return = -77.78925264742868
Episode 9580 finished after 256 timesteps. Return = -181.8878767325342
Episode 9581 finished after 120 timesteps. Return = 5.166529249497884
Episode 9582 finished after 122 timesteps. Return = -138.2926417156849
Episode 9583 finished after 284 timesteps. Return = -205.5237625046866
Episode 9584 finished after 178 timesteps. Return = -207.76806360801535
Episode 9585 finished after 316 timesteps. Return = -215.92254783019143
Episode 9586 finished after 402 timesteps. Return = -271.7441180330112
Episode 9587 finished after 375 timesteps. Return = -159.36512889770432
Episode 9588 finished after 132 timesteps. Return = -88.35025572587047
Episode 9589 finished after 210 timesteps. Return = -211.14631654090698
Episode 9590 finished after 119 timesteps. Return = -325.1494400695565
Episode 9591 finished after 100 timesteps. Return = -180.19283052605851
E

Episode 9694 finished after 292 timesteps. Return = -255.8656912556025
Episode 9695 finished after 153 timesteps. Return = -116.61222194074836
Episode 9696 finished after 259 timesteps. Return = -275.0942396755897
Episode 9697 finished after 171 timesteps. Return = -78.1816837115563
Episode 9698 finished after 297 timesteps. Return = -266.90403363917665
Episode 9699 finished after 289 timesteps. Return = -142.5947921851655
Episode 9700 finished after 254 timesteps. Return = -231.6258347855109
Episode 9701 finished after 192 timesteps. Return = -93.91729808391415
Episode 9702 finished after 268 timesteps. Return = -126.34467560967391
Episode 9703 finished after 301 timesteps. Return = -255.08110958046996
Episode 9704 finished after 95 timesteps. Return = -151.40110719341442
Episode 9705 finished after 369 timesteps. Return = -203.12997569386374
Episode 9706 finished after 141 timesteps. Return = -132.20256971607756
Episode 9707 finished after 245 timesteps. Return = -137.56070978732114


Episode 9809 finished after 109 timesteps. Return = -205.9321940651804
Episode 9810 finished after 126 timesteps. Return = -227.66067400572936
Episode 9811 finished after 160 timesteps. Return = -4.9628484068900605
Episode 9812 finished after 346 timesteps. Return = -317.47763893291335
Episode 9813 finished after 97 timesteps. Return = -61.46459957893216
Episode 9814 finished after 287 timesteps. Return = -208.82870577750924
Episode 9815 finished after 249 timesteps. Return = -326.69718744178857
Episode 9816 finished after 71 timesteps. Return = -192.25932788785212
Episode 9817 finished after 248 timesteps. Return = -281.505230918384
Episode 9818 finished after 66 timesteps. Return = -176.85797014835805
Episode 9819 finished after 226 timesteps. Return = -206.7859065173921
Episode 9820 finished after 334 timesteps. Return = -167.9987719777718
Episode 9821 finished after 238 timesteps. Return = -140.32033441999965
Episode 9822 finished after 205 timesteps. Return = -199.46727838000672
E

Episode 9926 finished after 209 timesteps. Return = -87.0588728637343
Episode 9927 finished after 153 timesteps. Return = -90.47659242975392
Episode 9928 finished after 221 timesteps. Return = -221.53772042330667
Episode 9929 finished after 302 timesteps. Return = -187.1896477582823
Episode 9930 finished after 75 timesteps. Return = -119.8097962500114
Episode 9931 finished after 242 timesteps. Return = -195.82843131587143
Episode 9932 finished after 118 timesteps. Return = -64.0454361479133
Episode 9933 finished after 156 timesteps. Return = -192.6721905673458
Episode 9934 finished after 224 timesteps. Return = -233.02679894505826
Episode 9935 finished after 116 timesteps. Return = -30.557908634345182
Episode 9936 finished after 303 timesteps. Return = -177.95736419806627
Episode 9937 finished after 360 timesteps. Return = -128.80337240691904
Episode 9938 finished after 278 timesteps. Return = -38.17575487242687
Episode 9939 finished after 275 timesteps. Return = -235.85728224044558
Ep

Episode 10044 finished after 270 timesteps. Return = -276.91800043056116
Episode 10045 finished after 311 timesteps. Return = -259.68715826119916
Episode 10046 finished after 118 timesteps. Return = 12.058058887145336
Episode 10047 finished after 387 timesteps. Return = -121.03639039522089
Episode 10048 finished after 126 timesteps. Return = -122.69885819098428
Episode 10049 finished after 383 timesteps. Return = -260.4073157320266
Episode 10050 finished after 145 timesteps. Return = 35.2112814502043
Episode 10051 finished after 70 timesteps. Return = -201.784453393384
Episode 10052 finished after 364 timesteps. Return = -251.1181054908219
Episode 10053 finished after 252 timesteps. Return = -182.07228470682043
Episode 10054 finished after 73 timesteps. Return = -115.79513421656569
Episode 10055 finished after 223 timesteps. Return = -243.30864480919544
Episode 10056 finished after 162 timesteps. Return = -88.57495432811785
Episode 10057 finished after 215 timesteps. Return = -214.5166

Episode 10158 finished after 77 timesteps. Return = -87.41994366383248
Episode 10159 finished after 319 timesteps. Return = -158.34521629033247
Episode 10160 finished after 175 timesteps. Return = -178.14160993223408
Episode 10161 finished after 260 timesteps. Return = -81.84533901868927
Episode 10162 finished after 455 timesteps. Return = -264.7299742809647
Episode 10163 finished after 146 timesteps. Return = -139.53275440340968
Episode 10164 finished after 142 timesteps. Return = 7.750594624229109
Episode 10165 finished after 366 timesteps. Return = -285.55699616125906
Episode 10166 finished after 137 timesteps. Return = -131.57287702097528
Episode 10167 finished after 581 timesteps. Return = -274.0861243118113
Episode 10168 finished after 174 timesteps. Return = -203.10259660719336
Episode 10169 finished after 85 timesteps. Return = -0.3772116837912307
Episode 10170 finished after 397 timesteps. Return = -272.0718490702081
Episode 10171 finished after 227 timesteps. Return = -266.61

Episode 10273 finished after 160 timesteps. Return = -201.65828012510724
Episode 10274 finished after 127 timesteps. Return = -189.43849321722104
Episode 10275 finished after 229 timesteps. Return = -192.73186606094956
Episode 10276 finished after 206 timesteps. Return = -192.99989230652568
Episode 10277 finished after 298 timesteps. Return = -238.19011934557048
Episode 10278 finished after 366 timesteps. Return = -315.0427620941149
Episode 10279 finished after 120 timesteps. Return = -114.65526935559015
Episode 10280 finished after 168 timesteps. Return = -215.05678920585575
Episode 10281 finished after 352 timesteps. Return = -224.87010223384408
Episode 10282 finished after 304 timesteps. Return = -146.6209996063592
Episode 10283 finished after 93 timesteps. Return = -117.72301506132743
Episode 10284 finished after 166 timesteps. Return = -144.5335451041586
Episode 10285 finished after 161 timesteps. Return = -203.58675581112396
Episode 10286 finished after 196 timesteps. Return = -1

Episode 10391 finished after 205 timesteps. Return = -161.34134176862057
Episode 10392 finished after 223 timesteps. Return = -139.30962645214902
Episode 10393 finished after 336 timesteps. Return = -265.77469287187023
Episode 10394 finished after 413 timesteps. Return = -249.37065513136324
Episode 10395 finished after 131 timesteps. Return = -197.50242631467626
Episode 10396 finished after 204 timesteps. Return = -58.93269247142375
Episode 10397 finished after 280 timesteps. Return = -271.74361708194425
Episode 10398 finished after 270 timesteps. Return = -196.13634458436348
Episode 10399 finished after 110 timesteps. Return = -205.27341762739565
Episode 10400 finished after 228 timesteps. Return = -179.36488684136629
Episode 10401 finished after 132 timesteps. Return = -103.7257936488144
Episode 10402 finished after 98 timesteps. Return = -70.96560043502453
Episode 10403 finished after 98 timesteps. Return = -191.29851659492948
Episode 10404 finished after 248 timesteps. Return = -18

Episode 10505 finished after 294 timesteps. Return = -185.6488170821582
Episode 10506 finished after 555 timesteps. Return = -246.91135780081245
Episode 10507 finished after 259 timesteps. Return = -175.43464908136207
Episode 10508 finished after 133 timesteps. Return = -83.18501119608274
Episode 10509 finished after 263 timesteps. Return = -131.67012331979126
Episode 10510 finished after 233 timesteps. Return = -75.00305931896824
Episode 10511 finished after 319 timesteps. Return = -208.3777867132701
Episode 10512 finished after 213 timesteps. Return = -246.45484471099354
Episode 10513 finished after 107 timesteps. Return = -49.32312112616705
Episode 10514 finished after 161 timesteps. Return = -38.138593244416796
Episode 10515 finished after 237 timesteps. Return = -103.78394632452947
Episode 10516 finished after 214 timesteps. Return = -192.73632148657393
Episode 10517 finished after 314 timesteps. Return = -220.1353851299579
Episode 10518 finished after 172 timesteps. Return = -62.

Episode 10619 finished after 335 timesteps. Return = -99.90558377520969
Episode 10620 finished after 277 timesteps. Return = -286.47136613331156
Episode 10621 finished after 163 timesteps. Return = -159.2796839453746
Episode 10622 finished after 105 timesteps. Return = 43.32596020486122
Episode 10623 finished after 95 timesteps. Return = -50.17941150882289
Episode 10624 finished after 174 timesteps. Return = -80.20863921040628
Episode 10625 finished after 351 timesteps. Return = -290.2064981494681
Episode 10626 finished after 185 timesteps. Return = -74.70161049421641
Episode 10627 finished after 321 timesteps. Return = -224.9676595320591
Episode 10628 finished after 133 timesteps. Return = -106.63634693454262
Episode 10629 finished after 259 timesteps. Return = -118.5826484183957
Episode 10630 finished after 194 timesteps. Return = -118.75119985601299
Episode 10631 finished after 59 timesteps. Return = -132.47019869988557
Episode 10632 finished after 263 timesteps. Return = -248.12160

Episode 10733 finished after 328 timesteps. Return = -183.57168888177472
Episode 10734 finished after 138 timesteps. Return = -37.39602905894847
Episode 10735 finished after 113 timesteps. Return = -177.94119679084088
Episode 10736 finished after 77 timesteps. Return = -98.92459300290372
Episode 10737 finished after 86 timesteps. Return = -106.29376374524459
Episode 10738 finished after 383 timesteps. Return = -271.67976294423346
Episode 10739 finished after 100 timesteps. Return = -101.72536884326921
Episode 10740 finished after 311 timesteps. Return = -119.64753355685075
Episode 10741 finished after 190 timesteps. Return = -80.3368062784015
Episode 10742 finished after 157 timesteps. Return = -106.76720062733303
Episode 10743 finished after 309 timesteps. Return = -135.50009760632528
Episode 10744 finished after 239 timesteps. Return = -108.38732219124074
Episode 10745 finished after 278 timesteps. Return = -163.4393946669654
Episode 10746 finished after 170 timesteps. Return = -96.0

Episode 10850 finished after 580 timesteps. Return = -243.84958453780087
Episode 10851 finished after 181 timesteps. Return = -142.11493040144745
Episode 10852 finished after 268 timesteps. Return = -121.68796835945638
Episode 10853 finished after 324 timesteps. Return = -257.49141431020155
Episode 10854 finished after 146 timesteps. Return = -267.6956864012601
Episode 10855 finished after 536 timesteps. Return = -334.7815754531735
Episode 10856 finished after 189 timesteps. Return = -232.17162131012103
Episode 10857 finished after 409 timesteps. Return = -134.69590538478664
Episode 10858 finished after 350 timesteps. Return = -93.02210942730522
Episode 10859 finished after 290 timesteps. Return = -287.51864338538485
Episode 10860 finished after 393 timesteps. Return = -277.06819723553843
Episode 10861 finished after 162 timesteps. Return = -12.251041983652883
Episode 10862 finished after 71 timesteps. Return = -94.60622443674157
Episode 10863 finished after 112 timesteps. Return = -20

Episode 10965 finished after 307 timesteps. Return = -268.7664417969695
Episode 10966 finished after 242 timesteps. Return = -248.78221174666538
Episode 10967 finished after 111 timesteps. Return = -77.57929020844722
Episode 10968 finished after 178 timesteps. Return = -100.9216698284497
Episode 10969 finished after 94 timesteps. Return = -43.56589487894858
Episode 10970 finished after 319 timesteps. Return = -256.2323374889502
Episode 10971 finished after 105 timesteps. Return = -243.21508900866888
Episode 10972 finished after 83 timesteps. Return = -106.19558179350419
Episode 10973 finished after 411 timesteps. Return = -50.0485559615093
Episode 10974 finished after 264 timesteps. Return = -104.40196384894145
Episode 10975 finished after 78 timesteps. Return = -171.57284795648957
Episode 10976 finished after 356 timesteps. Return = -85.37605588882722
Episode 10977 finished after 75 timesteps. Return = -109.9953660724099
Episode 10978 finished after 137 timesteps. Return = -196.077360

Episode 11080 finished after 216 timesteps. Return = -138.0907612385591
Episode 11081 finished after 151 timesteps. Return = -1.9515855874326604
Episode 11082 finished after 236 timesteps. Return = -165.3168693255198
Episode 11083 finished after 91 timesteps. Return = -174.91662442252368
Episode 11084 finished after 227 timesteps. Return = -68.87648271183699
Episode 11085 finished after 496 timesteps. Return = -219.57806116786645
Episode 11086 finished after 85 timesteps. Return = -106.561020582802
Episode 11087 finished after 770 timesteps. Return = -317.7336018934359
Episode 11088 finished after 240 timesteps. Return = -228.40717649093443
Episode 11089 finished after 252 timesteps. Return = -87.4715262817219
Episode 11090 finished after 437 timesteps. Return = -277.3439472888855
Episode 11091 finished after 183 timesteps. Return = 15.000753237321561
Episode 11092 finished after 139 timesteps. Return = -12.958991922514187
Episode 11093 finished after 193 timesteps. Return = -70.053649

KeyboardInterrupt: 

In [165]:
#torch.save(agent.state_dict(), 'checkpoints/lunar_lander_64x64_checkpoint_0.pt')
#agent.load_state_dict(torch.load('checkpoints/lunar_lander_32x32_checkpoint_0.pt'))

In [74]:
visualise_agent(greedy_policy, command=[150, 400], n=5)

Episode 0 finished after 156 timesteps. Return = -241.49785131822478
Episode 1 finished after 297 timesteps. Return = -358.99471816394623
Episode 2 finished after 211 timesteps. Return = -314.8731776425535
Episode 3 finished after 102 timesteps. Return = -224.18053479219859
Episode 4 finished after 412 timesteps. Return = -387.7649308825705


In [73]:
visualise_agent(stochastic_policy, command=[150, 400], n=5)

Episode 0 finished after 101 timesteps. Return = -131.8027246567254


In [25]:
print([mem['return'] for mem in replay_buffer])

[-63.70254888830934, -63.638939025122625, -63.5994898557033, -63.49465027645706, -63.46673824051337, -63.36871712910339, -63.3170788909209, -63.268455814456246, -63.26158837333688, -63.22068659038845, -63.21192519962132, -63.159962894358316, -63.035593454982134, -63.035425263206434, -62.97096940697155, -62.943593599875086, -62.92596251667162, -62.91281822394709, -62.90096774806254, -62.89021254680336, -62.88432112394105, -62.833528924538854, -62.77959557612017, -62.667310065127865, -62.66679486512233, -62.629278971083366, -62.61750379938199, -62.575461411676095, -62.47476364144245, -62.2956868138306, -62.28361089060018, -62.21614293189815, -62.211379210167564, -62.19752032429969, -62.17571584274293, -62.118940774335755, -62.08795198808494, -61.997331174933244, -61.92102486534531, -61.8701775567973, -61.79702597554188, -61.78349969009108, -61.761231874595126, -61.75852742540508, -61.71683109542317, -61.6446895161059, -61.506866501602374, -61.489387372573006, -61.48727633614841, -61.4803

# Previous Code

In [None]:
def train_net(policy_net, replay_buffer, n_updates=100, batch_size=64):
    all_costs = []
    for i in range(n_updates):
        batch_input = np.zeros((batch_size, np.prod(env.observation_space.shape)+2))
        batch_label = np.zeros((batch_size))
        for b in range(batch_size):
            sample_episode = np.random.randint(0, len(replay_buffer))
            sample_horizon = np.random.randint(1, len(replay_buffer[sample_episode]['observation'])+1)
            sample_mem_idx = np.random.randint(0, len(replay_buffer[sample_episode]['observation'])+1-sample_horizon)
            sample_mem = replay_buffer[sample_episode]['observation'][sample_mem_idx]
            sample_desired_reward = sum(replay_buffer[sample_episode]['reward'][sample_mem_idx:sample_mem_idx+sample_horizon])
            network_input = np.append(sample_mem, [sample_desired_reward, sample_horizon])
            label = replay_buffer[sample_episode]['action'][sample_mem_idx]
            batch_input[b] = network_input
            batch_label[b] = label
        batch_input = torch.tensor(batch_input).double()
        batch_label = torch.tensor(batch_label).long()
        pred = policy_net(batch_input)
        cost = F.cross_entropy(pred, batch_label)
        all_costs.append(cost.item())
        cost.backward()
        policy_net.optimizer.step()
        policy_net.optimizer.zero_grad()
    return np.mean(all_costs)

In [8]:
def train_net(policy_net, episode_mem, n_samples = 5): #stochastic gradient descent
    all_costs = []
    for i in range(n_samples):
        sample_horizon = np.random.randint(1, len(episode_mem['observation'])+1)
        sample_mem_idx = np.random.randint(0, len(episode_mem['observation'])+1-sample_horizon)
        sample_mem = episode_mem['observation'][sample_mem_idx]
        sample_desired_reward = sum(episode_mem['reward'][sample_mem_idx:sample_mem_idx+sample_horizon])
        network_input = torch.tensor(np.append(sample_mem, [sample_desired_reward, sample_horizon])).double()
        label = torch.tensor([episode_mem['action'][sample_mem_idx]]).double()
        
        pred = policy_net(network_input)
        cost = F.binary_cross_entropy(pred, label)
        all_costs.append(cost.item())
        cost.backward()
        policy_net.optimizer.step()
        policy_net.optimizer.zero_grad()
    return np.mean(all_costs)
    

In [33]:
def train(policy_net, n_episodes=100):
    global i_episode
    global epsilon
    try:
        for _ in range(n_episodes):
            observation = env.reset()
            episode_mem = {'observation':[],
                            'action':[],
                            'reward':[],
                            'done':[]}
            done=False
            while not done:
                network_input = torch.tensor(np.append(observation, [desired_reward, command_horizon])).double()
                action_prob = policy_net(network_input)
                action = np.random.binomial(1, action_prob.item())
                #action = int(action_prob.item()>0.5)
                if np.random.rand()<epsilon: action = np.random.randint(0, 2)
                new_observation, reward, done, info = env.step(action)
                
                episode_mem['observation'].append(observation)
                episode_mem['action'].append(action)
                episode_mem['reward'].append(reward)
                episode_mem['done'].append(done)
                
                observation=new_observation
                epsilon*=0.999
            episode_mem['return']=sum(episode_mem['reward'])
            episode_mem['episode_len']=len(episode_mem['observation'])
            mean_cost = train_net(policy_net, episode_mem)
            
            i_episode+=1
            print("Episode {} finished after {} timesteps. Epsilon={} Mean Cost={}".format(i_episode, len(episode_mem['observation']), epsilon, mean_cost))
        env.close()
    except KeyboardInterrupt:
        env.close()

## Previous version

In [55]:
import gym
import time
import torch
import numpy as np
from copy import deepcopy
import torch.nn.functional as F

def random_policy(obs):
    return np.random.randint(env.action_space.n)

#Visualise agent function
def visualise_agent(policy, command, command_scale, n=5):
    try:
        for trial_i in range(n):
            current_command = deepcopy(command)
            observation = env.reset()
            done=False
            t=0
            episode_return=0
            while not done:
                env.render()
                network_input = torch.tensor(np.append(observation, np.array(current_command)*command_scale)).double()
                policy_action = policy(network_input)
                observation, reward, done, info = env.step(policy_action)
                episode_return+=reward
                #time.sleep(0.1)
                current_command[0]-= reward
                current_command[1] = max(1, current_command[1]-1)
                
                t+=1
            env.render()
            time.sleep(1.5)
            print("Episode {} finished after {} timesteps. Return = {}".format(trial_i, t, episode_return))
        env.close()
    except KeyboardInterrupt:
        env.close()
        
#Behaviour function - Neural Network
class FCNN_AGENT(torch.nn.Module):
    def __init__(self):
        super().__init__()
        self.fc = torch.nn.Sequential(
            torch.nn.Linear(np.prod(env.observation_space.shape)+2, 10),
            torch.nn.ReLU(),
            torch.nn.Linear(10, 10),
            torch.nn.ReLU(),
            torch.nn.Linear(10, env.action_space.n)
        )
    
    def forward(self, x):
        x = self.fc(x)
        return x
    
    def create_optimizer(self, lr):
        self.optimizer = torch.optim.Adam(self.parameters(), lr=lr)

#Full the replay buffer with more experience
def collect_experience(policy, replay_buffer, replay_size, last_few, n_episodes=100, command_scale=0.01, log_to_tensorboard=True):
    global i_episode
    init_replay_buffer = deepcopy(replay_buffer)
    try:
        for _ in range(n_episodes):
            command = sample_command(init_replay_buffer, last_few)
            if log_to_tensorboard: writer.add_scalar('Command desired reward/Episode', command[0], i_episode)    # write loss to a graph
            if log_to_tensorboard: writer.add_scalar('Command horizon/Episode', command[1], i_episode)    # write loss to a graph
            observation = env.reset()
            episode_mem = {'observation':[],
                            'action':[],
                            'reward':[]}
            done=False
            while not done:
                network_input = torch.tensor(np.append(observation, np.array(command)*command_scale)).double()
                action = policy(network_input)
                new_observation, reward, done, info = env.step(action)
                
                episode_mem['observation'].append(observation)
                episode_mem['action'].append(action)
                episode_mem['reward'].append(reward)
                
                observation=new_observation
                #command[0]-= reward
                command[0] = max(1, command[0]-reward)
                command[1] = max(1, command[1]-1)
            episode_mem['return']=sum(episode_mem['reward'])
            episode_mem['episode_len']=len(episode_mem['observation'])
            replay_buffer.append(episode_mem)
            i_episode+=1
            if log_to_tensorboard: writer.add_scalar('Return/Episode', sum(episode_mem['reward']), i_episode)    # write loss to a graph
            print("Episode {} finished after {} timesteps. Return = {}".format(i_episode, len(episode_mem['observation']), sum(episode_mem['reward'])))
        env.close()
    except KeyboardInterrupt:
        env.close()
    replay_buffer = sorted(replay_buffer, key=lambda x:x['return'])[-replay_size:]
    return replay_buffer

#Sample exploratory command
def sample_command(replay_buffer, last_few):
    if len(replay_buffer)==0:
        return [1, 1]
    else:
        command_samples = replay_buffer[-last_few:]
        lengths = [mem['episode_len'] for mem in command_samples]
        returns = [mem['return'] for mem in command_samples]
        mean_return, std_return = np.mean(returns), np.std(returns)
        command_horizon = np.mean(lengths)
        desired_reward = np.random.uniform(mean_return, mean_return+std_return)
        return [desired_reward, command_horizon]

#Improve behviour function by training on replay buffer
def train_net(policy_net, replay_buffer, n_updates=100, batch_size=64, command_scale=0.01):
    global i_updates
    all_costs = []
    for i in range(n_updates):
        batch_input = np.zeros((batch_size, np.prod(env.observation_space.shape)+2))
        batch_label = np.zeros((batch_size))
        for b in range(batch_size):
            sample_episode = np.random.randint(0, len(replay_buffer))
            sample_t1 = np.random.randint(0, len(replay_buffer[sample_episode]['observation']))
            sample_t2 = len(replay_buffer[sample_episode]['observation'])
            sample_horizon = (sample_t2-sample_t1) * command_scale
            sample_mem = replay_buffer[sample_episode]['observation'][sample_t1]
            sample_desired_reward = sum(replay_buffer[sample_episode]['reward'][sample_t1:sample_t2])*command_scale
            network_input = np.append(sample_mem, [sample_desired_reward, sample_horizon])
            label = replay_buffer[sample_episode]['action'][sample_t1]
            batch_input[b] = network_input
            batch_label[b] = label
        batch_input = torch.tensor(batch_input).double()
        batch_label = torch.tensor(batch_label).long()
        pred = policy_net(batch_input)
        cost = F.cross_entropy(pred, batch_label)
        if log_to_tensorboard: writer.add_scalar('Cost/NN update', cost.item() , i_updates)    # write loss to a graph
        all_costs.append(cost.item())
        cost.backward()
        policy_net.optimizer.step()
        policy_net.optimizer.zero_grad()
        i_updates+=1
    return np.mean(all_costs)

#Return a greedy policy from a given network
def create_greedy_policy(policy_network):
    def policy(obs):
        action_logits = policy_network(obs)
        action = np.argmax(action_logits.detach().numpy())
        return action
    return policy

#Return a stochastic policy from a given network
def create_stochastic_policy(policy_network):
    def policy(obs):
        action_logits = policy_network(obs)
        action_probs = F.softmax(action_logits, dim=-1)
        action = torch.distributions.Categorical(action_probs).sample().item()
        return action
    return policy

In [56]:
#Define hyperparameters, initialize behaviour function
env = gym.make('LunarLander-v2')
i_episode=0
i_updates=0
replay_buffer = []
log_to_tensorboard = True 

replay_size = 600
last_few = 75
batch_size = 32
n_warm_up_episodes = 50
n_episodes_per_iter = 50
n_updates_per_iter = 300
command_scale = 0.01
lr = 0.001

agent = FCNN_AGENT().double()
agent.create_optimizer(lr)

stochastic_policy = create_stochastic_policy(agent)
greedy_policy = create_greedy_policy(agent)

# SET UP TRAINING VISUALISATION
if log_to_tensorboard: from torch.utils.tensorboard import SummaryWriter
if log_to_tensorboard: writer = SummaryWriter() # we will use this to show our models performance on a graph using tensorboard

#Collect warm up episodes
replay_buffer = collect_experience(random_policy, replay_buffer, replay_size, last_few, n_warm_up_episodes, command_scale, log_to_tensorboard)
train_net(agent, replay_buffer, n_updates_per_iter, batch_size, command_scale)

Episode 1 finished after 78 timesteps. Return = -128.4480682875527
Episode 2 finished after 89 timesteps. Return = -387.88715583374216
Episode 3 finished after 77 timesteps. Return = -95.10879230530807
Episode 4 finished after 72 timesteps. Return = -102.74438279096503
Episode 5 finished after 116 timesteps. Return = -253.90123618001869
Episode 6 finished after 107 timesteps. Return = -178.8621093404991
Episode 7 finished after 110 timesteps. Return = -424.2903105389424
Episode 8 finished after 69 timesteps. Return = -90.45928541886127
Episode 9 finished after 85 timesteps. Return = -90.34062619706825
Episode 10 finished after 110 timesteps. Return = -257.37052024828347
Episode 11 finished after 95 timesteps. Return = -171.70578091801366
Episode 12 finished after 65 timesteps. Return = -264.4237434177501
Episode 13 finished after 128 timesteps. Return = -146.38984817020642
Episode 14 finished after 73 timesteps. Return = -95.87199679628412
Episode 15 finished after 67 timesteps. Return

1.3918637607680855

In [57]:
#Collect experience and train behaviour function for given number of iterations
n_iters = 1000
for i in range(n_iters):
    replay_buffer = collect_experience(stochastic_policy, replay_buffer, replay_size, last_few, n_episodes_per_iter, command_scale, log_to_tensorboard)
    train_net(agent, replay_buffer, n_updates_per_iter, batch_size, command_scale)

Episode 51 finished after 137 timesteps. Return = -446.3506995863737
Episode 52 finished after 77 timesteps. Return = -140.5196864731071
Episode 53 finished after 70 timesteps. Return = -85.56728471718694
Episode 54 finished after 71 timesteps. Return = -94.78256978743447
Episode 55 finished after 114 timesteps. Return = -65.1801625049948
Episode 56 finished after 71 timesteps. Return = -315.67881711784116
Episode 57 finished after 120 timesteps. Return = -67.65283374333278
Episode 58 finished after 92 timesteps. Return = -413.1242067143709
Episode 59 finished after 96 timesteps. Return = -134.2708548742756
Episode 60 finished after 96 timesteps. Return = -115.75873117970687
Episode 61 finished after 88 timesteps. Return = -127.17007197006436
Episode 62 finished after 112 timesteps. Return = -301.4526746821167
Episode 63 finished after 113 timesteps. Return = -211.53988109680003
Episode 64 finished after 68 timesteps. Return = -119.3757190542396
Episode 65 finished after 93 timesteps. 

Episode 174 finished after 69 timesteps. Return = -104.16899601506384
Episode 175 finished after 84 timesteps. Return = -96.07286939569624
Episode 176 finished after 131 timesteps. Return = -139.4199800131397
Episode 177 finished after 74 timesteps. Return = -91.46291264096092
Episode 178 finished after 115 timesteps. Return = -90.51635088079755
Episode 179 finished after 98 timesteps. Return = -292.12224853522514
Episode 180 finished after 79 timesteps. Return = -143.27721094359757
Episode 181 finished after 63 timesteps. Return = -124.11629519864661
Episode 182 finished after 84 timesteps. Return = -133.22669053051544
Episode 183 finished after 95 timesteps. Return = -171.2783776465923
Episode 184 finished after 96 timesteps. Return = -93.28849817463086
Episode 185 finished after 92 timesteps. Return = -114.1557484370562
Episode 186 finished after 62 timesteps. Return = -102.55848158584956
Episode 187 finished after 121 timesteps. Return = -151.4838767167584
Episode 188 finished afte

Episode 293 finished after 98 timesteps. Return = -126.0177373450193
Episode 294 finished after 85 timesteps. Return = -41.33706913169587
Episode 295 finished after 110 timesteps. Return = -104.89408300251816
Episode 296 finished after 122 timesteps. Return = -122.06720661409226
Episode 297 finished after 67 timesteps. Return = -93.27479221165001
Episode 298 finished after 92 timesteps. Return = -111.51403266195848
Episode 299 finished after 96 timesteps. Return = -278.2307061737931
Episode 300 finished after 59 timesteps. Return = -68.63417569381657
Episode 301 finished after 113 timesteps. Return = -174.5064029495523
Episode 302 finished after 108 timesteps. Return = -138.43405525667737
Episode 303 finished after 87 timesteps. Return = -364.50838279260915
Episode 304 finished after 122 timesteps. Return = -90.0417980348177
Episode 305 finished after 63 timesteps. Return = -74.82944791207059
Episode 306 finished after 78 timesteps. Return = 12.349499040470633
Episode 307 finished afte

Episode 412 finished after 107 timesteps. Return = -190.50868161911495
Episode 413 finished after 89 timesteps. Return = -91.3439183969028
Episode 414 finished after 65 timesteps. Return = -95.84527173026802
Episode 415 finished after 106 timesteps. Return = -191.69772948261556
Episode 416 finished after 64 timesteps. Return = -120.5916127759939
Episode 417 finished after 69 timesteps. Return = -99.47554883556316
Episode 418 finished after 69 timesteps. Return = -138.91836274515907
Episode 419 finished after 64 timesteps. Return = -117.55656377805636
Episode 420 finished after 112 timesteps. Return = -100.63873253717723
Episode 421 finished after 73 timesteps. Return = -171.74789328967256
Episode 422 finished after 88 timesteps. Return = -80.93309778150977
Episode 423 finished after 95 timesteps. Return = -227.75923425296233
Episode 424 finished after 72 timesteps. Return = -122.62037874926172
Episode 425 finished after 61 timesteps. Return = -145.92015232939673
Episode 426 finished af

Episode 531 finished after 61 timesteps. Return = -246.37178230884416
Episode 532 finished after 69 timesteps. Return = -184.35148436013293
Episode 533 finished after 88 timesteps. Return = -162.70473431205681
Episode 534 finished after 71 timesteps. Return = -110.32276938173662
Episode 535 finished after 89 timesteps. Return = -191.55526324541177
Episode 536 finished after 88 timesteps. Return = -137.48470208927156
Episode 537 finished after 104 timesteps. Return = -347.35429462174017
Episode 538 finished after 83 timesteps. Return = -128.69897913425598
Episode 539 finished after 101 timesteps. Return = -321.2059091532176
Episode 540 finished after 112 timesteps. Return = -138.7140622015092
Episode 541 finished after 100 timesteps. Return = -186.51693224204348
Episode 542 finished after 60 timesteps. Return = -83.74087998108158
Episode 543 finished after 91 timesteps. Return = -271.50812190237775
Episode 544 finished after 92 timesteps. Return = -82.63524718879307
Episode 545 finished

Episode 651 finished after 89 timesteps. Return = -228.12220614249142
Episode 652 finished after 100 timesteps. Return = -185.45600648118116
Episode 653 finished after 76 timesteps. Return = -143.29087653051056
Episode 654 finished after 118 timesteps. Return = -102.72088375033732
Episode 655 finished after 147 timesteps. Return = -174.84816168079723
Episode 656 finished after 87 timesteps. Return = -210.20080940540765
Episode 657 finished after 103 timesteps. Return = -247.27492503958965
Episode 658 finished after 65 timesteps. Return = -173.65910815190708
Episode 659 finished after 69 timesteps. Return = -136.58032105500303
Episode 660 finished after 61 timesteps. Return = -48.947477507433305
Episode 661 finished after 97 timesteps. Return = -320.6362398486302
Episode 662 finished after 82 timesteps. Return = -94.23389179871761
Episode 663 finished after 115 timesteps. Return = -143.3302872722158
Episode 664 finished after 63 timesteps. Return = -110.19392325927913
Episode 665 finish

Episode 770 finished after 93 timesteps. Return = -361.8565803384485
Episode 771 finished after 108 timesteps. Return = -135.48566836589796
Episode 772 finished after 88 timesteps. Return = -250.3636633763952
Episode 773 finished after 91 timesteps. Return = -135.93981507724035
Episode 774 finished after 83 timesteps. Return = -114.945171888071
Episode 775 finished after 97 timesteps. Return = -268.6064181853361
Episode 776 finished after 86 timesteps. Return = -112.61470449103413
Episode 777 finished after 76 timesteps. Return = -134.16225054043275
Episode 778 finished after 73 timesteps. Return = -159.8578804635983
Episode 779 finished after 115 timesteps. Return = -270.3042330619008
Episode 780 finished after 75 timesteps. Return = -90.15903085804347
Episode 781 finished after 110 timesteps. Return = -89.53135145783126
Episode 782 finished after 106 timesteps. Return = -93.1788889643171
Episode 783 finished after 63 timesteps. Return = -87.07968244575486
Episode 784 finished after 7

Episode 891 finished after 65 timesteps. Return = -96.93459357956606
Episode 892 finished after 70 timesteps. Return = -129.0138524831833
Episode 893 finished after 110 timesteps. Return = -129.28457841173622
Episode 894 finished after 82 timesteps. Return = -111.65611749548567
Episode 895 finished after 68 timesteps. Return = -102.59439201002591
Episode 896 finished after 100 timesteps. Return = -244.13858840666458
Episode 897 finished after 93 timesteps. Return = -162.34595983228257
Episode 898 finished after 117 timesteps. Return = -69.1203000088697
Episode 899 finished after 106 timesteps. Return = -134.01951849012602
Episode 900 finished after 68 timesteps. Return = -109.43949350388209
Episode 901 finished after 104 timesteps. Return = -89.9339252191532
Episode 902 finished after 66 timesteps. Return = -137.93576225471622
Episode 903 finished after 71 timesteps. Return = -130.13976292711345
Episode 904 finished after 103 timesteps. Return = -439.42475809934984
Episode 905 finished

Episode 1013 finished after 65 timesteps. Return = -88.45958021041145
Episode 1014 finished after 83 timesteps. Return = -107.9186525051391
Episode 1015 finished after 102 timesteps. Return = -180.21820573365346
Episode 1016 finished after 111 timesteps. Return = -116.64505797449951
Episode 1017 finished after 78 timesteps. Return = -122.6802303980975
Episode 1018 finished after 83 timesteps. Return = -127.47463020559329
Episode 1019 finished after 95 timesteps. Return = -101.23331010897266
Episode 1020 finished after 89 timesteps. Return = -164.24845534550627
Episode 1021 finished after 86 timesteps. Return = -59.48356706333652
Episode 1022 finished after 69 timesteps. Return = -77.76455948547365
Episode 1023 finished after 75 timesteps. Return = -172.05703557012384
Episode 1024 finished after 100 timesteps. Return = -198.3139196278279
Episode 1025 finished after 149 timesteps. Return = -226.21538281793028
Episode 1026 finished after 87 timesteps. Return = -132.49614493738437
Episode 

Episode 1132 finished after 57 timesteps. Return = -138.2695971758805
Episode 1133 finished after 72 timesteps. Return = -94.57885358579016
Episode 1134 finished after 106 timesteps. Return = -116.87686692018109
Episode 1135 finished after 71 timesteps. Return = -81.25965558927656
Episode 1136 finished after 103 timesteps. Return = -108.20637245317961
Episode 1137 finished after 82 timesteps. Return = -90.40593497598263
Episode 1138 finished after 98 timesteps. Return = -113.90729454444282
Episode 1139 finished after 87 timesteps. Return = 20.631578925663945
Episode 1140 finished after 79 timesteps. Return = -144.5357465799871
Episode 1141 finished after 72 timesteps. Return = -93.82775662617273
Episode 1142 finished after 70 timesteps. Return = -70.05193652521507
Episode 1143 finished after 77 timesteps. Return = -107.7858710485022
Episode 1144 finished after 94 timesteps. Return = -74.91320019861327
Episode 1145 finished after 78 timesteps. Return = -18.900678187387825
Episode 1146 f

Episode 1250 finished after 119 timesteps. Return = -332.7259759746458
Episode 1251 finished after 63 timesteps. Return = -55.008850081145184
Episode 1252 finished after 84 timesteps. Return = -116.15128190431903
Episode 1253 finished after 79 timesteps. Return = -51.88700284512791
Episode 1254 finished after 124 timesteps. Return = -110.45449933406938
Episode 1255 finished after 69 timesteps. Return = -79.40153731695843
Episode 1256 finished after 82 timesteps. Return = -144.77769289054464
Episode 1257 finished after 110 timesteps. Return = -248.85262202541747
Episode 1258 finished after 65 timesteps. Return = -58.45343312852228
Episode 1259 finished after 77 timesteps. Return = -79.42801623948986
Episode 1260 finished after 66 timesteps. Return = -107.333429522547
Episode 1261 finished after 118 timesteps. Return = -118.93839788915923
Episode 1262 finished after 84 timesteps. Return = -301.2421651623846
Episode 1263 finished after 71 timesteps. Return = -111.77003096074526
Episode 12

Episode 1367 finished after 68 timesteps. Return = -228.59114677115133
Episode 1368 finished after 112 timesteps. Return = -225.9833413445663
Episode 1369 finished after 89 timesteps. Return = -111.22829384477166
Episode 1370 finished after 66 timesteps. Return = -202.83345031308107
Episode 1371 finished after 104 timesteps. Return = -206.62670664032416
Episode 1372 finished after 64 timesteps. Return = -81.69461601776517
Episode 1373 finished after 88 timesteps. Return = -94.79198794266736
Episode 1374 finished after 109 timesteps. Return = -121.33048407252141
Episode 1375 finished after 69 timesteps. Return = -92.21788729586392
Episode 1376 finished after 116 timesteps. Return = -106.46027654045564
Episode 1377 finished after 95 timesteps. Return = -134.69080027575575
Episode 1378 finished after 119 timesteps. Return = -143.029825659174
Episode 1379 finished after 92 timesteps. Return = -87.75259186155537
Episode 1380 finished after 128 timesteps. Return = -100.72369530154172
Episode

Episode 1485 finished after 77 timesteps. Return = -85.5287289705638
Episode 1486 finished after 138 timesteps. Return = -440.05310023843776
Episode 1487 finished after 72 timesteps. Return = -106.567931202934
Episode 1488 finished after 114 timesteps. Return = -141.559200734021
Episode 1489 finished after 110 timesteps. Return = -76.68487029571808
Episode 1490 finished after 64 timesteps. Return = -100.49294848265485
Episode 1491 finished after 73 timesteps. Return = -53.08938616301481
Episode 1492 finished after 78 timesteps. Return = -17.280749534844404
Episode 1493 finished after 73 timesteps. Return = -102.49149544186264
Episode 1494 finished after 117 timesteps. Return = -115.1074106993381
Episode 1495 finished after 64 timesteps. Return = -61.83487951417389
Episode 1496 finished after 94 timesteps. Return = 7.799987629348806
Episode 1497 finished after 86 timesteps. Return = -110.4661972047806
Episode 1498 finished after 85 timesteps. Return = -97.41588788008077
Episode 1499 fin

Episode 1601 finished after 134 timesteps. Return = -142.1334206736833
Episode 1602 finished after 68 timesteps. Return = -130.75143700750368
Episode 1603 finished after 63 timesteps. Return = -53.66932285815433
Episode 1604 finished after 76 timesteps. Return = -99.97990443997702
Episode 1605 finished after 72 timesteps. Return = -32.57833504396217
Episode 1606 finished after 84 timesteps. Return = -105.72344778558535
Episode 1607 finished after 73 timesteps. Return = -215.0983914240749
Episode 1608 finished after 97 timesteps. Return = -377.1627189922241
Episode 1609 finished after 73 timesteps. Return = -35.55362707662505
Episode 1610 finished after 125 timesteps. Return = -81.69479198014348
Episode 1611 finished after 78 timesteps. Return = -112.66204672545173
Episode 1612 finished after 64 timesteps. Return = -106.59084455505602
Episode 1613 finished after 71 timesteps. Return = -80.66830964709045
Episode 1614 finished after 67 timesteps. Return = -61.80959586947712
Episode 1615 f

Episode 1717 finished after 85 timesteps. Return = -82.19684003420853
Episode 1718 finished after 62 timesteps. Return = -70.20765583579905
Episode 1719 finished after 127 timesteps. Return = 4.338780153617876
Episode 1720 finished after 80 timesteps. Return = -74.21137253974706
Episode 1721 finished after 78 timesteps. Return = -141.24644911124622
Episode 1722 finished after 91 timesteps. Return = -113.18724116034336
Episode 1723 finished after 82 timesteps. Return = -83.972712819404
Episode 1724 finished after 72 timesteps. Return = -71.98836960338319
Episode 1725 finished after 78 timesteps. Return = -83.49384484152405
Episode 1726 finished after 117 timesteps. Return = -125.25007667017735
Episode 1727 finished after 61 timesteps. Return = -103.25962770967716
Episode 1728 finished after 126 timesteps. Return = -97.37290130833023
Episode 1729 finished after 106 timesteps. Return = -181.81687421544075
Episode 1730 finished after 101 timesteps. Return = -159.8248397189971
Episode 1731 

Episode 1834 finished after 71 timesteps. Return = -112.25741489237134
Episode 1835 finished after 63 timesteps. Return = -136.09656951977905
Episode 1836 finished after 95 timesteps. Return = -162.75557306133058
Episode 1837 finished after 95 timesteps. Return = -181.90418877929258
Episode 1838 finished after 94 timesteps. Return = -413.07851840650966
Episode 1839 finished after 74 timesteps. Return = -105.08792799184098
Episode 1840 finished after 104 timesteps. Return = -87.75397378069886
Episode 1841 finished after 83 timesteps. Return = -94.31464412141919
Episode 1842 finished after 96 timesteps. Return = -155.6913369744877
Episode 1843 finished after 97 timesteps. Return = -97.76308792020765
Episode 1844 finished after 80 timesteps. Return = -162.1249520602369
Episode 1845 finished after 98 timesteps. Return = -143.7528332481138
Episode 1846 finished after 115 timesteps. Return = 28.20827963790981
Episode 1847 finished after 64 timesteps. Return = -154.00362440124013
Episode 1848

Episode 1950 finished after 115 timesteps. Return = -157.56413146012832
Episode 1951 finished after 83 timesteps. Return = -110.56307953073737
Episode 1952 finished after 78 timesteps. Return = -141.49167196979374
Episode 1953 finished after 63 timesteps. Return = -51.74599226092759
Episode 1954 finished after 61 timesteps. Return = -90.68530085055511
Episode 1955 finished after 73 timesteps. Return = -100.95880080710675
Episode 1956 finished after 92 timesteps. Return = -136.88718719659025
Episode 1957 finished after 98 timesteps. Return = -118.73927215394502
Episode 1958 finished after 131 timesteps. Return = -118.76528982385999
Episode 1959 finished after 104 timesteps. Return = -115.06219600565912
Episode 1960 finished after 67 timesteps. Return = -106.20284922890303
Episode 1961 finished after 67 timesteps. Return = -105.01447626068112
Episode 1962 finished after 83 timesteps. Return = -114.10818972158943
Episode 1963 finished after 64 timesteps. Return = -81.05240180713687
Episod

Episode 2068 finished after 107 timesteps. Return = -229.22140602820878
Episode 2069 finished after 101 timesteps. Return = -150.59283404835452
Episode 2070 finished after 116 timesteps. Return = -203.55625892988837
Episode 2071 finished after 92 timesteps. Return = -86.9218320968182
Episode 2072 finished after 99 timesteps. Return = -163.57159271160472
Episode 2073 finished after 113 timesteps. Return = -78.34035196329782
Episode 2074 finished after 82 timesteps. Return = -91.43933650171526
Episode 2075 finished after 67 timesteps. Return = -97.60198068826814
Episode 2076 finished after 62 timesteps. Return = -62.14959783415489
Episode 2077 finished after 92 timesteps. Return = -220.33706732217803
Episode 2078 finished after 116 timesteps. Return = -320.61004062710754
Episode 2079 finished after 74 timesteps. Return = -99.94522750641605
Episode 2080 finished after 81 timesteps. Return = -145.52828324717413
Episode 2081 finished after 102 timesteps. Return = -122.65855014532693
Episode

Episode 2187 finished after 75 timesteps. Return = -127.27695285396557
Episode 2188 finished after 107 timesteps. Return = -80.0329675819642
Episode 2189 finished after 89 timesteps. Return = -189.63965927194891
Episode 2190 finished after 84 timesteps. Return = -105.9654536388988
Episode 2191 finished after 78 timesteps. Return = -47.17951438326127
Episode 2192 finished after 96 timesteps. Return = -145.17428937647085
Episode 2193 finished after 98 timesteps. Return = -135.29439704417462
Episode 2194 finished after 66 timesteps. Return = -131.93859844571313
Episode 2195 finished after 62 timesteps. Return = -104.17156664223673
Episode 2196 finished after 75 timesteps. Return = -75.26607258676214
Episode 2197 finished after 95 timesteps. Return = -185.5066970187989
Episode 2198 finished after 80 timesteps. Return = -110.366410160591
Episode 2199 finished after 96 timesteps. Return = -102.88492502584032
Episode 2200 finished after 101 timesteps. Return = -124.82783169929827
Episode 2201

Episode 2306 finished after 77 timesteps. Return = -90.29164662929972
Episode 2307 finished after 97 timesteps. Return = -149.02170711134272
Episode 2308 finished after 114 timesteps. Return = -113.32506815734101
Episode 2309 finished after 92 timesteps. Return = -110.31626310677323
Episode 2310 finished after 62 timesteps. Return = -87.51360622921275
Episode 2311 finished after 141 timesteps. Return = -12.708356319145508
Episode 2312 finished after 107 timesteps. Return = -66.33480560622948
Episode 2313 finished after 70 timesteps. Return = -76.47514166851366
Episode 2314 finished after 76 timesteps. Return = -188.82441161507387
Episode 2315 finished after 66 timesteps. Return = -143.4827052465926
Episode 2316 finished after 103 timesteps. Return = -115.17987906801436
Episode 2317 finished after 62 timesteps. Return = -89.9469884935432
Episode 2318 finished after 107 timesteps. Return = -108.62470894407755
Episode 2319 finished after 68 timesteps. Return = -158.32761234422162
Episode 

Episode 2422 finished after 116 timesteps. Return = -71.9519216471641
Episode 2423 finished after 61 timesteps. Return = -134.21395502558335
Episode 2424 finished after 135 timesteps. Return = -303.0777319821201
Episode 2425 finished after 128 timesteps. Return = -111.08378340593278
Episode 2426 finished after 103 timesteps. Return = -120.96419715596919
Episode 2427 finished after 76 timesteps. Return = -128.14289298486472
Episode 2428 finished after 108 timesteps. Return = -146.62268299471364
Episode 2429 finished after 110 timesteps. Return = -125.80935438356143
Episode 2430 finished after 62 timesteps. Return = -96.04086674453632
Episode 2431 finished after 70 timesteps. Return = -94.96210471397275
Episode 2432 finished after 103 timesteps. Return = -278.20927119726866
Episode 2433 finished after 106 timesteps. Return = -184.55217848194985
Episode 2434 finished after 123 timesteps. Return = -141.88160201097133
Episode 2435 finished after 90 timesteps. Return = -107.13856648735232
Ep

Episode 2538 finished after 97 timesteps. Return = -224.36052913575776
Episode 2539 finished after 95 timesteps. Return = -108.02833422559983
Episode 2540 finished after 96 timesteps. Return = -134.82963793558804
Episode 2541 finished after 69 timesteps. Return = -91.69984254283854
Episode 2542 finished after 82 timesteps. Return = -61.01604984176633
Episode 2543 finished after 115 timesteps. Return = -144.07663897851492
Episode 2544 finished after 58 timesteps. Return = -91.3370154420889
Episode 2545 finished after 91 timesteps. Return = -142.41482625924237
Episode 2546 finished after 83 timesteps. Return = -125.77033570187804
Episode 2547 finished after 103 timesteps. Return = -110.28013999242052
Episode 2548 finished after 71 timesteps. Return = -74.191418221381
Episode 2549 finished after 96 timesteps. Return = -124.32498098619942
Episode 2550 finished after 74 timesteps. Return = -82.20661271698434
Episode 2551 finished after 63 timesteps. Return = -87.26098092554776
Episode 2552 

Episode 2657 finished after 73 timesteps. Return = -179.4971353945062
Episode 2658 finished after 59 timesteps. Return = -138.58080938727505
Episode 2659 finished after 118 timesteps. Return = -125.16845317702207
Episode 2660 finished after 64 timesteps. Return = -118.22541305705752
Episode 2661 finished after 108 timesteps. Return = -116.4126575188893
Episode 2662 finished after 70 timesteps. Return = -106.19837636866741
Episode 2663 finished after 82 timesteps. Return = -168.4693659313284
Episode 2664 finished after 116 timesteps. Return = -4.2295779018631094
Episode 2665 finished after 94 timesteps. Return = -117.57461018595916
Episode 2666 finished after 94 timesteps. Return = -184.00741955185816
Episode 2667 finished after 92 timesteps. Return = -328.5747288171206
Episode 2668 finished after 63 timesteps. Return = -80.40177482118241
Episode 2669 finished after 72 timesteps. Return = -83.05354736422733
Episode 2670 finished after 86 timesteps. Return = -161.49665327761312
Episode 2

Episode 2777 finished after 89 timesteps. Return = -121.86017211509669
Episode 2778 finished after 92 timesteps. Return = -90.14669159409603
Episode 2779 finished after 60 timesteps. Return = -70.82182825082069
Episode 2780 finished after 67 timesteps. Return = -67.2535716439236
Episode 2781 finished after 85 timesteps. Return = -140.1370957489271
Episode 2782 finished after 80 timesteps. Return = -68.83820016411902
Episode 2783 finished after 112 timesteps. Return = -108.40585331260453
Episode 2784 finished after 93 timesteps. Return = -127.73518670638795
Episode 2785 finished after 76 timesteps. Return = -371.2762282809266
Episode 2786 finished after 71 timesteps. Return = -117.708207677774
Episode 2787 finished after 99 timesteps. Return = -91.73901221061936
Episode 2788 finished after 70 timesteps. Return = -180.31759344005047
Episode 2789 finished after 108 timesteps. Return = -152.64296873876367
Episode 2790 finished after 116 timesteps. Return = -78.60232139390557
Episode 2791 f

Episode 2896 finished after 112 timesteps. Return = -115.07687314641323
Episode 2897 finished after 114 timesteps. Return = -207.25477176315297
Episode 2898 finished after 72 timesteps. Return = -46.11702172716179
Episode 2899 finished after 75 timesteps. Return = -99.29300026414307
Episode 2900 finished after 89 timesteps. Return = -107.32957744373869
Episode 2901 finished after 83 timesteps. Return = -79.03473801824282
Episode 2902 finished after 82 timesteps. Return = -134.18275569595215
Episode 2903 finished after 88 timesteps. Return = -90.92844508561903
Episode 2904 finished after 66 timesteps. Return = -77.20424509693358
Episode 2905 finished after 84 timesteps. Return = -109.27379021500273
Episode 2906 finished after 66 timesteps. Return = -59.65039567206397
Episode 2907 finished after 83 timesteps. Return = -108.11512973587932
Episode 2908 finished after 98 timesteps. Return = -121.1389577370764
Episode 2909 finished after 84 timesteps. Return = -121.95421752230783
Episode 291

Episode 3016 finished after 64 timesteps. Return = -109.76638205442693
Episode 3017 finished after 63 timesteps. Return = -79.66055799030023
Episode 3018 finished after 128 timesteps. Return = -158.62755758765212
Episode 3019 finished after 82 timesteps. Return = -117.34806054297508
Episode 3020 finished after 112 timesteps. Return = -255.31477722272513
Episode 3021 finished after 84 timesteps. Return = -164.88235104406363
Episode 3022 finished after 70 timesteps. Return = -55.19212769404812
Episode 3023 finished after 73 timesteps. Return = -168.59029589595303
Episode 3024 finished after 80 timesteps. Return = -132.0191289479098
Episode 3025 finished after 67 timesteps. Return = -89.94404998522567
Episode 3026 finished after 77 timesteps. Return = -285.03682503500306
Episode 3027 finished after 86 timesteps. Return = -177.96203940319958
Episode 3028 finished after 93 timesteps. Return = -125.76471238642227
Episode 3029 finished after 113 timesteps. Return = -184.6805897743543
Episode 

Episode 3132 finished after 86 timesteps. Return = -87.61765315223617
Episode 3133 finished after 120 timesteps. Return = -75.22537865619333
Episode 3134 finished after 100 timesteps. Return = -239.44426393677293
Episode 3135 finished after 67 timesteps. Return = -65.48376077366719
Episode 3136 finished after 81 timesteps. Return = -82.62412412515114
Episode 3137 finished after 110 timesteps. Return = -112.40083043169693
Episode 3138 finished after 63 timesteps. Return = -95.24644218449643
Episode 3139 finished after 134 timesteps. Return = -28.665487045831696
Episode 3140 finished after 93 timesteps. Return = -127.36704914362255
Episode 3141 finished after 140 timesteps. Return = -7.824161148022839
Episode 3142 finished after 101 timesteps. Return = -165.67157115445337
Episode 3143 finished after 115 timesteps. Return = -174.93841263607476
Episode 3144 finished after 91 timesteps. Return = -56.420654024629826
Episode 3145 finished after 89 timesteps. Return = -63.78824531450823
Episod

Episode 3251 finished after 104 timesteps. Return = -109.86001516912017
Episode 3252 finished after 109 timesteps. Return = -226.1781374806227
Episode 3253 finished after 86 timesteps. Return = -151.737521201921
Episode 3254 finished after 89 timesteps. Return = -108.89990579149224
Episode 3255 finished after 72 timesteps. Return = -37.66616379446221
Episode 3256 finished after 127 timesteps. Return = -341.43778772915357
Episode 3257 finished after 68 timesteps. Return = -95.7671193626859
Episode 3258 finished after 88 timesteps. Return = -121.64633045062868
Episode 3259 finished after 109 timesteps. Return = -130.6233583092965
Episode 3260 finished after 81 timesteps. Return = -57.7889151571631
Episode 3261 finished after 114 timesteps. Return = -192.5290784010817
Episode 3262 finished after 66 timesteps. Return = -82.8399659260509
Episode 3263 finished after 83 timesteps. Return = -26.45036265821298
Episode 3264 finished after 81 timesteps. Return = -129.06581996224335
Episode 3265 f

Episode 3368 finished after 60 timesteps. Return = -159.25667201823285
Episode 3369 finished after 70 timesteps. Return = -118.74418725629769
Episode 3370 finished after 87 timesteps. Return = -196.616725021536
Episode 3371 finished after 89 timesteps. Return = -138.33967160655487
Episode 3372 finished after 71 timesteps. Return = -154.29761125367185
Episode 3373 finished after 105 timesteps. Return = -212.3391213033425
Episode 3374 finished after 94 timesteps. Return = -127.47396195588036
Episode 3375 finished after 66 timesteps. Return = -115.95760503264893
Episode 3376 finished after 109 timesteps. Return = 66.90791919956717
Episode 3377 finished after 97 timesteps. Return = -158.07918970936475
Episode 3378 finished after 105 timesteps. Return = -150.44565299310162
Episode 3379 finished after 88 timesteps. Return = -199.99029593502559
Episode 3380 finished after 77 timesteps. Return = -78.03326118076444
Episode 3381 finished after 84 timesteps. Return = -139.97691700622016
Episode 3

Episode 3487 finished after 61 timesteps. Return = -211.9553797088694
Episode 3488 finished after 92 timesteps. Return = -258.6628288279677
Episode 3489 finished after 76 timesteps. Return = -231.4808073756695
Episode 3490 finished after 70 timesteps. Return = -89.65980717400026
Episode 3491 finished after 79 timesteps. Return = -136.35545110942456
Episode 3492 finished after 80 timesteps. Return = -83.99038893294059
Episode 3493 finished after 108 timesteps. Return = -205.1180002404171
Episode 3494 finished after 78 timesteps. Return = -203.1892041553272
Episode 3495 finished after 83 timesteps. Return = -111.22611927703765
Episode 3496 finished after 71 timesteps. Return = -56.21036728005019
Episode 3497 finished after 70 timesteps. Return = -83.63423260642884
Episode 3498 finished after 72 timesteps. Return = -252.27624682354298
Episode 3499 finished after 68 timesteps. Return = -78.02160227758986
Episode 3500 finished after 124 timesteps. Return = -82.1940567328942
Episode 3501 fin

Episode 3607 finished after 74 timesteps. Return = -22.27019177815572
Episode 3608 finished after 78 timesteps. Return = -73.41817595382551
Episode 3609 finished after 89 timesteps. Return = -433.6822208980352
Episode 3610 finished after 117 timesteps. Return = -183.0865349294996
Episode 3611 finished after 70 timesteps. Return = -99.08053720297944
Episode 3612 finished after 134 timesteps. Return = -96.11604649620995
Episode 3613 finished after 99 timesteps. Return = -199.69068197739426
Episode 3614 finished after 78 timesteps. Return = -102.03584574962935
Episode 3615 finished after 66 timesteps. Return = -79.90452833427949
Episode 3616 finished after 81 timesteps. Return = -80.871922251248
Episode 3617 finished after 126 timesteps. Return = -71.77205631384555
Episode 3618 finished after 108 timesteps. Return = -60.7423039682411
Episode 3619 finished after 71 timesteps. Return = -79.51959305194524
Episode 3620 finished after 69 timesteps. Return = -120.07775404990556
Episode 3621 fin

Episode 3724 finished after 82 timesteps. Return = -100.70521756279616
Episode 3725 finished after 89 timesteps. Return = -137.0639331438652
Episode 3726 finished after 76 timesteps. Return = -140.80255013229015
Episode 3727 finished after 87 timesteps. Return = -248.6995683250136
Episode 3728 finished after 100 timesteps. Return = -192.01171086284205
Episode 3729 finished after 69 timesteps. Return = -70.37999372938766
Episode 3730 finished after 97 timesteps. Return = -194.19382282592045
Episode 3731 finished after 71 timesteps. Return = -121.99367964201772
Episode 3732 finished after 83 timesteps. Return = -110.12137107967048
Episode 3733 finished after 91 timesteps. Return = -244.4943383629803
Episode 3734 finished after 66 timesteps. Return = -144.54613982743115
Episode 3735 finished after 66 timesteps. Return = -125.13949818663559
Episode 3736 finished after 96 timesteps. Return = -125.20112636739007
Episode 3737 finished after 94 timesteps. Return = -96.93775305017724
Episode 37

Episode 3841 finished after 84 timesteps. Return = -130.44530579362424
Episode 3842 finished after 85 timesteps. Return = -75.03593321112818
Episode 3843 finished after 104 timesteps. Return = -117.3188188173526
Episode 3844 finished after 83 timesteps. Return = -111.6134579687905
Episode 3845 finished after 93 timesteps. Return = -87.70668567973263
Episode 3846 finished after 71 timesteps. Return = -105.02953812192557
Episode 3847 finished after 75 timesteps. Return = -93.76671366550688
Episode 3848 finished after 104 timesteps. Return = -194.70444826677783
Episode 3849 finished after 72 timesteps. Return = -100.33975826389916
Episode 3850 finished after 66 timesteps. Return = -77.29784367603693
Episode 3851 finished after 90 timesteps. Return = -153.65702052121927
Episode 3852 finished after 69 timesteps. Return = -97.74912764205918
Episode 3853 finished after 73 timesteps. Return = -291.30290831687194
Episode 3854 finished after 69 timesteps. Return = -73.73028882876599
Episode 3855

Episode 3957 finished after 106 timesteps. Return = -217.0333505897993
Episode 3958 finished after 104 timesteps. Return = -263.8035159528848
Episode 3959 finished after 59 timesteps. Return = -60.29206501674836
Episode 3960 finished after 99 timesteps. Return = -6.750245104156463
Episode 3961 finished after 74 timesteps. Return = -91.86242017158825
Episode 3962 finished after 64 timesteps. Return = -136.55102076057327
Episode 3963 finished after 79 timesteps. Return = -134.85887493425298
Episode 3964 finished after 78 timesteps. Return = -77.01499534552583
Episode 3965 finished after 125 timesteps. Return = -475.50670976793975
Episode 3966 finished after 126 timesteps. Return = -124.32706350739188
Episode 3967 finished after 94 timesteps. Return = -133.63205487907558
Episode 3968 finished after 94 timesteps. Return = -184.32424295672362
Episode 3969 finished after 86 timesteps. Return = -358.94611463417255
Episode 3970 finished after 110 timesteps. Return = -289.4425059471291
Episode 

Episode 4075 finished after 119 timesteps. Return = -167.84675679151417
Episode 4076 finished after 110 timesteps. Return = -282.3738791189304
Episode 4077 finished after 67 timesteps. Return = -84.54402393195966
Episode 4078 finished after 78 timesteps. Return = -177.23452245877547
Episode 4079 finished after 89 timesteps. Return = -104.33771756080642
Episode 4080 finished after 106 timesteps. Return = -180.8349396935304
Episode 4081 finished after 70 timesteps. Return = -105.38385638282257
Episode 4082 finished after 112 timesteps. Return = -52.68114498239113
Episode 4083 finished after 83 timesteps. Return = -192.46370767679616
Episode 4084 finished after 94 timesteps. Return = -178.411022456327
Episode 4085 finished after 75 timesteps. Return = -152.45051122939972
Episode 4086 finished after 127 timesteps. Return = -241.79439867320713
Episode 4087 finished after 108 timesteps. Return = -188.31788575347122
Episode 4088 finished after 71 timesteps. Return = -117.67010381426843
Episod

Episode 4192 finished after 93 timesteps. Return = -130.29847004599878
Episode 4193 finished after 111 timesteps. Return = -117.0451824999583
Episode 4194 finished after 117 timesteps. Return = -343.4368158428256
Episode 4195 finished after 91 timesteps. Return = -167.7196173603396
Episode 4196 finished after 65 timesteps. Return = -75.77530404612979
Episode 4197 finished after 96 timesteps. Return = -113.89135269982536
Episode 4198 finished after 88 timesteps. Return = -83.43523465189304
Episode 4199 finished after 105 timesteps. Return = -86.18459564179328
Episode 4200 finished after 120 timesteps. Return = -60.42702160923794
Episode 4201 finished after 120 timesteps. Return = -349.6369689228179
Episode 4202 finished after 59 timesteps. Return = -146.42654205740456
Episode 4203 finished after 73 timesteps. Return = -63.643164275518785
Episode 4204 finished after 74 timesteps. Return = -140.47482079569318
Episode 4205 finished after 66 timesteps. Return = -84.70129253623648
Episode 42

Episode 4312 finished after 119 timesteps. Return = -292.412300536342
Episode 4313 finished after 71 timesteps. Return = -181.28303746473654
Episode 4314 finished after 84 timesteps. Return = -140.7672096862241
Episode 4315 finished after 109 timesteps. Return = -118.90900356361668
Episode 4316 finished after 87 timesteps. Return = -136.23616903554998
Episode 4317 finished after 92 timesteps. Return = -152.073803229698
Episode 4318 finished after 102 timesteps. Return = -205.98063336498802
Episode 4319 finished after 57 timesteps. Return = -180.3996620965681
Episode 4320 finished after 73 timesteps. Return = -108.41912035548256
Episode 4321 finished after 97 timesteps. Return = -190.73154433254103
Episode 4322 finished after 82 timesteps. Return = -94.02493035486329
Episode 4323 finished after 102 timesteps. Return = -156.2885542918428
Episode 4324 finished after 69 timesteps. Return = -63.565119991449905
Episode 4325 finished after 62 timesteps. Return = -73.46866746793566
Episode 432

Episode 4431 finished after 98 timesteps. Return = -89.74321180855297
Episode 4432 finished after 79 timesteps. Return = -106.94489061040626
Episode 4433 finished after 70 timesteps. Return = -77.46409949765389
Episode 4434 finished after 69 timesteps. Return = -88.60075670950069
Episode 4435 finished after 121 timesteps. Return = -135.25547163964006
Episode 4436 finished after 97 timesteps. Return = -164.5634743058227
Episode 4437 finished after 75 timesteps. Return = -120.8424104601613
Episode 4438 finished after 73 timesteps. Return = -75.52501616103835
Episode 4439 finished after 75 timesteps. Return = -98.65922765411099
Episode 4440 finished after 79 timesteps. Return = -96.07656019652191
Episode 4441 finished after 75 timesteps. Return = -144.41360614203523
Episode 4442 finished after 99 timesteps. Return = -126.70289885350387
Episode 4443 finished after 71 timesteps. Return = -174.48217647816813
Episode 4444 finished after 90 timesteps. Return = -229.74409439209353
Episode 4445 

Episode 4551 finished after 98 timesteps. Return = -118.0058598333478
Episode 4552 finished after 84 timesteps. Return = -307.6052980532925
Episode 4553 finished after 116 timesteps. Return = -21.818017161574318
Episode 4554 finished after 70 timesteps. Return = -65.74274132828538
Episode 4555 finished after 66 timesteps. Return = -125.24048032468593
Episode 4556 finished after 74 timesteps. Return = -157.5188825968003
Episode 4557 finished after 68 timesteps. Return = -128.4704986298416
Episode 4558 finished after 109 timesteps. Return = -137.30884662662422
Episode 4559 finished after 55 timesteps. Return = -82.17292436823803
Episode 4560 finished after 56 timesteps. Return = -214.01737823135466
Episode 4561 finished after 169 timesteps. Return = -601.8355325458633
Episode 4562 finished after 94 timesteps. Return = -189.63515374012866
Episode 4563 finished after 58 timesteps. Return = -188.74239623868218
Episode 4564 finished after 88 timesteps. Return = -173.55330136228247
Episode 45

Episode 4670 finished after 104 timesteps. Return = -124.28829027579428
Episode 4671 finished after 92 timesteps. Return = -378.9272571439518
Episode 4672 finished after 112 timesteps. Return = -113.94534872298274
Episode 4673 finished after 114 timesteps. Return = -88.07295083905642
Episode 4674 finished after 101 timesteps. Return = -191.27491850455118
Episode 4675 finished after 74 timesteps. Return = -135.62865688644126
Episode 4676 finished after 94 timesteps. Return = -364.68579099526517
Episode 4677 finished after 84 timesteps. Return = -96.33822181210289
Episode 4678 finished after 81 timesteps. Return = -76.80956404010519
Episode 4679 finished after 68 timesteps. Return = -101.68064664966693
Episode 4680 finished after 100 timesteps. Return = -108.1660361532173
Episode 4681 finished after 99 timesteps. Return = -148.03521569074078
Episode 4682 finished after 64 timesteps. Return = -131.02457950597505
Episode 4683 finished after 100 timesteps. Return = -118.73892783308894
Episo

Episode 4788 finished after 97 timesteps. Return = -150.8615838377658
Episode 4789 finished after 87 timesteps. Return = -171.45823098582048
Episode 4790 finished after 75 timesteps. Return = -107.62302178985492
Episode 4791 finished after 77 timesteps. Return = -103.48373267739228
Episode 4792 finished after 95 timesteps. Return = -393.5794808122323
Episode 4793 finished after 95 timesteps. Return = -101.2243789193131
Episode 4794 finished after 72 timesteps. Return = -178.63011317316688
Episode 4795 finished after 62 timesteps. Return = -57.803465623301804
Episode 4796 finished after 74 timesteps. Return = -181.0845964572585
Episode 4797 finished after 80 timesteps. Return = -56.75833896790967
Episode 4798 finished after 75 timesteps. Return = -56.65225455509849
Episode 4799 finished after 78 timesteps. Return = 24.455190071404388
Episode 4800 finished after 98 timesteps. Return = -151.84383181979348
Episode 4801 finished after 67 timesteps. Return = -175.758157705896
Episode 4802 fi

Episode 4907 finished after 86 timesteps. Return = -330.3778233202946
Episode 4908 finished after 78 timesteps. Return = -140.7911020298737
Episode 4909 finished after 88 timesteps. Return = -111.44558321370657
Episode 4910 finished after 64 timesteps. Return = -116.82551256230815
Episode 4911 finished after 81 timesteps. Return = -118.20328195786136
Episode 4912 finished after 78 timesteps. Return = -113.33917858452469
Episode 4913 finished after 70 timesteps. Return = -85.98324131799572
Episode 4914 finished after 129 timesteps. Return = -102.34945796647308
Episode 4915 finished after 87 timesteps. Return = -41.505130863908306
Episode 4916 finished after 91 timesteps. Return = -131.1276320710717
Episode 4917 finished after 64 timesteps. Return = -181.9913605258584
Episode 4918 finished after 96 timesteps. Return = -124.6877109051655
Episode 4919 finished after 96 timesteps. Return = -214.44151850010687
Episode 4920 finished after 63 timesteps. Return = -105.09213343470685
Episode 492

Episode 5026 finished after 61 timesteps. Return = -95.13466549064563
Episode 5027 finished after 78 timesteps. Return = -85.51257966521473
Episode 5028 finished after 63 timesteps. Return = -151.9891415921013
Episode 5029 finished after 103 timesteps. Return = -111.68479038058945
Episode 5030 finished after 60 timesteps. Return = -72.9891757150116
Episode 5031 finished after 76 timesteps. Return = -104.52498918982815
Episode 5032 finished after 94 timesteps. Return = -118.06809376460312
Episode 5033 finished after 80 timesteps. Return = -122.85098402376536
Episode 5034 finished after 67 timesteps. Return = -167.0497746241677
Episode 5035 finished after 114 timesteps. Return = -82.59484914732033
Episode 5036 finished after 93 timesteps. Return = -287.97523576138053
Episode 5037 finished after 79 timesteps. Return = -96.18725201234318
Episode 5038 finished after 115 timesteps. Return = -115.34152880991877
Episode 5039 finished after 94 timesteps. Return = -110.92706886084986
Episode 504

Episode 5144 finished after 106 timesteps. Return = -102.64966534214084
Episode 5145 finished after 86 timesteps. Return = -45.61482820856135
Episode 5146 finished after 109 timesteps. Return = -161.28525566509126
Episode 5147 finished after 57 timesteps. Return = -145.53730962370594
Episode 5148 finished after 75 timesteps. Return = -95.50258702171757
Episode 5149 finished after 104 timesteps. Return = -249.5256725488996
Episode 5150 finished after 61 timesteps. Return = -82.48397708635889
Episode 5151 finished after 91 timesteps. Return = -7.904696418687337
Episode 5152 finished after 78 timesteps. Return = -110.54282472917723
Episode 5153 finished after 82 timesteps. Return = -239.77483653215407
Episode 5154 finished after 103 timesteps. Return = 8.915441949145318
Episode 5155 finished after 79 timesteps. Return = -119.00039517544295
Episode 5156 finished after 74 timesteps. Return = -84.92624668392313
Episode 5157 finished after 73 timesteps. Return = -119.30956444480238
Episode 51

Episode 5264 finished after 119 timesteps. Return = -301.3263808520542
Episode 5265 finished after 80 timesteps. Return = -89.34346240201768
Episode 5266 finished after 117 timesteps. Return = -53.636872821343495
Episode 5267 finished after 72 timesteps. Return = -135.55932895364813
Episode 5268 finished after 93 timesteps. Return = -313.42962785310795
Episode 5269 finished after 71 timesteps. Return = -230.90047157722125
Episode 5270 finished after 97 timesteps. Return = -201.75645846963138
Episode 5271 finished after 64 timesteps. Return = -86.74167428594116
Episode 5272 finished after 122 timesteps. Return = -224.6241572400856
Episode 5273 finished after 68 timesteps. Return = -57.97008163922408
Episode 5274 finished after 85 timesteps. Return = -123.2181173406348
Episode 5275 finished after 80 timesteps. Return = -146.47127339228285
Episode 5276 finished after 60 timesteps. Return = -161.76816063878584
Episode 5277 finished after 80 timesteps. Return = -105.88774905017888
Episode 5

Episode 5382 finished after 103 timesteps. Return = -83.0051252542339
Episode 5383 finished after 85 timesteps. Return = -152.64571412467114
Episode 5384 finished after 87 timesteps. Return = -135.3660909232213
Episode 5385 finished after 76 timesteps. Return = -122.11674234930979
Episode 5386 finished after 69 timesteps. Return = -65.98368702267767
Episode 5387 finished after 68 timesteps. Return = -59.31123724767694
Episode 5388 finished after 65 timesteps. Return = -108.26802716216017
Episode 5389 finished after 66 timesteps. Return = -19.315165768096335
Episode 5390 finished after 84 timesteps. Return = -54.527873837984316
Episode 5391 finished after 100 timesteps. Return = -203.94034257316326
Episode 5392 finished after 99 timesteps. Return = -446.67623179236165
Episode 5393 finished after 112 timesteps. Return = -167.17334741650143
Episode 5394 finished after 104 timesteps. Return = -199.25923659295384
Episode 5395 finished after 90 timesteps. Return = -151.13900215026754
Episode

Episode 5500 finished after 129 timesteps. Return = -195.86356026281027
Episode 5501 finished after 97 timesteps. Return = -213.52306078372214
Episode 5502 finished after 64 timesteps. Return = -104.75272349023363
Episode 5503 finished after 116 timesteps. Return = -122.12226811234784
Episode 5504 finished after 55 timesteps. Return = -172.41662370600153
Episode 5505 finished after 87 timesteps. Return = -172.27313827761185
Episode 5506 finished after 73 timesteps. Return = -257.6862654881854
Episode 5507 finished after 78 timesteps. Return = -129.99437827729403
Episode 5508 finished after 121 timesteps. Return = -155.28883070209318
Episode 5509 finished after 72 timesteps. Return = -131.71610889335312
Episode 5510 finished after 93 timesteps. Return = -226.9231762195477
Episode 5511 finished after 107 timesteps. Return = -250.4302058867489
Episode 5512 finished after 65 timesteps. Return = -32.46835074664848
Episode 5513 finished after 68 timesteps. Return = -113.9024808599425
Episode

Episode 5619 finished after 65 timesteps. Return = -61.5283447816501
Episode 5620 finished after 94 timesteps. Return = -236.0736473600889
Episode 5621 finished after 92 timesteps. Return = -225.6653009325271
Episode 5622 finished after 93 timesteps. Return = 38.73787725852148
Episode 5623 finished after 75 timesteps. Return = -78.61579513804375
Episode 5624 finished after 77 timesteps. Return = -189.59257389517344
Episode 5625 finished after 90 timesteps. Return = -205.7718968400066
Episode 5626 finished after 118 timesteps. Return = -139.7304421505689
Episode 5627 finished after 77 timesteps. Return = -103.28406261337938
Episode 5628 finished after 78 timesteps. Return = -43.07030208867022
Episode 5629 finished after 82 timesteps. Return = -249.40716254492077
Episode 5630 finished after 77 timesteps. Return = -68.5177290076243
Episode 5631 finished after 1000 timesteps. Return = 89.0298176834688
Episode 5632 finished after 61 timesteps. Return = -89.28191869697545
Episode 5633 finish

Episode 5741 finished after 105 timesteps. Return = -8.726691795320875
Episode 5742 finished after 78 timesteps. Return = -73.41634618321183
Episode 5743 finished after 74 timesteps. Return = -185.30956348358686
Episode 5744 finished after 65 timesteps. Return = -130.19471381305254
Episode 5745 finished after 76 timesteps. Return = -230.5437781729135
Episode 5746 finished after 74 timesteps. Return = -80.6174441767332
Episode 5747 finished after 75 timesteps. Return = -67.54236343850096
Episode 5748 finished after 92 timesteps. Return = -127.41615714144645
Episode 5749 finished after 91 timesteps. Return = -192.5861283419926
Episode 5750 finished after 69 timesteps. Return = -187.61667454052224
Episode 5751 finished after 93 timesteps. Return = -178.02464440100923
Episode 5752 finished after 61 timesteps. Return = -92.39646130077931
Episode 5753 finished after 112 timesteps. Return = -177.30511643402247
Episode 5754 finished after 71 timesteps. Return = -124.28508788464552
Episode 5755

Episode 5857 finished after 107 timesteps. Return = -150.7171226228597
Episode 5858 finished after 83 timesteps. Return = -70.37855144221199
Episode 5859 finished after 74 timesteps. Return = -154.7816009289554
Episode 5860 finished after 107 timesteps. Return = -211.66591496285713
Episode 5861 finished after 81 timesteps. Return = -143.84034299861793
Episode 5862 finished after 81 timesteps. Return = -118.81043747299158
Episode 5863 finished after 139 timesteps. Return = -66.18703392413991
Episode 5864 finished after 64 timesteps. Return = -159.67951160870552
Episode 5865 finished after 76 timesteps. Return = -171.53163816061777
Episode 5866 finished after 105 timesteps. Return = -128.90098695474168
Episode 5867 finished after 107 timesteps. Return = -132.48752488321472
Episode 5868 finished after 80 timesteps. Return = -193.0822576003586
Episode 5869 finished after 71 timesteps. Return = -77.77902818965089
Episode 5870 finished after 93 timesteps. Return = -112.14052365696406
Episode

Episode 5973 finished after 112 timesteps. Return = -276.74323660916605
Episode 5974 finished after 106 timesteps. Return = -376.6848987894204
Episode 5975 finished after 90 timesteps. Return = -62.82804462459515
Episode 5976 finished after 62 timesteps. Return = -109.65237883962648
Episode 5977 finished after 90 timesteps. Return = -100.11571589465134
Episode 5978 finished after 79 timesteps. Return = -54.61377907984564
Episode 5979 finished after 93 timesteps. Return = 26.063720409502295
Episode 5980 finished after 94 timesteps. Return = -231.56069177184904
Episode 5981 finished after 69 timesteps. Return = -137.2358290774315
Episode 5982 finished after 92 timesteps. Return = -216.8061494501365
Episode 5983 finished after 94 timesteps. Return = -208.14686686851383
Episode 5984 finished after 73 timesteps. Return = -97.9585935755684
Episode 5985 finished after 87 timesteps. Return = -144.6295834125666
Episode 5986 finished after 121 timesteps. Return = -185.77269100107043
Episode 5987

Episode 6089 finished after 78 timesteps. Return = -57.151655122658354
Episode 6090 finished after 74 timesteps. Return = -123.0583170746163
Episode 6091 finished after 75 timesteps. Return = -243.67883484403427
Episode 6092 finished after 70 timesteps. Return = -134.58076698931535
Episode 6093 finished after 67 timesteps. Return = -89.41188488046721
Episode 6094 finished after 81 timesteps. Return = -229.1546460406721
Episode 6095 finished after 128 timesteps. Return = -83.32552066139849
Episode 6096 finished after 103 timesteps. Return = -222.3233419222591
Episode 6097 finished after 89 timesteps. Return = -141.40404284835046
Episode 6098 finished after 64 timesteps. Return = -114.32929040169329
Episode 6099 finished after 80 timesteps. Return = -107.10682779977327
Episode 6100 finished after 66 timesteps. Return = -34.66761369051446
Episode 6101 finished after 90 timesteps. Return = -216.0935187665959
Episode 6102 finished after 90 timesteps. Return = -147.7158919895688
Episode 6103

Episode 6207 finished after 65 timesteps. Return = -89.25121110356213
Episode 6208 finished after 85 timesteps. Return = -73.25146526270322
Episode 6209 finished after 111 timesteps. Return = -349.84204295325026
Episode 6210 finished after 79 timesteps. Return = -137.16514387429675
Episode 6211 finished after 78 timesteps. Return = -135.7845885320382
Episode 6212 finished after 82 timesteps. Return = -184.9331633893167
Episode 6213 finished after 94 timesteps. Return = -236.275469234001
Episode 6214 finished after 62 timesteps. Return = -95.13241795285741
Episode 6215 finished after 92 timesteps. Return = -129.86592868962606
Episode 6216 finished after 59 timesteps. Return = 7.694760110272753
Episode 6217 finished after 65 timesteps. Return = -93.78316016679831
Episode 6218 finished after 99 timesteps. Return = -155.41796371686718
Episode 6219 finished after 71 timesteps. Return = -207.82209867673683
Episode 6220 finished after 103 timesteps. Return = -244.90721747151463
Episode 6221 f

Episode 6328 finished after 89 timesteps. Return = -149.76858532124282
Episode 6329 finished after 96 timesteps. Return = -352.30464062089925
Episode 6330 finished after 79 timesteps. Return = -129.52493130771217
Episode 6331 finished after 84 timesteps. Return = -291.56652593449815
Episode 6332 finished after 104 timesteps. Return = -238.76729539041958
Episode 6333 finished after 86 timesteps. Return = -312.7126631549148
Episode 6334 finished after 78 timesteps. Return = -112.30723211198759
Episode 6335 finished after 79 timesteps. Return = -183.10431242647516
Episode 6336 finished after 72 timesteps. Return = -106.51254022790178
Episode 6337 finished after 100 timesteps. Return = -216.38367906858676
Episode 6338 finished after 82 timesteps. Return = -168.98768374129332
Episode 6339 finished after 93 timesteps. Return = -163.1369088210398
Episode 6340 finished after 54 timesteps. Return = -141.61135179642906
Episode 6341 finished after 73 timesteps. Return = -121.21148654274137
Episod

Episode 6445 finished after 76 timesteps. Return = -113.99614073483619
Episode 6446 finished after 83 timesteps. Return = -148.6341253602491
Episode 6447 finished after 69 timesteps. Return = -77.06311138379927
Episode 6448 finished after 77 timesteps. Return = -91.84149797774032
Episode 6449 finished after 74 timesteps. Return = -139.71027993277875
Episode 6450 finished after 103 timesteps. Return = -188.23643011046084
Episode 6451 finished after 84 timesteps. Return = -138.92347587588182
Episode 6452 finished after 154 timesteps. Return = -90.73604131776665
Episode 6453 finished after 99 timesteps. Return = -120.92691773662087
Episode 6454 finished after 72 timesteps. Return = -108.92645807798209
Episode 6455 finished after 79 timesteps. Return = -216.16359272667174
Episode 6456 finished after 92 timesteps. Return = -233.93422455103695
Episode 6457 finished after 93 timesteps. Return = -14.371095080982698
Episode 6458 finished after 91 timesteps. Return = -210.75715136525326
Episode 

Episode 6565 finished after 79 timesteps. Return = -95.66770820572047
Episode 6566 finished after 99 timesteps. Return = -136.8354253569844
Episode 6567 finished after 104 timesteps. Return = -211.52693922684813
Episode 6568 finished after 90 timesteps. Return = -98.21819851895324
Episode 6569 finished after 116 timesteps. Return = -123.65859397995068
Episode 6570 finished after 96 timesteps. Return = -289.4318987139721
Episode 6571 finished after 106 timesteps. Return = -265.83123589521665
Episode 6572 finished after 70 timesteps. Return = -120.70869365311367
Episode 6573 finished after 82 timesteps. Return = -126.89711013284706
Episode 6574 finished after 97 timesteps. Return = -188.2232973685274
Episode 6575 finished after 69 timesteps. Return = -140.27332013798
Episode 6576 finished after 105 timesteps. Return = -285.44093911803543
Episode 6577 finished after 106 timesteps. Return = -203.46190041948074
Episode 6578 finished after 115 timesteps. Return = -333.6986800027821
Episode 6

Episode 6685 finished after 102 timesteps. Return = -72.6152438246027
Episode 6686 finished after 84 timesteps. Return = -147.38162154134744
Episode 6687 finished after 78 timesteps. Return = -118.84023277996833
Episode 6688 finished after 89 timesteps. Return = -258.6875125972321
Episode 6689 finished after 66 timesteps. Return = -135.59890797447747
Episode 6690 finished after 90 timesteps. Return = -60.991894705629704
Episode 6691 finished after 88 timesteps. Return = -276.3318243919126
Episode 6692 finished after 67 timesteps. Return = -191.44776836078668
Episode 6693 finished after 83 timesteps. Return = -255.49140749097583
Episode 6694 finished after 98 timesteps. Return = -52.78443284092991
Episode 6695 finished after 93 timesteps. Return = -100.33355824241006
Episode 6696 finished after 61 timesteps. Return = -110.64740588160996
Episode 6697 finished after 84 timesteps. Return = -187.56266400300845
Episode 6698 finished after 104 timesteps. Return = -155.25245693653773
Episode 6

Episode 6801 finished after 79 timesteps. Return = -13.298964887362246
Episode 6802 finished after 95 timesteps. Return = -123.47694008902792
Episode 6803 finished after 99 timesteps. Return = -356.4720373247308
Episode 6804 finished after 109 timesteps. Return = -266.4518091804186
Episode 6805 finished after 67 timesteps. Return = -132.14649866122423
Episode 6806 finished after 108 timesteps. Return = -370.79228446142423
Episode 6807 finished after 82 timesteps. Return = -177.39270076664855
Episode 6808 finished after 73 timesteps. Return = -60.21495909029163
Episode 6809 finished after 84 timesteps. Return = -159.3357381694874
Episode 6810 finished after 63 timesteps. Return = -95.2924852713851
Episode 6811 finished after 65 timesteps. Return = -79.58763134056213
Episode 6812 finished after 71 timesteps. Return = -90.56873063624035
Episode 6813 finished after 84 timesteps. Return = -121.7735649399428
Episode 6814 finished after 64 timesteps. Return = -144.16361895065467
Episode 6815 

Episode 6917 finished after 103 timesteps. Return = -114.26825488231792
Episode 6918 finished after 88 timesteps. Return = -266.4820716332828
Episode 6919 finished after 93 timesteps. Return = -269.480863412825
Episode 6920 finished after 66 timesteps. Return = -146.74661173327644
Episode 6921 finished after 82 timesteps. Return = -261.1703938610073
Episode 6922 finished after 60 timesteps. Return = -65.57074697492189
Episode 6923 finished after 76 timesteps. Return = -170.69520705350894
Episode 6924 finished after 99 timesteps. Return = -53.89797926182764
Episode 6925 finished after 95 timesteps. Return = -219.30706655539683
Episode 6926 finished after 102 timesteps. Return = -354.4348653016862
Episode 6927 finished after 109 timesteps. Return = -232.02363472721095
Episode 6928 finished after 83 timesteps. Return = -310.13871256488324
Episode 6929 finished after 65 timesteps. Return = -111.38104383312532
Episode 6930 finished after 86 timesteps. Return = -278.7742159252331
Episode 693

Episode 7035 finished after 65 timesteps. Return = -90.82883456820399
Episode 7036 finished after 110 timesteps. Return = -252.71790106745402
Episode 7037 finished after 112 timesteps. Return = -195.29246683355655
Episode 7038 finished after 88 timesteps. Return = -281.2188273516132
Episode 7039 finished after 76 timesteps. Return = -137.58894825235603
Episode 7040 finished after 68 timesteps. Return = -224.40377747887476
Episode 7041 finished after 66 timesteps. Return = -122.87016906147628
Episode 7042 finished after 56 timesteps. Return = -233.5296291867452
Episode 7043 finished after 86 timesteps. Return = -134.05900718228202
Episode 7044 finished after 100 timesteps. Return = -93.35605237462161
Episode 7045 finished after 91 timesteps. Return = -125.04950883984469
Episode 7046 finished after 75 timesteps. Return = -251.78991148515107
Episode 7047 finished after 77 timesteps. Return = -119.1788620635399
Episode 7048 finished after 89 timesteps. Return = -127.51994191072099
Episode 

Episode 7151 finished after 70 timesteps. Return = -109.62938665856088
Episode 7152 finished after 108 timesteps. Return = -519.3692716601724
Episode 7153 finished after 89 timesteps. Return = -130.64055473066134
Episode 7154 finished after 102 timesteps. Return = -119.86970904483337
Episode 7155 finished after 104 timesteps. Return = -249.1585780028101
Episode 7156 finished after 63 timesteps. Return = -203.88612591966518
Episode 7157 finished after 72 timesteps. Return = -210.93150940963008
Episode 7158 finished after 71 timesteps. Return = -78.83345166617335
Episode 7159 finished after 87 timesteps. Return = -108.33239907493933
Episode 7160 finished after 76 timesteps. Return = -259.79989326357435
Episode 7161 finished after 62 timesteps. Return = -190.59142170725056
Episode 7162 finished after 81 timesteps. Return = -66.0288203816859
Episode 7163 finished after 76 timesteps. Return = -61.05836949782628
Episode 7164 finished after 99 timesteps. Return = -434.7501767647521
Episode 71

Episode 7270 finished after 109 timesteps. Return = -114.48022765607658
Episode 7271 finished after 67 timesteps. Return = -181.9379972910636
Episode 7272 finished after 75 timesteps. Return = -46.200252062320274
Episode 7273 finished after 75 timesteps. Return = 13.065932924523878
Episode 7274 finished after 79 timesteps. Return = -321.5791122170428
Episode 7275 finished after 70 timesteps. Return = -91.50543675966776
Episode 7276 finished after 98 timesteps. Return = -189.4481497743713
Episode 7277 finished after 90 timesteps. Return = -281.0748504520974
Episode 7278 finished after 127 timesteps. Return = -50.15067687948294
Episode 7279 finished after 71 timesteps. Return = -103.71893153864993
Episode 7280 finished after 98 timesteps. Return = -0.42392752611273465
Episode 7281 finished after 108 timesteps. Return = -224.95887454539093
Episode 7282 finished after 103 timesteps. Return = -49.24986133477849
Episode 7283 finished after 112 timesteps. Return = -245.1897380212862
Episode 7

Episode 7391 finished after 70 timesteps. Return = -195.82850154275525
Episode 7392 finished after 76 timesteps. Return = -257.25521201928575
Episode 7393 finished after 81 timesteps. Return = -102.22419832086942
Episode 7394 finished after 103 timesteps. Return = -233.32628919481826
Episode 7395 finished after 105 timesteps. Return = -202.71230834939934
Episode 7396 finished after 96 timesteps. Return = -152.77587617083077
Episode 7397 finished after 104 timesteps. Return = -110.1657671818782
Episode 7398 finished after 67 timesteps. Return = -86.3984109959543
Episode 7399 finished after 92 timesteps. Return = -165.60607502661227
Episode 7400 finished after 77 timesteps. Return = -71.74535476645923
Episode 7401 finished after 81 timesteps. Return = -136.3147483408082
Episode 7402 finished after 116 timesteps. Return = -302.45074268677047
Episode 7403 finished after 104 timesteps. Return = -173.37108664077022
Episode 7404 finished after 80 timesteps. Return = 23.493647413719657
Episode

Episode 7507 finished after 95 timesteps. Return = -106.78358624080207
Episode 7508 finished after 107 timesteps. Return = -156.10916741911478
Episode 7509 finished after 107 timesteps. Return = -54.77711544692631
Episode 7510 finished after 118 timesteps. Return = -204.24115400773437
Episode 7511 finished after 77 timesteps. Return = -197.92238264292718
Episode 7512 finished after 95 timesteps. Return = -224.30862662826735
Episode 7513 finished after 63 timesteps. Return = -155.0401026167352
Episode 7514 finished after 67 timesteps. Return = -124.61057658166823
Episode 7515 finished after 77 timesteps. Return = -85.71796909174721
Episode 7516 finished after 86 timesteps. Return = -63.505956060182456
Episode 7517 finished after 84 timesteps. Return = -129.74800009778755
Episode 7518 finished after 91 timesteps. Return = -332.08207678392137
Episode 7519 finished after 86 timesteps. Return = -128.60927921284986
Episode 7520 finished after 76 timesteps. Return = -103.956695854061
Episode 

Episode 7625 finished after 62 timesteps. Return = -56.914354981227014
Episode 7626 finished after 83 timesteps. Return = -288.5110662847205
Episode 7627 finished after 102 timesteps. Return = -120.16575192024771
Episode 7628 finished after 135 timesteps. Return = -339.068232914149
Episode 7629 finished after 1000 timesteps. Return = 85.71547066698545
Episode 7630 finished after 147 timesteps. Return = -220.83167910700172
Episode 7631 finished after 61 timesteps. Return = -81.07777695360049
Episode 7632 finished after 80 timesteps. Return = -91.1310897924687
Episode 7633 finished after 102 timesteps. Return = -121.85856698529551
Episode 7634 finished after 86 timesteps. Return = -57.556546719251514
Episode 7635 finished after 99 timesteps. Return = -44.36585358272285
Episode 7636 finished after 62 timesteps. Return = -83.92062988650636
Episode 7637 finished after 85 timesteps. Return = -45.699530051854
Episode 7638 finished after 106 timesteps. Return = -141.6699128420679
Episode 7639 

Episode 7746 finished after 81 timesteps. Return = -10.167288469821798
Episode 7747 finished after 92 timesteps. Return = -221.21334380208918
Episode 7748 finished after 100 timesteps. Return = -218.7185396277879
Episode 7749 finished after 88 timesteps. Return = -288.220671104529
Episode 7750 finished after 112 timesteps. Return = -135.03186821729224
Episode 7751 finished after 85 timesteps. Return = -329.2360039163326
Episode 7752 finished after 82 timesteps. Return = -175.4858903744052
Episode 7753 finished after 73 timesteps. Return = 31.77205170295838
Episode 7754 finished after 106 timesteps. Return = -305.0183411811089
Episode 7755 finished after 82 timesteps. Return = -182.55605957652423
Episode 7756 finished after 67 timesteps. Return = -106.97560974460946
Episode 7757 finished after 91 timesteps. Return = -75.36280157421818
Episode 7758 finished after 64 timesteps. Return = -103.45895430413672
Episode 7759 finished after 67 timesteps. Return = -189.36935568753606
Episode 7760

Episode 7864 finished after 81 timesteps. Return = -129.94598658360067
Episode 7865 finished after 93 timesteps. Return = -142.38836349892517
Episode 7866 finished after 78 timesteps. Return = -107.14513339448101
Episode 7867 finished after 66 timesteps. Return = -75.77710294051482
Episode 7868 finished after 86 timesteps. Return = -297.5726871265225
Episode 7869 finished after 86 timesteps. Return = -167.2688324747018
Episode 7870 finished after 87 timesteps. Return = -274.3813198265285
Episode 7871 finished after 110 timesteps. Return = -105.7700103368636
Episode 7872 finished after 93 timesteps. Return = -173.86902113822708
Episode 7873 finished after 85 timesteps. Return = -232.59437160091426
Episode 7874 finished after 99 timesteps. Return = -211.74987311726807
Episode 7875 finished after 67 timesteps. Return = -122.86475978906338
Episode 7876 finished after 73 timesteps. Return = -177.08367096798952
Episode 7877 finished after 108 timesteps. Return = 5.020546829607298
Episode 787

Episode 7984 finished after 108 timesteps. Return = -165.85045383364553
Episode 7985 finished after 86 timesteps. Return = -113.70188069294645
Episode 7986 finished after 101 timesteps. Return = -327.77276234950784
Episode 7987 finished after 71 timesteps. Return = -218.16652553309166
Episode 7988 finished after 86 timesteps. Return = -89.39472034649586
Episode 7989 finished after 115 timesteps. Return = -349.3605362481923
Episode 7990 finished after 52 timesteps. Return = -164.24945604794505
Episode 7991 finished after 66 timesteps. Return = -118.57202958408416
Episode 7992 finished after 86 timesteps. Return = -312.63414746888
Episode 7993 finished after 98 timesteps. Return = -195.63548159471213
Episode 7994 finished after 108 timesteps. Return = -354.1497473533867
Episode 7995 finished after 68 timesteps. Return = -90.21186938360049
Episode 7996 finished after 72 timesteps. Return = -40.54747870984829
Episode 7997 finished after 76 timesteps. Return = -299.4564515376719
Episode 799

Episode 8100 finished after 68 timesteps. Return = -130.51780323061075
Episode 8101 finished after 111 timesteps. Return = -105.51391514611805
Episode 8102 finished after 71 timesteps. Return = -140.1280090361555
Episode 8103 finished after 96 timesteps. Return = -152.81653356899432
Episode 8104 finished after 81 timesteps. Return = 33.879370769094464
Episode 8105 finished after 116 timesteps. Return = -268.1383339472542
Episode 8106 finished after 82 timesteps. Return = -143.3065617268661
Episode 8107 finished after 105 timesteps. Return = -339.58542212663133
Episode 8108 finished after 88 timesteps. Return = -46.86156366539129
Episode 8109 finished after 116 timesteps. Return = -460.71063240469095
Episode 8110 finished after 90 timesteps. Return = -313.34068172373463
Episode 8111 finished after 82 timesteps. Return = -175.87474614796406
Episode 8112 finished after 96 timesteps. Return = -273.9965756229726
Episode 8113 finished after 109 timesteps. Return = -91.40001217951831
Episode 

Episode 8219 finished after 111 timesteps. Return = -220.17739695429947
Episode 8220 finished after 99 timesteps. Return = -60.635919144892895
Episode 8221 finished after 68 timesteps. Return = -135.15084513003848
Episode 8222 finished after 76 timesteps. Return = -259.47933968012165
Episode 8223 finished after 94 timesteps. Return = -85.01844273459704
Episode 8224 finished after 113 timesteps. Return = -226.2031438413032
Episode 8225 finished after 110 timesteps. Return = -214.98977216416884
Episode 8226 finished after 114 timesteps. Return = -248.67552195761004
Episode 8227 finished after 111 timesteps. Return = -320.51403425877015
Episode 8228 finished after 116 timesteps. Return = -160.7105108365243
Episode 8229 finished after 127 timesteps. Return = -155.9028643400734
Episode 8230 finished after 88 timesteps. Return = -290.56020984583154
Episode 8231 finished after 89 timesteps. Return = -233.1322519008971
Episode 8232 finished after 81 timesteps. Return = -121.94238660317387
Epis

Episode 8336 finished after 86 timesteps. Return = -7.204838295403846
Episode 8337 finished after 115 timesteps. Return = -162.88561711804826
Episode 8338 finished after 88 timesteps. Return = -142.62672407406222
Episode 8339 finished after 96 timesteps. Return = -137.81697642714923
Episode 8340 finished after 103 timesteps. Return = -266.704908593823
Episode 8341 finished after 98 timesteps. Return = -328.53279534945716
Episode 8342 finished after 93 timesteps. Return = -135.84372915631803
Episode 8343 finished after 84 timesteps. Return = -105.96545400977016
Episode 8344 finished after 118 timesteps. Return = -209.79952163639564
Episode 8345 finished after 112 timesteps. Return = -189.4788866183386
Episode 8346 finished after 78 timesteps. Return = -59.452403806477754
Episode 8347 finished after 108 timesteps. Return = -159.4304188306587
Episode 8348 finished after 100 timesteps. Return = -90.53918610993436
Episode 8349 finished after 114 timesteps. Return = -236.0312419151927
Episod

Episode 8456 finished after 88 timesteps. Return = -119.07438453019154
Episode 8457 finished after 96 timesteps. Return = -360.09684132189966
Episode 8458 finished after 106 timesteps. Return = -79.87491056408308
Episode 8459 finished after 66 timesteps. Return = -119.89952183105504
Episode 8460 finished after 85 timesteps. Return = -103.12469917643202
Episode 8461 finished after 68 timesteps. Return = -107.02372656958245
Episode 8462 finished after 85 timesteps. Return = -110.9669436879623
Episode 8463 finished after 113 timesteps. Return = -260.8265586851291
Episode 8464 finished after 86 timesteps. Return = -105.91547861894688
Episode 8465 finished after 103 timesteps. Return = -134.27630861957005
Episode 8466 finished after 71 timesteps. Return = -45.693048768243685
Episode 8467 finished after 119 timesteps. Return = -0.9981762948736446
Episode 8468 finished after 95 timesteps. Return = -308.6260696678245
Episode 8469 finished after 103 timesteps. Return = -115.19211477513677
Episo

Episode 8576 finished after 78 timesteps. Return = -144.74454415458175
Episode 8577 finished after 107 timesteps. Return = -115.7957392246855
Episode 8578 finished after 109 timesteps. Return = -320.97670941323616
Episode 8579 finished after 75 timesteps. Return = -90.52394261271468
Episode 8580 finished after 112 timesteps. Return = -153.76958977074318
Episode 8581 finished after 76 timesteps. Return = -301.4134322608382
Episode 8582 finished after 71 timesteps. Return = -98.9234905599405
Episode 8583 finished after 81 timesteps. Return = -80.99308065129568
Episode 8584 finished after 92 timesteps. Return = -78.92904591147081
Episode 8585 finished after 71 timesteps. Return = -71.58634359091108
Episode 8586 finished after 81 timesteps. Return = -291.78058331143154
Episode 8587 finished after 120 timesteps. Return = -222.60670318215517
Episode 8588 finished after 82 timesteps. Return = -46.569637587909234
Episode 8589 finished after 87 timesteps. Return = -142.09994384567221
Episode 85

Episode 8693 finished after 75 timesteps. Return = -149.9383221517701
Episode 8694 finished after 86 timesteps. Return = -198.82405917096264
Episode 8695 finished after 73 timesteps. Return = -93.94369421116066
Episode 8696 finished after 129 timesteps. Return = -26.065650900002794
Episode 8697 finished after 84 timesteps. Return = -272.3654118738863
Episode 8698 finished after 91 timesteps. Return = -247.79897612465493
Episode 8699 finished after 115 timesteps. Return = -204.9933403233618
Episode 8700 finished after 91 timesteps. Return = -255.1854641641384
Episode 8701 finished after 100 timesteps. Return = -304.81181593117043
Episode 8702 finished after 64 timesteps. Return = -67.54859991247113
Episode 8703 finished after 71 timesteps. Return = -60.8093725676073
Episode 8704 finished after 58 timesteps. Return = -150.11811278617108
Episode 8705 finished after 65 timesteps. Return = -167.59294243825224
Episode 8706 finished after 64 timesteps. Return = -134.6046670896147
Episode 8707

Episode 8813 finished after 94 timesteps. Return = -372.56115628775206
Episode 8814 finished after 74 timesteps. Return = -172.60208511159772
Episode 8815 finished after 99 timesteps. Return = -115.80261372847407
Episode 8816 finished after 83 timesteps. Return = -371.71873037046214
Episode 8817 finished after 116 timesteps. Return = -151.83960371300995
Episode 8818 finished after 82 timesteps. Return = -89.4951963508708
Episode 8819 finished after 82 timesteps. Return = -103.85674590624006
Episode 8820 finished after 84 timesteps. Return = -408.12215174069996
Episode 8821 finished after 76 timesteps. Return = -172.6389425044049
Episode 8822 finished after 89 timesteps. Return = -300.6194361277378
Episode 8823 finished after 99 timesteps. Return = -224.54003061398603
Episode 8824 finished after 77 timesteps. Return = -127.12691976427014
Episode 8825 finished after 82 timesteps. Return = -108.35134048298809
Episode 8826 finished after 114 timesteps. Return = -187.41537131579167
Episode 

Episode 8929 finished after 114 timesteps. Return = -303.69517384949535
Episode 8930 finished after 108 timesteps. Return = -106.7201468698327
Episode 8931 finished after 76 timesteps. Return = -153.49427146652133
Episode 8932 finished after 65 timesteps. Return = -238.59425791885232
Episode 8933 finished after 79 timesteps. Return = -411.05349804789995
Episode 8934 finished after 65 timesteps. Return = -151.6623086080145
Episode 8935 finished after 86 timesteps. Return = -149.17289878788552
Episode 8936 finished after 76 timesteps. Return = -266.7494451624764
Episode 8937 finished after 71 timesteps. Return = -95.66774842845689
Episode 8938 finished after 78 timesteps. Return = -167.29950654961488
Episode 8939 finished after 96 timesteps. Return = -237.24196530655237
Episode 8940 finished after 92 timesteps. Return = -88.05405621973458
Episode 8941 finished after 108 timesteps. Return = -117.15053622097781
Episode 8942 finished after 60 timesteps. Return = -95.28488868299416
Episode 8

Episode 9051 finished after 69 timesteps. Return = -149.0514054223267
Episode 9052 finished after 72 timesteps. Return = -120.38511853215589
Episode 9053 finished after 93 timesteps. Return = -126.46984273973888
Episode 9054 finished after 81 timesteps. Return = -208.11592636209156
Episode 9055 finished after 102 timesteps. Return = -255.8089346633006
Episode 9056 finished after 143 timesteps. Return = 62.38308178507492
Episode 9057 finished after 100 timesteps. Return = -179.35330030294458
Episode 9058 finished after 77 timesteps. Return = -319.1317681191654
Episode 9059 finished after 80 timesteps. Return = -244.49575369803676
Episode 9060 finished after 77 timesteps. Return = -131.84153702286915
Episode 9061 finished after 82 timesteps. Return = -271.8914436764681
Episode 9062 finished after 88 timesteps. Return = -119.59823023938752
Episode 9063 finished after 103 timesteps. Return = -232.63176669914606
Episode 9064 finished after 119 timesteps. Return = -342.3216207599368
Episode 

Episode 9168 finished after 88 timesteps. Return = -345.7105020006893
Episode 9169 finished after 72 timesteps. Return = -270.7954157015381
Episode 9170 finished after 63 timesteps. Return = -142.8543372085508
Episode 9171 finished after 91 timesteps. Return = -203.5404249604198
Episode 9172 finished after 75 timesteps. Return = -184.31924902769487
Episode 9173 finished after 102 timesteps. Return = -379.7888050195163
Episode 9174 finished after 109 timesteps. Return = -324.7159563880894
Episode 9175 finished after 100 timesteps. Return = -167.05637145847976
Episode 9176 finished after 84 timesteps. Return = -167.01981610320348
Episode 9177 finished after 101 timesteps. Return = -393.2209986356503
Episode 9178 finished after 91 timesteps. Return = -200.11210439301937
Episode 9179 finished after 108 timesteps. Return = -249.67261991568094
Episode 9180 finished after 121 timesteps. Return = -335.66865734305384
Episode 9181 finished after 86 timesteps. Return = -186.11785983818976
Episode

Episode 9284 finished after 72 timesteps. Return = -144.792015983257
Episode 9285 finished after 104 timesteps. Return = -247.4480451606168
Episode 9286 finished after 117 timesteps. Return = -201.32458015031412
Episode 9287 finished after 67 timesteps. Return = -63.35009821086292
Episode 9288 finished after 85 timesteps. Return = -380.60103070778354
Episode 9289 finished after 66 timesteps. Return = -222.03613230683385
Episode 9290 finished after 106 timesteps. Return = -294.4250582185165
Episode 9291 finished after 110 timesteps. Return = -106.78809583953782
Episode 9292 finished after 66 timesteps. Return = -75.83815825340363
Episode 9293 finished after 101 timesteps. Return = -63.321684931651745
Episode 9294 finished after 66 timesteps. Return = -91.72300889614272
Episode 9295 finished after 64 timesteps. Return = -289.8092089388805
Episode 9296 finished after 107 timesteps. Return = -331.14369257051953
Episode 9297 finished after 77 timesteps. Return = -411.87347389650745
Episode 

Episode 9401 finished after 89 timesteps. Return = -199.6259930089319
Episode 9402 finished after 101 timesteps. Return = -24.558527052783987
Episode 9403 finished after 72 timesteps. Return = -79.02540795273163
Episode 9404 finished after 127 timesteps. Return = -94.33967186200333
Episode 9405 finished after 91 timesteps. Return = -300.8583004492606
Episode 9406 finished after 63 timesteps. Return = -300.92796102671065
Episode 9407 finished after 78 timesteps. Return = 20.776425036497002
Episode 9408 finished after 71 timesteps. Return = -53.293463631889196
Episode 9409 finished after 89 timesteps. Return = -316.1723540158431
Episode 9410 finished after 97 timesteps. Return = -113.4353111496131
Episode 9411 finished after 92 timesteps. Return = -223.6988731917566
Episode 9412 finished after 70 timesteps. Return = -131.2966115886166
Episode 9413 finished after 85 timesteps. Return = -197.54657794191587
Episode 9414 finished after 122 timesteps. Return = -220.94542921811905
Episode 9415

Episode 9518 finished after 105 timesteps. Return = -297.3044002628866
Episode 9519 finished after 105 timesteps. Return = -221.62018486062877
Episode 9520 finished after 71 timesteps. Return = -21.98032167093926
Episode 9521 finished after 80 timesteps. Return = -180.84705668706906
Episode 9522 finished after 109 timesteps. Return = -228.68865129185505
Episode 9523 finished after 122 timesteps. Return = -43.580069854602876
Episode 9524 finished after 94 timesteps. Return = -96.54484436974177
Episode 9525 finished after 73 timesteps. Return = -97.61487534064207
Episode 9526 finished after 75 timesteps. Return = -204.37299806773626
Episode 9527 finished after 75 timesteps. Return = -218.9321828058094
Episode 9528 finished after 116 timesteps. Return = -238.60377020104082
Episode 9529 finished after 106 timesteps. Return = -312.3938904368941
Episode 9530 finished after 94 timesteps. Return = -328.7235551279108
Episode 9531 finished after 90 timesteps. Return = -248.81463864459474
Episode

Episode 9635 finished after 142 timesteps. Return = -241.91231385808405
Episode 9636 finished after 85 timesteps. Return = -123.47426431710505
Episode 9637 finished after 95 timesteps. Return = -50.82806359199394
Episode 9638 finished after 105 timesteps. Return = -139.6229518702737
Episode 9639 finished after 91 timesteps. Return = -297.0264624971229
Episode 9640 finished after 62 timesteps. Return = -77.62290582129717
Episode 9641 finished after 64 timesteps. Return = -17.066318800313056
Episode 9642 finished after 94 timesteps. Return = -110.19838582125792
Episode 9643 finished after 112 timesteps. Return = -128.43817506491666
Episode 9644 finished after 84 timesteps. Return = -149.0600231040704
Episode 9645 finished after 96 timesteps. Return = -172.77130701662566
Episode 9646 finished after 116 timesteps. Return = -138.50068477879609
Episode 9647 finished after 96 timesteps. Return = -226.52658206543487
Episode 9648 finished after 89 timesteps. Return = -109.33551575240304
Episode

Episode 9751 finished after 142 timesteps. Return = -360.44930964313625
Episode 9752 finished after 111 timesteps. Return = -195.75311942828208
Episode 9753 finished after 114 timesteps. Return = -310.71797779666844
Episode 9754 finished after 73 timesteps. Return = -96.58392988919073
Episode 9755 finished after 109 timesteps. Return = -402.4058521715597
Episode 9756 finished after 70 timesteps. Return = -128.03885848629437
Episode 9757 finished after 100 timesteps. Return = -198.51776296837443
Episode 9758 finished after 113 timesteps. Return = -273.5842298508689
Episode 9759 finished after 106 timesteps. Return = -445.6645253639239
Episode 9760 finished after 105 timesteps. Return = -284.8840547994311
Episode 9761 finished after 96 timesteps. Return = -254.6653615364259
Episode 9762 finished after 123 timesteps. Return = -80.44274903633713
Episode 9763 finished after 118 timesteps. Return = -108.89432966596368
Episode 9764 finished after 75 timesteps. Return = -116.07924280403756
Epi

Episode 9868 finished after 131 timesteps. Return = -143.07278581323303
Episode 9869 finished after 74 timesteps. Return = -339.4610543398355
Episode 9870 finished after 106 timesteps. Return = -225.5375282340899
Episode 9871 finished after 97 timesteps. Return = -226.76840468081497
Episode 9872 finished after 85 timesteps. Return = -137.257558096656
Episode 9873 finished after 109 timesteps. Return = -249.26557669510598
Episode 9874 finished after 122 timesteps. Return = -197.6179832976929
Episode 9875 finished after 101 timesteps. Return = 13.3980836308004
Episode 9876 finished after 141 timesteps. Return = -143.9067484106221
Episode 9877 finished after 102 timesteps. Return = -238.30662587811267
Episode 9878 finished after 69 timesteps. Return = -215.27562357094166
Episode 9879 finished after 80 timesteps. Return = -213.8965717769538
Episode 9880 finished after 81 timesteps. Return = -115.3131881237374
Episode 9881 finished after 96 timesteps. Return = -156.24183311188773
Episode 98

Episode 9985 finished after 89 timesteps. Return = -269.6874420131308
Episode 9986 finished after 103 timesteps. Return = -258.87468592452706
Episode 9987 finished after 99 timesteps. Return = -267.45285728571906
Episode 9988 finished after 116 timesteps. Return = -200.00267450500897
Episode 9989 finished after 68 timesteps. Return = -162.96908164544655
Episode 9990 finished after 96 timesteps. Return = -213.10849139579992
Episode 9991 finished after 58 timesteps. Return = -105.2295669827138
Episode 9992 finished after 67 timesteps. Return = -74.66782077195978
Episode 9993 finished after 77 timesteps. Return = -60.799820107629
Episode 9994 finished after 104 timesteps. Return = -103.97948885818066
Episode 9995 finished after 113 timesteps. Return = -122.14557350126921
Episode 9996 finished after 115 timesteps. Return = -200.33782037722818
Episode 9997 finished after 77 timesteps. Return = -72.74117889930467
Episode 9998 finished after 98 timesteps. Return = 28.336244772735967
Episode 9

Episode 10101 finished after 78 timesteps. Return = -253.9123297690235
Episode 10102 finished after 98 timesteps. Return = -108.68588536881725
Episode 10103 finished after 109 timesteps. Return = -276.3130128492782
Episode 10104 finished after 73 timesteps. Return = -137.2722507679424
Episode 10105 finished after 74 timesteps. Return = -209.2157125644
Episode 10106 finished after 86 timesteps. Return = -88.7444603670404
Episode 10107 finished after 77 timesteps. Return = -356.52679714941866
Episode 10108 finished after 138 timesteps. Return = -220.45139805186864
Episode 10109 finished after 107 timesteps. Return = -79.68661487775253
Episode 10110 finished after 121 timesteps. Return = -276.6277944031742
Episode 10111 finished after 90 timesteps. Return = -130.78561267113594
Episode 10112 finished after 67 timesteps. Return = -98.1422718651761
Episode 10113 finished after 64 timesteps. Return = -158.65167193325232
Episode 10114 finished after 72 timesteps. Return = -217.62920000370764
E

Episode 10217 finished after 103 timesteps. Return = -225.2060862938422
Episode 10218 finished after 116 timesteps. Return = -223.32965744676562
Episode 10219 finished after 73 timesteps. Return = -178.30485527539557
Episode 10220 finished after 68 timesteps. Return = -173.2401215020153
Episode 10221 finished after 102 timesteps. Return = -193.70920019630262
Episode 10222 finished after 96 timesteps. Return = -250.47192846547958
Episode 10223 finished after 85 timesteps. Return = -274.8602768315085
Episode 10224 finished after 64 timesteps. Return = -210.3944776394335
Episode 10225 finished after 93 timesteps. Return = -432.00716798769065
Episode 10226 finished after 106 timesteps. Return = -261.9569655213727
Episode 10227 finished after 120 timesteps. Return = -204.1786505505013
Episode 10228 finished after 123 timesteps. Return = -222.61933816956827
Episode 10229 finished after 102 timesteps. Return = -196.49482821373385
Episode 10230 finished after 78 timesteps. Return = -267.295428

Episode 10333 finished after 113 timesteps. Return = -260.2838687874207
Episode 10334 finished after 110 timesteps. Return = -235.0067838682809
Episode 10335 finished after 77 timesteps. Return = -96.25937692869358
Episode 10336 finished after 102 timesteps. Return = -238.38710736346502
Episode 10337 finished after 74 timesteps. Return = -223.8691308182398
Episode 10338 finished after 75 timesteps. Return = -226.56699931855235
Episode 10339 finished after 147 timesteps. Return = -243.79549509325182
Episode 10340 finished after 110 timesteps. Return = 30.37227483745673
Episode 10341 finished after 105 timesteps. Return = -119.33070727404453
Episode 10342 finished after 73 timesteps. Return = -253.1118530820678
Episode 10343 finished after 112 timesteps. Return = -173.84977284027468
Episode 10344 finished after 107 timesteps. Return = -149.31891424307548
Episode 10345 finished after 82 timesteps. Return = -231.3517291713811
Episode 10346 finished after 90 timesteps. Return = -287.0110114

Episode 10448 finished after 115 timesteps. Return = -289.59448611566364
Episode 10449 finished after 70 timesteps. Return = -85.0903009184932
Episode 10450 finished after 64 timesteps. Return = -115.96613238408956
Episode 10451 finished after 108 timesteps. Return = -176.49511329948885
Episode 10452 finished after 123 timesteps. Return = -171.17851497414946
Episode 10453 finished after 143 timesteps. Return = -123.82929111347866
Episode 10454 finished after 86 timesteps. Return = -205.61433903770478
Episode 10455 finished after 82 timesteps. Return = -193.132453877897
Episode 10456 finished after 76 timesteps. Return = -140.6871474685144
Episode 10457 finished after 93 timesteps. Return = -150.9912237001328
Episode 10458 finished after 127 timesteps. Return = -37.23500982957983
Episode 10459 finished after 75 timesteps. Return = -85.85557685041854
Episode 10460 finished after 73 timesteps. Return = -221.7324207450483
Episode 10461 finished after 108 timesteps. Return = -107.3159184794

KeyboardInterrupt: 

In [51]:
#Visualise final trained agent
visualise_agent(greedy_policy, command=[400, 400], command_scale=command_scale, n=3)

Episode 0 finished after 125 timesteps. Return = 125.0
