### This file is for testing radical changes to the codebase. Upside Down Reinforcement Learning.py is the working version.

In [166]:
import gym
env = gym.make('LunarLander-v2')

In [167]:
#command takes form [derired reward, desired horizon]
import numpy as np
def random_policy(obs, command):
    return np.random.randint(env.action_space.n)

In [168]:
import time
from copy import deepcopy
#Visualise agent function
def visualise_agent(policy, command, n=5):
    try:
        for trial_i in range(n):
            current_command = deepcopy(command)
            observation = env.reset()
            done=False
            t=0
            episode_return=0
            while not done:
                env.render()
                action = policy(torch.tensor([observation]).double(), torch.tensor([command]).double())
                observation, reward, done, info = env.step(action)
                episode_return+=reward
                current_command[0]-= reward
                current_command[1] = max(1, current_command[1]-1)
                t+=1
            env.render()
            time.sleep(1.5)
            print("Episode {} finished after {} timesteps. Return = {}".format(trial_i, t, episode_return))
        env.close()
    except KeyboardInterrupt:
        env.close()

In [169]:
#visualise_agent(random_policy, command=[500, 500], 1)

In [170]:
import torch
import torch.nn.functional as F

class FCNN_AGENT(torch.nn.Module):
    def __init__(self, command_scale):
        super().__init__()
        hidden_size=64
        self.command_scale=command_scale
        self.observation_embedding = torch.nn.Sequential(
            torch.nn.Linear(np.prod(env.observation_space.shape), hidden_size),
            torch.nn.Sigmoid()
        )
        self.command_embedding = torch.nn.Sequential(
            torch.nn.Linear(2, hidden_size),
            torch.nn.Sigmoid()
        )
        self.to_output = torch.nn.Sequential(
            torch.nn.Linear(hidden_size, hidden_size),
            torch.nn.ReLU(),
            torch.nn.Linear(hidden_size, env.action_space.n)
        )
    
    def forward(self, observation, command):
        obs_emebdding = self.observation_embedding(observation)
        cmd_embedding = self.command_embedding(command*self.command_scale)
        embedding = torch.mul(obs_emebdding, cmd_embedding)
        action_prob_logits = self.to_output(embedding)
        return action_prob_logits
    
    def create_optimizer(self, lr):
        self.optimizer = torch.optim.Adam(self.parameters(), lr=lr)

In [171]:
from copy import deepcopy
def collect_experience(policy, replay_buffer, replay_size, last_few, n_episodes=100, log_to_tensorboard=True):
    global i_episode
    init_replay_buffer = deepcopy(replay_buffer)
    try:
        for _ in range(n_episodes):
            command = sample_command(init_replay_buffer, last_few)
            writer.add_scalar('Command desired reward/Episode', command[0], i_episode)    # write loss to a graph
            writer.add_scalar('Command horizon/Episode', command[1], i_episode)    # write loss to a graph
            observation = env.reset()
            episode_mem = {'observation':[],
                           'action':[],
                           'reward':[],}
            done=False
            while not done:
                action = policy(torch.tensor([observation]).double(), torch.tensor([command]).double())
                new_observation, reward, done, info = env.step(action)
                
                episode_mem['observation'].append(observation)
                episode_mem['action'].append(action)
                episode_mem['reward'].append(reward)
                
                observation=new_observation
                command[0]-= reward
                command[1] = max(1, command[1]-1)
            episode_mem['return']=sum(episode_mem['reward'])
            episode_mem['episode_len']=len(episode_mem['observation'])
            replay_buffer.append(episode_mem)
            i_episode+=1
            if log_to_tensorboard: writer.add_scalar('Return/Episode', sum(episode_mem['reward']), i_episode)    # write loss to a graph
            print("Episode {} finished after {} timesteps. Return = {}".format(i_episode, len(episode_mem['observation']), sum(episode_mem['reward'])))
        env.close()
    except KeyboardInterrupt:
        env.close()
    replay_buffer = sorted(replay_buffer, key=lambda x:x['return'])[-replay_size:]
    return replay_buffer

def sample_command(replay_buffer, last_few):
    if len(replay_buffer)==0:
        return [1, 1]
    else:
        command_samples = replay_buffer[-last_few:]
        lengths = [mem['episode_len'] for mem in command_samples]
        returns = [mem['return'] for mem in command_samples]
        mean_return, std_return = np.mean(returns), np.std(returns)
        command_horizon = np.mean(lengths)
        desired_reward = np.random.uniform(mean_return, mean_return+std_return)
        return [desired_reward, command_horizon]

In [172]:
def train_net(policy_net, replay_buffer, n_updates=100, batch_size=64, log_to_tensorboard=True):
    global i_updates
    all_costs = []
    for i in range(n_updates):
        batch_observations = np.zeros((batch_size, np.prod(env.observation_space.shape)))
        batch_commands = np.zeros((batch_size, 2))
        batch_label = np.zeros((batch_size))
        for b in range(batch_size):
            sample_episode = np.random.randint(0, len(replay_buffer))
            sample_t1 = np.random.randint(0, len(replay_buffer[sample_episode]['observation']))
            sample_t2 = len(replay_buffer[sample_episode]['observation'])
            ##sample_t2 = np.random.randint(sample_t1+1, len(replay_buffer[sample_episode]['observation'])+1)
            sample_horizon = sample_t2-sample_t1
            sample_mem = replay_buffer[sample_episode]['observation'][sample_t1]
            sample_desired_reward = sum(replay_buffer[sample_episode]['reward'][sample_t1:sample_t2])
            network_input = np.append(sample_mem, [sample_desired_reward, sample_horizon])
            label = replay_buffer[sample_episode]['action'][sample_t1]
            batch_observations[b] = sample_mem
            batch_commands[b] = [sample_desired_reward, sample_horizon]
            batch_label[b] = label
        batch_observations = torch.tensor(batch_observations).double()
        batch_commands = torch.tensor(batch_commands).double()
        batch_label = torch.tensor(batch_label).long()
        pred = policy_net(batch_observations, batch_commands)
        cost = F.cross_entropy(pred, batch_label)
        if log_to_tensorboard: writer.add_scalar('Cost/NN update', cost.item() , i_updates)    # write loss to a graph
        all_costs.append(cost.item())
        cost.backward()
        policy_net.optimizer.step()
        policy_net.optimizer.zero_grad()
        i_updates+=1
    return np.mean(all_costs)

In [173]:
def create_greedy_policy(policy_network):
    def policy(obs, command):
        action_logits = policy_network(obs, command)
        action_probs = F.softmax(action_logits, dim=-1)
        action = np.argmax(action_probs.detach().numpy())
        return action
    return policy

def create_stochastic_policy(policy_network):
    def policy(obs, command):
        action_logits = policy_network(obs, command)
        action_probs = F.softmax(action_logits, dim=-1)
        action = torch.distributions.Categorical(action_probs).sample().item()
        return action
    return policy

In [175]:
i_episode=0
i_updates=0 #number of parameter updates to the neural network
replay_buffer = []
log_to_tensorboard = True 

replay_size = 700
last_few = 50
batch_size = 256
n_warm_up_episodes = 50
n_episodes_per_iter = 25
n_updates_per_iter = 15
command_scale = 0.02
lr = 0.001

agent = FCNN_AGENT(command_scale).double()
agent.create_optimizer(lr)

stochastic_policy = create_stochastic_policy(agent)
greedy_policy = create_greedy_policy(agent)

In [176]:
# SET UP TRAINING VISUALISATION
# SET UP TRAINING VISUALISATION
if log_to_tensorboard: from torch.utils.tensorboard import SummaryWriter
if log_to_tensorboard: writer = SummaryWriter() # we will use this to show our models performance on a graph using tensorboard

In [177]:
#Collect warm up episodes
replay_buffer = collect_experience(random_policy, replay_buffer, replay_size, last_few, n_warm_up_episodes, log_to_tensorboard)
train_net(agent, replay_buffer, n_updates_per_iter, batch_size, log_to_tensorboard)

Episode 1 finished after 64 timesteps. Return = -84.60078681057627
Episode 2 finished after 95 timesteps. Return = -120.78480124507686
Episode 3 finished after 63 timesteps. Return = -172.14825381248892
Episode 4 finished after 71 timesteps. Return = -91.4286045161926
Episode 5 finished after 88 timesteps. Return = -94.85860439292763
Episode 6 finished after 103 timesteps. Return = -243.6307219272219
Episode 7 finished after 94 timesteps. Return = -321.5253342006809
Episode 8 finished after 123 timesteps. Return = -198.62381016401844
Episode 9 finished after 71 timesteps. Return = -132.11189604460625
Episode 10 finished after 67 timesteps. Return = -124.86243874045171
Episode 11 finished after 85 timesteps. Return = -133.11606950032888
Episode 12 finished after 67 timesteps. Return = -71.45064283024958
Episode 13 finished after 92 timesteps. Return = -253.01980387275094
Episode 14 finished after 127 timesteps. Return = -106.47646839864615
Episode 15 finished after 70 timesteps. Return 

1.385637102955703

In [178]:
n_iters = 1000
for i in range(n_iters):
    replay_buffer = collect_experience(stochastic_policy, replay_buffer, replay_size, last_few, n_episodes_per_iter, log_to_tensorboard)
    train_net(agent, replay_buffer, n_updates_per_iter, batch_size, log_to_tensorboard)

Episode 51 finished after 107 timesteps. Return = -113.07739390702349
Episode 52 finished after 80 timesteps. Return = -316.08569869815676
Episode 53 finished after 66 timesteps. Return = -36.60251067892713
Episode 54 finished after 87 timesteps. Return = -69.98392383362653
Episode 55 finished after 101 timesteps. Return = -270.38410966312466
Episode 56 finished after 111 timesteps. Return = -110.95990485962744
Episode 57 finished after 105 timesteps. Return = -166.47410984245352
Episode 58 finished after 97 timesteps. Return = -156.73295510125288
Episode 59 finished after 125 timesteps. Return = -374.63088816841736
Episode 60 finished after 102 timesteps. Return = -104.08481077087355
Episode 61 finished after 77 timesteps. Return = -215.91439727583327
Episode 62 finished after 97 timesteps. Return = -186.6302828059158
Episode 63 finished after 93 timesteps. Return = -122.6390440332899
Episode 64 finished after 116 timesteps. Return = -83.1758552525774
Episode 65 finished after 108 tim

Episode 170 finished after 102 timesteps. Return = -284.4523883914275
Episode 171 finished after 117 timesteps. Return = -220.1366820679408
Episode 172 finished after 58 timesteps. Return = -186.979505323478
Episode 173 finished after 123 timesteps. Return = -484.90416413501924
Episode 174 finished after 134 timesteps. Return = -94.8135816933842
Episode 175 finished after 65 timesteps. Return = -119.26368398829123
Episode 176 finished after 74 timesteps. Return = -231.0219117660467
Episode 177 finished after 70 timesteps. Return = -182.83065146736593
Episode 178 finished after 104 timesteps. Return = -112.85654844140322
Episode 179 finished after 81 timesteps. Return = -113.5674522833551
Episode 180 finished after 76 timesteps. Return = -307.01901907722265
Episode 181 finished after 95 timesteps. Return = -164.99269739893998
Episode 182 finished after 116 timesteps. Return = -289.68585594369375
Episode 183 finished after 87 timesteps. Return = -334.6190878335255
Episode 184 finished af

Episode 291 finished after 91 timesteps. Return = -96.51374630685024
Episode 292 finished after 62 timesteps. Return = -94.54622098899897
Episode 293 finished after 69 timesteps. Return = -119.57940140864392
Episode 294 finished after 97 timesteps. Return = -290.01738334475
Episode 295 finished after 71 timesteps. Return = -115.15766398232832
Episode 296 finished after 81 timesteps. Return = -319.648145216712
Episode 297 finished after 99 timesteps. Return = -275.8158543977743
Episode 298 finished after 120 timesteps. Return = -106.11979481851704
Episode 299 finished after 86 timesteps. Return = -141.4648956851363
Episode 300 finished after 92 timesteps. Return = -177.81689133774097
Episode 301 finished after 123 timesteps. Return = -47.84569632480145
Episode 302 finished after 101 timesteps. Return = -448.66426100143093
Episode 303 finished after 105 timesteps. Return = -231.55502098310822
Episode 304 finished after 75 timesteps. Return = -131.92576576073836
Episode 305 finished after

Episode 411 finished after 90 timesteps. Return = -98.88539265925864
Episode 412 finished after 72 timesteps. Return = -320.4238494472846
Episode 413 finished after 68 timesteps. Return = -64.64364215083958
Episode 414 finished after 95 timesteps. Return = -308.6433479765385
Episode 415 finished after 68 timesteps. Return = -187.68119998959068
Episode 416 finished after 72 timesteps. Return = -106.45896196934116
Episode 417 finished after 75 timesteps. Return = -308.840500107999
Episode 418 finished after 68 timesteps. Return = -32.65245322663165
Episode 419 finished after 106 timesteps. Return = -247.75608677087263
Episode 420 finished after 79 timesteps. Return = -348.5519095235118
Episode 421 finished after 116 timesteps. Return = -116.47448039321512
Episode 422 finished after 97 timesteps. Return = -239.71960783598783
Episode 423 finished after 108 timesteps. Return = -201.62276161551995
Episode 424 finished after 100 timesteps. Return = -230.02570657887222
Episode 425 finished aft

Episode 531 finished after 107 timesteps. Return = -97.8130140539487
Episode 532 finished after 110 timesteps. Return = -113.57216581831835
Episode 533 finished after 74 timesteps. Return = -89.54221380240719
Episode 534 finished after 64 timesteps. Return = -249.3533325717778
Episode 535 finished after 129 timesteps. Return = -474.5760940135383
Episode 536 finished after 131 timesteps. Return = -184.84173486622774
Episode 537 finished after 117 timesteps. Return = -168.9296341278432
Episode 538 finished after 68 timesteps. Return = -180.42104042873754
Episode 539 finished after 79 timesteps. Return = -123.42821567763775
Episode 540 finished after 98 timesteps. Return = -121.16247434782917
Episode 541 finished after 65 timesteps. Return = -110.64232009487272
Episode 542 finished after 83 timesteps. Return = -122.15477485759845
Episode 543 finished after 117 timesteps. Return = -146.28058867364254
Episode 544 finished after 103 timesteps. Return = -152.84937556574002
Episode 545 finishe

Episode 649 finished after 68 timesteps. Return = -111.04075430229972
Episode 650 finished after 90 timesteps. Return = -111.75093292669305
Episode 651 finished after 110 timesteps. Return = -304.23757627567045
Episode 652 finished after 76 timesteps. Return = -149.13910581219056
Episode 653 finished after 78 timesteps. Return = 3.8161700299171457
Episode 654 finished after 98 timesteps. Return = -136.15317357199214
Episode 655 finished after 104 timesteps. Return = 5.830013443758304
Episode 656 finished after 97 timesteps. Return = -218.35981465152037
Episode 657 finished after 118 timesteps. Return = -211.92950728872125
Episode 658 finished after 81 timesteps. Return = -325.74411433940395
Episode 659 finished after 79 timesteps. Return = -105.21769259084934
Episode 660 finished after 118 timesteps. Return = -205.0939518333146
Episode 661 finished after 80 timesteps. Return = -137.6865961797114
Episode 662 finished after 78 timesteps. Return = -163.61835326333767
Episode 663 finished 

Episode 768 finished after 112 timesteps. Return = -280.14278364684014
Episode 769 finished after 96 timesteps. Return = -151.24323248688697
Episode 770 finished after 120 timesteps. Return = -157.3870155310363
Episode 771 finished after 106 timesteps. Return = -444.1373062540555
Episode 772 finished after 110 timesteps. Return = -252.78524104618165
Episode 773 finished after 131 timesteps. Return = -235.51101661331202
Episode 774 finished after 99 timesteps. Return = -94.25356909027796
Episode 775 finished after 138 timesteps. Return = -33.81804956446004
Episode 776 finished after 84 timesteps. Return = -137.2914435996807
Episode 777 finished after 75 timesteps. Return = -51.489446563182916
Episode 778 finished after 68 timesteps. Return = -87.38120708494503
Episode 779 finished after 111 timesteps. Return = -130.5625502553899
Episode 780 finished after 80 timesteps. Return = -204.94178618671566
Episode 781 finished after 80 timesteps. Return = -83.89594270935766
Episode 782 finished 

Episode 888 finished after 72 timesteps. Return = -80.6240304283125
Episode 889 finished after 98 timesteps. Return = -113.91710810700087
Episode 890 finished after 116 timesteps. Return = -135.0199464046617
Episode 891 finished after 106 timesteps. Return = -191.3549707326269
Episode 892 finished after 73 timesteps. Return = -112.11042135385517
Episode 893 finished after 78 timesteps. Return = -111.72210855429505
Episode 894 finished after 110 timesteps. Return = -185.91297028630225
Episode 895 finished after 97 timesteps. Return = -98.4764292216603
Episode 896 finished after 69 timesteps. Return = -80.15121804299397
Episode 897 finished after 72 timesteps. Return = -100.35673373553233
Episode 898 finished after 102 timesteps. Return = -254.20095931470988
Episode 899 finished after 88 timesteps. Return = -366.30270791024105
Episode 900 finished after 109 timesteps. Return = -128.5730643084015
Episode 901 finished after 73 timesteps. Return = -129.86557010453546
Episode 902 finished af

Episode 1008 finished after 135 timesteps. Return = -247.61538883154324
Episode 1009 finished after 137 timesteps. Return = -317.8466638468889
Episode 1010 finished after 97 timesteps. Return = -126.10278362595686
Episode 1011 finished after 106 timesteps. Return = -107.376307651242
Episode 1012 finished after 84 timesteps. Return = -141.0837231759779
Episode 1013 finished after 67 timesteps. Return = -122.26466167424888
Episode 1014 finished after 72 timesteps. Return = -99.71141293389394
Episode 1015 finished after 128 timesteps. Return = -322.76796916308115
Episode 1016 finished after 103 timesteps. Return = -45.81994594903885
Episode 1017 finished after 69 timesteps. Return = -94.81172429047389
Episode 1018 finished after 80 timesteps. Return = -171.45370351432848
Episode 1019 finished after 67 timesteps. Return = -80.53067136790939
Episode 1020 finished after 79 timesteps. Return = -91.69824088667552
Episode 1021 finished after 77 timesteps. Return = -97.57040481832776
Episode 102

Episode 1128 finished after 126 timesteps. Return = -87.0441838759431
Episode 1129 finished after 86 timesteps. Return = -44.352207601943626
Episode 1130 finished after 149 timesteps. Return = -76.16836859234056
Episode 1131 finished after 83 timesteps. Return = -105.91878318974031
Episode 1132 finished after 101 timesteps. Return = -78.35660230887765
Episode 1133 finished after 121 timesteps. Return = -156.09224689064098
Episode 1134 finished after 70 timesteps. Return = -89.43281438220599
Episode 1135 finished after 61 timesteps. Return = -98.06055305988676
Episode 1136 finished after 115 timesteps. Return = -89.21236566089365
Episode 1137 finished after 117 timesteps. Return = -85.69061592883676
Episode 1138 finished after 58 timesteps. Return = -110.73462702340652
Episode 1139 finished after 95 timesteps. Return = -88.60285884914853
Episode 1140 finished after 106 timesteps. Return = -96.91615330580467
Episode 1141 finished after 83 timesteps. Return = -108.46079589625697
Episode 1

Episode 1249 finished after 74 timesteps. Return = -60.52172424129218
Episode 1250 finished after 68 timesteps. Return = -98.96347705226678
Episode 1251 finished after 120 timesteps. Return = -109.33579915523329
Episode 1252 finished after 70 timesteps. Return = -68.90358057269668
Episode 1253 finished after 79 timesteps. Return = -82.21606228331251
Episode 1254 finished after 63 timesteps. Return = -112.55474739954587
Episode 1255 finished after 81 timesteps. Return = -81.17744416008945
Episode 1256 finished after 76 timesteps. Return = -99.81181356681107
Episode 1257 finished after 110 timesteps. Return = -56.22184518616852
Episode 1258 finished after 84 timesteps. Return = -103.10882736552807
Episode 1259 finished after 93 timesteps. Return = -143.2079281999164
Episode 1260 finished after 90 timesteps. Return = -85.2567495325857
Episode 1261 finished after 93 timesteps. Return = -121.00685744271297
Episode 1262 finished after 96 timesteps. Return = -99.22779157496754
Episode 1263 fi

Episode 1369 finished after 62 timesteps. Return = -93.58227076882999
Episode 1370 finished after 115 timesteps. Return = -12.77377527130082
Episode 1371 finished after 112 timesteps. Return = -113.42656944747682
Episode 1372 finished after 80 timesteps. Return = -53.91741947600879
Episode 1373 finished after 83 timesteps. Return = -93.56923154349536
Episode 1374 finished after 63 timesteps. Return = -82.15595830596095
Episode 1375 finished after 70 timesteps. Return = -99.90240570860522
Episode 1376 finished after 111 timesteps. Return = -139.10770010115087
Episode 1377 finished after 104 timesteps. Return = -104.18839797197863
Episode 1378 finished after 99 timesteps. Return = -75.64796241545125
Episode 1379 finished after 88 timesteps. Return = -117.33744912578746
Episode 1380 finished after 93 timesteps. Return = -95.57608758645443
Episode 1381 finished after 105 timesteps. Return = -70.49627532598033
Episode 1382 finished after 107 timesteps. Return = -96.27835762431903
Episode 13

Episode 1491 finished after 103 timesteps. Return = -88.50543120878443
Episode 1492 finished after 97 timesteps. Return = -130.47174946772958
Episode 1493 finished after 104 timesteps. Return = -78.71983091542715
Episode 1494 finished after 81 timesteps. Return = -107.69436759835045
Episode 1495 finished after 76 timesteps. Return = -117.22235329451519
Episode 1496 finished after 100 timesteps. Return = -102.30660568321588
Episode 1497 finished after 110 timesteps. Return = -126.53958474843456
Episode 1498 finished after 70 timesteps. Return = -100.52900470514224
Episode 1499 finished after 107 timesteps. Return = -148.21872704557558
Episode 1500 finished after 66 timesteps. Return = -75.97384245410404
Episode 1501 finished after 68 timesteps. Return = -93.57956185433501
Episode 1502 finished after 69 timesteps. Return = -55.71002435564756
Episode 1503 finished after 95 timesteps. Return = -86.50545331177291
Episode 1504 finished after 93 timesteps. Return = -99.58223349823254
Episode 

Episode 1611 finished after 114 timesteps. Return = -97.92371376033583
Episode 1612 finished after 60 timesteps. Return = -81.83093916597967
Episode 1613 finished after 118 timesteps. Return = -97.87153926001851
Episode 1614 finished after 122 timesteps. Return = -90.81256018454893
Episode 1615 finished after 67 timesteps. Return = -68.54003432408241
Episode 1616 finished after 80 timesteps. Return = -108.26754526416886
Episode 1617 finished after 89 timesteps. Return = -101.27934188618138
Episode 1618 finished after 71 timesteps. Return = -92.08602614080782
Episode 1619 finished after 90 timesteps. Return = -107.74568921427348
Episode 1620 finished after 63 timesteps. Return = -90.58169952488737
Episode 1621 finished after 90 timesteps. Return = -131.58718051501685
Episode 1622 finished after 106 timesteps. Return = -127.56215309668778
Episode 1623 finished after 67 timesteps. Return = -78.28557274282991
Episode 1624 finished after 65 timesteps. Return = -59.901857961941076
Episode 16

Episode 1730 finished after 105 timesteps. Return = -100.40765487253319
Episode 1731 finished after 82 timesteps. Return = -97.6370438318515
Episode 1732 finished after 104 timesteps. Return = -119.1453062366279
Episode 1733 finished after 67 timesteps. Return = -97.10087275624878
Episode 1734 finished after 63 timesteps. Return = -69.72010576445138
Episode 1735 finished after 86 timesteps. Return = -98.31347513912097
Episode 1736 finished after 79 timesteps. Return = -96.31155888526924
Episode 1737 finished after 74 timesteps. Return = -106.0959643101578
Episode 1738 finished after 73 timesteps. Return = -104.23286450741064
Episode 1739 finished after 104 timesteps. Return = -92.16704982474336
Episode 1740 finished after 91 timesteps. Return = -96.30868986865921
Episode 1741 finished after 73 timesteps. Return = -64.16532499805407
Episode 1742 finished after 114 timesteps. Return = -117.75631264590318
Episode 1743 finished after 71 timesteps. Return = -79.34088172956507
Episode 1744 f

Episode 1849 finished after 64 timesteps. Return = -48.26112996172556
Episode 1850 finished after 75 timesteps. Return = -72.27679477966457
Episode 1851 finished after 59 timesteps. Return = -95.32620798975071
Episode 1852 finished after 74 timesteps. Return = -85.02094689523064
Episode 1853 finished after 134 timesteps. Return = -134.64976805234588
Episode 1854 finished after 73 timesteps. Return = -85.05514123011537
Episode 1855 finished after 87 timesteps. Return = -86.74086051037014
Episode 1856 finished after 129 timesteps. Return = -99.53475834073826
Episode 1857 finished after 61 timesteps. Return = -69.37175722809317
Episode 1858 finished after 105 timesteps. Return = -79.15402237388795
Episode 1859 finished after 64 timesteps. Return = -77.28308223734692
Episode 1860 finished after 98 timesteps. Return = -88.8383598279425
Episode 1861 finished after 114 timesteps. Return = -71.62224689171262
Episode 1862 finished after 74 timesteps. Return = -101.48015511891907
Episode 1863 fi

Episode 1970 finished after 75 timesteps. Return = -81.77437351693341
Episode 1971 finished after 99 timesteps. Return = -108.87690650531586
Episode 1972 finished after 88 timesteps. Return = -126.9304949052055
Episode 1973 finished after 105 timesteps. Return = -77.85863811834321
Episode 1974 finished after 82 timesteps. Return = -79.88581915027503
Episode 1975 finished after 101 timesteps. Return = -133.9621851635189
Episode 1976 finished after 64 timesteps. Return = -58.99646605686264
Episode 1977 finished after 65 timesteps. Return = -80.10129087656262
Episode 1978 finished after 59 timesteps. Return = -115.10155124901561
Episode 1979 finished after 68 timesteps. Return = -108.20199149277431
Episode 1980 finished after 59 timesteps. Return = -75.24874946170621
Episode 1981 finished after 111 timesteps. Return = -124.50628293556527
Episode 1982 finished after 64 timesteps. Return = -36.49039839022166
Episode 1983 finished after 64 timesteps. Return = -65.14153771422225
Episode 1984 

Episode 2091 finished after 57 timesteps. Return = -52.85282512775733
Episode 2092 finished after 104 timesteps. Return = -78.57164911822404
Episode 2093 finished after 94 timesteps. Return = -79.38190200024582
Episode 2094 finished after 68 timesteps. Return = -60.27875952665905
Episode 2095 finished after 80 timesteps. Return = -95.45358230831911
Episode 2096 finished after 116 timesteps. Return = -102.80937650080875
Episode 2097 finished after 90 timesteps. Return = -95.61543480690143
Episode 2098 finished after 75 timesteps. Return = -108.45876009271512
Episode 2099 finished after 70 timesteps. Return = -97.92103092095883
Episode 2100 finished after 101 timesteps. Return = -121.01737550331673
Episode 2101 finished after 73 timesteps. Return = -96.4384119126624
Episode 2102 finished after 113 timesteps. Return = -101.20318862247571
Episode 2103 finished after 62 timesteps. Return = -94.12257253508426
Episode 2104 finished after 69 timesteps. Return = -69.51932569147148
Episode 2105 

Episode 2211 finished after 101 timesteps. Return = -116.38760018117362
Episode 2212 finished after 81 timesteps. Return = -93.76881022869456
Episode 2213 finished after 127 timesteps. Return = -104.39159910060877
Episode 2214 finished after 75 timesteps. Return = -95.86330913084925
Episode 2215 finished after 101 timesteps. Return = -78.27315508590566
Episode 2216 finished after 85 timesteps. Return = -111.55982065068099
Episode 2217 finished after 77 timesteps. Return = -98.05860306360343
Episode 2218 finished after 101 timesteps. Return = -81.14148917486631
Episode 2219 finished after 103 timesteps. Return = -101.41736863494809
Episode 2220 finished after 115 timesteps. Return = -1.491005550873524
Episode 2221 finished after 98 timesteps. Return = -100.43957568566378
Episode 2222 finished after 69 timesteps. Return = -75.69470441237041
Episode 2223 finished after 97 timesteps. Return = -156.43266542223756
Episode 2224 finished after 86 timesteps. Return = -108.7494935824606
Episode 

Episode 2328 finished after 76 timesteps. Return = -114.21930449040018
Episode 2329 finished after 71 timesteps. Return = -122.33538951963003
Episode 2330 finished after 82 timesteps. Return = -114.71236226274874
Episode 2331 finished after 62 timesteps. Return = -89.73360240243731
Episode 2332 finished after 59 timesteps. Return = -73.5741517595236
Episode 2333 finished after 85 timesteps. Return = -113.34301482112556
Episode 2334 finished after 69 timesteps. Return = -93.66770564393865
Episode 2335 finished after 118 timesteps. Return = -82.17439970304329
Episode 2336 finished after 81 timesteps. Return = -93.17889372713735
Episode 2337 finished after 104 timesteps. Return = -110.94550319532877
Episode 2338 finished after 93 timesteps. Return = -109.07678493178369
Episode 2339 finished after 75 timesteps. Return = -114.82996965544667
Episode 2340 finished after 104 timesteps. Return = -88.79126313050858
Episode 2341 finished after 58 timesteps. Return = -70.96769500220674
Episode 234

Episode 2448 finished after 98 timesteps. Return = -77.89114207421649
Episode 2449 finished after 77 timesteps. Return = -117.56908121248256
Episode 2450 finished after 105 timesteps. Return = 16.142626018080563
Episode 2451 finished after 63 timesteps. Return = -65.79136742942379
Episode 2452 finished after 80 timesteps. Return = -100.56644877237872
Episode 2453 finished after 71 timesteps. Return = -92.23469889303166
Episode 2454 finished after 68 timesteps. Return = -79.07753845378345
Episode 2455 finished after 62 timesteps. Return = -100.98607062945788
Episode 2456 finished after 112 timesteps. Return = -75.72530744488274
Episode 2457 finished after 105 timesteps. Return = -63.03818044107508
Episode 2458 finished after 121 timesteps. Return = -93.85320054882261
Episode 2459 finished after 95 timesteps. Return = -88.79884804820314
Episode 2460 finished after 59 timesteps. Return = -94.00912290795397
Episode 2461 finished after 61 timesteps. Return = -99.33334573626179
Episode 2462 

Episode 2569 finished after 71 timesteps. Return = -83.29703257922567
Episode 2570 finished after 98 timesteps. Return = -162.56458443476242
Episode 2571 finished after 88 timesteps. Return = -75.45157380613861
Episode 2572 finished after 109 timesteps. Return = -139.75359378375182
Episode 2573 finished after 112 timesteps. Return = -96.13181317338146
Episode 2574 finished after 56 timesteps. Return = -112.7555420116078
Episode 2575 finished after 115 timesteps. Return = -87.69701840059255
Episode 2576 finished after 84 timesteps. Return = -99.85564782920373
Episode 2577 finished after 91 timesteps. Return = -83.19227028089006
Episode 2578 finished after 71 timesteps. Return = -65.88413038240623
Episode 2579 finished after 91 timesteps. Return = -110.12957662088947
Episode 2580 finished after 151 timesteps. Return = -73.26265415376552
Episode 2581 finished after 114 timesteps. Return = -116.63038545783309
Episode 2582 finished after 79 timesteps. Return = -63.828078667926015
Episode 25

Episode 2687 finished after 90 timesteps. Return = 14.238281870852347
Episode 2688 finished after 74 timesteps. Return = -64.77495051720697
Episode 2689 finished after 111 timesteps. Return = -132.96100205454627
Episode 2690 finished after 70 timesteps. Return = -103.53998467556313
Episode 2691 finished after 85 timesteps. Return = -79.2280711061876
Episode 2692 finished after 66 timesteps. Return = -102.4713814142711
Episode 2693 finished after 111 timesteps. Return = -106.61504775245156
Episode 2694 finished after 106 timesteps. Return = -85.53640480723986
Episode 2695 finished after 73 timesteps. Return = -85.52122048138936
Episode 2696 finished after 81 timesteps. Return = -112.00184243857797
Episode 2697 finished after 87 timesteps. Return = -158.1321744720974
Episode 2698 finished after 62 timesteps. Return = -99.08006335033589
Episode 2699 finished after 71 timesteps. Return = -114.82614363753765
Episode 2700 finished after 75 timesteps. Return = -82.67740665839
Episode 2701 fin

Episode 2811 finished after 82 timesteps. Return = -91.51500215506016
Episode 2812 finished after 100 timesteps. Return = -89.8837115878348
Episode 2813 finished after 101 timesteps. Return = -73.27755476717778
Episode 2814 finished after 102 timesteps. Return = -104.9261397032167
Episode 2815 finished after 99 timesteps. Return = -105.09324184567363
Episode 2816 finished after 64 timesteps. Return = -98.11569123395624
Episode 2817 finished after 81 timesteps. Return = -85.99589988604417
Episode 2818 finished after 89 timesteps. Return = -85.31999534006516
Episode 2819 finished after 71 timesteps. Return = -88.47612121160977
Episode 2820 finished after 122 timesteps. Return = -79.89437868201551
Episode 2821 finished after 79 timesteps. Return = -99.1319398760313
Episode 2822 finished after 88 timesteps. Return = -94.46500059449149
Episode 2823 finished after 90 timesteps. Return = -77.40778868295355
Episode 2824 finished after 95 timesteps. Return = -110.617063561248
Episode 2825 finis

Episode 2929 finished after 64 timesteps. Return = -50.30837167322629
Episode 2930 finished after 108 timesteps. Return = -73.63570188237044
Episode 2931 finished after 68 timesteps. Return = -83.70536733955325
Episode 2932 finished after 77 timesteps. Return = -99.89931651244045
Episode 2933 finished after 81 timesteps. Return = -90.41720029945601
Episode 2934 finished after 83 timesteps. Return = -115.3659416063183
Episode 2935 finished after 60 timesteps. Return = -69.66382843640557
Episode 2936 finished after 119 timesteps. Return = -67.20028761363588
Episode 2937 finished after 63 timesteps. Return = -95.1108662612467
Episode 2938 finished after 112 timesteps. Return = -59.45356896205175
Episode 2939 finished after 61 timesteps. Return = -113.61635916900366
Episode 2940 finished after 88 timesteps. Return = -111.02386487106278
Episode 2941 finished after 82 timesteps. Return = -120.70051180225524
Episode 2942 finished after 113 timesteps. Return = -89.06512490122262
Episode 2943 f

Episode 3051 finished after 66 timesteps. Return = -96.04078862805366
Episode 3052 finished after 79 timesteps. Return = -95.90850672287326
Episode 3053 finished after 92 timesteps. Return = -90.18726478063395
Episode 3054 finished after 73 timesteps. Return = -109.36424897582478
Episode 3055 finished after 86 timesteps. Return = -133.45096119346545
Episode 3056 finished after 60 timesteps. Return = -98.21866410170576
Episode 3057 finished after 96 timesteps. Return = -87.2682716912926
Episode 3058 finished after 91 timesteps. Return = -106.35624574937998
Episode 3059 finished after 68 timesteps. Return = -103.43381884547263
Episode 3060 finished after 86 timesteps. Return = -110.06366656098798
Episode 3061 finished after 67 timesteps. Return = -92.29164451720816
Episode 3062 finished after 75 timesteps. Return = -105.47634146667153
Episode 3063 finished after 80 timesteps. Return = -102.34832100534007
Episode 3064 finished after 92 timesteps. Return = -92.3890612291247
Episode 3065 fi

Episode 3169 finished after 65 timesteps. Return = -87.37763654746959
Episode 3170 finished after 91 timesteps. Return = -99.8023029710999
Episode 3171 finished after 94 timesteps. Return = -98.3654731027578
Episode 3172 finished after 70 timesteps. Return = -66.91456229182063
Episode 3173 finished after 94 timesteps. Return = -123.7246079732038
Episode 3174 finished after 69 timesteps. Return = -93.64996583540426
Episode 3175 finished after 124 timesteps. Return = -89.34450910592864
Episode 3176 finished after 93 timesteps. Return = -96.69171958096473
Episode 3177 finished after 123 timesteps. Return = -83.55196303567075
Episode 3178 finished after 78 timesteps. Return = -91.23284967994326
Episode 3179 finished after 69 timesteps. Return = -70.20882375456029
Episode 3180 finished after 66 timesteps. Return = -88.61142284800337
Episode 3181 finished after 68 timesteps. Return = -55.40858891554282
Episode 3182 finished after 71 timesteps. Return = -56.047510746581715
Episode 3183 finish

Episode 3288 finished after 71 timesteps. Return = -103.29336462149817
Episode 3289 finished after 73 timesteps. Return = -73.29175340949266
Episode 3290 finished after 103 timesteps. Return = -98.1817181996719
Episode 3291 finished after 86 timesteps. Return = -108.11014168764974
Episode 3292 finished after 70 timesteps. Return = -81.6002549253815
Episode 3293 finished after 80 timesteps. Return = -81.48623516319964
Episode 3294 finished after 71 timesteps. Return = -79.65805646922294
Episode 3295 finished after 118 timesteps. Return = -66.65746267278357
Episode 3296 finished after 118 timesteps. Return = -88.41996185139551
Episode 3297 finished after 72 timesteps. Return = -73.75955910477019
Episode 3298 finished after 78 timesteps. Return = -99.73127796443735
Episode 3299 finished after 107 timesteps. Return = -90.87076842661685
Episode 3300 finished after 95 timesteps. Return = -86.81596094224456
Episode 3301 finished after 86 timesteps. Return = -119.54029776242511
Episode 3302 fi

Episode 3411 finished after 59 timesteps. Return = -96.8623716701992
Episode 3412 finished after 70 timesteps. Return = -82.93234306733542
Episode 3413 finished after 74 timesteps. Return = -76.30869013545632
Episode 3414 finished after 123 timesteps. Return = -77.12075958720314
Episode 3415 finished after 88 timesteps. Return = -80.45589820767304
Episode 3416 finished after 78 timesteps. Return = -103.1176389307301
Episode 3417 finished after 96 timesteps. Return = -124.32094367109887
Episode 3418 finished after 66 timesteps. Return = -112.34698790706936
Episode 3419 finished after 61 timesteps. Return = -125.17753653847453
Episode 3420 finished after 70 timesteps. Return = -127.11404277585928
Episode 3421 finished after 87 timesteps. Return = -138.698176258143
Episode 3422 finished after 78 timesteps. Return = -101.7601188029977
Episode 3423 finished after 63 timesteps. Return = -102.50136329409682
Episode 3424 finished after 80 timesteps. Return = -113.0388511826088
Episode 3425 fin

Episode 3531 finished after 68 timesteps. Return = -114.69531801405691
Episode 3532 finished after 102 timesteps. Return = -132.07695765759775
Episode 3533 finished after 72 timesteps. Return = -107.3172652766744
Episode 3534 finished after 86 timesteps. Return = -81.22393161777532
Episode 3535 finished after 82 timesteps. Return = -86.19599311118107
Episode 3536 finished after 127 timesteps. Return = -100.27962511937787
Episode 3537 finished after 70 timesteps. Return = -110.08904218578868
Episode 3538 finished after 76 timesteps. Return = -101.4847400796258
Episode 3539 finished after 75 timesteps. Return = -94.80408524271195
Episode 3540 finished after 67 timesteps. Return = -95.58705912317131
Episode 3541 finished after 106 timesteps. Return = -76.53313796003744
Episode 3542 finished after 107 timesteps. Return = -96.82548209163129
Episode 3543 finished after 105 timesteps. Return = -128.49302260303563
Episode 3544 finished after 75 timesteps. Return = -96.9370616838692
Episode 354

Episode 3649 finished after 65 timesteps. Return = -60.73876742075842
Episode 3650 finished after 83 timesteps. Return = -126.66784807987935
Episode 3651 finished after 108 timesteps. Return = -74.63832184247856
Episode 3652 finished after 88 timesteps. Return = -116.0618085690237
Episode 3653 finished after 78 timesteps. Return = -116.76769238661271
Episode 3654 finished after 65 timesteps. Return = -87.80512148705566
Episode 3655 finished after 70 timesteps. Return = -70.73724070465065
Episode 3656 finished after 62 timesteps. Return = -70.22677669572087
Episode 3657 finished after 89 timesteps. Return = -91.83642401989543
Episode 3658 finished after 111 timesteps. Return = -58.53057118421661
Episode 3659 finished after 62 timesteps. Return = -79.68956979624126
Episode 3660 finished after 71 timesteps. Return = -79.08229482853224
Episode 3661 finished after 101 timesteps. Return = -110.01244121420496
Episode 3662 finished after 68 timesteps. Return = -73.67348370531552
Episode 3663 f

Episode 3771 finished after 99 timesteps. Return = -90.89823066556397
Episode 3772 finished after 104 timesteps. Return = -103.51651579522527
Episode 3773 finished after 73 timesteps. Return = -46.94287736556323
Episode 3774 finished after 122 timesteps. Return = -94.33156365632487
Episode 3775 finished after 79 timesteps. Return = -70.0446353687563
Episode 3776 finished after 92 timesteps. Return = -87.86046092298537
Episode 3777 finished after 118 timesteps. Return = -120.1926205858785
Episode 3778 finished after 117 timesteps. Return = -135.84995656579827
Episode 3779 finished after 71 timesteps. Return = -113.72550989547237
Episode 3780 finished after 78 timesteps. Return = -108.56322240013246
Episode 3781 finished after 73 timesteps. Return = -130.3437584298205
Episode 3782 finished after 115 timesteps. Return = -90.41955503545176
Episode 3783 finished after 62 timesteps. Return = -108.10676867841057
Episode 3784 finished after 75 timesteps. Return = -38.38689849942935
Episode 378

Episode 3891 finished after 90 timesteps. Return = -141.02465506254856
Episode 3892 finished after 66 timesteps. Return = -35.405601179195216
Episode 3893 finished after 81 timesteps. Return = -89.79241258721547
Episode 3894 finished after 106 timesteps. Return = -75.22628534414292
Episode 3895 finished after 118 timesteps. Return = -110.86861203850789
Episode 3896 finished after 108 timesteps. Return = -86.69803592161287
Episode 3897 finished after 122 timesteps. Return = -67.36228767896418
Episode 3898 finished after 80 timesteps. Return = -69.91653661425745
Episode 3899 finished after 62 timesteps. Return = -121.62264672045733
Episode 3900 finished after 128 timesteps. Return = -103.74883954523392
Episode 3901 finished after 78 timesteps. Return = -96.78022014995467
Episode 3902 finished after 85 timesteps. Return = -88.56179545455217
Episode 3903 finished after 96 timesteps. Return = -98.4416388293545
Episode 3904 finished after 80 timesteps. Return = -100.36260764288734
Episode 39

Episode 4010 finished after 87 timesteps. Return = -78.93590116510654
Episode 4011 finished after 125 timesteps. Return = -81.33989807967005
Episode 4012 finished after 129 timesteps. Return = -115.99037536836039
Episode 4013 finished after 117 timesteps. Return = -48.91991101859212
Episode 4014 finished after 74 timesteps. Return = -61.8522699948125
Episode 4015 finished after 78 timesteps. Return = -87.96219934520467
Episode 4016 finished after 100 timesteps. Return = -44.61203336088049
Episode 4017 finished after 85 timesteps. Return = -106.0451780948942
Episode 4018 finished after 80 timesteps. Return = -76.48233892395001
Episode 4019 finished after 160 timesteps. Return = -95.31064567899226
Episode 4020 finished after 135 timesteps. Return = -84.01809038627827
Episode 4021 finished after 135 timesteps. Return = -82.78410347294943
Episode 4022 finished after 82 timesteps. Return = -124.71457030948179
Episode 4023 finished after 58 timesteps. Return = -75.59195880237999
Episode 4024

Episode 4131 finished after 81 timesteps. Return = -47.81946854491912
Episode 4132 finished after 68 timesteps. Return = -73.12273721827394
Episode 4133 finished after 86 timesteps. Return = -64.02281057971449
Episode 4134 finished after 70 timesteps. Return = -94.77072863949627
Episode 4135 finished after 93 timesteps. Return = -101.72477473220833
Episode 4136 finished after 64 timesteps. Return = -40.92586218371714
Episode 4137 finished after 107 timesteps. Return = -94.046679623467
Episode 4138 finished after 66 timesteps. Return = -75.87056959383528
Episode 4139 finished after 63 timesteps. Return = -33.52289576039661
Episode 4140 finished after 65 timesteps. Return = -95.91871966286281
Episode 4141 finished after 64 timesteps. Return = -57.52896853235814
Episode 4142 finished after 79 timesteps. Return = -118.967126789986
Episode 4143 finished after 95 timesteps. Return = -105.27383371942548
Episode 4144 finished after 107 timesteps. Return = -111.29937322726019
Episode 4145 finis

Episode 4249 finished after 122 timesteps. Return = -119.32014067402929
Episode 4250 finished after 104 timesteps. Return = -120.94168317267433
Episode 4251 finished after 129 timesteps. Return = -61.517897408447595
Episode 4252 finished after 130 timesteps. Return = -87.7757296027621
Episode 4253 finished after 142 timesteps. Return = -117.05324120863767
Episode 4254 finished after 122 timesteps. Return = -59.002561847653745
Episode 4255 finished after 62 timesteps. Return = -52.969590533640414
Episode 4256 finished after 73 timesteps. Return = -49.26997492886564
Episode 4257 finished after 67 timesteps. Return = -80.49911367809608
Episode 4258 finished after 68 timesteps. Return = -45.787406332896154
Episode 4259 finished after 94 timesteps. Return = -103.1211256588513
Episode 4260 finished after 85 timesteps. Return = -69.03215025649024
Episode 4261 finished after 114 timesteps. Return = -63.99359690382653
Episode 4262 finished after 66 timesteps. Return = -64.5025204503466
Episode 

Episode 4369 finished after 86 timesteps. Return = -126.59046213707154
Episode 4370 finished after 121 timesteps. Return = -83.51047308908021
Episode 4371 finished after 73 timesteps. Return = -104.12764436299027
Episode 4372 finished after 91 timesteps. Return = -110.64946031500698
Episode 4373 finished after 90 timesteps. Return = -117.9488012483622
Episode 4374 finished after 84 timesteps. Return = -83.75550624473888
Episode 4375 finished after 67 timesteps. Return = -96.67703239119571
Episode 4376 finished after 120 timesteps. Return = -100.58540187492797
Episode 4377 finished after 103 timesteps. Return = -126.86918535970662
Episode 4378 finished after 84 timesteps. Return = -66.43410638222814
Episode 4379 finished after 73 timesteps. Return = -91.42552511257657
Episode 4380 finished after 108 timesteps. Return = -91.61991712391377
Episode 4381 finished after 103 timesteps. Return = -99.10893410776234
Episode 4382 finished after 78 timesteps. Return = -22.850525986775978
Episode 4

Episode 4489 finished after 74 timesteps. Return = -113.13755321336784
Episode 4490 finished after 128 timesteps. Return = -82.04699835947335
Episode 4491 finished after 116 timesteps. Return = -65.60137255084516
Episode 4492 finished after 109 timesteps. Return = -115.03350800529599
Episode 4493 finished after 86 timesteps. Return = -180.6606009912167
Episode 4494 finished after 57 timesteps. Return = -62.5028658690436
Episode 4495 finished after 72 timesteps. Return = -98.08764298129451
Episode 4496 finished after 97 timesteps. Return = -98.10448208379452
Episode 4497 finished after 113 timesteps. Return = -74.48365996212964
Episode 4498 finished after 70 timesteps. Return = -96.34785871230926
Episode 4499 finished after 66 timesteps. Return = -83.34202501104599
Episode 4500 finished after 124 timesteps. Return = -64.67877866533735
Episode 4501 finished after 112 timesteps. Return = -78.10494198406056
Episode 4502 finished after 62 timesteps. Return = -84.02598364540341
Episode 4503 

Episode 4610 finished after 117 timesteps. Return = -85.73451365788384
Episode 4611 finished after 74 timesteps. Return = -102.57575286221095
Episode 4612 finished after 58 timesteps. Return = -70.41770313997773
Episode 4613 finished after 96 timesteps. Return = -91.98611100762915
Episode 4614 finished after 153 timesteps. Return = -68.3582645773838
Episode 4615 finished after 79 timesteps. Return = -68.77675511516462
Episode 4616 finished after 93 timesteps. Return = -55.11977457401194
Episode 4617 finished after 70 timesteps. Return = -71.94421267682988
Episode 4618 finished after 97 timesteps. Return = -84.53280323546647
Episode 4619 finished after 97 timesteps. Return = -119.60707447196566
Episode 4620 finished after 69 timesteps. Return = -44.33604311181501
Episode 4621 finished after 85 timesteps. Return = -96.13879279722349
Episode 4622 finished after 65 timesteps. Return = -29.641910496596253
Episode 4623 finished after 80 timesteps. Return = -97.81948487492882
Episode 4624 fin

Episode 4731 finished after 105 timesteps. Return = -96.32085209398744
Episode 4732 finished after 92 timesteps. Return = -95.08942565297181
Episode 4733 finished after 74 timesteps. Return = -100.01734137172457
Episode 4734 finished after 87 timesteps. Return = -104.78313758713331
Episode 4735 finished after 61 timesteps. Return = -88.97556275291345
Episode 4736 finished after 114 timesteps. Return = -60.69457069793937
Episode 4737 finished after 114 timesteps. Return = -70.6518220126537
Episode 4738 finished after 111 timesteps. Return = -79.83940645525188
Episode 4739 finished after 72 timesteps. Return = -99.41014936342192
Episode 4740 finished after 118 timesteps. Return = -54.8117746331074
Episode 4741 finished after 110 timesteps. Return = -89.97249309124281
Episode 4742 finished after 89 timesteps. Return = -88.86332662790448
Episode 4743 finished after 89 timesteps. Return = -91.69709065379945
Episode 4744 finished after 82 timesteps. Return = -89.69234164241004
Episode 4745 f

Episode 4848 finished after 103 timesteps. Return = -59.685478566367024
Episode 4849 finished after 76 timesteps. Return = -99.26858038885761
Episode 4850 finished after 82 timesteps. Return = -61.442984267958884
Episode 4851 finished after 105 timesteps. Return = -80.17760934814831
Episode 4852 finished after 96 timesteps. Return = -74.78065782112957
Episode 4853 finished after 73 timesteps. Return = -21.694431725226238
Episode 4854 finished after 71 timesteps. Return = -120.08012876000961
Episode 4855 finished after 102 timesteps. Return = -117.36088415070569
Episode 4856 finished after 63 timesteps. Return = -30.947215884269895
Episode 4857 finished after 150 timesteps. Return = -43.071385399211074
Episode 4858 finished after 75 timesteps. Return = -56.63342544395438
Episode 4859 finished after 120 timesteps. Return = -51.35754032629548
Episode 4860 finished after 112 timesteps. Return = -89.37741414217999
Episode 4861 finished after 70 timesteps. Return = -42.38997895767346
Episode

Episode 4968 finished after 102 timesteps. Return = -87.45438456583379
Episode 4969 finished after 139 timesteps. Return = -287.82322092128885
Episode 4970 finished after 83 timesteps. Return = -80.49256262984446
Episode 4971 finished after 73 timesteps. Return = -100.70671898106464
Episode 4972 finished after 126 timesteps. Return = -65.2175759108074
Episode 4973 finished after 78 timesteps. Return = -84.125157273365
Episode 4974 finished after 97 timesteps. Return = -79.77166096697275
Episode 4975 finished after 121 timesteps. Return = -53.27878624013077
Episode 4976 finished after 81 timesteps. Return = -85.39460908928405
Episode 4977 finished after 101 timesteps. Return = -69.10712223907085
Episode 4978 finished after 78 timesteps. Return = -19.100790836632882
Episode 4979 finished after 113 timesteps. Return = -205.8806299396369
Episode 4980 finished after 71 timesteps. Return = -51.889818122906476
Episode 4981 finished after 119 timesteps. Return = -87.37586174280092
Episode 4982

Episode 5086 finished after 134 timesteps. Return = -90.03402783648441
Episode 5087 finished after 117 timesteps. Return = -84.39698886262975
Episode 5088 finished after 144 timesteps. Return = -80.20966608923773
Episode 5089 finished after 79 timesteps. Return = -99.68760665527847
Episode 5090 finished after 94 timesteps. Return = -102.13847595225806
Episode 5091 finished after 80 timesteps. Return = -59.30193681720061
Episode 5092 finished after 118 timesteps. Return = -55.78701184488343
Episode 5093 finished after 90 timesteps. Return = -90.41827902280511
Episode 5094 finished after 85 timesteps. Return = -111.31038996365736
Episode 5095 finished after 80 timesteps. Return = -84.77463311965543
Episode 5096 finished after 79 timesteps. Return = -87.43540035570531
Episode 5097 finished after 81 timesteps. Return = -102.64391310382668
Episode 5098 finished after 134 timesteps. Return = -58.51456014391748
Episode 5099 finished after 63 timesteps. Return = -38.31508184486984
Episode 5100

Episode 5209 finished after 124 timesteps. Return = -71.0474134110769
Episode 5210 finished after 125 timesteps. Return = -62.170150964513155
Episode 5211 finished after 89 timesteps. Return = -91.38580321963813
Episode 5212 finished after 107 timesteps. Return = -100.3499900303968
Episode 5213 finished after 115 timesteps. Return = -21.661304479138437
Episode 5214 finished after 79 timesteps. Return = -49.10425342704475
Episode 5215 finished after 76 timesteps. Return = -101.26258452814942
Episode 5216 finished after 85 timesteps. Return = -88.0762210946071
Episode 5217 finished after 119 timesteps. Return = -66.33084443851308
Episode 5218 finished after 112 timesteps. Return = -82.60704382530997
Episode 5219 finished after 66 timesteps. Return = -71.75928459780398
Episode 5220 finished after 84 timesteps. Return = -105.9255307609784
Episode 5221 finished after 95 timesteps. Return = -83.21517279552373
Episode 5222 finished after 111 timesteps. Return = -84.62582726391508
Episode 5223

Episode 5329 finished after 106 timesteps. Return = -225.03031605063563
Episode 5330 finished after 102 timesteps. Return = -93.19301609542448
Episode 5331 finished after 80 timesteps. Return = -95.16186193797829
Episode 5332 finished after 118 timesteps. Return = -47.98130294020079
Episode 5333 finished after 113 timesteps. Return = -99.12245645515884
Episode 5334 finished after 136 timesteps. Return = -58.614964774229975
Episode 5335 finished after 86 timesteps. Return = -110.46127556631355
Episode 5336 finished after 74 timesteps. Return = -86.6029695382312
Episode 5337 finished after 100 timesteps. Return = -73.17085020969095
Episode 5338 finished after 94 timesteps. Return = -78.21088713642976
Episode 5339 finished after 108 timesteps. Return = -51.22360948161505
Episode 5340 finished after 90 timesteps. Return = -75.58009332077265
Episode 5341 finished after 148 timesteps. Return = -73.36818766973587
Episode 5342 finished after 98 timesteps. Return = -75.72486855206755
Episode 53

Episode 5445 finished after 118 timesteps. Return = -83.14916564125922
Episode 5446 finished after 81 timesteps. Return = -88.37164890850575
Episode 5447 finished after 102 timesteps. Return = -82.52816717427169
Episode 5448 finished after 65 timesteps. Return = -85.75033637543916
Episode 5449 finished after 106 timesteps. Return = -249.4284299360569
Episode 5450 finished after 71 timesteps. Return = -60.300372441180485
Episode 5451 finished after 129 timesteps. Return = -84.33154715575888
Episode 5452 finished after 74 timesteps. Return = -21.746721627087865
Episode 5453 finished after 81 timesteps. Return = -60.511466428636965
Episode 5454 finished after 121 timesteps. Return = -32.86401364877658
Episode 5455 finished after 108 timesteps. Return = -123.73763716500557
Episode 5456 finished after 122 timesteps. Return = -80.70564927966691
Episode 5457 finished after 146 timesteps. Return = -77.94552668705587
Episode 5458 finished after 134 timesteps. Return = -56.53688427460462
Episode

Episode 5563 finished after 75 timesteps. Return = -46.069702487115975
Episode 5564 finished after 66 timesteps. Return = -92.71118761750125
Episode 5565 finished after 101 timesteps. Return = -100.26610391848304
Episode 5566 finished after 76 timesteps. Return = -68.81461515957906
Episode 5567 finished after 102 timesteps. Return = -80.10202017093381
Episode 5568 finished after 126 timesteps. Return = -30.487788423160694
Episode 5569 finished after 95 timesteps. Return = -69.15339954867403
Episode 5570 finished after 126 timesteps. Return = -75.31588967719904
Episode 5571 finished after 94 timesteps. Return = -51.940494335127596
Episode 5572 finished after 112 timesteps. Return = -62.952675922943236
Episode 5573 finished after 88 timesteps. Return = -80.35165711757594
Episode 5574 finished after 89 timesteps. Return = -116.40109265545142
Episode 5575 finished after 77 timesteps. Return = -74.5194063831732
Episode 5576 finished after 64 timesteps. Return = -54.19953954471454
Episode 55

Episode 5683 finished after 132 timesteps. Return = -56.2790983296175
Episode 5684 finished after 92 timesteps. Return = -67.99227450281305
Episode 5685 finished after 123 timesteps. Return = -82.98727448590449
Episode 5686 finished after 131 timesteps. Return = -58.25021361273983
Episode 5687 finished after 130 timesteps. Return = -86.30328934263437
Episode 5688 finished after 114 timesteps. Return = -69.98401296169204
Episode 5689 finished after 57 timesteps. Return = -72.61972485974874
Episode 5690 finished after 100 timesteps. Return = -26.84602713661465
Episode 5691 finished after 64 timesteps. Return = -27.970223131490826
Episode 5692 finished after 76 timesteps. Return = -80.73068766751608
Episode 5693 finished after 128 timesteps. Return = -72.35474699106148
Episode 5694 finished after 86 timesteps. Return = -61.42164094694306
Episode 5695 finished after 97 timesteps. Return = -101.58243183995253
Episode 5696 finished after 104 timesteps. Return = -76.99175257898925
Episode 569

Episode 5801 finished after 89 timesteps. Return = -137.03050325599168
Episode 5802 finished after 60 timesteps. Return = -70.71853692548528
Episode 5803 finished after 99 timesteps. Return = -80.45598075215753
Episode 5804 finished after 95 timesteps. Return = -111.18302916468856
Episode 5805 finished after 92 timesteps. Return = -72.70741678901058
Episode 5806 finished after 108 timesteps. Return = -109.07272682134794
Episode 5807 finished after 104 timesteps. Return = -69.87412181947212
Episode 5808 finished after 121 timesteps. Return = -89.36723202142231
Episode 5809 finished after 115 timesteps. Return = -39.82341298720415
Episode 5810 finished after 83 timesteps. Return = -90.77727507456783
Episode 5811 finished after 138 timesteps. Return = -95.95943864599579
Episode 5812 finished after 124 timesteps. Return = -245.5980499403597
Episode 5813 finished after 117 timesteps. Return = -97.83591142285718
Episode 5814 finished after 66 timesteps. Return = -57.57302210471555
Episode 58

Episode 5920 finished after 81 timesteps. Return = -22.507130871471247
Episode 5921 finished after 129 timesteps. Return = -59.264026936765006
Episode 5922 finished after 61 timesteps. Return = -64.97521981042455
Episode 5923 finished after 99 timesteps. Return = -93.63266803257262
Episode 5924 finished after 75 timesteps. Return = -55.78218165333909
Episode 5925 finished after 110 timesteps. Return = -69.19139166328748
Episode 5926 finished after 84 timesteps. Return = -50.21887343889159
Episode 5927 finished after 91 timesteps. Return = -96.40844105264904
Episode 5928 finished after 103 timesteps. Return = -51.32752370821932
Episode 5929 finished after 139 timesteps. Return = -40.32381003637212
Episode 5930 finished after 91 timesteps. Return = -81.35316414491191
Episode 5931 finished after 64 timesteps. Return = -76.21737401826333
Episode 5932 finished after 102 timesteps. Return = -77.36007155974032
Episode 5933 finished after 86 timesteps. Return = -80.46814396443938
Episode 5934 

Episode 6039 finished after 144 timesteps. Return = -25.04063243792322
Episode 6040 finished after 137 timesteps. Return = -31.76975174224468
Episode 6041 finished after 111 timesteps. Return = -72.08857766842371
Episode 6042 finished after 114 timesteps. Return = -127.72440974267539
Episode 6043 finished after 81 timesteps. Return = -90.09565869587522
Episode 6044 finished after 157 timesteps. Return = -44.586761322265275
Episode 6045 finished after 117 timesteps. Return = -81.92555577700873
Episode 6046 finished after 109 timesteps. Return = -113.15534707653205
Episode 6047 finished after 79 timesteps. Return = -48.06756149024746
Episode 6048 finished after 138 timesteps. Return = -78.17867815331226
Episode 6049 finished after 107 timesteps. Return = -62.25521128752081
Episode 6050 finished after 111 timesteps. Return = 2.597456775597138
Episode 6051 finished after 134 timesteps. Return = -77.26722283188333
Episode 6052 finished after 70 timesteps. Return = -77.49377022371652
Episode

Episode 6156 finished after 119 timesteps. Return = -98.71562223545116
Episode 6157 finished after 117 timesteps. Return = -27.27709128773384
Episode 6158 finished after 95 timesteps. Return = -29.52274930353613
Episode 6159 finished after 81 timesteps. Return = -47.31486101239917
Episode 6160 finished after 131 timesteps. Return = 15.983802035506855
Episode 6161 finished after 99 timesteps. Return = -99.74056473731444
Episode 6162 finished after 77 timesteps. Return = -91.74260126157145
Episode 6163 finished after 104 timesteps. Return = -88.74455432018223
Episode 6164 finished after 102 timesteps. Return = -86.32044940148182
Episode 6165 finished after 123 timesteps. Return = -75.33536502081142
Episode 6166 finished after 79 timesteps. Return = -59.10053736729901
Episode 6167 finished after 79 timesteps. Return = -66.37411325836277
Episode 6168 finished after 82 timesteps. Return = -32.70780324236847
Episode 6169 finished after 99 timesteps. Return = -58.55419688641857
Episode 6170 f

Episode 6275 finished after 149 timesteps. Return = -97.09303851817937
Episode 6276 finished after 149 timesteps. Return = -70.75997887934449
Episode 6277 finished after 77 timesteps. Return = -41.32000883538733
Episode 6278 finished after 80 timesteps. Return = -50.768103901506436
Episode 6279 finished after 129 timesteps. Return = -48.805257240000934
Episode 6280 finished after 85 timesteps. Return = -51.27041424622212
Episode 6281 finished after 110 timesteps. Return = -109.95599831733148
Episode 6282 finished after 127 timesteps. Return = -86.41572232738302
Episode 6283 finished after 84 timesteps. Return = -76.45191527513956
Episode 6284 finished after 110 timesteps. Return = -36.14974127397221
Episode 6285 finished after 109 timesteps. Return = -47.2637555700824
Episode 6286 finished after 99 timesteps. Return = -8.965657113189408
Episode 6287 finished after 77 timesteps. Return = -53.20074651251829
Episode 6288 finished after 80 timesteps. Return = -92.56426467713143
Episode 628

Episode 6391 finished after 130 timesteps. Return = -91.45661644570941
Episode 6392 finished after 66 timesteps. Return = -92.43785852281087
Episode 6393 finished after 140 timesteps. Return = -4.91674239555428
Episode 6394 finished after 172 timesteps. Return = -70.19053388572263
Episode 6395 finished after 110 timesteps. Return = -115.11792352531633
Episode 6396 finished after 169 timesteps. Return = -97.53930419158709
Episode 6397 finished after 137 timesteps. Return = 15.516690132896898
Episode 6398 finished after 80 timesteps. Return = -43.48080589701898
Episode 6399 finished after 111 timesteps. Return = -222.19726858080918
Episode 6400 finished after 157 timesteps. Return = -36.99729295530974
Episode 6401 finished after 108 timesteps. Return = -68.45331558475257
Episode 6402 finished after 114 timesteps. Return = -62.1824667277886
Episode 6403 finished after 155 timesteps. Return = -73.87408604409839
Episode 6404 finished after 119 timesteps. Return = -154.71588419256267
Episode

Episode 6511 finished after 109 timesteps. Return = -16.03425233437831
Episode 6512 finished after 110 timesteps. Return = -24.052691141031474
Episode 6513 finished after 70 timesteps. Return = -27.19127725902395
Episode 6514 finished after 132 timesteps. Return = -12.920628929741994
Episode 6515 finished after 97 timesteps. Return = -68.0436277076415
Episode 6516 finished after 101 timesteps. Return = -59.79893809770786
Episode 6517 finished after 116 timesteps. Return = -33.17807657905705
Episode 6518 finished after 134 timesteps. Return = -23.480208514007288
Episode 6519 finished after 96 timesteps. Return = -46.164019125175926
Episode 6520 finished after 117 timesteps. Return = -23.63770972402311
Episode 6521 finished after 97 timesteps. Return = -35.52111406231964
Episode 6522 finished after 129 timesteps. Return = -4.329058817490662
Episode 6523 finished after 71 timesteps. Return = -69.62684833996026
Episode 6524 finished after 128 timesteps. Return = -82.72520102367133
Episode 

Episode 6628 finished after 122 timesteps. Return = -65.43277414922858
Episode 6629 finished after 145 timesteps. Return = -57.44937080513546
Episode 6630 finished after 124 timesteps. Return = -42.82025426446887
Episode 6631 finished after 88 timesteps. Return = -1.1616554550637233
Episode 6632 finished after 118 timesteps. Return = -18.31349928404643
Episode 6633 finished after 138 timesteps. Return = -64.35763356831515
Episode 6634 finished after 113 timesteps. Return = -58.463564234425654
Episode 6635 finished after 126 timesteps. Return = -76.26016715124703
Episode 6636 finished after 104 timesteps. Return = -98.96451321685122
Episode 6637 finished after 109 timesteps. Return = -28.21742543715702
Episode 6638 finished after 114 timesteps. Return = -64.15843970507525
Episode 6639 finished after 116 timesteps. Return = -55.9927697594136
Episode 6640 finished after 179 timesteps. Return = -94.49086490366811
Episode 6641 finished after 154 timesteps. Return = -43.94789448842222
Episod

Episode 6744 finished after 128 timesteps. Return = -12.202651632197984
Episode 6745 finished after 81 timesteps. Return = -111.45945274865309
Episode 6746 finished after 162 timesteps. Return = -31.126966513883332
Episode 6747 finished after 144 timesteps. Return = -44.76401137588972
Episode 6748 finished after 113 timesteps. Return = -103.77633747764584
Episode 6749 finished after 104 timesteps. Return = -46.175748725018465
Episode 6750 finished after 103 timesteps. Return = -77.14141558981805
Episode 6751 finished after 79 timesteps. Return = -67.12416090749696
Episode 6752 finished after 134 timesteps. Return = -4.767232867596064
Episode 6753 finished after 148 timesteps. Return = -6.675386500507074
Episode 6754 finished after 89 timesteps. Return = -116.15496553698095
Episode 6755 finished after 99 timesteps. Return = -58.589639933847735
Episode 6756 finished after 87 timesteps. Return = -121.69943125392132
Episode 6757 finished after 114 timesteps. Return = -234.68053274132706
Ep

Episode 6860 finished after 118 timesteps. Return = -267.1839431011716
Episode 6861 finished after 164 timesteps. Return = -56.55036140646312
Episode 6862 finished after 82 timesteps. Return = -48.85541082782218
Episode 6863 finished after 94 timesteps. Return = -96.09411800596054
Episode 6864 finished after 74 timesteps. Return = -49.30468973902695
Episode 6865 finished after 88 timesteps. Return = -39.80380793654625
Episode 6866 finished after 135 timesteps. Return = -286.0973805480285
Episode 6867 finished after 126 timesteps. Return = -46.22167467090766
Episode 6868 finished after 131 timesteps. Return = -6.118323165590084
Episode 6869 finished after 127 timesteps. Return = -29.927360763808693
Episode 6870 finished after 154 timesteps. Return = -13.489139331973192
Episode 6871 finished after 82 timesteps. Return = -48.16718113809635
Episode 6872 finished after 90 timesteps. Return = -251.01022654551392
Episode 6873 finished after 106 timesteps. Return = -53.041859542429826
Episode 

Episode 6976 finished after 305 timesteps. Return = -14.684329013023145
Episode 6977 finished after 91 timesteps. Return = -47.974648819522244
Episode 6978 finished after 154 timesteps. Return = -43.195225865668675
Episode 6979 finished after 141 timesteps. Return = -285.18670139548715
Episode 6980 finished after 126 timesteps. Return = -287.4071123203994
Episode 6981 finished after 98 timesteps. Return = -29.70537561834459
Episode 6982 finished after 187 timesteps. Return = 7.458153710292322
Episode 6983 finished after 130 timesteps. Return = -422.41644519964956
Episode 6984 finished after 93 timesteps. Return = -72.23385715754547
Episode 6985 finished after 88 timesteps. Return = -33.842928857601166
Episode 6986 finished after 124 timesteps. Return = 2.4845510508603326
Episode 6987 finished after 132 timesteps. Return = -130.17385634262592
Episode 6988 finished after 126 timesteps. Return = -134.01615697584165
Episode 6989 finished after 178 timesteps. Return = -192.2017049601722
Epi

Episode 7095 finished after 150 timesteps. Return = -36.71776208016445
Episode 7096 finished after 118 timesteps. Return = 0.30881588295845575
Episode 7097 finished after 129 timesteps. Return = -51.980069471835776
Episode 7098 finished after 161 timesteps. Return = -31.941012605067414
Episode 7099 finished after 84 timesteps. Return = -48.24458793618431
Episode 7100 finished after 66 timesteps. Return = -66.40299434638243
Episode 7101 finished after 129 timesteps. Return = 6.154348084580079
Episode 7102 finished after 159 timesteps. Return = -47.56770298729306
Episode 7103 finished after 115 timesteps. Return = 4.243186454438018
Episode 7104 finished after 110 timesteps. Return = -253.49504869267483
Episode 7105 finished after 126 timesteps. Return = -63.4593324279429
Episode 7106 finished after 135 timesteps. Return = -28.266292274048382
Episode 7107 finished after 89 timesteps. Return = -29.69670334316403
Episode 7108 finished after 158 timesteps. Return = -38.530674636360835
Episod

Episode 7211 finished after 112 timesteps. Return = -144.02803205954018
Episode 7212 finished after 116 timesteps. Return = -34.16008699571921
Episode 7213 finished after 91 timesteps. Return = -67.46236777657586
Episode 7214 finished after 144 timesteps. Return = -59.83570293121073
Episode 7215 finished after 147 timesteps. Return = -45.21274485817065
Episode 7216 finished after 168 timesteps. Return = -105.1915932017094
Episode 7217 finished after 185 timesteps. Return = -377.4061556417487
Episode 7218 finished after 102 timesteps. Return = -52.742592458013085
Episode 7219 finished after 169 timesteps. Return = -171.46976073286436
Episode 7220 finished after 161 timesteps. Return = 2.3425283890324238
Episode 7221 finished after 103 timesteps. Return = -33.38029094286986
Episode 7222 finished after 147 timesteps. Return = -28.96492616123932
Episode 7223 finished after 181 timesteps. Return = -64.79558934688504
Episode 7224 finished after 129 timesteps. Return = -31.231186668828514
Epi

Episode 7328 finished after 187 timesteps. Return = -36.18649222379008
Episode 7329 finished after 113 timesteps. Return = -192.44577671583318
Episode 7330 finished after 81 timesteps. Return = -47.68954719725083
Episode 7331 finished after 221 timesteps. Return = -41.58734341972847
Episode 7332 finished after 229 timesteps. Return = -309.30597375513554
Episode 7333 finished after 115 timesteps. Return = -35.59204076668051
Episode 7334 finished after 189 timesteps. Return = -24.54589473128489
Episode 7335 finished after 177 timesteps. Return = -112.89816742262603
Episode 7336 finished after 164 timesteps. Return = -34.70628038181896
Episode 7337 finished after 172 timesteps. Return = -8.670228669904446
Episode 7338 finished after 161 timesteps. Return = -78.51765848443728
Episode 7339 finished after 88 timesteps. Return = -38.11354267111234
Episode 7340 finished after 124 timesteps. Return = -58.63454744932956
Episode 7341 finished after 207 timesteps. Return = -264.22823658886625
Epis

Episode 7447 finished after 165 timesteps. Return = -38.40854097141164
Episode 7448 finished after 93 timesteps. Return = -53.66078106136986
Episode 7449 finished after 169 timesteps. Return = 53.258366573341306
Episode 7450 finished after 191 timesteps. Return = -26.225084801309876
Episode 7451 finished after 172 timesteps. Return = -4.709750058548423
Episode 7452 finished after 93 timesteps. Return = -33.15975524255738
Episode 7453 finished after 104 timesteps. Return = -24.10823643554147
Episode 7454 finished after 96 timesteps. Return = -5.280335147437185
Episode 7455 finished after 88 timesteps. Return = -60.60424882408872
Episode 7456 finished after 174 timesteps. Return = -123.888208510571
Episode 7457 finished after 125 timesteps. Return = 17.972552170879396
Episode 7458 finished after 142 timesteps. Return = -74.82508713012759
Episode 7459 finished after 179 timesteps. Return = -63.684053228158156
Episode 7460 finished after 130 timesteps. Return = -35.598334546361286
Episode 

Episode 7563 finished after 132 timesteps. Return = -90.29476125961719
Episode 7564 finished after 160 timesteps. Return = -54.21047090831686
Episode 7565 finished after 123 timesteps. Return = -134.0149442156831
Episode 7566 finished after 196 timesteps. Return = -26.171948776383104
Episode 7567 finished after 121 timesteps. Return = -135.99669023165956
Episode 7568 finished after 121 timesteps. Return = 4.765405996327232
Episode 7569 finished after 111 timesteps. Return = -80.24574640073612
Episode 7570 finished after 155 timesteps. Return = -85.90267795360262
Episode 7571 finished after 149 timesteps. Return = -88.46510777812429
Episode 7572 finished after 78 timesteps. Return = -80.83817417297155
Episode 7573 finished after 123 timesteps. Return = -69.60229778176478
Episode 7574 finished after 151 timesteps. Return = -40.3309833686491
Episode 7575 finished after 191 timesteps. Return = -27.153818744289097
Episode 7576 finished after 147 timesteps. Return = -44.6859364849017
Episode

Episode 7680 finished after 143 timesteps. Return = -131.68288342890207
Episode 7681 finished after 145 timesteps. Return = -129.88636544876007
Episode 7682 finished after 86 timesteps. Return = -16.223118362604282
Episode 7683 finished after 124 timesteps. Return = -53.56727071410271
Episode 7684 finished after 381 timesteps. Return = -229.6224805282134
Episode 7685 finished after 138 timesteps. Return = -98.19836803153099
Episode 7686 finished after 108 timesteps. Return = -16.693602538192124
Episode 7687 finished after 118 timesteps. Return = -48.355581927320344
Episode 7688 finished after 185 timesteps. Return = -27.361511688977487
Episode 7689 finished after 146 timesteps. Return = -108.00218893397124
Episode 7690 finished after 159 timesteps. Return = -167.924023722702
Episode 7691 finished after 122 timesteps. Return = -48.633947393346034
Episode 7692 finished after 70 timesteps. Return = -59.972043882406105
Episode 7693 finished after 166 timesteps. Return = -34.08278062462976


Episode 7798 finished after 135 timesteps. Return = -192.22655751266208
Episode 7799 finished after 141 timesteps. Return = -47.491871117274975
Episode 7800 finished after 159 timesteps. Return = -41.24180140354861
Episode 7801 finished after 159 timesteps. Return = -35.81552731263929
Episode 7802 finished after 84 timesteps. Return = -31.948279419213563
Episode 7803 finished after 125 timesteps. Return = -3.7780614234391976
Episode 7804 finished after 96 timesteps. Return = -63.9150934943981
Episode 7805 finished after 155 timesteps. Return = -135.7965230224268
Episode 7806 finished after 131 timesteps. Return = -25.148396479093023
Episode 7807 finished after 210 timesteps. Return = -60.84169948663163
Episode 7808 finished after 104 timesteps. Return = -5.243768020177669
Episode 7809 finished after 108 timesteps. Return = 0.8831081439896309
Episode 7810 finished after 88 timesteps. Return = -28.15932327487424
Episode 7811 finished after 78 timesteps. Return = -59.5461767303646
Episode

Episode 7916 finished after 135 timesteps. Return = -63.725921445190664
Episode 7917 finished after 123 timesteps. Return = -1.1135790373326557
Episode 7918 finished after 124 timesteps. Return = -29.533028619563638
Episode 7919 finished after 155 timesteps. Return = 21.774223312966953
Episode 7920 finished after 331 timesteps. Return = -262.3065293838561
Episode 7921 finished after 161 timesteps. Return = -56.19306289492107
Episode 7922 finished after 121 timesteps. Return = -64.89858807517915
Episode 7923 finished after 137 timesteps. Return = -41.359005015698294
Episode 7924 finished after 152 timesteps. Return = -42.577165471124204
Episode 7925 finished after 180 timesteps. Return = 17.530848954847897
Episode 7926 finished after 123 timesteps. Return = -43.72332023494607
Episode 7927 finished after 141 timesteps. Return = -31.38855971949259
Episode 7928 finished after 86 timesteps. Return = -65.53317373483449
Episode 7929 finished after 163 timesteps. Return = -6.416540915994972
Ep

Episode 8035 finished after 198 timesteps. Return = -18.006878661697712
Episode 8036 finished after 94 timesteps. Return = -37.40275510579977
Episode 8037 finished after 205 timesteps. Return = -76.9996022586684
Episode 8038 finished after 311 timesteps. Return = -142.06100252646294
Episode 8039 finished after 142 timesteps. Return = -34.20380833349907
Episode 8040 finished after 114 timesteps. Return = -5.471027522941114
Episode 8041 finished after 135 timesteps. Return = -21.11057766129062
Episode 8042 finished after 121 timesteps. Return = 0.09679556783883925
Episode 8043 finished after 185 timesteps. Return = -38.22586882355667
Episode 8044 finished after 138 timesteps. Return = 21.75831743680247
Episode 8045 finished after 208 timesteps. Return = -28.30070034839673
Episode 8046 finished after 173 timesteps. Return = -225.08004188371575
Episode 8047 finished after 160 timesteps. Return = -53.79963224194255
Episode 8048 finished after 300 timesteps. Return = -177.97909959115242
Epis

Episode 8151 finished after 147 timesteps. Return = -78.78361498378028
Episode 8152 finished after 188 timesteps. Return = -64.00242820476943
Episode 8153 finished after 140 timesteps. Return = -61.821922465264194
Episode 8154 finished after 184 timesteps. Return = -43.08759330913935
Episode 8155 finished after 143 timesteps. Return = -8.46580125158323
Episode 8156 finished after 138 timesteps. Return = -13.902657094720652
Episode 8157 finished after 476 timesteps. Return = -326.40281803640914
Episode 8158 finished after 402 timesteps. Return = -27.422590180256435
Episode 8159 finished after 80 timesteps. Return = -39.661001716340564
Episode 8160 finished after 389 timesteps. Return = -226.41237733319412
Episode 8161 finished after 208 timesteps. Return = -217.47505077115855
Episode 8162 finished after 78 timesteps. Return = -27.556673619314083
Episode 8163 finished after 183 timesteps. Return = -47.11068967487033
Episode 8164 finished after 145 timesteps. Return = -0.7414712968734705


Episode 8267 finished after 404 timesteps. Return = -114.8666522502749
Episode 8268 finished after 119 timesteps. Return = 15.51922041038263
Episode 8269 finished after 143 timesteps. Return = -36.08820658122801
Episode 8270 finished after 124 timesteps. Return = -154.45786398775402
Episode 8271 finished after 175 timesteps. Return = -58.681082312552164
Episode 8272 finished after 110 timesteps. Return = 5.102055545493343
Episode 8273 finished after 118 timesteps. Return = -31.49840146988565
Episode 8274 finished after 191 timesteps. Return = 3.4992454016879293
Episode 8275 finished after 157 timesteps. Return = -16.58479148653241
Episode 8276 finished after 109 timesteps. Return = -2.123004225878475
Episode 8277 finished after 138 timesteps. Return = -84.52199637414544
Episode 8278 finished after 176 timesteps. Return = -70.8586178159774
Episode 8279 finished after 197 timesteps. Return = -37.847636227804045
Episode 8280 finished after 94 timesteps. Return = -21.930788968255328
Episod

Episode 8386 finished after 187 timesteps. Return = -173.67003197960022
Episode 8387 finished after 140 timesteps. Return = -104.5766591275003
Episode 8388 finished after 165 timesteps. Return = -86.39328544309623
Episode 8389 finished after 223 timesteps. Return = 25.42268574495148
Episode 8390 finished after 122 timesteps. Return = -5.626174762215982
Episode 8391 finished after 817 timesteps. Return = -316.67966373728973
Episode 8392 finished after 178 timesteps. Return = -19.99697907549823
Episode 8393 finished after 113 timesteps. Return = 8.54111529379999
Episode 8394 finished after 1000 timesteps. Return = 13.505979930110794
Episode 8395 finished after 184 timesteps. Return = -70.3092567781513
Episode 8396 finished after 227 timesteps. Return = -159.71293124616938
Episode 8397 finished after 97 timesteps. Return = -27.315417161318265
Episode 8398 finished after 318 timesteps. Return = -222.99767730370718
Episode 8399 finished after 226 timesteps. Return = 17.802487380648927
Episo

Episode 8502 finished after 117 timesteps. Return = -265.524118433947
Episode 8503 finished after 100 timesteps. Return = 23.210438436782027
Episode 8504 finished after 180 timesteps. Return = -16.955110709485098
Episode 8505 finished after 184 timesteps. Return = -75.01307817508325
Episode 8506 finished after 190 timesteps. Return = 31.89166076833601
Episode 8507 finished after 89 timesteps. Return = 0.6778924076859454
Episode 8508 finished after 161 timesteps. Return = -11.07852164280169
Episode 8509 finished after 209 timesteps. Return = -242.8456495640638
Episode 8510 finished after 169 timesteps. Return = -6.778669096466956
Episode 8511 finished after 223 timesteps. Return = -338.06152970038954
Episode 8512 finished after 249 timesteps. Return = 4.835285244384195
Episode 8513 finished after 138 timesteps. Return = -131.66357520713623
Episode 8514 finished after 198 timesteps. Return = -15.652528579861354
Episode 8515 finished after 174 timesteps. Return = 28.30780502396317
Episode

Episode 8618 finished after 161 timesteps. Return = -52.54173614109086
Episode 8619 finished after 206 timesteps. Return = -199.6372385453841
Episode 8620 finished after 1000 timesteps. Return = 21.844044870496283
Episode 8621 finished after 132 timesteps. Return = 4.2143151974842965
Episode 8622 finished after 211 timesteps. Return = -211.05296451592224
Episode 8623 finished after 103 timesteps. Return = -69.44168978270687
Episode 8624 finished after 134 timesteps. Return = -67.14213781068008
Episode 8625 finished after 147 timesteps. Return = -62.009037583826256
Episode 8626 finished after 171 timesteps. Return = -60.27716764802173
Episode 8627 finished after 154 timesteps. Return = 9.480360662525257
Episode 8628 finished after 130 timesteps. Return = -81.45839985550171
Episode 8629 finished after 350 timesteps. Return = -271.77132275591714
Episode 8630 finished after 90 timesteps. Return = -42.38434463223067
Episode 8631 finished after 195 timesteps. Return = -84.59197445283587
Epis

Episode 8735 finished after 506 timesteps. Return = -161.1383205718563
Episode 8736 finished after 115 timesteps. Return = -185.48762465472356
Episode 8737 finished after 721 timesteps. Return = -371.33678974134807
Episode 8738 finished after 202 timesteps. Return = -256.2223584659654
Episode 8739 finished after 169 timesteps. Return = -110.10416864640837
Episode 8740 finished after 172 timesteps. Return = -183.3810988062263
Episode 8741 finished after 721 timesteps. Return = -237.9928937553892
Episode 8742 finished after 206 timesteps. Return = -325.2823007015344
Episode 8743 finished after 145 timesteps. Return = 40.6689574304248
Episode 8744 finished after 216 timesteps. Return = 14.745813516509585
Episode 8745 finished after 185 timesteps. Return = -155.09605272966223
Episode 8746 finished after 202 timesteps. Return = -30.45697276457952
Episode 8747 finished after 228 timesteps. Return = -17.380803504187355
Episode 8748 finished after 163 timesteps. Return = -78.73276860552009
Epi

Episode 8851 finished after 282 timesteps. Return = -24.896573243067053
Episode 8852 finished after 181 timesteps. Return = -218.307327278858
Episode 8853 finished after 158 timesteps. Return = -101.32295888457921
Episode 8854 finished after 195 timesteps. Return = -251.28587719835772
Episode 8855 finished after 210 timesteps. Return = -56.793936469101766
Episode 8856 finished after 399 timesteps. Return = -104.19359001963217
Episode 8857 finished after 423 timesteps. Return = -131.72949263554864
Episode 8858 finished after 1000 timesteps. Return = 10.95476459954209
Episode 8859 finished after 228 timesteps. Return = -217.65549090825473
Episode 8860 finished after 139 timesteps. Return = -60.7671004153842
Episode 8861 finished after 100 timesteps. Return = -290.7238346542273
Episode 8862 finished after 90 timesteps. Return = 2.3242850032537063
Episode 8863 finished after 142 timesteps. Return = 20.41690115152184
Episode 8864 finished after 141 timesteps. Return = -88.86333274442215
Epi

Episode 8968 finished after 343 timesteps. Return = -229.26602578026458
Episode 8969 finished after 154 timesteps. Return = -20.208299551551107
Episode 8970 finished after 216 timesteps. Return = -192.5435509999248
Episode 8971 finished after 147 timesteps. Return = -38.32707059125829
Episode 8972 finished after 220 timesteps. Return = -83.26844372157379
Episode 8973 finished after 263 timesteps. Return = -31.242997770750804
Episode 8974 finished after 186 timesteps. Return = -227.8343643308496
Episode 8975 finished after 211 timesteps. Return = 13.465454945868444
Episode 8976 finished after 229 timesteps. Return = -256.62923355653083
Episode 8977 finished after 217 timesteps. Return = -52.83825823259046
Episode 8978 finished after 190 timesteps. Return = -50.62411273910408
Episode 8979 finished after 137 timesteps. Return = -147.8090926209755
Episode 8980 finished after 480 timesteps. Return = -169.64669238514048
Episode 8981 finished after 900 timesteps. Return = -294.7852610933796
E

Episode 9083 finished after 437 timesteps. Return = -117.31102800987574
Episode 9084 finished after 135 timesteps. Return = -9.025457901673562
Episode 9085 finished after 111 timesteps. Return = -16.68614661245732
Episode 9086 finished after 89 timesteps. Return = 1.9856485680478926
Episode 9087 finished after 145 timesteps. Return = -50.48782887321121
Episode 9088 finished after 240 timesteps. Return = -6.318055411411095
Episode 9089 finished after 137 timesteps. Return = 5.711377119542405
Episode 9090 finished after 147 timesteps. Return = -11.090192226271412
Episode 9091 finished after 225 timesteps. Return = -289.2723371215561
Episode 9092 finished after 364 timesteps. Return = -148.83933207951932
Episode 9093 finished after 197 timesteps. Return = -187.76915375048878
Episode 9094 finished after 188 timesteps. Return = 26.3487548854732
Episode 9095 finished after 370 timesteps. Return = -175.85785486770496
Episode 9096 finished after 174 timesteps. Return = 22.830564507974316
Episo

Episode 9199 finished after 352 timesteps. Return = -68.86631738502267
Episode 9200 finished after 136 timesteps. Return = -23.167832546956078
Episode 9201 finished after 207 timesteps. Return = -59.78654931753027
Episode 9202 finished after 184 timesteps. Return = -292.07025600238194
Episode 9203 finished after 187 timesteps. Return = -217.91056315812392
Episode 9204 finished after 233 timesteps. Return = -44.882019620440886
Episode 9205 finished after 110 timesteps. Return = -11.554284056578695
Episode 9206 finished after 124 timesteps. Return = -53.80223047950614
Episode 9207 finished after 164 timesteps. Return = -1.9759702680998998
Episode 9208 finished after 152 timesteps. Return = -94.58751873694243
Episode 9209 finished after 155 timesteps. Return = -61.64527935599875
Episode 9210 finished after 267 timesteps. Return = -105.50360266767862
Episode 9211 finished after 125 timesteps. Return = -82.88132868695345
Episode 9212 finished after 199 timesteps. Return = -9.494436265444548

Episode 9314 finished after 305 timesteps. Return = -19.8526356847933
Episode 9315 finished after 331 timesteps. Return = -181.17122802486378
Episode 9316 finished after 176 timesteps. Return = -279.8704314078692
Episode 9317 finished after 140 timesteps. Return = -21.991891217008543
Episode 9318 finished after 163 timesteps. Return = -42.55501103296015
Episode 9319 finished after 224 timesteps. Return = -49.29463192948478
Episode 9320 finished after 185 timesteps. Return = -240.94468810800828
Episode 9321 finished after 97 timesteps. Return = -20.721394477889774
Episode 9322 finished after 136 timesteps. Return = -27.08198673678929
Episode 9323 finished after 301 timesteps. Return = -40.7856261133949
Episode 9324 finished after 203 timesteps. Return = 5.858178562804937
Episode 9325 finished after 299 timesteps. Return = -55.20588744741484
Episode 9326 finished after 683 timesteps. Return = 1.1657579583280864
Episode 9327 finished after 143 timesteps. Return = -94.05719096230305
Episod

Episode 9431 finished after 268 timesteps. Return = -238.98452782850993
Episode 9432 finished after 118 timesteps. Return = -8.110098484922247
Episode 9433 finished after 240 timesteps. Return = -134.1236560925731
Episode 9434 finished after 176 timesteps. Return = -260.80136743239666
Episode 9435 finished after 135 timesteps. Return = -14.907170003916093
Episode 9436 finished after 299 timesteps. Return = 11.146679576318263
Episode 9437 finished after 1000 timesteps. Return = 32.82148313779803
Episode 9438 finished after 230 timesteps. Return = -81.34397107092119
Episode 9439 finished after 120 timesteps. Return = -239.5326875533221
Episode 9440 finished after 266 timesteps. Return = -7.774768291086531
Episode 9441 finished after 215 timesteps. Return = -250.86266525784913
Episode 9442 finished after 1000 timesteps. Return = -27.484156101651074
Episode 9443 finished after 1000 timesteps. Return = -21.02206202761361
Episode 9444 finished after 241 timesteps. Return = -59.1640247123302


Episode 9547 finished after 169 timesteps. Return = -155.81631163742315
Episode 9548 finished after 153 timesteps. Return = 11.741413866858807
Episode 9549 finished after 140 timesteps. Return = -32.19041533899886
Episode 9550 finished after 152 timesteps. Return = -231.48380580636965
Episode 9551 finished after 113 timesteps. Return = -34.778950770294344
Episode 9552 finished after 153 timesteps. Return = -29.797947958506086
Episode 9553 finished after 230 timesteps. Return = -27.765766157521924
Episode 9554 finished after 130 timesteps. Return = -98.91038918752423
Episode 9555 finished after 1000 timesteps. Return = 33.10878901132591
Episode 9556 finished after 1000 timesteps. Return = 10.940304448556068
Episode 9557 finished after 148 timesteps. Return = 20.04979770746985
Episode 9558 finished after 357 timesteps. Return = -303.0400358147116
Episode 9559 finished after 722 timesteps. Return = -77.36638306112494
Episode 9560 finished after 883 timesteps. Return = -212.70297978330456


Episode 9663 finished after 122 timesteps. Return = 39.702475095771376
Episode 9664 finished after 164 timesteps. Return = -94.87203642753498
Episode 9665 finished after 241 timesteps. Return = -25.47404985066362
Episode 9666 finished after 122 timesteps. Return = -32.763548628265085
Episode 9667 finished after 241 timesteps. Return = -92.68923438726422
Episode 9668 finished after 281 timesteps. Return = -108.53124792684335
Episode 9669 finished after 211 timesteps. Return = -207.6933293532298
Episode 9670 finished after 1000 timesteps. Return = -54.4413177783661
Episode 9671 finished after 195 timesteps. Return = -291.2795479527738
Episode 9672 finished after 141 timesteps. Return = -26.44984730822877
Episode 9673 finished after 120 timesteps. Return = -335.7875439090145
Episode 9674 finished after 203 timesteps. Return = 11.703335782665022
Episode 9675 finished after 234 timesteps. Return = -56.49716333977382
Episode 9676 finished after 396 timesteps. Return = -302.0795440216392
Epis

Episode 9778 finished after 111 timesteps. Return = 55.31553787320428
Episode 9779 finished after 85 timesteps. Return = -28.793455108158682
Episode 9780 finished after 272 timesteps. Return = -15.518211462159655
Episode 9781 finished after 147 timesteps. Return = 27.764254361582914
Episode 9782 finished after 136 timesteps. Return = 25.765122640865172
Episode 9783 finished after 1000 timesteps. Return = 30.253485235501326
Episode 9784 finished after 190 timesteps. Return = -40.14256624653288
Episode 9785 finished after 1000 timesteps. Return = 16.239504132693707
Episode 9786 finished after 132 timesteps. Return = -261.45312114424746
Episode 9787 finished after 211 timesteps. Return = -283.095647613929
Episode 9788 finished after 247 timesteps. Return = -39.285146920791306
Episode 9789 finished after 160 timesteps. Return = -76.93936218844297
Episode 9790 finished after 119 timesteps. Return = -13.612034015875707
Episode 9791 finished after 208 timesteps. Return = -200.74444563276646
E

Episode 9894 finished after 354 timesteps. Return = -109.28340653179676
Episode 9895 finished after 154 timesteps. Return = 32.06168911402878
Episode 9896 finished after 119 timesteps. Return = -99.93612916397103
Episode 9897 finished after 128 timesteps. Return = -35.029851227326205
Episode 9898 finished after 120 timesteps. Return = 14.405037090035691
Episode 9899 finished after 168 timesteps. Return = -19.770297039421067
Episode 9900 finished after 123 timesteps. Return = -252.3187913948659
Episode 9901 finished after 244 timesteps. Return = 12.38334137695314
Episode 9902 finished after 124 timesteps. Return = -120.3339883177373
Episode 9903 finished after 118 timesteps. Return = -1.0456303583804214
Episode 9904 finished after 1000 timesteps. Return = -21.999875000017568
Episode 9905 finished after 127 timesteps. Return = -10.17884392747824
Episode 9906 finished after 247 timesteps. Return = -106.81182862251288
Episode 9907 finished after 207 timesteps. Return = -19.27188170547167
E

Episode 10011 finished after 195 timesteps. Return = -49.32966266811276
Episode 10012 finished after 260 timesteps. Return = 2.834676621940787
Episode 10013 finished after 449 timesteps. Return = -224.6963742905486
Episode 10014 finished after 137 timesteps. Return = -168.69957582164903
Episode 10015 finished after 124 timesteps. Return = -126.93797808526749
Episode 10016 finished after 86 timesteps. Return = -25.680134701243077
Episode 10017 finished after 139 timesteps. Return = -56.63625026847245
Episode 10018 finished after 145 timesteps. Return = -153.40562171083883
Episode 10019 finished after 229 timesteps. Return = -235.75057733767503
Episode 10020 finished after 147 timesteps. Return = -109.65830273874435
Episode 10021 finished after 158 timesteps. Return = -96.76655762273123
Episode 10022 finished after 154 timesteps. Return = -134.81662642322704
Episode 10023 finished after 250 timesteps. Return = -14.423028052854846
Episode 10024 finished after 404 timesteps. Return = -183.

Episode 10125 finished after 1000 timesteps. Return = 37.00787592896277
Episode 10126 finished after 205 timesteps. Return = 22.78169894615634
Episode 10127 finished after 148 timesteps. Return = -90.09993887606868
Episode 10128 finished after 1000 timesteps. Return = 18.08564758217956
Episode 10129 finished after 239 timesteps. Return = -337.2371427503602
Episode 10130 finished after 407 timesteps. Return = -247.28533799686858
Episode 10131 finished after 137 timesteps. Return = -46.26203967157272
Episode 10132 finished after 144 timesteps. Return = -142.01409920910677
Episode 10133 finished after 1000 timesteps. Return = 11.417765602781442
Episode 10134 finished after 151 timesteps. Return = -157.07480045453048
Episode 10135 finished after 232 timesteps. Return = -341.6136864698989
Episode 10136 finished after 143 timesteps. Return = 7.818454382096149
Episode 10137 finished after 169 timesteps. Return = -94.09253205767584
Episode 10138 finished after 104 timesteps. Return = -190.6237

Episode 10242 finished after 191 timesteps. Return = -199.27868827740636
Episode 10243 finished after 144 timesteps. Return = -5.05618796586414
Episode 10244 finished after 231 timesteps. Return = -260.8675788081598
Episode 10245 finished after 106 timesteps. Return = -77.30343730611834
Episode 10246 finished after 235 timesteps. Return = -61.010074099859395
Episode 10247 finished after 247 timesteps. Return = -64.03536624706669
Episode 10248 finished after 324 timesteps. Return = -80.3563679016778
Episode 10249 finished after 181 timesteps. Return = -29.72680697272132
Episode 10250 finished after 141 timesteps. Return = -88.78646982198727
Episode 10251 finished after 193 timesteps. Return = -172.97678209765277
Episode 10252 finished after 100 timesteps. Return = 9.843968757955693
Episode 10253 finished after 150 timesteps. Return = -293.6201309456121
Episode 10254 finished after 144 timesteps. Return = -120.80323166228732
Episode 10255 finished after 224 timesteps. Return = -228.09241

Episode 10357 finished after 207 timesteps. Return = 5.579506276965759
Episode 10358 finished after 105 timesteps. Return = -219.4310474438831
Episode 10359 finished after 138 timesteps. Return = 8.820623516064956
Episode 10360 finished after 224 timesteps. Return = -320.12236614422284
Episode 10361 finished after 154 timesteps. Return = -13.878191104999843
Episode 10362 finished after 152 timesteps. Return = -80.21712807530315
Episode 10363 finished after 1000 timesteps. Return = -73.10088205888859
Episode 10364 finished after 342 timesteps. Return = -318.2958532618828
Episode 10365 finished after 179 timesteps. Return = 4.856163228458854
Episode 10366 finished after 192 timesteps. Return = -186.5916970948026
Episode 10367 finished after 179 timesteps. Return = -252.74817542799835
Episode 10368 finished after 185 timesteps. Return = -255.7290885224782
Episode 10369 finished after 131 timesteps. Return = -365.54959449236685
Episode 10370 finished after 236 timesteps. Return = -75.72396

Episode 10471 finished after 299 timesteps. Return = -140.06738774054733
Episode 10472 finished after 102 timesteps. Return = -50.33538463439771
Episode 10473 finished after 133 timesteps. Return = -110.26855866864463
Episode 10474 finished after 379 timesteps. Return = -169.82637529857533
Episode 10475 finished after 136 timesteps. Return = -148.78878298786037
Episode 10476 finished after 148 timesteps. Return = -54.67376641641025
Episode 10477 finished after 288 timesteps. Return = -192.2466987530684
Episode 10478 finished after 149 timesteps. Return = -158.42211709063676
Episode 10479 finished after 270 timesteps. Return = 7.7559683711057374
Episode 10480 finished after 411 timesteps. Return = -267.91423011743734
Episode 10481 finished after 1000 timesteps. Return = 24.513110116256765
Episode 10482 finished after 200 timesteps. Return = -310.792501223393
Episode 10483 finished after 145 timesteps. Return = -12.969931361675421
Episode 10484 finished after 157 timesteps. Return = -12.

Episode 10586 finished after 133 timesteps. Return = -200.79520280227447
Episode 10587 finished after 203 timesteps. Return = 13.643840902621022
Episode 10588 finished after 112 timesteps. Return = -46.01305849486777
Episode 10589 finished after 191 timesteps. Return = -258.7866519137648
Episode 10590 finished after 148 timesteps. Return = -53.235307045992016
Episode 10591 finished after 198 timesteps. Return = 3.9884686823401694
Episode 10592 finished after 117 timesteps. Return = -82.31549791206024
Episode 10593 finished after 222 timesteps. Return = -337.49066578148484
Episode 10594 finished after 182 timesteps. Return = -285.0667152379356
Episode 10595 finished after 468 timesteps. Return = -406.9042921465247
Episode 10596 finished after 120 timesteps. Return = 32.153269543845994
Episode 10597 finished after 375 timesteps. Return = -377.21909251724026
Episode 10598 finished after 113 timesteps. Return = -110.02648934879447
Episode 10599 finished after 258 timesteps. Return = -354.8

Episode 10702 finished after 113 timesteps. Return = 0.12805684842912513
Episode 10703 finished after 208 timesteps. Return = -162.99346454846636
Episode 10704 finished after 1000 timesteps. Return = 40.48292104405859
Episode 10705 finished after 156 timesteps. Return = -86.3857077218332
Episode 10706 finished after 134 timesteps. Return = -490.9231614934044
Episode 10707 finished after 132 timesteps. Return = 2.4232792017205185
Episode 10708 finished after 249 timesteps. Return = -54.16803450673195
Episode 10709 finished after 90 timesteps. Return = -45.99478959796258
Episode 10710 finished after 214 timesteps. Return = -357.57847654478184
Episode 10711 finished after 375 timesteps. Return = -315.4521979467831
Episode 10712 finished after 119 timesteps. Return = -80.53530596334618
Episode 10713 finished after 1000 timesteps. Return = 44.75349598773145
Episode 10714 finished after 1000 timesteps. Return = 10.366916489294592
Episode 10715 finished after 175 timesteps. Return = -532.0547

Episode 10816 finished after 197 timesteps. Return = -267.32642350493995
Episode 10817 finished after 200 timesteps. Return = -48.111445745104035
Episode 10818 finished after 188 timesteps. Return = -202.15664104461638
Episode 10819 finished after 124 timesteps. Return = -39.3538058785251
Episode 10820 finished after 100 timesteps. Return = -128.39638658363577
Episode 10821 finished after 157 timesteps. Return = -28.54916461516936
Episode 10822 finished after 121 timesteps. Return = -91.65102722126895
Episode 10823 finished after 185 timesteps. Return = -118.6656151611901
Episode 10824 finished after 271 timesteps. Return = -141.52438072171947
Episode 10825 finished after 193 timesteps. Return = -59.96891808918666
Episode 10826 finished after 175 timesteps. Return = -7.8720036570733924
Episode 10827 finished after 103 timesteps. Return = -43.229556896514055
Episode 10828 finished after 224 timesteps. Return = -213.42120318104855
Episode 10829 finished after 131 timesteps. Return = -211

Episode 10931 finished after 149 timesteps. Return = -119.67947333346602
Episode 10932 finished after 218 timesteps. Return = -32.29514549279317
Episode 10933 finished after 128 timesteps. Return = 2.865080884107897
Episode 10934 finished after 118 timesteps. Return = -240.94977937454075
Episode 10935 finished after 87 timesteps. Return = -87.4656676931811
Episode 10936 finished after 79 timesteps. Return = -59.6696699153144
Episode 10937 finished after 343 timesteps. Return = -220.20947967173294
Episode 10938 finished after 184 timesteps. Return = 11.781271973349874
Episode 10939 finished after 152 timesteps. Return = -55.31195033666382
Episode 10940 finished after 127 timesteps. Return = -52.729074752262335
Episode 10941 finished after 200 timesteps. Return = -179.42001083411873
Episode 10942 finished after 225 timesteps. Return = -13.55978214127029
Episode 10943 finished after 98 timesteps. Return = -172.03120391609872
Episode 10944 finished after 1000 timesteps. Return = 80.0697747

Episode 11045 finished after 1000 timesteps. Return = 13.883811457530477
Episode 11046 finished after 452 timesteps. Return = 28.42238659202158
Episode 11047 finished after 171 timesteps. Return = 9.695696981695122
Episode 11048 finished after 216 timesteps. Return = -234.9778424177936
Episode 11049 finished after 160 timesteps. Return = -3.034044172688496
Episode 11050 finished after 93 timesteps. Return = -80.13643283904983
Episode 11051 finished after 171 timesteps. Return = -107.4436226977037
Episode 11052 finished after 1000 timesteps. Return = 48.53117383470499
Episode 11053 finished after 201 timesteps. Return = -188.29213508592545
Episode 11054 finished after 1000 timesteps. Return = 47.92492571243597
Episode 11055 finished after 172 timesteps. Return = -14.403837581524684
Episode 11056 finished after 383 timesteps. Return = -122.3302662692706
Episode 11057 finished after 129 timesteps. Return = -87.62862483228585
Episode 11058 finished after 422 timesteps. Return = -19.8515051

Episode 11162 finished after 171 timesteps. Return = -70.05332113182223
Episode 11163 finished after 235 timesteps. Return = -448.9502978875612
Episode 11164 finished after 272 timesteps. Return = -132.1309459719843
Episode 11165 finished after 194 timesteps. Return = -247.72908223939544
Episode 11166 finished after 193 timesteps. Return = -322.1108619745196
Episode 11167 finished after 103 timesteps. Return = 35.73546962226612
Episode 11168 finished after 178 timesteps. Return = -248.2819413555159
Episode 11169 finished after 202 timesteps. Return = -337.09769651120223
Episode 11170 finished after 134 timesteps. Return = -15.225192643096207
Episode 11171 finished after 173 timesteps. Return = -45.77731162585177
Episode 11172 finished after 360 timesteps. Return = -279.64325433746353
Episode 11173 finished after 183 timesteps. Return = -50.44279807461532
Episode 11174 finished after 200 timesteps. Return = 58.329924803527234
Episode 11175 finished after 280 timesteps. Return = -210.483

Episode 11277 finished after 1000 timesteps. Return = 87.29993951060645
Episode 11278 finished after 277 timesteps. Return = -239.17967276864601
Episode 11279 finished after 153 timesteps. Return = -12.050732887320876
Episode 11280 finished after 1000 timesteps. Return = 35.55028101378137
Episode 11281 finished after 244 timesteps. Return = -38.88386523942778
Episode 11282 finished after 154 timesteps. Return = -95.49658068540563
Episode 11283 finished after 268 timesteps. Return = -81.13878880075978
Episode 11284 finished after 132 timesteps. Return = 19.77884033414459
Episode 11285 finished after 139 timesteps. Return = 4.8310111033047605
Episode 11286 finished after 112 timesteps. Return = -218.95178085531768
Episode 11287 finished after 313 timesteps. Return = -90.12395235545588
Episode 11288 finished after 268 timesteps. Return = -174.86515291487802
Episode 11289 finished after 248 timesteps. Return = -289.6857428917332
Episode 11290 finished after 179 timesteps. Return = 35.64490

Episode 11391 finished after 1000 timesteps. Return = -22.029877008293436
Episode 11392 finished after 463 timesteps. Return = -228.05844868573655
Episode 11393 finished after 315 timesteps. Return = -197.25164967439312
Episode 11394 finished after 127 timesteps. Return = -77.74552221673447
Episode 11395 finished after 423 timesteps. Return = -305.8333233380075
Episode 11396 finished after 230 timesteps. Return = -112.64408110185671
Episode 11397 finished after 129 timesteps. Return = 16.51762577847923
Episode 11398 finished after 143 timesteps. Return = -190.51081874497157
Episode 11399 finished after 721 timesteps. Return = -105.36864133237123
Episode 11400 finished after 224 timesteps. Return = -327.40762051541304
Episode 11401 finished after 240 timesteps. Return = -68.5058130746587
Episode 11402 finished after 162 timesteps. Return = -44.71403452393163
Episode 11403 finished after 168 timesteps. Return = 14.186968925828594
Episode 11404 finished after 142 timesteps. Return = -241.

Episode 11505 finished after 313 timesteps. Return = -136.33898168264676
Episode 11506 finished after 140 timesteps. Return = -45.17345937483921
Episode 11507 finished after 220 timesteps. Return = -297.96484173792544
Episode 11508 finished after 121 timesteps. Return = -67.77888159059621
Episode 11509 finished after 1000 timesteps. Return = 20.594261842009963
Episode 11510 finished after 145 timesteps. Return = -85.78878136301464
Episode 11511 finished after 76 timesteps. Return = -70.92879646576984
Episode 11512 finished after 1000 timesteps. Return = 60.14517085563274
Episode 11513 finished after 216 timesteps. Return = -62.75520562435385
Episode 11514 finished after 136 timesteps. Return = -320.9553939540309
Episode 11515 finished after 402 timesteps. Return = -141.56531556600638
Episode 11516 finished after 290 timesteps. Return = -294.86802466668496
Episode 11517 finished after 278 timesteps. Return = -181.9859096872814
Episode 11518 finished after 142 timesteps. Return = -7.8899

Episode 11620 finished after 85 timesteps. Return = -44.16833765921671
Episode 11621 finished after 249 timesteps. Return = -16.560974574195342
Episode 11622 finished after 202 timesteps. Return = -15.554617685715058
Episode 11623 finished after 1000 timesteps. Return = 54.488205246882195
Episode 11624 finished after 168 timesteps. Return = -113.00007516422667
Episode 11625 finished after 1000 timesteps. Return = 68.68761075247521
Episode 11626 finished after 323 timesteps. Return = -132.13430091549822
Episode 11627 finished after 205 timesteps. Return = -67.9573682063458
Episode 11628 finished after 175 timesteps. Return = -21.8854196056988
Episode 11629 finished after 151 timesteps. Return = -9.87687118260898
Episode 11630 finished after 116 timesteps. Return = -1.1202283866808074
Episode 11631 finished after 140 timesteps. Return = 10.32146145026951
Episode 11632 finished after 135 timesteps. Return = -52.12125341690762
Episode 11633 finished after 173 timesteps. Return = -30.985094

Episode 11736 finished after 1000 timesteps. Return = 41.67262383217256
Episode 11737 finished after 224 timesteps. Return = -248.33019471790615
Episode 11738 finished after 146 timesteps. Return = -75.84205915217854
Episode 11739 finished after 75 timesteps. Return = -50.112537708250585
Episode 11740 finished after 357 timesteps. Return = -229.1906337199605
Episode 11741 finished after 240 timesteps. Return = 27.18715167551794
Episode 11742 finished after 158 timesteps. Return = 11.38938034139305
Episode 11743 finished after 168 timesteps. Return = -122.29044778298032
Episode 11744 finished after 124 timesteps. Return = 11.23957072928448
Episode 11745 finished after 101 timesteps. Return = -16.4661009661273
Episode 11746 finished after 246 timesteps. Return = -280.1982281260282
Episode 11747 finished after 366 timesteps. Return = -84.28336341823967
Episode 11748 finished after 1000 timesteps. Return = 2.1683349706870225
Episode 11749 finished after 194 timesteps. Return = -243.7730079

Episode 11850 finished after 328 timesteps. Return = 9.524935315913126
Episode 11851 finished after 130 timesteps. Return = 5.213960324135897
Episode 11852 finished after 1000 timesteps. Return = 67.25015583245077
Episode 11853 finished after 211 timesteps. Return = -182.13021911801007
Episode 11854 finished after 269 timesteps. Return = -17.575657900401865
Episode 11855 finished after 243 timesteps. Return = -170.9079491143929
Episode 11856 finished after 155 timesteps. Return = -21.096531629028703
Episode 11857 finished after 1000 timesteps. Return = 47.94798665193515
Episode 11858 finished after 137 timesteps. Return = -50.0758426232636
Episode 11859 finished after 216 timesteps. Return = -223.333840206612
Episode 11860 finished after 254 timesteps. Return = -39.57066861895197
Episode 11861 finished after 175 timesteps. Return = -62.459108240238116
Episode 11862 finished after 1000 timesteps. Return = 33.54029165495655
Episode 11863 finished after 260 timesteps. Return = -180.170018

Episode 11966 finished after 1000 timesteps. Return = 66.12107629898443
Episode 11967 finished after 204 timesteps. Return = -66.40123733574123
Episode 11968 finished after 1000 timesteps. Return = 78.35249183538536
Episode 11969 finished after 182 timesteps. Return = 47.64819947533499
Episode 11970 finished after 130 timesteps. Return = -47.71536231069345
Episode 11971 finished after 1000 timesteps. Return = 3.920853747584692
Episode 11972 finished after 266 timesteps. Return = -53.21077028254213
Episode 11973 finished after 153 timesteps. Return = -13.07524529311766
Episode 11974 finished after 91 timesteps. Return = -10.650623123548186
Episode 11975 finished after 138 timesteps. Return = -68.75149845303602
Episode 11976 finished after 1000 timesteps. Return = -1.1317229571653575
Episode 11977 finished after 242 timesteps. Return = -103.21116845420372
Episode 11978 finished after 133 timesteps. Return = 59.55269715166543
Episode 11979 finished after 257 timesteps. Return = -113.37170

Episode 12080 finished after 216 timesteps. Return = -264.55884650171936
Episode 12081 finished after 231 timesteps. Return = -28.010180039376323
Episode 12082 finished after 155 timesteps. Return = -21.351251091509454
Episode 12083 finished after 207 timesteps. Return = -145.35541073800414
Episode 12084 finished after 172 timesteps. Return = 2.2333003664036397
Episode 12085 finished after 113 timesteps. Return = -65.70559421083486
Episode 12086 finished after 244 timesteps. Return = -1.4387178100567155
Episode 12087 finished after 166 timesteps. Return = -36.80414353490781
Episode 12088 finished after 207 timesteps. Return = -213.95354719710434
Episode 12089 finished after 120 timesteps. Return = -82.79408775459709
Episode 12090 finished after 129 timesteps. Return = 13.84134906200849
Episode 12091 finished after 169 timesteps. Return = -22.217642595814425
Episode 12092 finished after 90 timesteps. Return = -35.57735183828602
Episode 12093 finished after 376 timesteps. Return = -27.38

Episode 12195 finished after 149 timesteps. Return = -68.0924912229955
Episode 12196 finished after 356 timesteps. Return = -294.3799486501576
Episode 12197 finished after 544 timesteps. Return = -46.8086028883841
Episode 12198 finished after 168 timesteps. Return = 7.168048835837595
Episode 12199 finished after 217 timesteps. Return = -258.5309478717986
Episode 12200 finished after 284 timesteps. Return = -286.3127715801607
Episode 12201 finished after 306 timesteps. Return = -106.22185696341367
Episode 12202 finished after 197 timesteps. Return = -121.50498377979517
Episode 12203 finished after 700 timesteps. Return = -25.189420759093366
Episode 12204 finished after 380 timesteps. Return = -284.53953336328647
Episode 12205 finished after 321 timesteps. Return = -30.740147814128548
Episode 12206 finished after 211 timesteps. Return = -270.1909661814119
Episode 12207 finished after 136 timesteps. Return = -195.30107341158038
Episode 12208 finished after 208 timesteps. Return = -1.51223

Episode 12309 finished after 567 timesteps. Return = 2.7775136984234763
Episode 12310 finished after 197 timesteps. Return = -16.35176239052501
Episode 12311 finished after 499 timesteps. Return = -214.93660721358964
Episode 12312 finished after 203 timesteps. Return = -207.6833807692267
Episode 12313 finished after 200 timesteps. Return = -18.947692202876468
Episode 12314 finished after 100 timesteps. Return = -125.27863801439388
Episode 12315 finished after 212 timesteps. Return = 19.05895637654926
Episode 12316 finished after 210 timesteps. Return = -66.37313872424627
Episode 12317 finished after 190 timesteps. Return = 35.32420544059559
Episode 12318 finished after 199 timesteps. Return = -63.64284802396155
Episode 12319 finished after 1000 timesteps. Return = -49.369888526756796
Episode 12320 finished after 83 timesteps. Return = -59.142425913875854
Episode 12321 finished after 272 timesteps. Return = -76.54949413203951
Episode 12322 finished after 178 timesteps. Return = -182.281

Episode 12423 finished after 1000 timesteps. Return = -25.94401274899615
Episode 12424 finished after 1000 timesteps. Return = 53.47061874280765
Episode 12425 finished after 156 timesteps. Return = -47.83422614342453
Episode 12426 finished after 212 timesteps. Return = -284.62696967572174
Episode 12427 finished after 309 timesteps. Return = -84.12725593313598
Episode 12428 finished after 137 timesteps. Return = 39.32219788272576
Episode 12429 finished after 182 timesteps. Return = -10.304672727215134
Episode 12430 finished after 126 timesteps. Return = 8.435936900323512
Episode 12431 finished after 92 timesteps. Return = 10.137504567033801
Episode 12432 finished after 1000 timesteps. Return = 6.3151726931412515
Episode 12433 finished after 246 timesteps. Return = 6.816328878232937
Episode 12434 finished after 173 timesteps. Return = -55.33939105325925
Episode 12435 finished after 240 timesteps. Return = -94.96929961185066
Episode 12436 finished after 666 timesteps. Return = -109.822728

Episode 12537 finished after 311 timesteps. Return = -122.14122674659052
Episode 12538 finished after 276 timesteps. Return = -41.581996184569206
Episode 12539 finished after 221 timesteps. Return = -159.97103389944317
Episode 12540 finished after 255 timesteps. Return = -129.90843982997998
Episode 12541 finished after 180 timesteps. Return = -91.76768306137365
Episode 12542 finished after 156 timesteps. Return = -368.0539547318671
Episode 12543 finished after 136 timesteps. Return = -137.36706449970893
Episode 12544 finished after 194 timesteps. Return = -112.02780037456532
Episode 12545 finished after 136 timesteps. Return = -86.05627023099612
Episode 12546 finished after 1000 timesteps. Return = 86.32005331826431
Episode 12547 finished after 175 timesteps. Return = 11.310078162586962
Episode 12548 finished after 335 timesteps. Return = -252.32581567551227
Episode 12549 finished after 192 timesteps. Return = 12.286635659066135
Episode 12550 finished after 125 timesteps. Return = -110

Episode 12651 finished after 217 timesteps. Return = -44.63163378985695
Episode 12652 finished after 181 timesteps. Return = -165.6796104396829
Episode 12653 finished after 179 timesteps. Return = -37.26306183208824
Episode 12654 finished after 1000 timesteps. Return = -74.28133302617218
Episode 12655 finished after 316 timesteps. Return = -23.008065076354796
Episode 12656 finished after 141 timesteps. Return = -27.020379630104273
Episode 12657 finished after 121 timesteps. Return = -53.422899611626654
Episode 12658 finished after 224 timesteps. Return = -257.78854117151013
Episode 12659 finished after 224 timesteps. Return = -57.32043744706002
Episode 12660 finished after 120 timesteps. Return = -90.88749533813514
Episode 12661 finished after 139 timesteps. Return = -4.825459081514552
Episode 12662 finished after 127 timesteps. Return = 38.81032140818246
Episode 12663 finished after 177 timesteps. Return = -66.80297541437125
Episode 12664 finished after 190 timesteps. Return = -360.45

Episode 12765 finished after 1000 timesteps. Return = -13.49232235243168
Episode 12766 finished after 187 timesteps. Return = -198.39004238175028
Episode 12767 finished after 117 timesteps. Return = -181.68399519240205
Episode 12768 finished after 1000 timesteps. Return = -31.95525486573882
Episode 12769 finished after 90 timesteps. Return = 13.0096068697069
Episode 12770 finished after 1000 timesteps. Return = 61.758311902274656
Episode 12771 finished after 832 timesteps. Return = -346.2493477973999
Episode 12772 finished after 189 timesteps. Return = 8.990752813933227
Episode 12773 finished after 224 timesteps. Return = -24.1627773381787
Episode 12774 finished after 111 timesteps. Return = 18.984365961448574
Episode 12775 finished after 235 timesteps. Return = -301.6637769003769
Episode 12776 finished after 1000 timesteps. Return = 47.374987507612026
Episode 12777 finished after 1000 timesteps. Return = 32.5590670579157
Episode 12778 finished after 165 timesteps. Return = -13.6011150

Episode 12879 finished after 1000 timesteps. Return = 46.904479357329905
Episode 12880 finished after 349 timesteps. Return = -57.770172508346505
Episode 12881 finished after 240 timesteps. Return = 48.284963406160244
Episode 12882 finished after 153 timesteps. Return = -9.849373897393221
Episode 12883 finished after 270 timesteps. Return = -90.41969138304871
Episode 12884 finished after 146 timesteps. Return = -39.39347236609039
Episode 12885 finished after 218 timesteps. Return = -27.602592992632978
Episode 12886 finished after 224 timesteps. Return = 21.57920739155476
Episode 12887 finished after 132 timesteps. Return = 27.747964457624263
Episode 12888 finished after 231 timesteps. Return = -287.4009899712572
Episode 12889 finished after 618 timesteps. Return = -272.8991786416726
Episode 12890 finished after 314 timesteps. Return = -12.948258754848908
Episode 12891 finished after 1000 timesteps. Return = -13.699054970525152
Episode 12892 finished after 161 timesteps. Return = -49.26

Episode 12995 finished after 286 timesteps. Return = -10.76087307292228
Episode 12996 finished after 1000 timesteps. Return = 82.05356861179311
Episode 12997 finished after 139 timesteps. Return = -85.5342649374561
Episode 12998 finished after 71 timesteps. Return = -58.796150622537056
Episode 12999 finished after 130 timesteps. Return = -87.28988573556478
Episode 13000 finished after 200 timesteps. Return = -0.9434286636882661
Episode 13001 finished after 161 timesteps. Return = -90.39913869086963
Episode 13002 finished after 165 timesteps. Return = 23.306999167802502
Episode 13003 finished after 111 timesteps. Return = -48.79781058946459
Episode 13004 finished after 151 timesteps. Return = -11.057675358142703
Episode 13005 finished after 1000 timesteps. Return = -21.71486953748378
Episode 13006 finished after 82 timesteps. Return = -40.187954829652924
Episode 13007 finished after 177 timesteps. Return = 14.571924055747033
Episode 13008 finished after 73 timesteps. Return = -46.056805

Episode 13110 finished after 244 timesteps. Return = -55.59425920737777
Episode 13111 finished after 169 timesteps. Return = -62.0810606340551
Episode 13112 finished after 106 timesteps. Return = -98.25566072545564
Episode 13113 finished after 180 timesteps. Return = -246.79152996175642
Episode 13114 finished after 137 timesteps. Return = 3.4062428516612044
Episode 13115 finished after 277 timesteps. Return = 8.517217534677428
Episode 13116 finished after 193 timesteps. Return = -8.58015646477638
Episode 13117 finished after 206 timesteps. Return = 6.488254218198733
Episode 13118 finished after 326 timesteps. Return = -127.57399707095206
Episode 13119 finished after 191 timesteps. Return = -25.535666194733068
Episode 13120 finished after 178 timesteps. Return = -204.84336899343657
Episode 13121 finished after 102 timesteps. Return = -41.53455513450349
Episode 13122 finished after 217 timesteps. Return = -48.924244252702835
Episode 13123 finished after 1000 timesteps. Return = 33.752273

Episode 13224 finished after 139 timesteps. Return = -52.211308791741736
Episode 13225 finished after 166 timesteps. Return = 47.449618474622014
Episode 13226 finished after 168 timesteps. Return = -215.31771796586838
Episode 13227 finished after 1000 timesteps. Return = 62.58668986264074
Episode 13228 finished after 161 timesteps. Return = 1.7922617986019844
Episode 13229 finished after 183 timesteps. Return = 35.778489494695634
Episode 13230 finished after 282 timesteps. Return = -130.9304571275874
Episode 13231 finished after 322 timesteps. Return = -183.43920767276353
Episode 13232 finished after 176 timesteps. Return = -210.64565700065424
Episode 13233 finished after 349 timesteps. Return = -143.52737738189546
Episode 13234 finished after 1000 timesteps. Return = 63.859380935494165
Episode 13235 finished after 381 timesteps. Return = -241.62026555441074
Episode 13236 finished after 428 timesteps. Return = -283.8399748825692
Episode 13237 finished after 116 timesteps. Return = -194

Episode 13338 finished after 364 timesteps. Return = -73.48426904034902
Episode 13339 finished after 327 timesteps. Return = -66.08895054592605
Episode 13340 finished after 134 timesteps. Return = -1.2340332879179385
Episode 13341 finished after 367 timesteps. Return = 2.469222540372897
Episode 13342 finished after 494 timesteps. Return = -331.29312406331553
Episode 13343 finished after 217 timesteps. Return = -289.05033782507365
Episode 13344 finished after 378 timesteps. Return = -286.96253727202725
Episode 13345 finished after 1000 timesteps. Return = 5.1774652223272435
Episode 13346 finished after 324 timesteps. Return = -278.1611866936795
Episode 13347 finished after 313 timesteps. Return = -187.39718949317756
Episode 13348 finished after 1000 timesteps. Return = 34.10608135609095
Episode 13349 finished after 1000 timesteps. Return = -83.2139800997053
Episode 13350 finished after 339 timesteps. Return = -30.50109993465918
Episode 13351 finished after 142 timesteps. Return = -127.7

Episode 13452 finished after 1000 timesteps. Return = 36.662364846276205
Episode 13453 finished after 413 timesteps. Return = -244.95721721987556
Episode 13454 finished after 933 timesteps. Return = -82.57619448358498
Episode 13455 finished after 271 timesteps. Return = -217.37730404141854
Episode 13456 finished after 331 timesteps. Return = -51.12404031334121
Episode 13457 finished after 729 timesteps. Return = -361.5596556041469
Episode 13458 finished after 214 timesteps. Return = -73.58437321054936
Episode 13459 finished after 336 timesteps. Return = -62.48639572970879
Episode 13460 finished after 260 timesteps. Return = -107.72504941520377
Episode 13461 finished after 438 timesteps. Return = -224.1788379173133
Episode 13462 finished after 1000 timesteps. Return = 50.40452404667814
Episode 13463 finished after 176 timesteps. Return = 15.530009214060101
Episode 13464 finished after 251 timesteps. Return = 0.4010762512880319
Episode 13465 finished after 314 timesteps. Return = -14.673

Episode 13567 finished after 1000 timesteps. Return = 59.592291761581755
Episode 13568 finished after 125 timesteps. Return = 11.86945633352866
Episode 13569 finished after 109 timesteps. Return = -51.57075153018763
Episode 13570 finished after 169 timesteps. Return = -28.109056957589274
Episode 13571 finished after 244 timesteps. Return = -35.71616680279915
Episode 13572 finished after 148 timesteps. Return = 17.14385887052002
Episode 13573 finished after 88 timesteps. Return = -81.2435902161008
Episode 13574 finished after 257 timesteps. Return = -105.90145572507241
Episode 13575 finished after 211 timesteps. Return = -4.931169500428325
Episode 13576 finished after 315 timesteps. Return = -158.48335185442676
Episode 13577 finished after 206 timesteps. Return = -149.24611327491286
Episode 13578 finished after 206 timesteps. Return = -67.80791462039862
Episode 13579 finished after 366 timesteps. Return = -108.11532664875422
Episode 13580 finished after 1000 timesteps. Return = 92.38549

Episode 13682 finished after 1000 timesteps. Return = -28.088881275828772
Episode 13683 finished after 216 timesteps. Return = -95.8995010966041
Episode 13684 finished after 237 timesteps. Return = -313.5889948950784
Episode 13685 finished after 109 timesteps. Return = -39.22681927544332
Episode 13686 finished after 220 timesteps. Return = -214.53360548555483
Episode 13687 finished after 95 timesteps. Return = 0.08850848355459107
Episode 13688 finished after 145 timesteps. Return = -18.45931307789462
Episode 13689 finished after 203 timesteps. Return = 14.540938953188686
Episode 13690 finished after 197 timesteps. Return = -194.30368000821747
Episode 13691 finished after 167 timesteps. Return = -82.01860260580737
Episode 13692 finished after 166 timesteps. Return = -162.76366496881954
Episode 13693 finished after 231 timesteps. Return = -146.4324757750494
Episode 13694 finished after 227 timesteps. Return = -12.206070356152836
Episode 13695 finished after 137 timesteps. Return = -83.74

Episode 13797 finished after 203 timesteps. Return = -88.05651802118103
Episode 13798 finished after 271 timesteps. Return = -49.72913041338275
Episode 13799 finished after 155 timesteps. Return = -23.877887525985102
Episode 13800 finished after 592 timesteps. Return = -87.21038312213976
Episode 13801 finished after 149 timesteps. Return = 11.353493509857032
Episode 13802 finished after 1000 timesteps. Return = 53.64770099748131
Episode 13803 finished after 152 timesteps. Return = 3.0496528966628915
Episode 13804 finished after 562 timesteps. Return = -197.99617838265033
Episode 13805 finished after 1000 timesteps. Return = 49.41991143906033
Episode 13806 finished after 136 timesteps. Return = 1.8623805870382029
Episode 13807 finished after 1000 timesteps. Return = -21.94827026887917
Episode 13808 finished after 85 timesteps. Return = -24.355066091682772
Episode 13809 finished after 211 timesteps. Return = -5.772267008142421
Episode 13810 finished after 1000 timesteps. Return = 15.4689

Episode 13911 finished after 257 timesteps. Return = -17.145733929092884
Episode 13912 finished after 446 timesteps. Return = -354.3770910025004
Episode 13913 finished after 170 timesteps. Return = 9.008864393688711
Episode 13914 finished after 235 timesteps. Return = -281.82578010820873
Episode 13915 finished after 433 timesteps. Return = -185.1951329831355
Episode 13916 finished after 226 timesteps. Return = -221.4462815472857
Episode 13917 finished after 145 timesteps. Return = -33.409263944313494
Episode 13918 finished after 323 timesteps. Return = -272.7428578707651
Episode 13919 finished after 561 timesteps. Return = -221.35541382752774
Episode 13920 finished after 1000 timesteps. Return = 72.08400262531524
Episode 13921 finished after 1000 timesteps. Return = -130.84130646491008
Episode 13922 finished after 101 timesteps. Return = -56.780998336080884
Episode 13923 finished after 381 timesteps. Return = -87.80676656837795
Episode 13924 finished after 315 timesteps. Return = -294.

Episode 14027 finished after 1000 timesteps. Return = 27.420032029137783
Episode 14028 finished after 160 timesteps. Return = 11.789763579852774
Episode 14029 finished after 1000 timesteps. Return = 21.546541093785468
Episode 14030 finished after 1000 timesteps. Return = 32.932927118442336
Episode 14031 finished after 1000 timesteps. Return = 85.73497460043667
Episode 14032 finished after 139 timesteps. Return = -66.09753659532659
Episode 14033 finished after 1000 timesteps. Return = 35.768052864985734
Episode 14034 finished after 188 timesteps. Return = 3.0484117502397083
Episode 14035 finished after 189 timesteps. Return = 1.6403492959141062
Episode 14036 finished after 272 timesteps. Return = -213.68856696254943
Episode 14037 finished after 179 timesteps. Return = -54.136849309429444
Episode 14038 finished after 152 timesteps. Return = 40.86046842609673
Episode 14039 finished after 1000 timesteps. Return = 1.6377676219109425
Episode 14040 finished after 144 timesteps. Return = -24.4

Episode 14141 finished after 1000 timesteps. Return = 28.177150723265566
Episode 14142 finished after 215 timesteps. Return = -237.7322474724381
Episode 14143 finished after 154 timesteps. Return = -22.99598144743291
Episode 14144 finished after 333 timesteps. Return = -124.27578158321303
Episode 14145 finished after 96 timesteps. Return = -27.41297707199469
Episode 14146 finished after 862 timesteps. Return = -274.5157420269868
Episode 14147 finished after 190 timesteps. Return = -202.933290586007
Episode 14148 finished after 150 timesteps. Return = -60.99160531025795
Episode 14149 finished after 1000 timesteps. Return = 68.23046171669914
Episode 14150 finished after 235 timesteps. Return = -26.097403846391572
Episode 14151 finished after 264 timesteps. Return = -44.65867141870555
Episode 14152 finished after 154 timesteps. Return = -7.797306725085335
Episode 14153 finished after 1000 timesteps. Return = 74.37026716751512
Episode 14154 finished after 1000 timesteps. Return = 56.667604

Episode 14256 finished after 352 timesteps. Return = -190.0886482691917
Episode 14257 finished after 270 timesteps. Return = -30.312410354287152
Episode 14258 finished after 141 timesteps. Return = -3.107112523993081
Episode 14259 finished after 454 timesteps. Return = -71.0116106226069
Episode 14260 finished after 119 timesteps. Return = -71.67768811801645
Episode 14261 finished after 187 timesteps. Return = -25.347079208303782
Episode 14262 finished after 163 timesteps. Return = 16.616576280390106
Episode 14263 finished after 343 timesteps. Return = -50.67443489430673
Episode 14264 finished after 298 timesteps. Return = -211.15635020027213
Episode 14265 finished after 171 timesteps. Return = -251.7321722033467
Episode 14266 finished after 162 timesteps. Return = -130.48241476553633
Episode 14267 finished after 449 timesteps. Return = -59.80341862402282
Episode 14268 finished after 230 timesteps. Return = -225.4070213387168
Episode 14269 finished after 88 timesteps. Return = -9.627474

Episode 14370 finished after 391 timesteps. Return = -282.8201267446243
Episode 14371 finished after 212 timesteps. Return = -41.94767707647422
Episode 14372 finished after 243 timesteps. Return = -62.69803594002259
Episode 14373 finished after 566 timesteps. Return = -272.8466461770446
Episode 14374 finished after 290 timesteps. Return = -105.9839718060366
Episode 14375 finished after 417 timesteps. Return = -73.7484592862564
Episode 14376 finished after 165 timesteps. Return = -77.53036957209459
Episode 14377 finished after 369 timesteps. Return = -215.9635675514757
Episode 14378 finished after 217 timesteps. Return = -209.30602466950552
Episode 14379 finished after 266 timesteps. Return = -46.21889133687053
Episode 14380 finished after 82 timesteps. Return = -47.35380208646441
Episode 14381 finished after 568 timesteps. Return = -272.41596912008987
Episode 14382 finished after 216 timesteps. Return = -22.37796784424576
Episode 14383 finished after 244 timesteps. Return = -72.6307531

Episode 14484 finished after 1000 timesteps. Return = 67.81107881771767
Episode 14485 finished after 187 timesteps. Return = -44.03052916718754
Episode 14486 finished after 387 timesteps. Return = -246.0846776164875
Episode 14487 finished after 1000 timesteps. Return = 97.56940890957898
Episode 14488 finished after 220 timesteps. Return = -257.5508038926918
Episode 14489 finished after 161 timesteps. Return = -186.32928336697574
Episode 14490 finished after 315 timesteps. Return = -227.10650540917652
Episode 14491 finished after 456 timesteps. Return = -99.79731926113281
Episode 14492 finished after 282 timesteps. Return = -54.52326727143211
Episode 14493 finished after 663 timesteps. Return = -167.5954528250519
Episode 14494 finished after 103 timesteps. Return = -41.145407965462795
Episode 14495 finished after 374 timesteps. Return = -62.00360965619168
Episode 14496 finished after 342 timesteps. Return = -31.23210419222393
Episode 14497 finished after 318 timesteps. Return = -322.626

Episode 14600 finished after 143 timesteps. Return = -97.20761733667447
Episode 14601 finished after 235 timesteps. Return = -297.4869012985265
Episode 14602 finished after 1000 timesteps. Return = 7.046949168412072
Episode 14603 finished after 269 timesteps. Return = -84.56754767768949
Episode 14604 finished after 398 timesteps. Return = -267.73263850459017
Episode 14605 finished after 229 timesteps. Return = -66.70820991350853
Episode 14606 finished after 267 timesteps. Return = -224.92837499718493
Episode 14607 finished after 130 timesteps. Return = -48.251379320530354
Episode 14608 finished after 1000 timesteps. Return = 49.287475779211995
Episode 14609 finished after 150 timesteps. Return = -99.03058022874305
Episode 14610 finished after 160 timesteps. Return = -118.2558163974299
Episode 14611 finished after 145 timesteps. Return = -27.008754968516726
Episode 14612 finished after 301 timesteps. Return = -46.057688706864894
Episode 14613 finished after 250 timesteps. Return = -228.

Episode 14715 finished after 164 timesteps. Return = -116.93499361349596
Episode 14716 finished after 310 timesteps. Return = -128.18634584724575
Episode 14717 finished after 1000 timesteps. Return = 24.199448182495242
Episode 14718 finished after 161 timesteps. Return = 39.27219447062055
Episode 14719 finished after 186 timesteps. Return = -236.8534201827
Episode 14720 finished after 181 timesteps. Return = -93.78940937592262
Episode 14721 finished after 1000 timesteps. Return = 39.82496195390716
Episode 14722 finished after 1000 timesteps. Return = -17.74171431653617
Episode 14723 finished after 99 timesteps. Return = 17.113322797811847
Episode 14724 finished after 311 timesteps. Return = -200.20332286964702
Episode 14725 finished after 261 timesteps. Return = -215.26229860231211
Episode 14726 finished after 307 timesteps. Return = -205.26227360086722
Episode 14727 finished after 257 timesteps. Return = -112.90756887412628
Episode 14728 finished after 242 timesteps. Return = 18.97765

Episode 14829 finished after 359 timesteps. Return = -17.867969007862825
Episode 14830 finished after 795 timesteps. Return = -329.4305734492858
Episode 14831 finished after 621 timesteps. Return = -209.9376325751096
Episode 14832 finished after 297 timesteps. Return = -6.560374462902388
Episode 14833 finished after 118 timesteps. Return = -11.97852653866255
Episode 14834 finished after 281 timesteps. Return = -122.92915058682301
Episode 14835 finished after 193 timesteps. Return = -14.519995540166036
Episode 14836 finished after 1000 timesteps. Return = 66.71776323579448
Episode 14837 finished after 129 timesteps. Return = -97.00527637072186
Episode 14838 finished after 164 timesteps. Return = -49.1804070377225
Episode 14839 finished after 998 timesteps. Return = -115.66308353408782
Episode 14840 finished after 1000 timesteps. Return = 88.76893344117744
Episode 14841 finished after 119 timesteps. Return = -74.21827445178383
Episode 14842 finished after 544 timesteps. Return = -289.779

Episode 14943 finished after 202 timesteps. Return = 19.60105079250802
Episode 14944 finished after 260 timesteps. Return = 32.50884451177072
Episode 14945 finished after 1000 timesteps. Return = 14.284944019444124
Episode 14946 finished after 171 timesteps. Return = 42.45814816891715
Episode 14947 finished after 244 timesteps. Return = -91.53552215231072
Episode 14948 finished after 218 timesteps. Return = 19.91112086560429
Episode 14949 finished after 255 timesteps. Return = 24.9519894271167
Episode 14950 finished after 1000 timesteps. Return = 78.43321974075458
Episode 14951 finished after 250 timesteps. Return = -171.6216463938995
Episode 14952 finished after 205 timesteps. Return = -269.44275836886106
Episode 14953 finished after 1000 timesteps. Return = 24.403051257671287
Episode 14954 finished after 213 timesteps. Return = 8.529517098062058
Episode 14955 finished after 148 timesteps. Return = -46.41606244261733
Episode 14956 finished after 77 timesteps. Return = -20.990829936994

Episode 15057 finished after 271 timesteps. Return = -71.36408794289639
Episode 15058 finished after 352 timesteps. Return = -237.4183802348981
Episode 15059 finished after 154 timesteps. Return = -52.19226458536083
Episode 15060 finished after 1000 timesteps. Return = 64.81556697871076
Episode 15061 finished after 90 timesteps. Return = -43.77649799885931
Episode 15062 finished after 644 timesteps. Return = -122.20695239656385
Episode 15063 finished after 1000 timesteps. Return = 94.38816815228196
Episode 15064 finished after 228 timesteps. Return = -189.7949257845856
Episode 15065 finished after 158 timesteps. Return = -46.29389785936748
Episode 15066 finished after 315 timesteps. Return = 16.478528717781188
Episode 15067 finished after 768 timesteps. Return = -125.45715108408491
Episode 15068 finished after 1000 timesteps. Return = 79.57174946440527
Episode 15069 finished after 124 timesteps. Return = -51.869020197294006
Episode 15070 finished after 193 timesteps. Return = -48.92917

Episode 15171 finished after 211 timesteps. Return = -173.78284558082774
Episode 15172 finished after 368 timesteps. Return = -62.32800154888709
Episode 15173 finished after 1000 timesteps. Return = -25.526961994514835
Episode 15174 finished after 175 timesteps. Return = -64.3074836937467
Episode 15175 finished after 415 timesteps. Return = -308.83337069335244
Episode 15176 finished after 169 timesteps. Return = -25.362328983757834
Episode 15177 finished after 164 timesteps. Return = -37.146264724699684
Episode 15178 finished after 143 timesteps. Return = -156.917228416301
Episode 15179 finished after 215 timesteps. Return = 2.0317432708668264
Episode 15180 finished after 237 timesteps. Return = -23.380024418226114
Episode 15181 finished after 407 timesteps. Return = -219.6042116296606
Episode 15182 finished after 186 timesteps. Return = -13.865103897825477
Episode 15183 finished after 1000 timesteps. Return = -20.814546253177987
Episode 15184 finished after 1000 timesteps. Return = 87

Episode 15285 finished after 1000 timesteps. Return = 76.03194494531259
Episode 15286 finished after 185 timesteps. Return = -14.679400623735091
Episode 15287 finished after 142 timesteps. Return = -57.46407582094538
Episode 15288 finished after 321 timesteps. Return = -57.148763429943386
Episode 15289 finished after 130 timesteps. Return = 48.82506861646246
Episode 15290 finished after 1000 timesteps. Return = -153.82123142105178
Episode 15291 finished after 1000 timesteps. Return = -101.42622392593302
Episode 15292 finished after 145 timesteps. Return = 1.9579904887104078
Episode 15293 finished after 205 timesteps. Return = -179.6088930826146
Episode 15294 finished after 264 timesteps. Return = 19.58100424406946
Episode 15295 finished after 323 timesteps. Return = -43.51447369604081
Episode 15296 finished after 638 timesteps. Return = -301.533605012586
Episode 15297 finished after 106 timesteps. Return = 18.935654701074924
Episode 15298 finished after 110 timesteps. Return = -22.5455

Episode 15401 finished after 154 timesteps. Return = -28.91885560803928
Episode 15402 finished after 1000 timesteps. Return = 0.852557776038229
Episode 15403 finished after 291 timesteps. Return = -96.76387985611639
Episode 15404 finished after 1000 timesteps. Return = 37.33880438633264
Episode 15405 finished after 216 timesteps. Return = 36.20788620985928
Episode 15406 finished after 1000 timesteps. Return = 53.16678767785574
Episode 15407 finished after 258 timesteps. Return = -74.11232064079186
Episode 15408 finished after 275 timesteps. Return = -195.90402094387971
Episode 15409 finished after 87 timesteps. Return = -45.96228659442487
Episode 15410 finished after 392 timesteps. Return = -238.8916765078303
Episode 15411 finished after 166 timesteps. Return = -55.07563086827773
Episode 15412 finished after 223 timesteps. Return = 8.589837680206003
Episode 15413 finished after 122 timesteps. Return = -66.94852114346654
Episode 15414 finished after 232 timesteps. Return = -77.345023990

Episode 15515 finished after 235 timesteps. Return = -63.70221490487096
Episode 15516 finished after 441 timesteps. Return = -258.81878596867574
Episode 15517 finished after 1000 timesteps. Return = 76.30901823142042
Episode 15518 finished after 1000 timesteps. Return = 58.2008109328591
Episode 15519 finished after 1000 timesteps. Return = 45.85557641816482
Episode 15520 finished after 253 timesteps. Return = -114.61850965499397
Episode 15521 finished after 133 timesteps. Return = -17.30266400786364
Episode 15522 finished after 302 timesteps. Return = -36.22943340658689
Episode 15523 finished after 166 timesteps. Return = 23.84331568113251
Episode 15524 finished after 169 timesteps. Return = 13.67513241759228
Episode 15525 finished after 1000 timesteps. Return = 70.44612687548354
Episode 15526 finished after 210 timesteps. Return = 12.32164823768845
Episode 15527 finished after 237 timesteps. Return = -65.3661539829638
Episode 15528 finished after 146 timesteps. Return = -10.6806005003

Episode 15629 finished after 225 timesteps. Return = 2.978317332846075
Episode 15630 finished after 114 timesteps. Return = 27.146545781757496
Episode 15631 finished after 196 timesteps. Return = 41.04556496379274
Episode 15632 finished after 438 timesteps. Return = -197.13948801509088
Episode 15633 finished after 1000 timesteps. Return = 38.768661999799825
Episode 15634 finished after 1000 timesteps. Return = 8.042889170924672
Episode 15635 finished after 355 timesteps. Return = -65.68308446242943
Episode 15636 finished after 110 timesteps. Return = -13.82940446101638
Episode 15637 finished after 257 timesteps. Return = -44.168428035818636
Episode 15638 finished after 1000 timesteps. Return = -5.928995694221818
Episode 15639 finished after 406 timesteps. Return = -170.75960696415467
Episode 15640 finished after 166 timesteps. Return = 29.90291979839583
Episode 15641 finished after 348 timesteps. Return = -64.50851167300293
Episode 15642 finished after 212 timesteps. Return = -185.2970

Episode 15745 finished after 132 timesteps. Return = 28.948251598913544
Episode 15746 finished after 220 timesteps. Return = -22.623083398351
Episode 15747 finished after 108 timesteps. Return = -97.03610881085062
Episode 15748 finished after 223 timesteps. Return = -237.61409374630426
Episode 15749 finished after 312 timesteps. Return = -155.27178717024248
Episode 15750 finished after 152 timesteps. Return = 4.696104101692555
Episode 15751 finished after 87 timesteps. Return = -2.895871584851406
Episode 15752 finished after 1000 timesteps. Return = 60.23053892030853
Episode 15753 finished after 1000 timesteps. Return = 92.3998718683747
Episode 15754 finished after 1000 timesteps. Return = 70.99190151255728
Episode 15755 finished after 133 timesteps. Return = 65.61935770606476
Episode 15756 finished after 180 timesteps. Return = -7.550926154036318
Episode 15757 finished after 153 timesteps. Return = 30.250263924380505
Episode 15758 finished after 1000 timesteps. Return = 109.7041209892

Episode 15859 finished after 178 timesteps. Return = 33.852387177913585
Episode 15860 finished after 545 timesteps. Return = -133.6723757097315
Episode 15861 finished after 139 timesteps. Return = 15.582217206444284
Episode 15862 finished after 1000 timesteps. Return = 89.49049558673202
Episode 15863 finished after 294 timesteps. Return = -17.725586021996776
Episode 15864 finished after 187 timesteps. Return = -22.497007081147217
Episode 15865 finished after 121 timesteps. Return = 12.698478150416548
Episode 15866 finished after 113 timesteps. Return = 34.931564824649485
Episode 15867 finished after 261 timesteps. Return = 2.5421735957444582
Episode 15868 finished after 1000 timesteps. Return = 75.73316877208542
Episode 15869 finished after 203 timesteps. Return = -63.43595294955348
Episode 15870 finished after 351 timesteps. Return = -103.1766319440722
Episode 15871 finished after 324 timesteps. Return = -1.9280603105817988
Episode 15872 finished after 1000 timesteps. Return = 16.5865

Episode 15975 finished after 350 timesteps. Return = -300.20126690028025
Episode 15976 finished after 1000 timesteps. Return = 61.224287113954304
Episode 15977 finished after 1000 timesteps. Return = 45.73295614822393
Episode 15978 finished after 148 timesteps. Return = -19.43026166583452
Episode 15979 finished after 155 timesteps. Return = -105.30048059806849
Episode 15980 finished after 242 timesteps. Return = -4.415638192593377
Episode 15981 finished after 371 timesteps. Return = -224.7944131827852
Episode 15982 finished after 189 timesteps. Return = -1.0527898215217135
Episode 15983 finished after 1000 timesteps. Return = 111.08748127031416
Episode 15984 finished after 137 timesteps. Return = -30.615808795795544
Episode 15985 finished after 1000 timesteps. Return = 70.42480922820236
Episode 15986 finished after 149 timesteps. Return = -95.93445314111536
Episode 15987 finished after 153 timesteps. Return = 27.372626184541133
Episode 15988 finished after 146 timesteps. Return = -29.1

Episode 16090 finished after 228 timesteps. Return = 15.166895074869217
Episode 16091 finished after 240 timesteps. Return = -195.1790094145988
Episode 16092 finished after 292 timesteps. Return = 35.358823489990186
Episode 16093 finished after 193 timesteps. Return = 15.490150969150633
Episode 16094 finished after 302 timesteps. Return = -46.74432524078908
Episode 16095 finished after 194 timesteps. Return = -30.34700258674394
Episode 16096 finished after 639 timesteps. Return = -314.86180238004727
Episode 16097 finished after 265 timesteps. Return = -37.72430457064725
Episode 16098 finished after 1000 timesteps. Return = 82.04950770202868
Episode 16099 finished after 209 timesteps. Return = -48.662923181564054
Episode 16100 finished after 372 timesteps. Return = -77.90875571757182
Episode 16101 finished after 83 timesteps. Return = -40.533193235113686
Episode 16102 finished after 1000 timesteps. Return = 76.30122846893177
Episode 16103 finished after 263 timesteps. Return = -101.3502

Episode 16204 finished after 1000 timesteps. Return = 95.06199416010953
Episode 16205 finished after 122 timesteps. Return = 19.544058829432856
Episode 16206 finished after 426 timesteps. Return = -158.05414080063548
Episode 16207 finished after 1000 timesteps. Return = 54.02236875652933
Episode 16208 finished after 1000 timesteps. Return = 70.43743371753008
Episode 16209 finished after 1000 timesteps. Return = -14.16307428180659
Episode 16210 finished after 279 timesteps. Return = -166.74423704566752
Episode 16211 finished after 1000 timesteps. Return = 80.03416143936944
Episode 16212 finished after 193 timesteps. Return = 72.41319688211578
Episode 16213 finished after 1000 timesteps. Return = 15.589740475837093
Episode 16214 finished after 1000 timesteps. Return = 47.409443311346884
Episode 16215 finished after 208 timesteps. Return = 28.82263169315337
Episode 16216 finished after 260 timesteps. Return = -81.6964448840917
Episode 16217 finished after 294 timesteps. Return = -62.74139

Episode 16320 finished after 128 timesteps. Return = 1.9844226876792135
Episode 16321 finished after 1000 timesteps. Return = 20.55193301666919
Episode 16322 finished after 597 timesteps. Return = -179.16746508670758
Episode 16323 finished after 321 timesteps. Return = -256.27585783587267
Episode 16324 finished after 1000 timesteps. Return = -38.4228554532811
Episode 16325 finished after 1000 timesteps. Return = 34.23406462242485
Episode 16326 finished after 176 timesteps. Return = 1.1200725692305298
Episode 16327 finished after 1000 timesteps. Return = -0.6634182693067068
Episode 16328 finished after 142 timesteps. Return = -35.48356325442937
Episode 16329 finished after 1000 timesteps. Return = 29.380187094490452
Episode 16330 finished after 181 timesteps. Return = -11.688905277188198
Episode 16331 finished after 156 timesteps. Return = -200.54780959098287
Episode 16332 finished after 201 timesteps. Return = 6.308940903754504
Episode 16333 finished after 1000 timesteps. Return = 66.2

Episode 16434 finished after 206 timesteps. Return = -19.056703735084284
Episode 16435 finished after 547 timesteps. Return = -191.39232223694998
Episode 16436 finished after 1000 timesteps. Return = 8.408833596172363
Episode 16437 finished after 314 timesteps. Return = 0.8998614020284919
Episode 16438 finished after 135 timesteps. Return = 0.5306441344415447
Episode 16439 finished after 1000 timesteps. Return = 50.87913524332673
Episode 16440 finished after 179 timesteps. Return = -50.950282698834485
Episode 16441 finished after 197 timesteps. Return = -201.005094436155
Episode 16442 finished after 267 timesteps. Return = -161.84190254534712
Episode 16443 finished after 914 timesteps. Return = -70.9727700413393
Episode 16444 finished after 710 timesteps. Return = -304.67244289632436
Episode 16445 finished after 197 timesteps. Return = -68.8251014931638
Episode 16446 finished after 1000 timesteps. Return = 82.3001068443066
Episode 16447 finished after 190 timesteps. Return = -16.924858

Episode 16548 finished after 1000 timesteps. Return = 11.872726144850313
Episode 16549 finished after 288 timesteps. Return = -138.24124047545686
Episode 16550 finished after 309 timesteps. Return = -203.5352434660553
Episode 16551 finished after 1000 timesteps. Return = 90.11106202364803
Episode 16552 finished after 384 timesteps. Return = -315.63966357748836
Episode 16553 finished after 353 timesteps. Return = -57.15066320602871
Episode 16554 finished after 339 timesteps. Return = -219.22233920332695
Episode 16555 finished after 297 timesteps. Return = -108.56901554499967
Episode 16556 finished after 481 timesteps. Return = -214.34154347132127
Episode 16557 finished after 160 timesteps. Return = -38.189171960452896
Episode 16558 finished after 116 timesteps. Return = 5.8491675176783815
Episode 16559 finished after 1000 timesteps. Return = 68.74533959743147
Episode 16560 finished after 139 timesteps. Return = -45.900907638380914
Episode 16561 finished after 1000 timesteps. Return = 16

Episode 16663 finished after 1000 timesteps. Return = 20.870873131728935
Episode 16664 finished after 1000 timesteps. Return = 97.05917592108824
Episode 16665 finished after 1000 timesteps. Return = 43.0757857896474
Episode 16666 finished after 214 timesteps. Return = 41.906816931654475
Episode 16667 finished after 407 timesteps. Return = -213.2133021504835
Episode 16668 finished after 227 timesteps. Return = 15.046716156965658
Episode 16669 finished after 1000 timesteps. Return = -34.16687628324426
Episode 16670 finished after 1000 timesteps. Return = 52.091599834957734
Episode 16671 finished after 1000 timesteps. Return = 49.17607726133817
Episode 16672 finished after 431 timesteps. Return = -243.59294927686983
Episode 16673 finished after 311 timesteps. Return = -19.707717638671767
Episode 16674 finished after 121 timesteps. Return = -210.94760786999217
Episode 16675 finished after 255 timesteps. Return = -24.363976224490173
Episode 16676 finished after 1000 timesteps. Return = -12.

Episode 16778 finished after 1000 timesteps. Return = 11.204351276098727
Episode 16779 finished after 172 timesteps. Return = -226.3979764176315
Episode 16780 finished after 460 timesteps. Return = -155.6186861563275
Episode 16781 finished after 1000 timesteps. Return = 57.347584058539354
Episode 16782 finished after 278 timesteps. Return = -193.8135603792819
Episode 16783 finished after 146 timesteps. Return = -32.95153408022972
Episode 16784 finished after 117 timesteps. Return = -11.128195708100634
Episode 16785 finished after 146 timesteps. Return = -22.711844921103733
Episode 16786 finished after 117 timesteps. Return = -7.608850503107661
Episode 16787 finished after 266 timesteps. Return = -5.3553090770264475
Episode 16788 finished after 573 timesteps. Return = -315.4027777892768
Episode 16789 finished after 291 timesteps. Return = -68.79736422955526
Episode 16790 finished after 1000 timesteps. Return = 29.986114086858038
Episode 16791 finished after 176 timesteps. Return = -37.0

Episode 16894 finished after 1000 timesteps. Return = -27.965267082768808
Episode 16895 finished after 1000 timesteps. Return = 72.67919866108545
Episode 16896 finished after 164 timesteps. Return = -13.046058855768962
Episode 16897 finished after 1000 timesteps. Return = 60.68191971229822
Episode 16898 finished after 244 timesteps. Return = 5.516789997268134
Episode 16899 finished after 148 timesteps. Return = -49.442421856259415
Episode 16900 finished after 1000 timesteps. Return = 120.2185178309333
Episode 16901 finished after 220 timesteps. Return = -191.5379969663774
Episode 16902 finished after 236 timesteps. Return = -3.953349383819372
Episode 16903 finished after 1000 timesteps. Return = 13.248010405060173
Episode 16904 finished after 1000 timesteps. Return = 65.4143620726178
Episode 16905 finished after 112 timesteps. Return = 0.220154353468061
Episode 16906 finished after 148 timesteps. Return = 32.45336064047419
Episode 16907 finished after 235 timesteps. Return = -145.29051

Episode 17008 finished after 1000 timesteps. Return = 33.2682170200301
Episode 17009 finished after 295 timesteps. Return = -317.37582718440717
Episode 17010 finished after 185 timesteps. Return = -191.86573877212996
Episode 17011 finished after 191 timesteps. Return = 39.48466346334098
Episode 17012 finished after 245 timesteps. Return = -44.61351713746484
Episode 17013 finished after 165 timesteps. Return = 7.950560416078815
Episode 17014 finished after 163 timesteps. Return = -8.557870387763884
Episode 17015 finished after 170 timesteps. Return = 15.81676600071033
Episode 17016 finished after 152 timesteps. Return = -85.79140783034178
Episode 17017 finished after 219 timesteps. Return = -170.41680677544727
Episode 17018 finished after 466 timesteps. Return = -48.32548164295373
Episode 17019 finished after 122 timesteps. Return = -20.717010732123782
Episode 17020 finished after 177 timesteps. Return = -21.25564008934016
Episode 17021 finished after 1000 timesteps. Return = -5.7879747

Episode 17123 finished after 1000 timesteps. Return = 82.15952760574294
Episode 17124 finished after 118 timesteps. Return = -46.90971123382694
Episode 17125 finished after 1000 timesteps. Return = 55.61355095495385
Episode 17126 finished after 1000 timesteps. Return = 83.2493997526002
Episode 17127 finished after 191 timesteps. Return = 46.97590852801903
Episode 17128 finished after 206 timesteps. Return = -252.25329408618987
Episode 17129 finished after 607 timesteps. Return = -290.2904469379545
Episode 17130 finished after 161 timesteps. Return = -51.92523828105753
Episode 17131 finished after 1000 timesteps. Return = 74.5842052463166
Episode 17132 finished after 1000 timesteps. Return = 56.58250029514787
Episode 17133 finished after 90 timesteps. Return = 13.316078685975256
Episode 17134 finished after 1000 timesteps. Return = 85.29037530956649
Episode 17135 finished after 135 timesteps. Return = -23.262220015054794
Episode 17136 finished after 195 timesteps. Return = -18.554694126

Episode 17238 finished after 1000 timesteps. Return = 46.76929699285987
Episode 17239 finished after 220 timesteps. Return = -5.816935088383403
Episode 17240 finished after 146 timesteps. Return = 21.26800343413106
Episode 17241 finished after 257 timesteps. Return = 38.559742369204855
Episode 17242 finished after 188 timesteps. Return = -29.390322660038734
Episode 17243 finished after 200 timesteps. Return = -6.68568647509079
Episode 17244 finished after 1000 timesteps. Return = 81.26077077719191
Episode 17245 finished after 99 timesteps. Return = -13.582420946059855
Episode 17246 finished after 245 timesteps. Return = -176.96483412299716
Episode 17247 finished after 98 timesteps. Return = -182.59557169024606
Episode 17248 finished after 231 timesteps. Return = -76.47022433074252
Episode 17249 finished after 178 timesteps. Return = 54.70078752430197
Episode 17250 finished after 1000 timesteps. Return = 42.115062859972525
Episode 17251 finished after 214 timesteps. Return = -47.3375828

Episode 17352 finished after 429 timesteps. Return = -237.60949459110623
Episode 17353 finished after 1000 timesteps. Return = -0.5526227251524722
Episode 17354 finished after 195 timesteps. Return = -37.283172321569296
Episode 17355 finished after 261 timesteps. Return = -40.89153894645801
Episode 17356 finished after 1000 timesteps. Return = 73.26419398166986
Episode 17357 finished after 1000 timesteps. Return = 48.49925684759008
Episode 17358 finished after 1000 timesteps. Return = 66.39651003328716
Episode 17359 finished after 1000 timesteps. Return = 66.90946976237903
Episode 17360 finished after 1000 timesteps. Return = -10.963668893435129
Episode 17361 finished after 459 timesteps. Return = -253.75716612472863
Episode 17362 finished after 234 timesteps. Return = -64.59450439778226
Episode 17363 finished after 880 timesteps. Return = -406.95817104744214
Episode 17364 finished after 370 timesteps. Return = -196.74642707937363
Episode 17365 finished after 1000 timesteps. Return = 6

Episode 17466 finished after 1000 timesteps. Return = 81.25164361456387
Episode 17467 finished after 1000 timesteps. Return = 34.54635563429803
Episode 17468 finished after 258 timesteps. Return = -15.983726675290143
Episode 17469 finished after 271 timesteps. Return = 25.603836994260448
Episode 17470 finished after 127 timesteps. Return = -194.75019407550144
Episode 17471 finished after 702 timesteps. Return = -182.66218695591795
Episode 17472 finished after 240 timesteps. Return = 18.31108145734929
Episode 17473 finished after 1000 timesteps. Return = 25.95850552525931
Episode 17474 finished after 87 timesteps. Return = -19.05381440810261
Episode 17475 finished after 907 timesteps. Return = -388.42911309051084
Episode 17476 finished after 323 timesteps. Return = -225.04347426143423
Episode 17477 finished after 1000 timesteps. Return = -13.411568025462639
Episode 17478 finished after 1000 timesteps. Return = 85.268462544637
Episode 17479 finished after 121 timesteps. Return = -7.97666

Episode 17580 finished after 149 timesteps. Return = -94.86104688854249
Episode 17581 finished after 191 timesteps. Return = 22.558784666798275
Episode 17582 finished after 186 timesteps. Return = 38.57139655981965
Episode 17583 finished after 223 timesteps. Return = -253.373327997064
Episode 17584 finished after 647 timesteps. Return = -159.31725856885308
Episode 17585 finished after 169 timesteps. Return = -37.006719099872974
Episode 17586 finished after 1000 timesteps. Return = 60.41182313395224
Episode 17587 finished after 1000 timesteps. Return = 59.81594427578259
Episode 17588 finished after 1000 timesteps. Return = 125.75275231420055
Episode 17589 finished after 156 timesteps. Return = -36.432203035191215
Episode 17590 finished after 269 timesteps. Return = -45.89607742407945
Episode 17591 finished after 190 timesteps. Return = -64.18033836591997
Episode 17592 finished after 173 timesteps. Return = -148.3467137013834
Episode 17593 finished after 212 timesteps. Return = -145.9985

KeyboardInterrupt: 

In [165]:
#torch.save(agent.state_dict(), 'checkpoints/lunar_lander_64x64_checkpoint_0.pt')
#agent.load_state_dict(torch.load('checkpoints/lunar_lander_32x32_checkpoint_0.pt'))

In [179]:
visualise_agent(greedy_policy, command=[150, 400], n=5)

Episode 0 finished after 833 timesteps. Return = -360.375805006002


In [181]:
visualise_agent(stochastic_policy, command=[150, 400], n=5)

Episode 0 finished after 213 timesteps. Return = -27.11786348432311
Episode 1 finished after 271 timesteps. Return = -226.95918413035955
Episode 2 finished after 325 timesteps. Return = -26.1805768711521


In [130]:
print([mem['return'] for mem in replay_buffer])

# Previous Code

In [None]:
def train_net(policy_net, replay_buffer, n_updates=100, batch_size=64):
    all_costs = []
    for i in range(n_updates):
        batch_input = np.zeros((batch_size, np.prod(env.observation_space.shape)+2))
        batch_label = np.zeros((batch_size))
        for b in range(batch_size):
            sample_episode = np.random.randint(0, len(replay_buffer))
            sample_horizon = np.random.randint(1, len(replay_buffer[sample_episode]['observation'])+1)
            sample_mem_idx = np.random.randint(0, len(replay_buffer[sample_episode]['observation'])+1-sample_horizon)
            sample_mem = replay_buffer[sample_episode]['observation'][sample_mem_idx]
            sample_desired_reward = sum(replay_buffer[sample_episode]['reward'][sample_mem_idx:sample_mem_idx+sample_horizon])
            network_input = np.append(sample_mem, [sample_desired_reward, sample_horizon])
            label = replay_buffer[sample_episode]['action'][sample_mem_idx]
            batch_input[b] = network_input
            batch_label[b] = label
        batch_input = torch.tensor(batch_input).double()
        batch_label = torch.tensor(batch_label).long()
        pred = policy_net(batch_input)
        cost = F.cross_entropy(pred, batch_label)
        all_costs.append(cost.item())
        cost.backward()
        policy_net.optimizer.step()
        policy_net.optimizer.zero_grad()
    return np.mean(all_costs)

In [8]:
def train_net(policy_net, episode_mem, n_samples = 5): #stochastic gradient descent
    all_costs = []
    for i in range(n_samples):
        sample_horizon = np.random.randint(1, len(episode_mem['observation'])+1)
        sample_mem_idx = np.random.randint(0, len(episode_mem['observation'])+1-sample_horizon)
        sample_mem = episode_mem['observation'][sample_mem_idx]
        sample_desired_reward = sum(episode_mem['reward'][sample_mem_idx:sample_mem_idx+sample_horizon])
        network_input = torch.tensor(np.append(sample_mem, [sample_desired_reward, sample_horizon])).double()
        label = torch.tensor([episode_mem['action'][sample_mem_idx]]).double()
        
        pred = policy_net(network_input)
        cost = F.binary_cross_entropy(pred, label)
        all_costs.append(cost.item())
        cost.backward()
        policy_net.optimizer.step()
        policy_net.optimizer.zero_grad()
    return np.mean(all_costs)
    

In [33]:
def train(policy_net, n_episodes=100):
    global i_episode
    global epsilon
    try:
        for _ in range(n_episodes):
            observation = env.reset()
            episode_mem = {'observation':[],
                            'action':[],
                            'reward':[],
                            'done':[]}
            done=False
            while not done:
                network_input = torch.tensor(np.append(observation, [desired_reward, command_horizon])).double()
                action_prob = policy_net(network_input)
                action = np.random.binomial(1, action_prob.item())
                #action = int(action_prob.item()>0.5)
                if np.random.rand()<epsilon: action = np.random.randint(0, 2)
                new_observation, reward, done, info = env.step(action)
                
                episode_mem['observation'].append(observation)
                episode_mem['action'].append(action)
                episode_mem['reward'].append(reward)
                episode_mem['done'].append(done)
                
                observation=new_observation
                epsilon*=0.999
            episode_mem['return']=sum(episode_mem['reward'])
            episode_mem['episode_len']=len(episode_mem['observation'])
            mean_cost = train_net(policy_net, episode_mem)
            
            i_episode+=1
            print("Episode {} finished after {} timesteps. Epsilon={} Mean Cost={}".format(i_episode, len(episode_mem['observation']), epsilon, mean_cost))
        env.close()
    except KeyboardInterrupt:
        env.close()