In [None]:
import torch
import torch.nn as nn
import torch.nn.functional as F
import torch.optim as optim
import torch
import time
import gym
import numpy as np

device = torch.device("cuda:0" if torch.cuda.is_available() else "cpu")


In [None]:
class ReplayBuffer:
    def __init__(self, max_size=5e5):
        self.buffer = []
        self.max_size = int(max_size)
        self.size = 0
    
    def add(self, transition):
        self.size +=1
        # transiton is tuple of (state, action, reward, next_state, done)
        self.buffer.append(transition)
    
    def sample(self, batch_size):
        # delete 1/5th of the buffer when full
        if self.size > self.max_size:
            del self.buffer[0:int(self.size/5)]
            self.size = len(self.buffer)
        
        indexes = np.random.randint(0, len(self.buffer), size=batch_size)
        state, action, reward, next_state, done = [], [], [], [], []
        
        for i in indexes:
            s, a, r, s_, d = self.buffer[i]
            state.append(np.array(s, copy=False))
            action.append(np.array(a, copy=False))
            reward.append(np.array(r, copy=False))
            next_state.append(np.array(s_, copy=False))
            done.append(np.array(d, copy=False))
        
        return np.array(state), np.array(action), np.array(reward), np.array(next_state), np.array(done)
    

In [None]:
class Actor(nn.Module):
    def __init__(self, state_dim, action_dim, max_action):
        super(Actor, self).__init__()
        
        self.l1 = nn.Linear(state_dim, 400)
        self.l2 = nn.Linear(400, 300)
        self.l3 = nn.Linear(300, action_dim)
        
        self.max_action = max_action
        
    def forward(self, state):
        a = F.relu(self.l1(state))
        a = F.relu(self.l2(a))
        a = torch.tanh(self.l3(a)) * self.max_action
        return a
        
class Critic(nn.Module):
    def __init__(self, state_dim, action_dim):
        super(Critic, self).__init__()
        
        self.l1 = nn.Linear(state_dim + action_dim, 400)
        self.l2 = nn.Linear(400, 300)
        self.l3 = nn.Linear(300, 1)
        
    def forward(self, state, action):
        state_action = torch.cat([state, action], 1)
        
        q = F.relu(self.l1(state_action))
        q = F.relu(self.l2(q))
        q = self.l3(q)
        return q
    
class TD3:
    def __init__(self, lr, state_dim, action_dim, max_action):
        
        self.actor = Actor(state_dim, action_dim, max_action).to(device)
        self.actor_target = Actor(state_dim, action_dim, max_action).to(device)
        self.actor_target.load_state_dict(self.actor.state_dict())
        self.actor_optimizer = optim.Adam(self.actor.parameters(), lr=lr)
        
        self.critic_1 = Critic(state_dim, action_dim).to(device)
        self.critic_1_target = Critic(state_dim, action_dim).to(device)
        self.critic_1_target.load_state_dict(self.critic_1.state_dict())
        self.critic_1_optimizer = optim.Adam(self.critic_1.parameters(), lr=lr)
        
        self.critic_2 = Critic(state_dim, action_dim).to(device)
        self.critic_2_target = Critic(state_dim, action_dim).to(device)
        self.critic_2_target.load_state_dict(self.critic_2.state_dict())
        self.critic_2_optimizer = optim.Adam(self.critic_2.parameters(), lr=lr)
        
        self.max_action = max_action
    
    def select_action(self, state):
        state = torch.FloatTensor(state.reshape(1, -1)).to(device)
        return self.actor(state).cpu().data.numpy().flatten()
    
    def update(self, replay_buffer, n_iter, batch_size, gamma, polyak, policy_noise, noise_clip, policy_delay):
        
        for i in range(n_iter):
            # Sample a batch of transitions from replay buffer:
            state, action_, reward, next_state, done = replay_buffer.sample(batch_size)
            state = torch.FloatTensor(state).to(device)
            action = torch.FloatTensor(action_).to(device)
            reward = torch.FloatTensor(reward).reshape((batch_size,1)).to(device)
            next_state = torch.FloatTensor(next_state).to(device)
            done = torch.FloatTensor(done).reshape((batch_size,1)).to(device)
            
            # Select next action according to target policy:
            noise = torch.FloatTensor(action_).data.normal_(0, policy_noise).to(device)
            noise = noise.clamp(-noise_clip, noise_clip)
            next_action = (self.actor_target(next_state) + noise)
            next_action = next_action.clamp(-self.max_action, self.max_action)
            
            # Compute target Q-value:
            target_Q1 = self.critic_1_target(next_state, next_action)
            target_Q2 = self.critic_2_target(next_state, next_action)
            target_Q = torch.min(target_Q1, target_Q2)
            target_Q = reward + ((1-done) * gamma * target_Q).detach()
            
            # Optimize Critic 1:
            current_Q1 = self.critic_1(state, action)
            loss_Q1 = F.mse_loss(current_Q1, target_Q)
            self.critic_1_optimizer.zero_grad()
            loss_Q1.backward()
            self.critic_1_optimizer.step()
            
            # Optimize Critic 2:
            current_Q2 = self.critic_2(state, action)
            loss_Q2 = F.mse_loss(current_Q2, target_Q)
            self.critic_2_optimizer.zero_grad()
            loss_Q2.backward()
            self.critic_2_optimizer.step()
            
            # Delayed policy updates:
            if i % policy_delay == 0:
                # Compute actor loss:
                actor_loss = -self.critic_1(state, self.actor(state)).mean()
                
                # Optimize the actor
                self.actor_optimizer.zero_grad()
                actor_loss.backward()
                self.actor_optimizer.step()
                
                # Polyak averaging update:
                for param, target_param in zip(self.actor.parameters(), self.actor_target.parameters()):
                    target_param.data.copy_( (polyak * target_param.data) + ((1-polyak) * param.data))
                
                for param, target_param in zip(self.critic_1.parameters(), self.critic_1_target.parameters()):
                    target_param.data.copy_( (polyak * target_param.data) + ((1-polyak) * param.data))
                
                for param, target_param in zip(self.critic_2.parameters(), self.critic_2_target.parameters()):
                    target_param.data.copy_( (polyak * target_param.data) + ((1-polyak) * param.data))
                    
                
    def save(self, directory, name):
        torch.save(self.actor.state_dict(), '%s/%s_actor.pth' % (directory, name))
        torch.save(self.actor_target.state_dict(), '%s/%s_actor_target.pth' % (directory, name))
        
        torch.save(self.critic_1.state_dict(), '%s/%s_crtic_1.pth' % (directory, name))
        torch.save(self.critic_1_target.state_dict(), '%s/%s_critic_1_target.pth' % (directory, name))
        
        torch.save(self.critic_2.state_dict(), '%s/%s_crtic_2.pth' % (directory, name))
        torch.save(self.critic_2_target.state_dict(), '%s/%s_critic_2_target.pth' % (directory, name))
        
    def load(self, directory, name):
        self.actor.load_state_dict(torch.load('%s/%s_actor.pth' % (directory, name), map_location=lambda storage, loc: storage))
        self.actor_target.load_state_dict(torch.load('%s/%s_actor_target.pth' % (directory, name), map_location=lambda storage, loc: storage))
        
        self.critic_1.load_state_dict(torch.load('%s/%s_crtic_1.pth' % (directory, name), map_location=lambda storage, loc: storage))
        self.critic_1_target.load_state_dict(torch.load('%s/%s_critic_1_target.pth' % (directory, name), map_location=lambda storage, loc: storage))
        
        self.critic_2.load_state_dict(torch.load('%s/%s_crtic_2.pth' % (directory, name), map_location=lambda storage, loc: storage))
        self.critic_2_target.load_state_dict(torch.load('%s/%s_critic_2_target.pth' % (directory, name), map_location=lambda storage, loc: storage))
        
        
    def load_actor(self, directory, name):
        self.actor.load_state_dict(torch.load('%s/%s_actor.pth' % (directory, name), map_location=lambda storage, loc: storage))
        self.actor_target.load_state_dict(torch.load('%s/%s_actor_target.pth' % (directory, name), map_location=lambda storage, loc: storage))
        
        
        
      
        

episode: 2256
episode length: 85
episode reward: 27.62357543348556
current timestep: 30399

episode: 2257
episode length: 86
episode reward: 28.253464308683494
current timestep: 30485

episode: 2258
episode length: 98
episode reward: 39.36948244703435
current timestep: 30583

episode: 2259
episode length: 89
episode reward: 31.415976916624807
current timestep: 30672

episode: 2260
episode length: 95
episode reward: 37.160364489841804
current timestep: 30767

episode: 2261
episode length: 84
episode reward: 27.21930539056115
current timestep: 30851

episode: 2262
episode length: 94
episode reward: 35.907474491769044
current timestep: 30945

episode: 2263
episode length: 81
episode reward: 24.82054050297668
current timestep: 31026

episode: 2264
episode length: 84
episode reward: 26.78520929929127
current timestep: 31110

episode: 2265
episode length: 85
episode reward: 27.333834718536455
current timestep: 31195

episode: 2266
episode length: 85
episode reward: 28.004934086562447
current

episode: 2344
episode length: 52
episode reward: -32.22569746458371
current timestep: 35979

episode: 2345
episode length: 52
episode reward: -33.08132891006094
current timestep: 36031

episode: 2346
episode length: 52
episode reward: -31.739953598425434
current timestep: 36083

episode: 2347
episode length: 51
episode reward: -29.888409257032
current timestep: 36134

episode: 2348
episode length: 53
episode reward: -39.533894372969606
current timestep: 36187

episode: 2349
episode length: 52
episode reward: -34.13455691440129
current timestep: 36239

episode: 2350
episode length: 49
episode reward: -27.358318452447637
current timestep: 36288

episode: 2351
episode length: 53
episode reward: -34.9330278587643
current timestep: 36341

episode: 2352
episode length: 52
episode reward: -33.0599857960234
current timestep: 36393

episode: 2353
episode length: 53
episode reward: -34.81001439014153
current timestep: 36446

episode: 2354
episode length: 51
episode reward: -30.21366771750571
cur

episode: 2432
episode length: 40
episode reward: -4.811576550462265
current timestep: 39621

episode: 2433
episode length: 34
episode reward: -5.074755141324133
current timestep: 39655

episode: 2434
episode length: 32
episode reward: -3.1455664038305056
current timestep: 39687

episode: 2435
episode length: 38
episode reward: -1.2438885044655252
current timestep: 39725

episode: 2436
episode length: 36
episode reward: -4.810783156109407
current timestep: 39761

episode: 2437
episode length: 34
episode reward: -0.7087613775379771
current timestep: 39795

episode: 2438
episode length: 34
episode reward: -4.125682355152814
current timestep: 39829

episode: 2439
episode length: 39
episode reward: -0.7738957333457873
current timestep: 39868

episode: 2440
episode length: 35
episode reward: -2.478669308536079
current timestep: 39903

episode: 2441
episode length: 35
episode reward: -2.193078309594314
current timestep: 39938

episode: 2442
episode length: 40
episode reward: -9.23576850357077

episode: 2521
episode length: 30
episode reward: -4.3668069098738105
current timestep: 42607

episode: 2522
episode length: 29
episode reward: -4.000957852700101
current timestep: 42636

episode: 2523
episode length: 29
episode reward: -5.54577382886571
current timestep: 42665

episode: 2524
episode length: 31
episode reward: -3.192056236724817
current timestep: 42696

episode: 2525
episode length: 29
episode reward: -4.558469964669623
current timestep: 42725

episode: 2526
episode length: 31
episode reward: -4.872890466984833
current timestep: 42756

episode: 2527
episode length: 30
episode reward: -3.822149375900315
current timestep: 42786

episode: 2528
episode length: 31
episode reward: -4.805949479812241
current timestep: 42817

episode: 2529
episode length: 30
episode reward: -3.617534897054562
current timestep: 42847

episode: 2530
episode length: 31
episode reward: -4.689415319977422
current timestep: 42878

episode: 2531
episode length: 31
episode reward: -3.8770426948023955
c

episode: 2609
episode length: 29
episode reward: -4.385732441274925
current timestep: 45192

episode: 2610
episode length: 28
episode reward: -5.057448160932712
current timestep: 45220

episode: 2611
episode length: 30
episode reward: -4.998414241078807
current timestep: 45250

episode: 2612
episode length: 29
episode reward: -5.679814149253946
current timestep: 45279

episode: 2613
episode length: 28
episode reward: -5.27663668584759
current timestep: 45307

episode: 2614
episode length: 27
episode reward: -4.712179378339327
current timestep: 45334

episode: 2615
episode length: 30
episode reward: -4.77369645407697
current timestep: 45364

episode: 2616
episode length: 30
episode reward: -4.010817007473582
current timestep: 45394

episode: 2617
episode length: 28
episode reward: -5.067401877192664
current timestep: 45422

episode: 2618
episode length: 29
episode reward: -5.697915187768609
current timestep: 45451

episode: 2619
episode length: 28
episode reward: -5.489834077580864
curr

episode: 2699
episode length: 25
episode reward: -5.75843567207172
current timestep: 47666

episode: 2700
episode length: 29
episode reward: -4.065836973744619
current timestep: 47695

avg. reward: -4.739836700194522

episode: 2701
episode length: 23
episode reward: -3.8535944821006596
current timestep: 47718

episode: 2702
episode length: 28
episode reward: -3.981101628146825
current timestep: 47746

episode: 2703
episode length: 23
episode reward: -4.6175681671811555
current timestep: 47769

episode: 2704
episode length: 27
episode reward: -6.191800920358948
current timestep: 47796

episode: 2705
episode length: 25
episode reward: -1.9350974575044395
current timestep: 47821

episode: 2706
episode length: 24
episode reward: -5.505759657125475
current timestep: 47845

episode: 2707
episode length: 25
episode reward: -0.9711086206260133
current timestep: 47870

episode: 2708
episode length: 26
episode reward: -6.009392880158588
current timestep: 47896

episode: 2709
episode length: 23
e

episode: 2787
episode length: 33
episode reward: -4.918701890966361
current timestep: 50054

episode: 2788
episode length: 29
episode reward: -4.558251280279514
current timestep: 50083

episode: 2789
episode length: 34
episode reward: -4.44907680009807
current timestep: 50117

episode: 2790
episode length: 35
episode reward: -3.48805486437609
current timestep: 50152

episode: 2791
episode length: 34
episode reward: -3.8984729989282574
current timestep: 50186

episode: 2792
episode length: 35
episode reward: -4.197751393572208
current timestep: 50221

episode: 2793
episode length: 35
episode reward: -2.8425112580522574
current timestep: 50256

episode: 2794
episode length: 34
episode reward: -4.467474212762003
current timestep: 50290

episode: 2795
episode length: 35
episode reward: -4.084717991363735
current timestep: 50325

episode: 2796
episode length: 34
episode reward: -3.7531456115824318
current timestep: 50359

episode: 2797
episode length: 35
episode reward: -6.261188219102531
c

episode: 2875
episode length: 29
episode reward: -5.309076744823786
current timestep: 52837

episode: 2876
episode length: 30
episode reward: -4.936816030076516
current timestep: 52867

episode: 2877
episode length: 34
episode reward: -3.4455119903417777
current timestep: 52901

episode: 2878
episode length: 28
episode reward: -6.738547941257331
current timestep: 52929

episode: 2879
episode length: 30
episode reward: -5.752210674512663
current timestep: 52959

episode: 2880
episode length: 36
episode reward: -2.2221024130694387
current timestep: 52995

episode: 2881
episode length: 37
episode reward: -2.0662860635893843
current timestep: 53032

episode: 2882
episode length: 32
episode reward: -4.345529862330339
current timestep: 53064

episode: 2883
episode length: 30
episode reward: -4.692071551149336
current timestep: 53094

episode: 2884
episode length: 30
episode reward: -5.0099463300305045
current timestep: 53124

episode: 2885
episode length: 29
episode reward: -4.54485171230087

episode: 2963
episode length: 36
episode reward: -7.34301827670196
current timestep: 55679

episode: 2964
episode length: 36
episode reward: -3.5051554792624318
current timestep: 55715

episode: 2965
episode length: 39
episode reward: -16.219253194863093
current timestep: 55754

episode: 2966
episode length: 35
episode reward: -6.5081873339506515
current timestep: 55789

episode: 2967
episode length: 50
episode reward: -15.939201681069882
current timestep: 55839

episode: 2968
episode length: 40
episode reward: -16.22895960914573
current timestep: 55879

episode: 2969
episode length: 34
episode reward: -3.3488485418813183
current timestep: 55913

episode: 2970
episode length: 42
episode reward: -19.418413682983307
current timestep: 55955

episode: 2971
episode length: 33
episode reward: -4.289659073274965
current timestep: 55988

episode: 2972
episode length: 44
episode reward: -16.73623080280545
current timestep: 56032

episode: 2973
episode length: 39
episode reward: -14.297486228080

episode: 3052
episode length: 25
episode reward: -0.628276113474873
current timestep: 59266

episode: 3053
episode length: 13
episode reward: -7.526547372120226
current timestep: 59279

episode: 3054
episode length: 24
episode reward: -0.914375316553315
current timestep: 59303

episode: 3055
episode length: 23
episode reward: -1.5925700460135939
current timestep: 59326

episode: 3056
episode length: 13
episode reward: -7.811694006130498
current timestep: 59339

episode: 3057
episode length: 13
episode reward: -7.239671790017655
current timestep: 59352

episode: 3058
episode length: 12
episode reward: -7.576575222195639
current timestep: 59364

episode: 3059
episode length: 13
episode reward: -7.581563828605649
current timestep: 59377

episode: 3060
episode length: 13
episode reward: -7.453050858356361
current timestep: 59390

episode: 3061
episode length: 14
episode reward: -7.882966422183602
current timestep: 59404

episode: 3062
episode length: 13
episode reward: -7.8891382833488395


episode: 3140
episode length: 23
episode reward: -3.584224051897233
current timestep: 61379

episode: 3141
episode length: 24
episode reward: -1.620694522756474
current timestep: 61403

episode: 3142
episode length: 20
episode reward: -3.135361827701563
current timestep: 61423

episode: 3143
episode length: 20
episode reward: -4.081704545302327
current timestep: 61443

episode: 3144
episode length: 21
episode reward: -3.311309472166135
current timestep: 61464

episode: 3145
episode length: 20
episode reward: -1.7587927211146943
current timestep: 61484

episode: 3146
episode length: 20
episode reward: 0.3929496793262339
current timestep: 61504

episode: 3147
episode length: 19
episode reward: -2.8057509471796105
current timestep: 61523

episode: 3148
episode length: 20
episode reward: -3.026377591238575
current timestep: 61543

episode: 3149
episode length: 20
episode reward: -2.0774359118507406
current timestep: 61563

episode: 3150
episode length: 20
episode reward: -2.052048087347167

episode: 3228
episode length: 19
episode reward: -4.105649086624975
current timestep: 63135

episode: 3229
episode length: 20
episode reward: -2.7334959186739924
current timestep: 63155

episode: 3230
episode length: 19
episode reward: -2.5311784618553888
current timestep: 63174

episode: 3231
episode length: 20
episode reward: -2.9570265438107906
current timestep: 63194

episode: 3232
episode length: 20
episode reward: -1.254908323174928
current timestep: 63214

episode: 3233
episode length: 20
episode reward: -1.7474938307670067
current timestep: 63234

episode: 3234
episode length: 20
episode reward: -2.35180335483183
current timestep: 63254

episode: 3235
episode length: 19
episode reward: -1.7073762568144724
current timestep: 63273

episode: 3236
episode length: 20
episode reward: -1.2442372332297835
current timestep: 63293

episode: 3237
episode length: 20
episode reward: -1.2674556178514298
current timestep: 63313

episode: 3238
episode length: 21
episode reward: -2.386299463020

episode: 3316
episode length: 21
episode reward: -5.1071351811343595
current timestep: 65040

episode: 3317
episode length: 21
episode reward: -2.7810844140344138
current timestep: 65061

episode: 3318
episode length: 22
episode reward: -0.8544315994842434
current timestep: 65083

episode: 3319
episode length: 22
episode reward: -0.8209280796800692
current timestep: 65105

episode: 3320
episode length: 20
episode reward: -3.5559365823109417
current timestep: 65125

episode: 3321
episode length: 22
episode reward: -0.8506481440463396
current timestep: 65147

episode: 3322
episode length: 21
episode reward: -4.036562257609955
current timestep: 65168

episode: 3323
episode length: 21
episode reward: -2.5375799685757214
current timestep: 65189

episode: 3324
episode length: 22
episode reward: -1.6280358162356319
current timestep: 65211

episode: 3325
episode length: 22
episode reward: -2.0119158824609844
current timestep: 65233

episode: 3326
episode length: 22
episode reward: -3.560012847

episode: 3404
episode length: 19
episode reward: -7.1367130868806825
current timestep: 66918

episode: 3405
episode length: 20
episode reward: -6.914896008648139
current timestep: 66938

episode: 3406
episode length: 19
episode reward: -7.23882247719354
current timestep: 66957

episode: 3407
episode length: 20
episode reward: -6.522579501497755
current timestep: 66977

episode: 3408
episode length: 20
episode reward: -4.008491485673977
current timestep: 66997

episode: 3409
episode length: 18
episode reward: -8.549817480738717
current timestep: 67015

episode: 3410
episode length: 20
episode reward: -6.612612115496459
current timestep: 67035

episode: 3411
episode length: 21
episode reward: -5.268634959203539
current timestep: 67056

episode: 3412
episode length: 19
episode reward: -7.5243795284962465
current timestep: 67075

episode: 3413
episode length: 19
episode reward: -7.896640054807153
current timestep: 67094

episode: 3414
episode length: 20
episode reward: -5.824595630268241
c

episode: 3492
episode length: 33
episode reward: 2.168144969424059
current timestep: 68722

episode: 3493
episode length: 35
episode reward: 1.4053015685347265
current timestep: 68757

episode: 3494
episode length: 34
episode reward: 2.697243781373055
current timestep: 68791

episode: 3495
episode length: 30
episode reward: 1.1035996797968886
current timestep: 68821

episode: 3496
episode length: 35
episode reward: 1.8460796624581102
current timestep: 68856

episode: 3497
episode length: 29
episode reward: 0.5890994935731706
current timestep: 68885

episode: 3498
episode length: 30
episode reward: 0.03755815042075206
current timestep: 68915

episode: 3499
episode length: 32
episode reward: 3.0063430216686666
current timestep: 68947

episode: 3500
episode length: 30
episode reward: 1.6871134645099155
current timestep: 68977

avg. reward: -5.500271167932575

episode: 3501
episode length: 29
episode reward: 1.9328355149938803
current timestep: 69006

episode: 3502
episode length: 30
episo

episode: 3581
episode length: 28
episode reward: -2.8900406099288296
current timestep: 71126

episode: 3582
episode length: 33
episode reward: -1.6183629097949968
current timestep: 71159

episode: 3583
episode length: 31
episode reward: -2.302978979966754
current timestep: 71190

episode: 3584
episode length: 28
episode reward: -2.3451715905662582
current timestep: 71218

episode: 3585
episode length: 30
episode reward: -0.3364217578935993
current timestep: 71248

episode: 3586
episode length: 31
episode reward: -1.5403208505379422
current timestep: 71279

episode: 3587
episode length: 31
episode reward: -1.9811528262964813
current timestep: 71310

episode: 3588
episode length: 31
episode reward: -1.311389326835686
current timestep: 71341

episode: 3589
episode length: 34
episode reward: -0.6283657534119268
current timestep: 71375

episode: 3590
episode length: 31
episode reward: -3.1261181965460003
current timestep: 71406

episode: 3591
episode length: 32
episode reward: -0.8886069155

episode: 3670
episode length: 30
episode reward: -3.5429367380751864
current timestep: 74213

episode: 3671
episode length: 45
episode reward: -13.310538349559849
current timestep: 74258

episode: 3672
episode length: 31
episode reward: -3.8053642490251374
current timestep: 74289

episode: 3673
episode length: 38
episode reward: -0.5224766647786503
current timestep: 74327

episode: 3674
episode length: 28
episode reward: -2.0873987365398246
current timestep: 74355

episode: 3675
episode length: 31
episode reward: -3.7079862741901817
current timestep: 74386

episode: 3676
episode length: 27
episode reward: -2.579573194367777
current timestep: 74413

episode: 3677
episode length: 30
episode reward: -3.1065749831736844
current timestep: 74443

episode: 3678
episode length: 31
episode reward: -1.7025455521800137
current timestep: 74474

episode: 3679
episode length: 31
episode reward: -3.393814950644295
current timestep: 74505

episode: 3680
episode length: 30
episode reward: -1.7048997098

episode: 3759
episode length: 25
episode reward: 0.22325945764445027
current timestep: 77318

episode: 3760
episode length: 26
episode reward: -0.45325324820543234
current timestep: 77344

episode: 3761
episode length: 26
episode reward: -1.7967726266593815
current timestep: 77370

episode: 3762
episode length: 29
episode reward: 1.9413818788013102
current timestep: 77399

episode: 3763
episode length: 28
episode reward: -0.1944527768724757
current timestep: 77427

episode: 3764
episode length: 25
episode reward: -2.276346309847128
current timestep: 77452

episode: 3765
episode length: 31
episode reward: 3.948086342678393
current timestep: 77483

episode: 3766
episode length: 26
episode reward: 0.9876650429303402
current timestep: 77509

episode: 3767
episode length: 27
episode reward: 0.2736770491940204
current timestep: 77536

episode: 3768
episode length: 26
episode reward: 1.4841768320775617
current timestep: 77562

episode: 3769
episode length: 24
episode reward: -0.58156877756109

episode: 3847
episode length: 32
episode reward: 4.992440450244294
current timestep: 80411

episode: 3848
episode length: 32
episode reward: 5.295338019197019
current timestep: 80443

episode: 3849
episode length: 32
episode reward: 2.9999948142884585
current timestep: 80475

episode: 3850
episode length: 33
episode reward: 5.391626256504207
current timestep: 80508

episode: 3851
episode length: 32
episode reward: 3.9882104048820226
current timestep: 80540

episode: 3852
episode length: 49
episode reward: 27.102254399869455
current timestep: 80589

episode: 3853
episode length: 82
episode reward: 85.7760475852036
current timestep: 80671

episode: 3854
episode length: 92
episode reward: 70.33356251170872
current timestep: 80763

episode: 3855
episode length: 33
episode reward: 3.4048554459784968
current timestep: 80796

episode: 3856
episode length: 32
episode reward: 5.406549625024738
current timestep: 80828

episode: 3857
episode length: 36
episode reward: 5.305316844609804
current ti

episode: 3939
episode length: 9
episode reward: -8.916334739529319
current timestep: 85049

episode: 3940
episode length: 9
episode reward: -8.362153927310738
current timestep: 85058

episode: 3941
episode length: 9
episode reward: -8.840144072817639
current timestep: 85067

episode: 3942
episode length: 9
episode reward: -8.350935805355839
current timestep: 85076

episode: 3943
episode length: 9
episode reward: -8.475732309291987
current timestep: 85085

episode: 3944
episode length: 9
episode reward: -8.574122762844052
current timestep: 85094

episode: 3945
episode length: 9
episode reward: -8.292335442681473
current timestep: 85103

episode: 3946
episode length: 9
episode reward: -8.88623580136032
current timestep: 85112

episode: 3947
episode length: 9
episode reward: -8.892470933828333
current timestep: 85121

episode: 3948
episode length: 9
episode reward: -8.296126210426355
current timestep: 85130

episode: 3949
episode length: 9
episode reward: -8.292442661306247
current timest

episode: 4031
episode length: 10
episode reward: -9.063259445551976
current timestep: 85893

episode: 4032
episode length: 9
episode reward: -8.174307805500975
current timestep: 85902

episode: 4033
episode length: 9
episode reward: -8.194604350634581
current timestep: 85911

episode: 4034
episode length: 9
episode reward: -8.49824360553875
current timestep: 85920

episode: 4035
episode length: 9
episode reward: -8.525866914423597
current timestep: 85929

episode: 4036
episode length: 10
episode reward: -8.992413857741603
current timestep: 85939

episode: 4037
episode length: 9
episode reward: -8.283943028421035
current timestep: 85948

episode: 4038
episode length: 10
episode reward: -9.000523781439426
current timestep: 85958

episode: 4039
episode length: 10
episode reward: -8.668697714955254
current timestep: 85968

episode: 4040
episode length: 9
episode reward: -8.325866028936737
current timestep: 85977

episode: 4041
episode length: 10
episode reward: -9.080168268672137
current t

episode: 4123
episode length: 9
episode reward: -8.383277449089052
current timestep: 86750

episode: 4124
episode length: 9
episode reward: -8.347449253076226
current timestep: 86759

episode: 4125
episode length: 10
episode reward: -8.744927360079945
current timestep: 86769

episode: 4126
episode length: 9
episode reward: -7.9838431932394585
current timestep: 86778

episode: 4127
episode length: 9
episode reward: -8.38752406408784
current timestep: 86787

episode: 4128
episode length: 10
episode reward: -9.127273342784845
current timestep: 86797

episode: 4129
episode length: 9
episode reward: -8.237632884605935
current timestep: 86806

episode: 4130
episode length: 9
episode reward: -8.192030232809227
current timestep: 86815

episode: 4131
episode length: 10
episode reward: -9.289741832520674
current timestep: 86825

episode: 4132
episode length: 10
episode reward: -8.664652416366318
current timestep: 86835

episode: 4133
episode length: 9
episode reward: -8.119521943244958
current t

episode: 4212
episode length: 35
episode reward: 30.70795776933363
current timestep: 88810

episode: 4213
episode length: 46
episode reward: 34.73996212312029
current timestep: 88856

episode: 4214
episode length: 81
episode reward: 88.65011147587916
current timestep: 88937

episode: 4215
episode length: 46
episode reward: 30.23656550099848
current timestep: 88983

episode: 4216
episode length: 45
episode reward: 49.85438823692045
current timestep: 89028

episode: 4217
episode length: 43
episode reward: 32.25667499022777
current timestep: 89071

episode: 4218
episode length: 47
episode reward: 30.7795472109373
current timestep: 89118

episode: 4219
episode length: 68
episode reward: 56.128429622436336
current timestep: 89186

episode: 4220
episode length: 45
episode reward: 32.6549590129536
current timestep: 89231

episode: 4221
episode length: 45
episode reward: 28.953984663075975
current timestep: 89276

episode: 4222
episode length: 47
episode reward: 23.20355373439172
current times

episode: 4301
episode length: 46
episode reward: 37.114989518969026
current timestep: 93071

episode: 4302
episode length: 65
episode reward: 22.541361412065175
current timestep: 93136

episode: 4303
episode length: 47
episode reward: 3.075066558729741
current timestep: 93183

episode: 4304
episode length: 45
episode reward: -8.354492366539207
current timestep: 93228

episode: 4305
episode length: 36
episode reward: 9.140621106399134
current timestep: 93264

episode: 4306
episode length: 33
episode reward: 2.6366433895875283
current timestep: 93297

episode: 4307
episode length: 32
episode reward: 3.7424155321973678
current timestep: 93329

episode: 4308
episode length: 32
episode reward: 6.137663252036246
current timestep: 93361

episode: 4309
episode length: 47
episode reward: 26.137537555591145
current timestep: 93408

episode: 4310
episode length: 78
episode reward: 36.00798720735649
current timestep: 93486

episode: 4311
episode length: 49
episode reward: 8.789436836745931
current

episode: 4390
episode length: 69
episode reward: 66.70388505963435
current timestep: 98678

episode: 4391
episode length: 64
episode reward: -29.531411369143257
current timestep: 98742

episode: 4392
episode length: 65
episode reward: -33.51020830041046
current timestep: 98807

episode: 4393
episode length: 65
episode reward: -27.820221118485716
current timestep: 98872

episode: 4394
episode length: 70
episode reward: -28.867004948760915
current timestep: 98942

episode: 4395
episode length: 68
episode reward: -29.829873808634886
current timestep: 99010

episode: 4396
episode length: 71
episode reward: -35.26570555552587
current timestep: 99081

episode: 4397
episode length: 56
episode reward: -13.929500519004058
current timestep: 99137

episode: 4398
episode length: 59
episode reward: 46.56856096302576
current timestep: 99196

episode: 4399
episode length: 67
episode reward: -34.87726198605369
current timestep: 99263

episode: 4400
episode length: 64
episode reward: -27.68925324775362

episode: 4477
episode length: 59
episode reward: -34.256196661919226
current timestep: 104102

episode: 4478
episode length: 62
episode reward: -34.77125347464553
current timestep: 104164

episode: 4479
episode length: 54
episode reward: -24.118348991848663
current timestep: 104218

episode: 4480
episode length: 67
episode reward: -31.802338229675208
current timestep: 104285

episode: 4481
episode length: 70
episode reward: -33.31514481074863
current timestep: 104355

episode: 4482
episode length: 56
episode reward: -31.399762961558093
current timestep: 104411

episode: 4483
episode length: 60
episode reward: -33.86535655971166
current timestep: 104471

episode: 4484
episode length: 59
episode reward: -30.540575658812887
current timestep: 104530

episode: 4485
episode length: 67
episode reward: -28.686270007192544
current timestep: 104597

episode: 4486
episode length: 58
episode reward: -30.229410202213018
current timestep: 104655

episode: 4487
episode length: 68
episode reward: -28.

episode: 4564
episode length: 60
episode reward: -33.12204145345453
current timestep: 109714

episode: 4565
episode length: 59
episode reward: -31.09280415458418
current timestep: 109773

episode: 4566
episode length: 57
episode reward: -27.625373287533954
current timestep: 109830

episode: 4567
episode length: 58
episode reward: -25.126948587168823
current timestep: 109888

episode: 4568
episode length: 66
episode reward: -41.13113233461058
current timestep: 109954

episode: 4569
episode length: 63
episode reward: -36.36902245181994
current timestep: 110017

episode: 4570
episode length: 58
episode reward: -32.864547996704815
current timestep: 110075

episode: 4571
episode length: 63
episode reward: -36.94272759839317
current timestep: 110138

episode: 4572
episode length: 60
episode reward: -34.58976097515373
current timestep: 110198

episode: 4573
episode length: 53
episode reward: -22.236634075181293
current timestep: 110251

episode: 4574
episode length: 64
episode reward: -36.576

episode: 4651
episode length: 58
episode reward: -34.76048269860671
current timestep: 114567

episode: 4652
episode length: 54
episode reward: -37.182278360992086
current timestep: 114621

episode: 4653
episode length: 56
episode reward: -34.51780519403289
current timestep: 114677

episode: 4654
episode length: 56
episode reward: -39.81768809164244
current timestep: 114733

episode: 4655
episode length: 54
episode reward: -31.524753590493717
current timestep: 114787

episode: 4656
episode length: 56
episode reward: -35.974013506393675
current timestep: 114843

episode: 4657
episode length: 57
episode reward: -33.436708278588156
current timestep: 114900

episode: 4658
episode length: 59
episode reward: -33.52944187132842
current timestep: 114959

episode: 4659
episode length: 54
episode reward: -31.557638563669595
current timestep: 115013

episode: 4660
episode length: 55
episode reward: -36.13356635454603
current timestep: 115068

episode: 4661
episode length: 56
episode reward: -36.55

episode: 4738
episode length: 56
episode reward: -31.486048186764183
current timestep: 119412

episode: 4739
episode length: 55
episode reward: -34.26414457184027
current timestep: 119467

episode: 4740
episode length: 55
episode reward: -38.62102056458812
current timestep: 119522

episode: 4741
episode length: 54
episode reward: -31.40381034442757
current timestep: 119576

episode: 4742
episode length: 56
episode reward: -35.90518152040175
current timestep: 119632

episode: 4743
episode length: 56
episode reward: -33.759289859323744
current timestep: 119688

episode: 4744
episode length: 58
episode reward: -39.86266557097828
current timestep: 119746

episode: 4745
episode length: 56
episode reward: -37.6992165234134
current timestep: 119802

episode: 4746
episode length: 55
episode reward: -29.77852152021768
current timestep: 119857

episode: 4747
episode length: 54
episode reward: -35.69980719856452
current timestep: 119911

episode: 4748
episode length: 54
episode reward: -36.296361

episode: 4825
episode length: 53
episode reward: -26.38757437264686
current timestep: 124096

episode: 4826
episode length: 53
episode reward: -23.505554859242114
current timestep: 124149

episode: 4827
episode length: 54
episode reward: -26.41636827916912
current timestep: 124203

episode: 4828
episode length: 56
episode reward: -31.503566622045426
current timestep: 124259

episode: 4829
episode length: 55
episode reward: -24.245700434232806
current timestep: 124314

episode: 4830
episode length: 54
episode reward: -20.421249593654572
current timestep: 124368

episode: 4831
episode length: 53
episode reward: -20.81301549704888
current timestep: 124421

episode: 4832
episode length: 56
episode reward: -30.918769742924912
current timestep: 124477

episode: 4833
episode length: 55
episode reward: -21.96621029634854
current timestep: 124532

episode: 4834
episode length: 61
episode reward: -33.256636854302776
current timestep: 124593

episode: 4835
episode length: 74
episode reward: -28.0

episode: 4912
episode length: 69
episode reward: -30.135111455895753
current timestep: 129485

episode: 4913
episode length: 74
episode reward: -23.058317391611453
current timestep: 129559

episode: 4914
episode length: 72
episode reward: -27.326624288529146
current timestep: 129631

episode: 4915
episode length: 87
episode reward: -24.027120190637795
current timestep: 129718

episode: 4916
episode length: 69
episode reward: -33.65056295860422
current timestep: 129787

episode: 4917
episode length: 63
episode reward: -21.29497849388126
current timestep: 129850

episode: 4918
episode length: 102
episode reward: 89.47033025365444
current timestep: 129952

episode: 4919
episode length: 61
episode reward: -12.701323818527612
current timestep: 130013

episode: 4920
episode length: 112
episode reward: 80.035789764841
current timestep: 130125

episode: 4921
episode length: 62
episode reward: -23.01011166113612
current timestep: 130187

episode: 4922
episode length: 56
episode reward: -10.9238

episode: 4999
episode length: 62
episode reward: -14.789834629215001
current timestep: 136113

episode: 5000
episode length: 72
episode reward: -27.697135898795516
current timestep: 136185

avg. reward: -0.7143496919500757

episode: 5001
episode length: 75
episode reward: -31.068352558380028
current timestep: 136260

episode: 5002
episode length: 119
episode reward: 104.99311204700751
current timestep: 136379

episode: 5003
episode length: 66
episode reward: -24.210778756783302
current timestep: 136445

episode: 5004
episode length: 70
episode reward: -26.82464502809643
current timestep: 136515

episode: 5005
episode length: 61
episode reward: -12.964849203373022
current timestep: 136576

episode: 5006
episode length: 78
episode reward: -32.68226636214418
current timestep: 136654

episode: 5007
episode length: 62
episode reward: -19.32626703584162
current timestep: 136716

episode: 5008
episode length: 83
episode reward: -25.478116410151866
current timestep: 136799

episode: 5009
episo

episode: 5086
episode length: 69
episode reward: -26.256870361145847
current timestep: 142499

episode: 5087
episode length: 89
episode reward: -29.557873231747692
current timestep: 142588

episode: 5088
episode length: 87
episode reward: 61.562133132149526
current timestep: 142675

episode: 5089
episode length: 60
episode reward: -13.560692202137927
current timestep: 142735

episode: 5090
episode length: 92
episode reward: -10.796681515073562
current timestep: 142827

episode: 5091
episode length: 71
episode reward: -25.141470291479166
current timestep: 142898

episode: 5092
episode length: 70
episode reward: -32.16260366447878
current timestep: 142968

episode: 5093
episode length: 61
episode reward: -11.441202827414493
current timestep: 143029

episode: 5094
episode length: 50
episode reward: -10.907088339928073
current timestep: 143079

episode: 5095
episode length: 84
episode reward: -37.23334995646668
current timestep: 143163

episode: 5096
episode length: 58
episode reward: -15.

episode: 5173
episode length: 62
episode reward: -15.019132326499216
current timestep: 148602

episode: 5174
episode length: 76
episode reward: -21.575340279265074
current timestep: 148678

episode: 5175
episode length: 76
episode reward: -31.33263309770235
current timestep: 148754

episode: 5176
episode length: 57
episode reward: -2.1514824660503895
current timestep: 148811

episode: 5177
episode length: 68
episode reward: -52.148641253801145
current timestep: 148879

episode: 5178
episode length: 60
episode reward: -10.383008490246475
current timestep: 148939

episode: 5179
episode length: 67
episode reward: -30.663141946516483
current timestep: 149006

episode: 5180
episode length: 73
episode reward: -35.08169896155537
current timestep: 149079

episode: 5181
episode length: 73
episode reward: -35.35257802469251
current timestep: 149152

episode: 5182
episode length: 132
episode reward: 4.1220264374535045
current timestep: 149284

episode: 5183
episode length: 134
episode reward: -4.

episode: 5260
episode length: 111
episode reward: 90.82666733537165
current timestep: 155191

episode: 5261
episode length: 76
episode reward: -40.35691619189525
current timestep: 155267

episode: 5262
episode length: 74
episode reward: -30.719445121286597
current timestep: 155341

episode: 5263
episode length: 61
episode reward: -54.504031077976656
current timestep: 155402

episode: 5264
episode length: 83
episode reward: -31.79907623062274
current timestep: 155485

episode: 5265
episode length: 148
episode reward: 111.82884066527733
current timestep: 155633

episode: 5266
episode length: 60
episode reward: -6.20352169998976
current timestep: 155693

episode: 5267
episode length: 79
episode reward: -39.89740786477046
current timestep: 155772

episode: 5268
episode length: 79
episode reward: -53.40658859134322
current timestep: 155851

episode: 5269
episode length: 58
episode reward: -47.37363507798193
current timestep: 155909

episode: 5270
episode length: 99
episode reward: -45.50463

episode: 5347
episode length: 67
episode reward: -46.170984108938
current timestep: 161433

episode: 5348
episode length: 60
episode reward: -9.79209496575664
current timestep: 161493

episode: 5349
episode length: 82
episode reward: 42.752300523416864
current timestep: 161575

episode: 5350
episode length: 62
episode reward: -29.350145518127967
current timestep: 161637

episode: 5351
episode length: 51
episode reward: -9.7106644725707
current timestep: 161688

episode: 5352
episode length: 79
episode reward: -52.49820633962027
current timestep: 161767

episode: 5353
episode length: 74
episode reward: -35.79066071021148
current timestep: 161841

episode: 5354
episode length: 70
episode reward: -34.62604326669557
current timestep: 161911

episode: 5355
episode length: 70
episode reward: -41.22228233091803
current timestep: 161981

episode: 5356
episode length: 60
episode reward: -39.986958189472304
current timestep: 162041

episode: 5357
episode length: 84
episode reward: 54.58578877187

episode: 5434
episode length: 75
episode reward: -49.78954963814957
current timestep: 168149

episode: 5435
episode length: 65
episode reward: -50.06385854092353
current timestep: 168214

episode: 5436
episode length: 72
episode reward: -45.44704032642063
current timestep: 168286

episode: 5437
episode length: 97
episode reward: -47.09110262845118
current timestep: 168383

episode: 5438
episode length: 75
episode reward: -49.802987407804544
current timestep: 168458

episode: 5439
episode length: 81
episode reward: -45.85908726386282
current timestep: 168539

episode: 5440
episode length: 97
episode reward: -40.62503997686948
current timestep: 168636

episode: 5441
episode length: 93
episode reward: -46.91565419207693
current timestep: 168729

episode: 5442
episode length: 129
episode reward: -18.33048170530386
current timestep: 168858

episode: 5443
episode length: 88
episode reward: -51.68442357194037
current timestep: 168946

episode: 5444
episode length: 69
episode reward: -38.06845

episode: 5521
episode length: 67
episode reward: -42.75588193255477
current timestep: 174640

episode: 5522
episode length: 75
episode reward: -21.91189224891017
current timestep: 174715

episode: 5523
episode length: 71
episode reward: -35.16943354893775
current timestep: 174786

episode: 5524
episode length: 65
episode reward: 27.02426447200822
current timestep: 174851

episode: 5525
episode length: 71
episode reward: -29.352422351762666
current timestep: 174922

episode: 5526
episode length: 80
episode reward: -25.398687135356663
current timestep: 175002

episode: 5527
episode length: 70
episode reward: -37.229197126620775
current timestep: 175072

episode: 5528
episode length: 86
episode reward: -34.94495501570242
current timestep: 175158

episode: 5529
episode length: 63
episode reward: -20.983771746387255
current timestep: 175221

episode: 5530
episode length: 57
episode reward: -8.47946742960083
current timestep: 175278

episode: 5531
episode length: 66
episode reward: -17.64584

episode: 5608
episode length: 74
episode reward: -26.678004095163644
current timestep: 180985

episode: 5609
episode length: 64
episode reward: -18.61342440409971
current timestep: 181049

episode: 5610
episode length: 64
episode reward: -18.70330557999784
current timestep: 181113

episode: 5611
episode length: 67
episode reward: -27.877258115021668
current timestep: 181180

episode: 5612
episode length: 64
episode reward: -17.252934372705713
current timestep: 181244

episode: 5613
episode length: 85
episode reward: -27.5652851929308
current timestep: 181329

episode: 5614
episode length: 87
episode reward: -21.571700744912103
current timestep: 181416

episode: 5615
episode length: 54
episode reward: 27.05596334817444
current timestep: 181470

episode: 5616
episode length: 78
episode reward: -37.10967019534984
current timestep: 181548

episode: 5617
episode length: 74
episode reward: 58.02528029004801
current timestep: 181622

episode: 5618
episode length: 80
episode reward: -22.156707

episode: 5695
episode length: 56
episode reward: 22.6310706756779
current timestep: 187428

episode: 5696
episode length: 59
episode reward: 31.12113202099368
current timestep: 187487

episode: 5697
episode length: 58
episode reward: 1.2702458232986573
current timestep: 187545

episode: 5698
episode length: 95
episode reward: -28.056298351861567
current timestep: 187640

episode: 5699
episode length: 69
episode reward: -23.59353183006862
current timestep: 187709

episode: 5700
episode length: 89
episode reward: -29.041053199587523
current timestep: 187798

avg. reward: 1.8128476450428923

episode: 5701
episode length: 81
episode reward: -16.235798116618408
current timestep: 187879

episode: 5702
episode length: 47
episode reward: -8.947586477847402
current timestep: 187926

episode: 5703
episode length: 77
episode reward: -23.841612348989777
current timestep: 188003

episode: 5704
episode length: 108
episode reward: 113.93502995252689
current timestep: 188111

episode: 5705
episode len

episode: 5782
episode length: 104
episode reward: -10.777652872025893
current timestep: 193055

episode: 5783
episode length: 58
episode reward: -14.737768288636264
current timestep: 193113

episode: 5784
episode length: 58
episode reward: -8.236287947369492
current timestep: 193171

episode: 5785
episode length: 65
episode reward: -25.394903224342617
current timestep: 193236

episode: 5786
episode length: 49
episode reward: -11.635039224218062
current timestep: 193285

episode: 5787
episode length: 77
episode reward: -20.658029314820357
current timestep: 193362

episode: 5788
episode length: 57
episode reward: -8.017792774363652
current timestep: 193419

episode: 5789
episode length: 58
episode reward: -7.653820213449426
current timestep: 193477

episode: 5790
episode length: 57
episode reward: -7.9581229812849585
current timestep: 193534

episode: 5791
episode length: 157
episode reward: 167.26568546492982
current timestep: 193691

episode: 5792
episode length: 57
episode reward: -8.

episode: 5869
episode length: 65
episode reward: -23.359372143713788
current timestep: 198738

episode: 5870
episode length: 123
episode reward: -1.8663102906318572
current timestep: 198861

episode: 5871
episode length: 72
episode reward: -21.858247812018945
current timestep: 198933

episode: 5872
episode length: 63
episode reward: -12.689631746736334
current timestep: 198996

episode: 5873
episode length: 100
episode reward: -9.959801896546352
current timestep: 199096

episode: 5874
episode length: 55
episode reward: 2.694011421853694
current timestep: 199151

episode: 5875
episode length: 53
episode reward: -1.545968715303761
current timestep: 199204

episode: 5876
episode length: 47
episode reward: -7.4754354803285255
current timestep: 199251

episode: 5877
episode length: 60
episode reward: -8.156736630304888
current timestep: 199311

episode: 5878
episode length: 55
episode reward: -2.3231885481339676
current timestep: 199366

episode: 5879
episode length: 51
episode reward: -4.1

episode: 5956
episode length: 56
episode reward: 18.38942552430773
current timestep: 205049

episode: 5957
episode length: 89
episode reward: -15.860295596526345
current timestep: 205138

episode: 5958
episode length: 48
episode reward: -7.126036094406015
current timestep: 205186

episode: 5959
episode length: 125
episode reward: 11.181231182648114
current timestep: 205311

episode: 5960
episode length: 58
episode reward: -2.8827105861569597
current timestep: 205369

episode: 5961
episode length: 91
episode reward: -29.268215663540968
current timestep: 205460

episode: 5962
episode length: 61
episode reward: -23.339192900268685
current timestep: 205521

episode: 5963
episode length: 82
episode reward: -19.419169231447462
current timestep: 205603

episode: 5964
episode length: 62
episode reward: -22.25948746724586
current timestep: 205665

episode: 5965
episode length: 49
episode reward: -1.9770064132466896
current timestep: 205714

episode: 5966
episode length: 80
episode reward: -40.2

episode: 6043
episode length: 58
episode reward: -18.506125486145393
current timestep: 211437

episode: 6044
episode length: 57
episode reward: -14.116130566946453
current timestep: 211494

episode: 6045
episode length: 56
episode reward: -16.845339879472146
current timestep: 211550

episode: 6046
episode length: 94
episode reward: -21.02875006226449
current timestep: 211644

episode: 6047
episode length: 63
episode reward: -31.570846276439887
current timestep: 211707

episode: 6048
episode length: 73
episode reward: 30.576733025622435
current timestep: 211780

episode: 6049
episode length: 59
episode reward: -21.87814079840512
current timestep: 211839

episode: 6050
episode length: 64
episode reward: 41.59372417297354
current timestep: 211903

episode: 6051
episode length: 57
episode reward: -15.641536488068192
current timestep: 211960

episode: 6052
episode length: 80
episode reward: -41.45559968956562
current timestep: 212040

episode: 6053
episode length: 56
episode reward: -16.797

episode: 6130
episode length: 73
episode reward: -31.2282433202073
current timestep: 217552

episode: 6131
episode length: 119
episode reward: 115.61902041508135
current timestep: 217671

episode: 6132
episode length: 63
episode reward: -28.501917099095476
current timestep: 217734

episode: 6133
episode length: 83
episode reward: -26.13704969901547
current timestep: 217817

episode: 6134
episode length: 67
episode reward: -27.383072372135196
current timestep: 217884

episode: 6135
episode length: 96
episode reward: -20.37511080681987
current timestep: 217980

episode: 6136
episode length: 59
episode reward: -19.265060262578164
current timestep: 218039

episode: 6137
episode length: 136
episode reward: 10.733741085497293
current timestep: 218175

episode: 6138
episode length: 54
episode reward: -16.51123289783405
current timestep: 218229

episode: 6139
episode length: 58
episode reward: -15.927971771084838
current timestep: 218287

episode: 6140
episode length: 59
episode reward: -22.17

episode: 6217
episode length: 71
episode reward: -28.575743918824823
current timestep: 223801

episode: 6218
episode length: 69
episode reward: -51.976820691289994
current timestep: 223870

episode: 6219
episode length: 61
episode reward: -25.759341192595897
current timestep: 223931

episode: 6220
episode length: 59
episode reward: -45.38848130008505
current timestep: 223990

episode: 6221
episode length: 59
episode reward: -17.183431257925605
current timestep: 224049

episode: 6222
episode length: 63
episode reward: -32.77056158369621
current timestep: 224112

episode: 6223
episode length: 61
episode reward: -23.530543522884663
current timestep: 224173

episode: 6224
episode length: 65
episode reward: -29.070284052351436
current timestep: 224238

episode: 6225
episode length: 57
episode reward: -14.176210244390921
current timestep: 224295

episode: 6226
episode length: 86
episode reward: -26.563076697519392
current timestep: 224381

episode: 6227
episode length: 67
episode reward: -46

episode: 6304
episode length: 47
episode reward: -27.657756728048682
current timestep: 229965

episode: 6305
episode length: 68
episode reward: -48.15749169535838
current timestep: 230033

episode: 6306
episode length: 62
episode reward: -42.9445942100389
current timestep: 230095

episode: 6307
episode length: 57
episode reward: -48.18208910668116
current timestep: 230152

episode: 6308
episode length: 75
episode reward: -33.831620751885794
current timestep: 230227

episode: 6309
episode length: 59
episode reward: -45.048170309989914
current timestep: 230286

episode: 6310
episode length: 68
episode reward: -46.38929590529268
current timestep: 230354

episode: 6311
episode length: 59
episode reward: -37.57900557898536
current timestep: 230413

episode: 6312
episode length: 60
episode reward: -34.94731781056549
current timestep: 230473

episode: 6313
episode length: 62
episode reward: -35.60819792609556
current timestep: 230535

episode: 6314
episode length: 79
episode reward: -13.20474

episode: 6391
episode length: 55
episode reward: -42.6066580016351
current timestep: 234952

episode: 6392
episode length: 55
episode reward: -40.7691716621058
current timestep: 235007

episode: 6393
episode length: 52
episode reward: -30.45098411463427
current timestep: 235059

episode: 6394
episode length: 56
episode reward: -43.322318432834855
current timestep: 235115

episode: 6395
episode length: 55
episode reward: -40.677579693845175
current timestep: 235170

episode: 6396
episode length: 52
episode reward: -34.57901405248037
current timestep: 235222

episode: 6397
episode length: 56
episode reward: -43.28085468176189
current timestep: 235278

episode: 6398
episode length: 54
episode reward: -34.21729130686774
current timestep: 235332

episode: 6399
episode length: 52
episode reward: -31.256590905954248
current timestep: 235384

episode: 6400
episode length: 54
episode reward: -37.19523098144684
current timestep: 235438

avg. reward: -39.05061196436941

episode: 6401
episode leng

episode: 6478
episode length: 53
episode reward: -34.277236065283006
current timestep: 239590

episode: 6479
episode length: 54
episode reward: -36.21240430224827
current timestep: 239644

episode: 6480
episode length: 53
episode reward: -34.699388511821894
current timestep: 239697

episode: 6481
episode length: 54
episode reward: -34.7291049567807
current timestep: 239751

episode: 6482
episode length: 51
episode reward: -28.72462422939705
current timestep: 239802

episode: 6483
episode length: 56
episode reward: -44.77569700623627
current timestep: 239858

episode: 6484
episode length: 54
episode reward: -34.7596402141691
current timestep: 239912

episode: 6485
episode length: 52
episode reward: -30.976289524570053
current timestep: 239964

episode: 6486
episode length: 55
episode reward: -40.53527884778698
current timestep: 240019

episode: 6487
episode length: 55
episode reward: -31.799580136600653
current timestep: 240074

episode: 6488
episode length: 52
episode reward: -29.06276

episode: 6565
episode length: 54
episode reward: -34.203593022047194
current timestep: 244243

episode: 6566
episode length: 57
episode reward: -47.108645956314064
current timestep: 244300

episode: 6567
episode length: 52
episode reward: -29.587599009377303
current timestep: 244352

episode: 6568
episode length: 56
episode reward: -43.281187122039434
current timestep: 244408

episode: 6569
episode length: 55
episode reward: -41.21578277231749
current timestep: 244463

episode: 6570
episode length: 50
episode reward: -26.607467495331
current timestep: 244513

episode: 6571
episode length: 53
episode reward: -35.46074029393769
current timestep: 244566

episode: 6572
episode length: 55
episode reward: -40.15159337587183
current timestep: 244621

episode: 6573
episode length: 52
episode reward: -32.27110479020303
current timestep: 244673

episode: 6574
episode length: 55
episode reward: -48.82331059090282
current timestep: 244728

episode: 6575
episode length: 54
episode reward: -37.77177

episode: 6652
episode length: 55
episode reward: -39.89573998770877
current timestep: 249090

episode: 6653
episode length: 55
episode reward: -41.01988348790912
current timestep: 249145

episode: 6654
episode length: 51
episode reward: -29.921832123568787
current timestep: 249196

episode: 6655
episode length: 57
episode reward: -49.56442659192635
current timestep: 249253

episode: 6656
episode length: 52
episode reward: -31.147286626540108
current timestep: 249305

episode: 6657
episode length: 54
episode reward: -38.660689675753936
current timestep: 249359

episode: 6658
episode length: 56
episode reward: -42.286447293694195
current timestep: 249415

episode: 6659
episode length: 53
episode reward: -33.18491771446925
current timestep: 249468

episode: 6660
episode length: 55
episode reward: -37.61697816234011
current timestep: 249523

episode: 6661
episode length: 55
episode reward: -38.157298284475765
current timestep: 249578

episode: 6662
episode length: 55
episode reward: -41.26

episode: 6739
episode length: 53
episode reward: -32.84911340891146
current timestep: 253765

episode: 6740
episode length: 50
episode reward: -28.05906862906259
current timestep: 253815

episode: 6741
episode length: 53
episode reward: -34.13911544607688
current timestep: 253868

episode: 6742
episode length: 54
episode reward: -38.59380246577719
current timestep: 253922

episode: 6743
episode length: 52
episode reward: -32.0172513177471
current timestep: 253974

episode: 6744
episode length: 53
episode reward: -36.04034601871051
current timestep: 254027

episode: 6745
episode length: 54
episode reward: -39.268111670735735
current timestep: 254081

episode: 6746
episode length: 54
episode reward: -34.3505872642377
current timestep: 254135

episode: 6747
episode length: 56
episode reward: -39.70193746561116
current timestep: 254191

episode: 6748
episode length: 55
episode reward: -42.37088936183066
current timestep: 254246

episode: 6749
episode length: 56
episode reward: -42.01637367

episode: 6826
episode length: 54
episode reward: -36.5726821799365
current timestep: 258443

episode: 6827
episode length: 53
episode reward: -34.215414069922666
current timestep: 258496

episode: 6828
episode length: 52
episode reward: -33.909577706985715
current timestep: 258548

episode: 6829
episode length: 54
episode reward: -35.28367224039839
current timestep: 258602

episode: 6830
episode length: 54
episode reward: -36.56645363250016
current timestep: 258656

episode: 6831
episode length: 52
episode reward: -29.497714934430327
current timestep: 258708

episode: 6832
episode length: 55
episode reward: -42.422520960955076
current timestep: 258763

episode: 6833
episode length: 54
episode reward: -40.78471130094923
current timestep: 258817

episode: 6834
episode length: 55
episode reward: -43.580316199655904
current timestep: 258872

episode: 6835
episode length: 54
episode reward: -37.995827986445924
current timestep: 258926

episode: 6836
episode length: 55
episode reward: -39.51

episode: 6913
episode length: 53
episode reward: -37.05429528681058
current timestep: 263151

episode: 6914
episode length: 55
episode reward: -42.047959168789696
current timestep: 263206

episode: 6915
episode length: 55
episode reward: -40.1797543905138
current timestep: 263261

episode: 6916
episode length: 52
episode reward: -33.01938867131611
current timestep: 263313

episode: 6917
episode length: 49
episode reward: -28.07855820446564
current timestep: 263362

episode: 6918
episode length: 55
episode reward: -41.09025918316647
current timestep: 263417

episode: 6919
episode length: 55
episode reward: -41.77727589187219
current timestep: 263472

episode: 6920
episode length: 55
episode reward: -42.350407607660486
current timestep: 263527

episode: 6921
episode length: 54
episode reward: -39.44650718023077
current timestep: 263581

episode: 6922
episode length: 55
episode reward: -43.339052032444116
current timestep: 263636

episode: 6923
episode length: 52
episode reward: -32.22984

episode: 7000
episode length: 56
episode reward: -41.15819776190678
current timestep: 267876

avg. reward: -38.922358514786545

episode: 7001
episode length: 54
episode reward: -39.293472052264335
current timestep: 267930

episode: 7002
episode length: 54
episode reward: -39.52955107963234
current timestep: 267984

episode: 7003
episode length: 53
episode reward: -32.79719129026629
current timestep: 268037

episode: 7004
episode length: 52
episode reward: -35.86981130908883
current timestep: 268089

episode: 7005
episode length: 54
episode reward: -35.96971487121295
current timestep: 268143

episode: 7006
episode length: 54
episode reward: -39.544829857680206
current timestep: 268197

episode: 7007
episode length: 56
episode reward: -43.09882878650028
current timestep: 268253

episode: 7008
episode length: 55
episode reward: -42.175538812114844
current timestep: 268308

episode: 7009
episode length: 55
episode reward: -41.86004215697204
current timestep: 268363

episode: 7010
episode l

episode: 7087
episode length: 55
episode reward: -40.413254593937914
current timestep: 272588

episode: 7088
episode length: 48
episode reward: -27.488229571384693
current timestep: 272636

episode: 7089
episode length: 52
episode reward: -30.0558576661574
current timestep: 272688

episode: 7090
episode length: 55
episode reward: -37.29406712477808
current timestep: 272743

episode: 7091
episode length: 56
episode reward: -40.89488840315949
current timestep: 272799

episode: 7092
episode length: 52
episode reward: -30.72069342280145
current timestep: 272851

episode: 7093
episode length: 54
episode reward: -34.80327991318597
current timestep: 272905

episode: 7094
episode length: 55
episode reward: -40.017785836114946
current timestep: 272960

episode: 7095
episode length: 53
episode reward: -31.176393851623317
current timestep: 273013

episode: 7096
episode length: 52
episode reward: -29.272135186600345
current timestep: 273065

episode: 7097
episode length: 52
episode reward: -31.719

episode: 7174
episode length: 55
episode reward: -39.60615812487003
current timestep: 277184

episode: 7175
episode length: 54
episode reward: -36.660813534528685
current timestep: 277238

episode: 7176
episode length: 53
episode reward: -35.11980650216929
current timestep: 277291

episode: 7177
episode length: 53
episode reward: -31.699757428370624
current timestep: 277344

episode: 7178
episode length: 52
episode reward: -29.420696737868624
current timestep: 277396

episode: 7179
episode length: 54
episode reward: -34.9325135918548
current timestep: 277450

episode: 7180
episode length: 54
episode reward: -41.44419018362844
current timestep: 277504

episode: 7181
episode length: 54
episode reward: -38.51301585623042
current timestep: 277558

episode: 7182
episode length: 53
episode reward: -35.8681617943071
current timestep: 277611

episode: 7183
episode length: 52
episode reward: -31.132507187252507
current timestep: 277663

episode: 7184
episode length: 53
episode reward: -33.66086

episode: 7261
episode length: 55
episode reward: -40.12066026842514
current timestep: 281749

episode: 7262
episode length: 56
episode reward: -39.81969321784512
current timestep: 281805

episode: 7263
episode length: 52
episode reward: -29.447840144082086
current timestep: 281857

episode: 7264
episode length: 51
episode reward: -27.325963210728354
current timestep: 281908

episode: 7265
episode length: 52
episode reward: -33.13689167890443
current timestep: 281960

episode: 7266
episode length: 53
episode reward: -31.292453593647085
current timestep: 282013

episode: 7267
episode length: 55
episode reward: -39.57908074860621
current timestep: 282068

episode: 7268
episode length: 51
episode reward: -25.88754081137724
current timestep: 282119

episode: 7269
episode length: 54
episode reward: -36.390074883230554
current timestep: 282173

episode: 7270
episode length: 51
episode reward: -29.339521952643807
current timestep: 282224

episode: 7271
episode length: 54
episode reward: -36.74

episode: 7348
episode length: 53
episode reward: -35.006137848650866
current timestep: 286346

episode: 7349
episode length: 53
episode reward: -28.349316468137598
current timestep: 286399

episode: 7350
episode length: 50
episode reward: -27.04600440720034
current timestep: 286449

episode: 7351
episode length: 53
episode reward: -32.575693365909245
current timestep: 286502

episode: 7352
episode length: 53
episode reward: -31.931068247229256
current timestep: 286555

episode: 7353
episode length: 53
episode reward: -30.09208255926637
current timestep: 286608

episode: 7354
episode length: 57
episode reward: -43.75284927729953
current timestep: 286665

episode: 7355
episode length: 52
episode reward: -28.811904201899758
current timestep: 286717

episode: 7356
episode length: 55
episode reward: -39.153710051977015
current timestep: 286772

episode: 7357
episode length: 54
episode reward: -34.412021119191486
current timestep: 286826

episode: 7358
episode length: 53
episode reward: -31.

episode: 7435
episode length: 54
episode reward: -34.625288303717156
current timestep: 290924

episode: 7436
episode length: 52
episode reward: -29.296202998014387
current timestep: 290976

episode: 7437
episode length: 52
episode reward: -28.195619490844823
current timestep: 291028

episode: 7438
episode length: 54
episode reward: -32.77883719173731
current timestep: 291082

episode: 7439
episode length: 54
episode reward: -33.45127764745565
current timestep: 291136

episode: 7440
episode length: 53
episode reward: -32.84588470525175
current timestep: 291189

episode: 7441
episode length: 57
episode reward: -42.2521983765521
current timestep: 291246

episode: 7442
episode length: 52
episode reward: -27.52462777601059
current timestep: 291298

episode: 7443
episode length: 48
episode reward: -26.951181490038483
current timestep: 291346

episode: 7444
episode length: 53
episode reward: -35.73525993808393
current timestep: 291399

episode: 7445
episode length: 55
episode reward: -41.7040

episode: 7522
episode length: 54
episode reward: -38.37260021539054
current timestep: 295524

episode: 7523
episode length: 55
episode reward: -37.33808833554193
current timestep: 295579

episode: 7524
episode length: 52
episode reward: -31.199737706809877
current timestep: 295631

episode: 7525
episode length: 55
episode reward: -36.27698321410323
current timestep: 295686

episode: 7526
episode length: 57
episode reward: -40.33030597007236
current timestep: 295743

episode: 7527
episode length: 51
episode reward: -25.422931046376295
current timestep: 295794

episode: 7528
episode length: 55
episode reward: -38.26667649698664
current timestep: 295849

episode: 7529
episode length: 51
episode reward: -29.279256392157638
current timestep: 295900

episode: 7530
episode length: 51
episode reward: -26.650804161642064
current timestep: 295951

episode: 7531
episode length: 54
episode reward: -34.314124008872454
current timestep: 296005

episode: 7532
episode length: 55
episode reward: -36.85

episode: 7609
episode length: 53
episode reward: -30.316097116253875
current timestep: 300109

episode: 7610
episode length: 55
episode reward: -37.66808264388696
current timestep: 300164

episode: 7611
episode length: 53
episode reward: -31.33490643791115
current timestep: 300217

episode: 7612
episode length: 53
episode reward: -34.337292291106074
current timestep: 300270

episode: 7613
episode length: 52
episode reward: -31.621321587174666
current timestep: 300322

episode: 7614
episode length: 55
episode reward: -40.068606728253776
current timestep: 300377

episode: 7615
episode length: 54
episode reward: -37.788847326536526
current timestep: 300431

episode: 7616
episode length: 51
episode reward: -27.491294886799654
current timestep: 300482

episode: 7617
episode length: 50
episode reward: -27.218544741897787
current timestep: 300532

episode: 7618
episode length: 52
episode reward: -29.994553208092135
current timestep: 300584

episode: 7619
episode length: 49
episode reward: -26

episode: 7696
episode length: 53
episode reward: -33.569461067613595
current timestep: 304544

episode: 7697
episode length: 50
episode reward: -26.955692548816724
current timestep: 304594

episode: 7698
episode length: 52
episode reward: -28.320768142881683
current timestep: 304646

episode: 7699
episode length: 47
episode reward: -24.372516720291266
current timestep: 304693

episode: 7700
episode length: 52
episode reward: -28.969829157004817
current timestep: 304745

avg. reward: -30.218591999720253

episode: 7701
episode length: 52
episode reward: -32.27757840904299
current timestep: 304797

episode: 7702
episode length: 48
episode reward: -22.82780106715702
current timestep: 304845

episode: 7703
episode length: 54
episode reward: -37.4243593149594
current timestep: 304899

episode: 7704
episode length: 31
episode reward: -12.929201186592394
current timestep: 304930

episode: 7705
episode length: 53
episode reward: -31.874759635992667
current timestep: 304983

episode: 7706
episod

episode: 7783
episode length: 31
episode reward: -11.358233254215575
current timestep: 308007

episode: 7784
episode length: 31
episode reward: -10.206288164338952
current timestep: 308038

episode: 7785
episode length: 40
episode reward: -9.47055752018938
current timestep: 308078

episode: 7786
episode length: 43
episode reward: -14.839203702371329
current timestep: 308121

episode: 7787
episode length: 41
episode reward: -9.800637448442464
current timestep: 308162

episode: 7788
episode length: 39
episode reward: -9.368817841761432
current timestep: 308201

episode: 7789
episode length: 39
episode reward: -8.485891080612694
current timestep: 308240

episode: 7790
episode length: 41
episode reward: -9.674116593854098
current timestep: 308281

episode: 7791
episode length: 44
episode reward: -21.547683727041164
current timestep: 308325

episode: 7792
episode length: 44
episode reward: -24.964312571329035
current timestep: 308369

episode: 7793
episode length: 53
episode reward: -20.823

episode: 7870
episode length: 46
episode reward: -27.181758954777436
current timestep: 312017

episode: 7871
episode length: 45
episode reward: -24.04600099057117
current timestep: 312062

episode: 7872
episode length: 45
episode reward: -24.999632663175827
current timestep: 312107

episode: 7873
episode length: 45
episode reward: -23.883025516708354
current timestep: 312152

episode: 7874
episode length: 47
episode reward: -26.84484266694774
current timestep: 312199

episode: 7875
episode length: 46
episode reward: -23.93619860946007
current timestep: 312245

episode: 7876
episode length: 40
episode reward: -7.97987735205963
current timestep: 312285

episode: 7877
episode length: 43
episode reward: -23.600949731527294
current timestep: 312328

episode: 7878
episode length: 48
episode reward: -24.35328543591304
current timestep: 312376

episode: 7879
episode length: 45
episode reward: -23.30479774814144
current timestep: 312421

episode: 7880
episode length: 57
episode reward: -25.5371

episode: 7957
episode length: 39
episode reward: -7.874736224325674
current timestep: 315683

episode: 7958
episode length: 39
episode reward: -8.089970206418249
current timestep: 315722

episode: 7959
episode length: 39
episode reward: -8.41427467950885
current timestep: 315761

episode: 7960
episode length: 39
episode reward: -9.44940611606047
current timestep: 315800

episode: 7961
episode length: 40
episode reward: -7.937107290789723
current timestep: 315840

episode: 7962
episode length: 39
episode reward: -7.7677893010415815
current timestep: 315879

episode: 7963
episode length: 39
episode reward: -8.219758490606845
current timestep: 315918

episode: 7964
episode length: 39
episode reward: -8.47288686533801
current timestep: 315957

episode: 7965
episode length: 40
episode reward: -8.799284922915005
current timestep: 315997

episode: 7966
episode length: 39
episode reward: -8.19775093496597
current timestep: 316036

episode: 7967
episode length: 39
episode reward: -7.91777065348

episode: 8044
episode length: 39
episode reward: -8.754471685912623
current timestep: 319106

episode: 8045
episode length: 39
episode reward: -10.911311063197111
current timestep: 319145

episode: 8046
episode length: 39
episode reward: -8.018995914761026
current timestep: 319184

episode: 8047
episode length: 39
episode reward: -8.976193166474435
current timestep: 319223

episode: 8048
episode length: 39
episode reward: -7.792579469175397
current timestep: 319262

episode: 8049
episode length: 40
episode reward: -9.489720384616007
current timestep: 319302

episode: 8050
episode length: 39
episode reward: -9.815932750170257
current timestep: 319341

episode: 8051
episode length: 39
episode reward: -7.44860074283112
current timestep: 319380

episode: 8052
episode length: 41
episode reward: -10.44381617098809
current timestep: 319421

episode: 8053
episode length: 40
episode reward: -10.475611958167189
current timestep: 319461

episode: 8054
episode length: 39
episode reward: -8.5930165

episode: 8131
episode length: 39
episode reward: -8.29329759954267
current timestep: 322574

episode: 8132
episode length: 38
episode reward: -7.971385856271933
current timestep: 322612

episode: 8133
episode length: 40
episode reward: -8.606040774302707
current timestep: 322652

episode: 8134
episode length: 40
episode reward: -8.29347135721187
current timestep: 322692

episode: 8135
episode length: 42
episode reward: -15.615257684917175
current timestep: 322734

episode: 8136
episode length: 40
episode reward: -9.603457092753494
current timestep: 322774

episode: 8137
episode length: 41
episode reward: -8.819446593096966
current timestep: 322815

episode: 8138
episode length: 41
episode reward: -7.160622527934574
current timestep: 322856

episode: 8139
episode length: 41
episode reward: -12.219338408642118
current timestep: 322897

episode: 8140
episode length: 40
episode reward: -13.131458218878826
current timestep: 322937

episode: 8141
episode length: 41
episode reward: -10.904000

episode: 8218
episode length: 41
episode reward: -8.267308991900457
current timestep: 326067

episode: 8219
episode length: 41
episode reward: -7.316545555532545
current timestep: 326108

episode: 8220
episode length: 32
episode reward: -12.284245441964343
current timestep: 326140

episode: 8221
episode length: 41
episode reward: -7.1060605150921505
current timestep: 326181

episode: 8222
episode length: 31
episode reward: -10.828460433279963
current timestep: 326212

episode: 8223
episode length: 32
episode reward: -10.446505979454411
current timestep: 326244

episode: 8224
episode length: 40
episode reward: -12.264404225358984
current timestep: 326284

episode: 8225
episode length: 42
episode reward: -12.472970869804094
current timestep: 326326

episode: 8226
episode length: 42
episode reward: -18.017768077951075
current timestep: 326368

episode: 8227
episode length: 30
episode reward: -10.829079755327257
current timestep: 326398

episode: 8228
episode length: 41
episode reward: -6.

episode: 8305
episode length: 40
episode reward: -10.084277865702958
current timestep: 329229

episode: 8306
episode length: 30
episode reward: -11.659715112479951
current timestep: 329259

episode: 8307
episode length: 32
episode reward: -11.265822768155681
current timestep: 329291

episode: 8308
episode length: 43
episode reward: -13.918044454699244
current timestep: 329334

episode: 8309
episode length: 30
episode reward: -11.086390713048967
current timestep: 329364

episode: 8310
episode length: 31
episode reward: -11.554010125389198
current timestep: 329395

episode: 8311
episode length: 40
episode reward: -6.636361506219814
current timestep: 329435

episode: 8312
episode length: 31
episode reward: -10.962057800429411
current timestep: 329466

episode: 8313
episode length: 31
episode reward: -10.920170410007305
current timestep: 329497

episode: 8314
episode length: 42
episode reward: -10.168521880740986
current timestep: 329539

episode: 8315
episode length: 31
episode reward: -1

episode: 8392
episode length: 32
episode reward: -11.469321943128282
current timestep: 332041

episode: 8393
episode length: 31
episode reward: -10.374502490941033
current timestep: 332072

episode: 8394
episode length: 33
episode reward: -10.880463551581418
current timestep: 332105

episode: 8395
episode length: 32
episode reward: -11.13175984736179
current timestep: 332137

episode: 8396
episode length: 32
episode reward: -11.157509032268385
current timestep: 332169

episode: 8397
episode length: 31
episode reward: -12.325908945863045
current timestep: 332200

episode: 8398
episode length: 31
episode reward: -11.184595502830904
current timestep: 332231

episode: 8399
episode length: 31
episode reward: -12.6570136160476
current timestep: 332262

episode: 8400
episode length: 33
episode reward: -10.637846227327792
current timestep: 332295

avg. reward: -11.495828018082324

episode: 8401
episode length: 31
episode reward: -10.390688506762942
current timestep: 332326

episode: 8402
episo

episode: 8479
episode length: 31
episode reward: -11.049285709398191
current timestep: 334796

episode: 8480
episode length: 31
episode reward: -11.283707395084752
current timestep: 334827

episode: 8481
episode length: 31
episode reward: -10.831100522924668
current timestep: 334858

episode: 8482
episode length: 31
episode reward: -11.005346378885953
current timestep: 334889

episode: 8483
episode length: 31
episode reward: -11.47919476893887
current timestep: 334920

episode: 8484
episode length: 33
episode reward: -11.815660744757226
current timestep: 334953

episode: 8485
episode length: 31
episode reward: -10.191591125796336
current timestep: 334984

episode: 8486
episode length: 31
episode reward: -11.182006047385386
current timestep: 335015

episode: 8487
episode length: 31
episode reward: -13.304659828895575
current timestep: 335046

episode: 8488
episode length: 30
episode reward: -12.05756935415056
current timestep: 335076

episode: 8489
episode length: 31
episode reward: -11

episode: 8566
episode length: 31
episode reward: -12.731259692435627
current timestep: 337510

episode: 8567
episode length: 30
episode reward: -11.580824694852042
current timestep: 337540

episode: 8568
episode length: 31
episode reward: -11.587688093723989
current timestep: 337571

episode: 8569
episode length: 31
episode reward: -12.684191501441495
current timestep: 337602

episode: 8570
episode length: 32
episode reward: -11.416497407832361
current timestep: 337634

episode: 8571
episode length: 31
episode reward: -12.166334355679613
current timestep: 337665

episode: 8572
episode length: 32
episode reward: -12.629906555092143
current timestep: 337697

episode: 8573
episode length: 31
episode reward: -12.189094830906082
current timestep: 337728

episode: 8574
episode length: 32
episode reward: -11.003604487327936
current timestep: 337760

episode: 8575
episode length: 31
episode reward: -12.291788486611596
current timestep: 337791

episode: 8576
episode length: 32
episode reward: -

episode: 8653
episode length: 31
episode reward: -10.651670635003597
current timestep: 340233

episode: 8654
episode length: 32
episode reward: -13.09842961154721
current timestep: 340265

episode: 8655
episode length: 31
episode reward: -12.528849823023563
current timestep: 340296

episode: 8656
episode length: 31
episode reward: -10.768405447900479
current timestep: 340327

episode: 8657
episode length: 32
episode reward: -11.263753559549919
current timestep: 340359

episode: 8658
episode length: 31
episode reward: -12.182896924898683
current timestep: 340390

episode: 8659
episode length: 31
episode reward: -11.03964613913056
current timestep: 340421

episode: 8660
episode length: 31
episode reward: -11.991419493164543
current timestep: 340452

episode: 8661
episode length: 31
episode reward: -12.385852139837159
current timestep: 340483

episode: 8662
episode length: 30
episode reward: -11.57310720916435
current timestep: 340513

episode: 8663
episode length: 32
episode reward: -11.

episode: 8740
episode length: 31
episode reward: -11.332902887429483
current timestep: 342951

episode: 8741
episode length: 31
episode reward: -12.30164964556019
current timestep: 342982

episode: 8742
episode length: 32
episode reward: -11.030941294838307
current timestep: 343014

episode: 8743
episode length: 31
episode reward: -12.792858327848446
current timestep: 343045

episode: 8744
episode length: 45
episode reward: -16.089924994551588
current timestep: 343090

episode: 8745
episode length: 31
episode reward: -12.431708778190407
current timestep: 343121

episode: 8746
episode length: 33
episode reward: -13.096049931731805
current timestep: 343154

episode: 8747
episode length: 31
episode reward: -11.316370380143109
current timestep: 343185

episode: 8748
episode length: 31
episode reward: -12.675292376738977
current timestep: 343216

episode: 8749
episode length: 31
episode reward: -12.427836985366474
current timestep: 343247

episode: 8750
episode length: 31
episode reward: -1

episode: 8827
episode length: 31
episode reward: -12.328543576308917
current timestep: 345723

episode: 8828
episode length: 32
episode reward: -12.525869462394278
current timestep: 345755

episode: 8829
episode length: 31
episode reward: -11.23577318360259
current timestep: 345786

episode: 8830
episode length: 32
episode reward: -12.253210280093615
current timestep: 345818

episode: 8831
episode length: 31
episode reward: -11.752359768530717
current timestep: 345849

episode: 8832
episode length: 31
episode reward: -11.611203861493344
current timestep: 345880

episode: 8833
episode length: 31
episode reward: -10.265711642295908
current timestep: 345911

episode: 8834
episode length: 31
episode reward: -11.65183048619084
current timestep: 345942

episode: 8835
episode length: 31
episode reward: -11.216613566830844
current timestep: 345973

episode: 8836
episode length: 31
episode reward: -12.55154693204835
current timestep: 346004

episode: 8837
episode length: 31
episode reward: -11.

episode: 8914
episode length: 31
episode reward: -12.817950239642572
current timestep: 348456

episode: 8915
episode length: 31
episode reward: -13.04043998968087
current timestep: 348487

episode: 8916
episode length: 31
episode reward: -10.082595944805497
current timestep: 348518

episode: 8917
episode length: 32
episode reward: -13.36727227501971
current timestep: 348550

episode: 8918
episode length: 31
episode reward: -11.926858474540005
current timestep: 348581

episode: 8919
episode length: 31
episode reward: -11.20124230641106
current timestep: 348612

episode: 8920
episode length: 31
episode reward: -10.385016012905883
current timestep: 348643

episode: 8921
episode length: 32
episode reward: -10.380869582496283
current timestep: 348675

episode: 8922
episode length: 31
episode reward: -12.795682793756654
current timestep: 348706

episode: 8923
episode length: 31
episode reward: -11.765070503544814
current timestep: 348737

episode: 8924
episode length: 31
episode reward: -12.

episode: 9001
episode length: 31
episode reward: -11.098239256663586
current timestep: 351188

episode: 9002
episode length: 31
episode reward: -11.912502520294634
current timestep: 351219

episode: 9003
episode length: 31
episode reward: -11.28132495901782
current timestep: 351250

episode: 9004
episode length: 31
episode reward: -13.890383720651569
current timestep: 351281

episode: 9005
episode length: 33
episode reward: -13.63181668538656
current timestep: 351314

episode: 9006
episode length: 33
episode reward: -12.409255330795558
current timestep: 351347

episode: 9007
episode length: 32
episode reward: -10.991536403279955
current timestep: 351379

episode: 9008
episode length: 31
episode reward: -11.30494740745842
current timestep: 351410

episode: 9009
episode length: 31
episode reward: -11.120181482252187
current timestep: 351441

episode: 9010
episode length: 31
episode reward: -11.548646369306601
current timestep: 351472

episode: 9011
episode length: 31
episode reward: -11.

episode: 9088
episode length: 32
episode reward: -13.951546158881847
current timestep: 353915

episode: 9089
episode length: 32
episode reward: -11.589436186155883
current timestep: 353947

episode: 9090
episode length: 31
episode reward: -10.484030856897919
current timestep: 353978

episode: 9091
episode length: 31
episode reward: -13.747226080225868
current timestep: 354009

episode: 9092
episode length: 30
episode reward: -11.608835410886204
current timestep: 354039

episode: 9093
episode length: 31
episode reward: -12.76767996695162
current timestep: 354070

episode: 9094
episode length: 31
episode reward: -12.7105664791804
current timestep: 354101

episode: 9095
episode length: 32
episode reward: -11.9447568950274
current timestep: 354133

episode: 9096
episode length: 32
episode reward: -11.095294688666078
current timestep: 354165

episode: 9097
episode length: 31
episode reward: -13.240252611563314
current timestep: 354196

episode: 9098
episode length: 32
episode reward: -12.64

episode: 9175
episode length: 31
episode reward: -10.598618596344535
current timestep: 356652

episode: 9176
episode length: 31
episode reward: -10.846798904302679
current timestep: 356683

episode: 9177
episode length: 32
episode reward: -10.77342408364833
current timestep: 356715

episode: 9178
episode length: 31
episode reward: -13.327146636903246
current timestep: 356746

episode: 9179
episode length: 31
episode reward: -12.562219469251396
current timestep: 356777

episode: 9180
episode length: 31
episode reward: -12.041719620722745
current timestep: 356808

episode: 9181
episode length: 31
episode reward: -10.55481464324735
current timestep: 356839

episode: 9182
episode length: 31
episode reward: -10.58810565709473
current timestep: 356870

episode: 9183
episode length: 31
episode reward: -12.559813616769915
current timestep: 356901

episode: 9184
episode length: 31
episode reward: -12.463394671984247
current timestep: 356932

episode: 9185
episode length: 31
episode reward: -10.

episode: 9262
episode length: 31
episode reward: -11.097442925136574
current timestep: 359368

episode: 9263
episode length: 31
episode reward: -13.081410773167246
current timestep: 359399

episode: 9264
episode length: 31
episode reward: -12.85376644904387
current timestep: 359430

episode: 9265
episode length: 31
episode reward: -11.798289491011609
current timestep: 359461

episode: 9266
episode length: 31
episode reward: -12.704908514024412
current timestep: 359492

episode: 9267
episode length: 32
episode reward: -10.493981070426752
current timestep: 359524

episode: 9268
episode length: 32
episode reward: -10.754308703577802
current timestep: 359556

episode: 9269
episode length: 31
episode reward: -13.485557311289746
current timestep: 359587

episode: 9270
episode length: 31
episode reward: -10.930836438252744
current timestep: 359618

episode: 9271
episode length: 32
episode reward: -12.515123975261995
current timestep: 359650

episode: 9272
episode length: 31
episode reward: -1

episode: 9349
episode length: 31
episode reward: -11.750713813086863
current timestep: 362100

episode: 9350
episode length: 30
episode reward: -9.881688565104335
current timestep: 362130

episode: 9351
episode length: 32
episode reward: -13.200373704913835
current timestep: 362162

episode: 9352
episode length: 31
episode reward: -10.602503900938087
current timestep: 362193

episode: 9353
episode length: 31
episode reward: -12.106565584424898
current timestep: 362224

episode: 9354
episode length: 31
episode reward: -10.91635326600949
current timestep: 362255

episode: 9355
episode length: 31
episode reward: -11.191328756807492
current timestep: 362286

episode: 9356
episode length: 32
episode reward: -12.993687521643183
current timestep: 362318

episode: 9357
episode length: 31
episode reward: -11.00668790915257
current timestep: 362349

episode: 9358
episode length: 31
episode reward: -12.41120839703737
current timestep: 362380

episode: 9359
episode length: 31
episode reward: -13.1

episode: 9436
episode length: 31
episode reward: -11.675103058317987
current timestep: 364815

episode: 9437
episode length: 31
episode reward: -10.871162553200502
current timestep: 364846

episode: 9438
episode length: 31
episode reward: -12.291779514594515
current timestep: 364877

episode: 9439
episode length: 31
episode reward: -10.733164612441591
current timestep: 364908

episode: 9440
episode length: 32
episode reward: -12.315524965118366
current timestep: 364940

episode: 9441
episode length: 31
episode reward: -9.755634671140957
current timestep: 364971

episode: 9442
episode length: 31
episode reward: -12.963938841275375
current timestep: 365002

episode: 9443
episode length: 31
episode reward: -12.453521665235362
current timestep: 365033

episode: 9444
episode length: 31
episode reward: -12.337818230941568
current timestep: 365064

episode: 9445
episode length: 32
episode reward: -10.813546180163788
current timestep: 365096

episode: 9446
episode length: 32
episode reward: -1

episode: 9523
episode length: 31
episode reward: -11.08475256007005
current timestep: 367528

episode: 9524
episode length: 31
episode reward: -12.641545364434101
current timestep: 367559

episode: 9525
episode length: 31
episode reward: -12.650545876644633
current timestep: 367590

episode: 9526
episode length: 31
episode reward: -11.430699235169078
current timestep: 367621

episode: 9527
episode length: 31
episode reward: -11.458025790131467
current timestep: 367652

episode: 9528
episode length: 31
episode reward: -11.115571304568837
current timestep: 367683

episode: 9529
episode length: 30
episode reward: -10.634946396675284
current timestep: 367713

episode: 9530
episode length: 30
episode reward: -12.873624988942888
current timestep: 367743

episode: 9531
episode length: 32
episode reward: -12.001848479946172
current timestep: 367775

episode: 9532
episode length: 31
episode reward: -10.891975378533472
current timestep: 367806

episode: 9533
episode length: 30
episode reward: -1

episode: 9610
episode length: 32
episode reward: -11.160860233786897
current timestep: 370252

episode: 9611
episode length: 31
episode reward: -11.264895693780513
current timestep: 370283

episode: 9612
episode length: 32
episode reward: -10.994420189758685
current timestep: 370315

episode: 9613
episode length: 31
episode reward: -11.470275515545493
current timestep: 370346

episode: 9614
episode length: 32
episode reward: -12.132161850288826
current timestep: 370378

episode: 9615
episode length: 31
episode reward: -11.697845281518049
current timestep: 370409

episode: 9616
episode length: 32
episode reward: -12.800586845286823
current timestep: 370441

episode: 9617
episode length: 32
episode reward: -11.496319648595318
current timestep: 370473

episode: 9618
episode length: 32
episode reward: -12.835137156314753
current timestep: 370505

episode: 9619
episode length: 31
episode reward: -12.808817842557664
current timestep: 370536

episode: 9620
episode length: 31
episode reward: -

episode: 9697
episode length: 33
episode reward: -12.994941657378103
current timestep: 372992

episode: 9698
episode length: 31
episode reward: -11.203701504269551
current timestep: 373023

episode: 9699
episode length: 31
episode reward: -10.778208136131159
current timestep: 373054

episode: 9700
episode length: 31
episode reward: -12.654657322676897
current timestep: 373085

avg. reward: -11.918628095050394

episode: 9701
episode length: 32
episode reward: -12.10939501779624
current timestep: 373117

episode: 9702
episode length: 48
episode reward: -25.846993629306645
current timestep: 373165

episode: 9703
episode length: 31
episode reward: -11.078495943182432
current timestep: 373196

episode: 9704
episode length: 32
episode reward: -12.067943766638775
current timestep: 373228

episode: 9705
episode length: 31
episode reward: -11.112451215057192
current timestep: 373259

episode: 9706
episode length: 31
episode reward: -11.317403011632628
current timestep: 373290

episode: 9707
epi

episode: 9784
episode length: 32
episode reward: -13.496552033200043
current timestep: 375836

episode: 9785
episode length: 47
episode reward: -19.892100404294833
current timestep: 375883

episode: 9786
episode length: 46
episode reward: -15.901696025924444
current timestep: 375929

episode: 9787
episode length: 31
episode reward: -13.968027541412454
current timestep: 375960

episode: 9788
episode length: 33
episode reward: -10.52018353991394
current timestep: 375993

episode: 9789
episode length: 47
episode reward: -18.162937046253095
current timestep: 376040

episode: 9790
episode length: 48
episode reward: -24.770589790790954
current timestep: 376088

episode: 9791
episode length: 51
episode reward: -26.880918985746938
current timestep: 376139

episode: 9792
episode length: 32
episode reward: -12.689222017580033
current timestep: 376171

episode: 9793
episode length: 51
episode reward: -31.102547155955808
current timestep: 376222

episode: 9794
episode length: 49
episode reward: -2

episode: 9871
episode length: 54
episode reward: -32.233645531088044
current timestep: 380164

episode: 9872
episode length: 51
episode reward: -28.12592674862427
current timestep: 380215

episode: 9873
episode length: 51
episode reward: -32.910451551031926
current timestep: 380266

episode: 9874
episode length: 50
episode reward: -25.654887754045934
current timestep: 380316

episode: 9875
episode length: 54
episode reward: -31.474253221358747
current timestep: 380370

episode: 9876
episode length: 50
episode reward: -31.154835614928366
current timestep: 380420

episode: 9877
episode length: 51
episode reward: -33.1350802159507
current timestep: 380471

episode: 9878
episode length: 54
episode reward: -35.4555485100123
current timestep: 380525

episode: 9879
episode length: 53
episode reward: -33.711265745424846
current timestep: 380578

episode: 9880
episode length: 53
episode reward: -32.75823226082728
current timestep: 380631

episode: 9881
episode length: 54
episode reward: -31.518

episode: 9958
episode length: 52
episode reward: -28.2680967053208
current timestep: 384564

episode: 9959
episode length: 53
episode reward: -34.309917349909604
current timestep: 384617

episode: 9960
episode length: 53
episode reward: -33.25304980292153
current timestep: 384670

episode: 9961
episode length: 44
episode reward: -11.970236300699012
current timestep: 384714

episode: 9962
episode length: 47
episode reward: -14.371999572778652
current timestep: 384761

episode: 9963
episode length: 48
episode reward: -17.355692967307615
current timestep: 384809

episode: 9964
episode length: 46
episode reward: -15.843675766103726
current timestep: 384855

episode: 9965
episode length: 47
episode reward: -17.956035482153464
current timestep: 384902

episode: 9966
episode length: 47
episode reward: -17.95328207910203
current timestep: 384949

episode: 9967
episode length: 44
episode reward: -12.544395510401788
current timestep: 384993

episode: 9968
episode length: 46
episode reward: -16.8

episode: 10044
episode length: 41
episode reward: -10.693128347590404
current timestep: 388494

episode: 10045
episode length: 42
episode reward: -11.098468521039266
current timestep: 388536

episode: 10046
episode length: 40
episode reward: -8.6725800787028
current timestep: 388576

episode: 10047
episode length: 41
episode reward: -8.884141354888433
current timestep: 388617

episode: 10048
episode length: 40
episode reward: -9.525925639326843
current timestep: 388657

episode: 10049
episode length: 41
episode reward: -9.523560146611565
current timestep: 388698

episode: 10050
episode length: 43
episode reward: -10.337256923342874
current timestep: 388741

episode: 10051
episode length: 39
episode reward: -9.87678202788431
current timestep: 388780

episode: 10052
episode length: 40
episode reward: -8.125801169689929
current timestep: 388820

episode: 10053
episode length: 41
episode reward: -10.139034668705092
current timestep: 388861

episode: 10054
episode length: 39
episode reward:

episode: 10130
episode length: 33
episode reward: -7.016641752215556
current timestep: 391604

episode: 10131
episode length: 32
episode reward: -7.919158111846579
current timestep: 391636

episode: 10132
episode length: 31
episode reward: -8.531380309597536
current timestep: 391667

episode: 10133
episode length: 27
episode reward: -7.933974999992941
current timestep: 391694

episode: 10134
episode length: 28
episode reward: -6.602543810119887
current timestep: 391722

episode: 10135
episode length: 28
episode reward: -6.817230093434896
current timestep: 391750

episode: 10136
episode length: 33
episode reward: -9.036793824900204
current timestep: 391783

episode: 10137
episode length: 31
episode reward: -9.567053754314907
current timestep: 391814

episode: 10138
episode length: 35
episode reward: -7.941810379366197
current timestep: 391849

episode: 10139
episode length: 32
episode reward: -8.550143367842095
current timestep: 391881

episode: 10140
episode length: 29
episode reward: 

episode: 10217
episode length: 27
episode reward: -7.395957488646331
current timestep: 394074

episode: 10218
episode length: 27
episode reward: -7.486004881522106
current timestep: 394101

episode: 10219
episode length: 27
episode reward: -8.864464107715309
current timestep: 394128

episode: 10220
episode length: 28
episode reward: -7.0399760615876055
current timestep: 394156

episode: 10221
episode length: 25
episode reward: -6.961658417532071
current timestep: 394181

episode: 10222
episode length: 30
episode reward: -7.8660700819877
current timestep: 394211

episode: 10223
episode length: 24
episode reward: -7.601826258421717
current timestep: 394235

episode: 10224
episode length: 23
episode reward: -8.257566995440142
current timestep: 394258

episode: 10225
episode length: 30
episode reward: -7.880718131242522
current timestep: 394288

episode: 10226
episode length: 28
episode reward: -6.778905846648201
current timestep: 394316

episode: 10227
episode length: 28
episode reward: -

episode: 10303
episode length: 46
episode reward: -26.58856471088184
current timestep: 396735

episode: 10304
episode length: 38
episode reward: -7.612588469536695
current timestep: 396773

episode: 10305
episode length: 36
episode reward: -7.723245773961862
current timestep: 396809

episode: 10306
episode length: 42
episode reward: -18.822400092784417
current timestep: 396851

episode: 10307
episode length: 34
episode reward: -7.998496194705078
current timestep: 396885

episode: 10308
episode length: 33
episode reward: -9.576440470755328
current timestep: 396918

episode: 10309
episode length: 37
episode reward: -7.512313649534285
current timestep: 396955

episode: 10310
episode length: 31
episode reward: -8.302435558616246
current timestep: 396986

episode: 10311
episode length: 38
episode reward: -7.142260338853103
current timestep: 397024

episode: 10312
episode length: 37
episode reward: -7.997192654243142
current timestep: 397061

episode: 10313
episode length: 47
episode reward:

episode: 10390
episode length: 39
episode reward: -6.00925568423161
current timestep: 400208

episode: 10391
episode length: 43
episode reward: -10.161964010478762
current timestep: 400251

episode: 10392
episode length: 39
episode reward: -7.016056271630471
current timestep: 400290

episode: 10393
episode length: 39
episode reward: -6.009663513553717
current timestep: 400329

episode: 10394
episode length: 41
episode reward: -6.966389667646848
current timestep: 400370

episode: 10395
episode length: 35
episode reward: -6.282162204473903
current timestep: 400405

episode: 10396
episode length: 41
episode reward: -6.701640192893034
current timestep: 400446

episode: 10397
episode length: 43
episode reward: -10.728480800675667
current timestep: 400489

episode: 10398
episode length: 42
episode reward: -9.35880135888188
current timestep: 400531

episode: 10399
episode length: 40
episode reward: -8.790680844421152
current timestep: 400571

episode: 10400
episode length: 45
episode reward: 

episode: 10476
episode length: 46
episode reward: -15.871394811842654
current timestep: 403986

episode: 10477
episode length: 46
episode reward: -17.486789929511303
current timestep: 404032

episode: 10478
episode length: 45
episode reward: -14.521767277947722
current timestep: 404077

episode: 10479
episode length: 47
episode reward: -17.307344889803673
current timestep: 404124

episode: 10480
episode length: 49
episode reward: -23.14307918128066
current timestep: 404173

episode: 10481
episode length: 44
episode reward: -13.783694064210945
current timestep: 404217

episode: 10482
episode length: 48
episode reward: -19.384872020211457
current timestep: 404265

episode: 10483
episode length: 46
episode reward: -15.970476517621337
current timestep: 404311

episode: 10484
episode length: 47
episode reward: -15.580792163683888
current timestep: 404358

episode: 10485
episode length: 43
episode reward: -11.280281978065226
current timestep: 404401

episode: 10486
episode length: 46
episode

episode: 10562
episode length: 49
episode reward: -14.63255338085812
current timestep: 407952

episode: 10563
episode length: 50
episode reward: -20.597699566684057
current timestep: 408002

episode: 10564
episode length: 49
episode reward: -14.909543414160622
current timestep: 408051

episode: 10565
episode length: 55
episode reward: -29.383205291427114
current timestep: 408106

episode: 10566
episode length: 47
episode reward: -15.431056953095213
current timestep: 408153

episode: 10567
episode length: 47
episode reward: -10.856964056726406
current timestep: 408200

episode: 10568
episode length: 50
episode reward: -18.297186759567822
current timestep: 408250

episode: 10569
episode length: 47
episode reward: -12.589855450660387
current timestep: 408297

episode: 10570
episode length: 51
episode reward: -19.83218256841332
current timestep: 408348

episode: 10571
episode length: 44
episode reward: -10.919470971131831
current timestep: 408392

episode: 10572
episode length: 49
episode 

KeyboardInterrupt: 

In [None]:
######### Hyperparameters #########
env_name = "Walker2d-v2"
log_interval = 100           # print avg reward after interval
gamma = 0.99                # discount for future rewards
batch_size = 100            # num of transitions sampled from replay buffer
lr = 0.001
exploration_noise = 0.2 
polyak = 0.995              # target policy update parameter (1-tau)
policy_noise = 0.2          # target policy smoothing noise
noise_clip = 0.5
policy_delay = 2            # delayed policy updates parameter
max_episodes = 1000         # max num of episodes
max_timesteps = 5000        # max timesteps in one episode
directory = "/home/imnotrachit/Documents/Collage_stuff/sem_6/rl/endsem/td3/preTrained/" # save trained models
filename = "TD3_{}".format(env_name)
###################################

env = gym.make(env_name)
state_dim = env.observation_space.shape[0]
action_dim = env.action_space.shape[0]
max_action = float(env.action_space.high[0])

policy = TD3(lr, state_dim, action_dim, max_action)
replay_buffer = ReplayBuffer()

rewards = []
times=[]
avg_reward=0

# training procedure:
for episode in range(1, max_episodes+1):
    start_time = time.time()
    state = env.reset()
    ep_reward=0
    for t in range(max_timesteps):
        # select action and add exploration noise:
        action = policy.select_action(state)
        action = action + np.random.normal(0, exploration_noise, size=env.action_space.shape[0])
        action = action.clip(env.action_space.low, env.action_space.high)
        
        # take action in env:
        next_state, reward, done, _ = env.step(action)
        replay_buffer.add((state, action, reward, next_state, float(done)))
        state = next_state
        
        avg_reward += reward
        ep_reward += reward
        
        # if episode is done then update policy:
        if done or t==(max_timesteps-1):
            policy.update(replay_buffer, t, batch_size, gamma, polyak, policy_noise, noise_clip, policy_delay)
            end_time = time.time()
            episode_time = end_time - start_time
            break
    
    # logging updates:
    rewards.append(ep_reward)
    times.append(episode_time)
        
    if episode % log_interval == 0:
        avg_reward = int(avg_reward / log_interval)
        print("Episode: {}\tAverage Reward: {}".format(episode, avg_reward))
        avg_reward = 0
        
name = filename + '_solved'
policy.save(directory, name)


plt.figure(figsize=(12, 6))
plt.plot(rewards)
plt.xlabel('Episodes')
plt.ylabel('Rewards')
plt.title('Episode vs Reward')
plt.show()

In [None]:
np.save('td3_reward.npy',rewards)
np.save('td3_time.npy',times)