In [1]:
import os
import torch
import gym
import numpy as np
from TD3_torch.TD3 import TD3
from PIL import Image
from TD3_torch.utils import ReplayBuffer

env_name = 'BipedalWalkerHardcore-v2'
log_interval = 100           # print avg reward after interval
random_seed = 0
gamma = 0.99                # discount for future rewards
batch_size = 128        # num of transitions sampled from replay buffer
exploration_noise = 0.1 
polyak_int = [0.995, 0.999999]              # target policy update parameter (1-tau)
policy_noise = 0.2          # target policy smoothing noise
noise_clip = 0.5
policy_delay = 2            # delayed policy updates parameter
max_episodes = 50000         # max num of episodes
max_timesteps = 2000        # max timesteps in one episode
max_buffer_length = 300000
directory = "./preTrained/td3_torch/{}".format(env_name) # save trained models
filename = "TD3_torch_{}_{}".format(env_name, random_seed)
reward_history = []



def train():
    env = gym.make(env_name)
    state_dim = env.observation_space.shape[0]
    action_dim = env.action_space.shape[0]
    max_action = float(env.action_space.high[0])
    polyak = polyak_int[0]
    
    policy = TD3(state_dim, action_dim, max_action)
    replay_buffer = ReplayBuffer(max_length=max_buffer_length)
    
    print("action_space={}".format(env.action_space))
    print("obs_space={}".format(env.observation_space))
    print("threshold={} \n".format(env.spec.reward_threshold))
    
    if random_seed:
        print("Random Seed: {}".format(random_seed))
        env.seed(random_seed)
        torch.manual_seed(random_seed)
        np.random.seed(random_seed)
    
    # logging variables:        
    log_f = open("log.txt","w+")
    
    # training procedure:
    for episode in range(1, max_episodes+1):
        ep_reward = 0
        state = env.reset()
       
        for t in range(max_timesteps):
            # select action and add exploration noise:
            action = policy.select_action(state)
            action = action + np.random.normal(0, exploration_noise, size=env.action_space.shape[0])
            action = action.clip(env.action_space.low, env.action_space.high)
            
            # take action in env:
            next_state, reward, done, _ = env.step(action)
            replay_buffer.add((state, action, reward, next_state, float(done)))
            state = next_state
            
            ep_reward += reward
            
            # if episode is done then update policy:
            if done or t==(max_timesteps-1):
                policy.update(replay_buffer, t, batch_size, gamma, polyak, policy_noise, noise_clip, policy_delay)
                break
        
        reward_history.append(ep_reward)
        avg_reward = np.mean(reward_history[-100:]) 
        
        # logging updates:        
        log_f.write('{},{}\n'.format(episode, ep_reward))
        log_f.flush()
       
        
        # if avg reward > 300 then save and stop traning:
        if avg_reward >= env.spec.reward_threshold: 
            print("########## Solved! ###########")
            name = filename + '_solved'
            policy.save(directory, name)
            log_f.close()
            break
            
        # Calculate polyak
        #part = (env.spec.reward_threshold - avg_reward) / (env.spec.reward_threshold + 150)
        #if part > 1:
        #    part = 1
        #polyak = polyak_int[0] + (1 - part) * (polyak_int[1] - polyak_int[0])     
        
        
        if episode > 500:
            policy.save(directory, filename)
        
        # print avg reward every log interval:
        if episode % log_interval == 0:            
            print("Ep: {}   Rew: {:3.2f}   Avg Rew: {:3.2f}   Polyak: {:6.6f}   Buffer: {:3.2f}   Loss: {:6.6f}  {:6.6f}  {:6.6f}".format(
                episode, ep_reward, avg_reward, polyak, replay_buffer.get_fill(), policy.actor_loss, policy.loss_Q1, policy.loss_Q2))

train()


[33mWARN: gym.spaces.Box autodetected dtype as <class 'numpy.float32'>. Please provide explicit dtype.[0m
[33mWARN: gym.spaces.Box autodetected dtype as <class 'numpy.float32'>. Please provide explicit dtype.[0m
action_space=Box(4,)
obs_space=Box(24,)
threshold=300 

Ep: 1   Rew: -93.89   Avg Rew: -93.89   Polyak: 0.995000   Buffer: 0.03   Loss: -0.049667  0.653095  0.499643
Ep: 2   Rew: -102.07   Avg Rew: -97.98   Polyak: 0.995000   Buffer: 0.05   Loss: 0.811679  5.742258  4.799824
Ep: 3   Rew: -106.70   Avg Rew: -100.89   Polyak: 0.995000   Buffer: 0.07   Loss: 0.152485  129.301544  131.299652
Ep: 4   Rew: -216.20   Avg Rew: -129.71   Polyak: 0.995000   Buffer: 0.73   Loss: 1.689378  1.940202  1.534711
Ep: 5   Rew: -180.06   Avg Rew: -139.78   Polyak: 0.995000   Buffer: 1.40   Loss: -0.110850  2.879950  4.398229
Ep: 6   Rew: -109.45   Avg Rew: -134.73   Polyak: 0.995000   Buffer: 1.43   Loss: -0.182587  1.206853  0.881050
Ep: 7   Rew: -108.71   Avg Rew: -131.01   Polyak: 0.995000

Ep: 72   Rew: -99.90   Avg Rew: -108.01   Polyak: 0.995000   Buffer: 3.91   Loss: 10.444903  5.820513  6.067129
Ep: 73   Rew: -112.78   Avg Rew: -108.08   Polyak: 0.995000   Buffer: 3.93   Loss: 8.105766  6.760255  6.717532
Ep: 74   Rew: -119.13   Avg Rew: -108.23   Polyak: 0.995000   Buffer: 3.95   Loss: 10.438009  7.168272  3.984973
Ep: 75   Rew: -98.47   Avg Rew: -108.10   Polyak: 0.995000   Buffer: 3.98   Loss: 9.493410  46.886211  51.052864
Ep: 76   Rew: -113.08   Avg Rew: -108.16   Polyak: 0.995000   Buffer: 4.00   Loss: 6.605355  3.912993  4.482635
Ep: 77   Rew: -80.28   Avg Rew: -107.80   Polyak: 0.995000   Buffer: 4.67   Loss: 10.597369  4.402636  5.667572
Ep: 78   Rew: -113.65   Avg Rew: -107.87   Polyak: 0.995000   Buffer: 4.69   Loss: 12.132183  17.143040  16.486115
Ep: 79   Rew: -101.35   Avg Rew: -107.79   Polyak: 0.995000   Buffer: 4.72   Loss: 13.081518  8.796448  12.347109
Ep: 80   Rew: -101.43   Avg Rew: -107.71   Polyak: 0.995000   Buffer: 4.75   Loss: 10.712696  5.2

Ep: 144   Rew: -135.72   Avg Rew: -118.41   Polyak: 0.995000   Buffer: 36.69   Loss: 105.292252  16.050594  18.231659
Ep: 145   Rew: -133.98   Avg Rew: -118.76   Polyak: 0.995000   Buffer: 36.74   Loss: 104.647514  20.565275  16.852905
Ep: 146   Rew: -137.15   Avg Rew: -119.11   Polyak: 0.995000   Buffer: 36.82   Loss: 104.643211  24.415766  22.192429
Ep: 147   Rew: -104.27   Avg Rew: -119.15   Polyak: 0.995000   Buffer: 36.85   Loss: 105.787819  18.793816  16.952572
Ep: 148   Rew: -131.50   Avg Rew: -119.45   Polyak: 0.995000   Buffer: 36.91   Loss: 104.678719  20.464457  18.539234
Ep: 149   Rew: -127.91   Avg Rew: -119.72   Polyak: 0.995000   Buffer: 36.96   Loss: 104.291763  14.936949  17.209238
Ep: 150   Rew: -123.01   Avg Rew: -119.93   Polyak: 0.995000   Buffer: 37.00   Loss: 105.637001  19.166443  16.024929
Ep: 151   Rew: -140.34   Avg Rew: -120.33   Polyak: 0.995000   Buffer: 37.67   Loss: 108.887810  18.269855  20.093084
Ep: 152   Rew: -107.79   Avg Rew: -120.41   Polyak: 0.99

Ep: 214   Rew: -100.28   Avg Rew: -116.46   Polyak: 0.995000   Buffer: 40.52   Loss: 118.236862  23.475193  23.056852
Ep: 215   Rew: -103.11   Avg Rew: -116.28   Polyak: 0.995000   Buffer: 40.53   Loss: 120.475182  25.644625  23.051168
Ep: 216   Rew: -102.61   Avg Rew: -116.01   Polyak: 0.995000   Buffer: 40.55   Loss: 119.902901  24.066017  25.666777
Ep: 217   Rew: -103.04   Avg Rew: -115.90   Polyak: 0.995000   Buffer: 40.57   Loss: 120.901855  27.903450  28.013779
Ep: 218   Rew: -99.86   Avg Rew: -115.80   Polyak: 0.995000   Buffer: 40.58   Loss: 120.488556  19.186399  19.134037
Ep: 219   Rew: -104.99   Avg Rew: -115.70   Polyak: 0.995000   Buffer: 40.60   Loss: 120.054024  23.694767  25.266155
Ep: 220   Rew: -100.18   Avg Rew: -115.30   Polyak: 0.995000   Buffer: 40.62   Loss: 121.324265  24.744286  24.608900
Ep: 221   Rew: -99.21   Avg Rew: -114.90   Polyak: 0.995000   Buffer: 40.64   Loss: 121.308212  26.982824  24.897024
Ep: 222   Rew: -100.08   Avg Rew: -114.48   Polyak: 0.9950

Ep: 284   Rew: -105.85   Avg Rew: -102.25   Polyak: 0.995000   Buffer: 41.96   Loss: 126.098587  27.198757  27.141512
Ep: 285   Rew: -102.92   Avg Rew: -102.25   Polyak: 0.995000   Buffer: 41.97   Loss: 125.529839  28.350739  24.628132
Ep: 286   Rew: -97.56   Avg Rew: -102.17   Polyak: 0.995000   Buffer: 42.00   Loss: 126.281685  25.062733  32.384502
Ep: 287   Rew: -97.72   Avg Rew: -102.13   Polyak: 0.995000   Buffer: 42.03   Loss: 125.725853  24.991394  29.677088
Ep: 288   Rew: -105.73   Avg Rew: -102.19   Polyak: 0.995000   Buffer: 42.04   Loss: 127.033180  24.602636  27.427910
Ep: 289   Rew: -100.09   Avg Rew: -102.14   Polyak: 0.995000   Buffer: 42.06   Loss: 125.833893  27.159954  22.481808
Ep: 290   Rew: -99.75   Avg Rew: -102.14   Polyak: 0.995000   Buffer: 42.09   Loss: 128.423615  49.242580  42.115501
Ep: 291   Rew: -102.00   Avg Rew: -102.15   Polyak: 0.995000   Buffer: 42.11   Loss: 125.270348  22.135513  26.019474
Ep: 292   Rew: -98.51   Avg Rew: -102.14   Polyak: 0.995000

Ep: 354   Rew: -101.29   Avg Rew: -101.56   Polyak: 0.995000   Buffer: 43.38   Loss: 130.946030  170.327744  189.129837
Ep: 355   Rew: -100.83   Avg Rew: -101.54   Polyak: 0.995000   Buffer: 43.40   Loss: 132.105087  27.857447  26.238541
Ep: 356   Rew: -99.92   Avg Rew: -101.54   Polyak: 0.995000   Buffer: 43.42   Loss: 131.128281  25.579849  23.830219
Ep: 357   Rew: -106.11   Avg Rew: -101.61   Polyak: 0.995000   Buffer: 43.44   Loss: 131.223145  23.964016  22.182041
Ep: 358   Rew: -99.60   Avg Rew: -101.55   Polyak: 0.995000   Buffer: 43.46   Loss: 131.575272  24.949596  21.952435
Ep: 359   Rew: -103.83   Avg Rew: -101.59   Polyak: 0.995000   Buffer: 43.48   Loss: 129.675507  27.318165  27.175148
Ep: 360   Rew: -100.63   Avg Rew: -101.56   Polyak: 0.995000   Buffer: 43.49   Loss: 130.775375  31.653210  28.620302
Ep: 361   Rew: -99.83   Avg Rew: -101.57   Polyak: 0.995000   Buffer: 43.51   Loss: 132.206055  26.773029  34.022003
Ep: 362   Rew: -100.72   Avg Rew: -101.55   Polyak: 0.995

Ep: 424   Rew: -101.12   Avg Rew: -101.32   Polyak: 0.995000   Buffer: 44.71   Loss: 132.366913  26.843391  26.710415
Ep: 425   Rew: -105.64   Avg Rew: -101.40   Polyak: 0.995000   Buffer: 44.73   Loss: 133.508713  30.775921  32.461876
Ep: 426   Rew: -99.81   Avg Rew: -101.37   Polyak: 0.995000   Buffer: 44.74   Loss: 133.411530  28.072994  34.210159
Ep: 427   Rew: -102.83   Avg Rew: -101.36   Polyak: 0.995000   Buffer: 44.76   Loss: 131.002869  28.588820  26.346058
Ep: 428   Rew: -100.47   Avg Rew: -101.38   Polyak: 0.995000   Buffer: 44.78   Loss: 133.970367  24.124929  28.350124
Ep: 429   Rew: -104.79   Avg Rew: -101.42   Polyak: 0.995000   Buffer: 44.80   Loss: 133.576248  30.804121  32.364697
Ep: 430   Rew: -100.34   Avg Rew: -101.43   Polyak: 0.995000   Buffer: 44.82   Loss: 133.873306  31.105755  38.150505
Ep: 431   Rew: -105.15   Avg Rew: -101.48   Polyak: 0.995000   Buffer: 44.83   Loss: 133.308411  23.306923  31.568920
Ep: 432   Rew: -99.49   Avg Rew: -101.46   Polyak: 0.9950

Ep: 494   Rew: -100.69   Avg Rew: -101.87   Polyak: 0.995000   Buffer: 46.01   Loss: 133.852386  23.862263  35.832996
Ep: 495   Rew: -100.72   Avg Rew: -101.86   Polyak: 0.995000   Buffer: 46.03   Loss: 135.533478  32.127579  29.284601
Ep: 496   Rew: -100.73   Avg Rew: -101.87   Polyak: 0.995000   Buffer: 46.05   Loss: 135.590744  24.749397  29.454987
Ep: 497   Rew: -100.60   Avg Rew: -101.89   Polyak: 0.995000   Buffer: 46.07   Loss: 136.168182  31.696135  27.773064
Ep: 498   Rew: -100.62   Avg Rew: -101.89   Polyak: 0.995000   Buffer: 46.09   Loss: 133.389328  33.783119  25.381247
Ep: 499   Rew: -102.09   Avg Rew: -101.86   Polyak: 0.995000   Buffer: 46.11   Loss: 136.124451  27.589417  32.991367
Ep: 500   Rew: -101.41   Avg Rew: -101.82   Polyak: 0.995000   Buffer: 46.15   Loss: 136.619720  36.947182  31.839550
Ep: 501   Rew: -103.65   Avg Rew: -101.80   Polyak: 0.995000   Buffer: 46.16   Loss: 134.917847  32.156975  28.793049
Ep: 502   Rew: -101.33   Avg Rew: -101.81   Polyak: 0.99

Ep: 564   Rew: -101.69   Avg Rew: -102.08   Polyak: 0.995000   Buffer: 47.35   Loss: 138.111649  23.981434  25.038612
Ep: 565   Rew: -101.99   Avg Rew: -102.09   Polyak: 0.995000   Buffer: 47.37   Loss: 136.880676  35.859524  32.180618
Ep: 566   Rew: -100.27   Avg Rew: -102.09   Polyak: 0.995000   Buffer: 47.39   Loss: 139.221848  35.276611  32.004707
Ep: 567   Rew: -100.76   Avg Rew: -102.11   Polyak: 0.995000   Buffer: 47.41   Loss: 138.781097  35.080311  31.863739
Ep: 568   Rew: -101.60   Avg Rew: -102.12   Polyak: 0.995000   Buffer: 47.43   Loss: 137.003235  34.772770  30.778830
Ep: 569   Rew: -105.52   Avg Rew: -102.14   Polyak: 0.995000   Buffer: 47.45   Loss: 137.808426  63.552086  50.723953
Ep: 570   Rew: -101.90   Avg Rew: -102.11   Polyak: 0.995000   Buffer: 47.47   Loss: 137.437958  30.806215  26.410154
Ep: 571   Rew: -102.98   Avg Rew: -102.14   Polyak: 0.995000   Buffer: 47.48   Loss: 136.024292  35.177811  33.164490
Ep: 572   Rew: -102.82   Avg Rew: -102.15   Polyak: 0.99

Ep: 634   Rew: -100.97   Avg Rew: -101.78   Polyak: 0.995000   Buffer: 48.73   Loss: 136.927704  30.187906  28.652039
Ep: 635   Rew: -105.68   Avg Rew: -101.78   Polyak: 0.995000   Buffer: 48.74   Loss: 138.037216  37.446514  26.694508
Ep: 636   Rew: -100.35   Avg Rew: -101.75   Polyak: 0.995000   Buffer: 48.77   Loss: 138.520782  31.814297  21.017632
Ep: 637   Rew: -100.88   Avg Rew: -101.75   Polyak: 0.995000   Buffer: 48.79   Loss: 136.490036  31.138113  26.661757
Ep: 638   Rew: -102.75   Avg Rew: -101.76   Polyak: 0.995000   Buffer: 48.81   Loss: 136.473755  29.007523  30.115852
Ep: 639   Rew: -100.24   Avg Rew: -101.75   Polyak: 0.995000   Buffer: 48.83   Loss: 138.458466  29.727398  30.296116
Ep: 640   Rew: -104.23   Avg Rew: -101.79   Polyak: 0.995000   Buffer: 48.85   Loss: 139.717209  31.066814  32.054783
Ep: 641   Rew: -100.53   Avg Rew: -101.79   Polyak: 0.995000   Buffer: 48.87   Loss: 136.740204  24.585796  35.711269
Ep: 642   Rew: -104.88   Avg Rew: -101.84   Polyak: 0.99

Ep: 704   Rew: -105.23   Avg Rew: -101.56   Polyak: 0.995000   Buffer: 50.10   Loss: 138.842819  32.971199  30.520504
Ep: 705   Rew: -105.75   Avg Rew: -101.56   Polyak: 0.995000   Buffer: 50.12   Loss: 139.444336  27.035944  31.411610
Ep: 706   Rew: -99.29   Avg Rew: -101.54   Polyak: 0.995000   Buffer: 50.14   Loss: 135.500092  31.718134  28.405659
Ep: 707   Rew: -100.14   Avg Rew: -101.50   Polyak: 0.995000   Buffer: 50.16   Loss: 136.969955  179.049591  199.273056
Ep: 708   Rew: -99.47   Avg Rew: -101.48   Polyak: 0.995000   Buffer: 50.18   Loss: 134.505554  28.343399  28.487875
Ep: 709   Rew: -99.69   Avg Rew: -101.47   Polyak: 0.995000   Buffer: 50.20   Loss: 138.665756  21.383858  25.270077
Ep: 710   Rew: -100.53   Avg Rew: -101.46   Polyak: 0.995000   Buffer: 50.22   Loss: 134.634628  25.511456  20.919437
Ep: 711   Rew: -99.97   Avg Rew: -101.41   Polyak: 0.995000   Buffer: 50.24   Loss: 135.513123  37.171181  28.000042
Ep: 712   Rew: -100.16   Avg Rew: -101.40   Polyak: 0.9950

Ep: 774   Rew: -100.53   Avg Rew: -101.15   Polyak: 0.995000   Buffer: 51.50   Loss: 135.253891  31.653240  32.876106
Ep: 775   Rew: -100.02   Avg Rew: -101.15   Polyak: 0.995000   Buffer: 51.52   Loss: 138.532471  31.057344  28.704823
Ep: 776   Rew: -101.20   Avg Rew: -101.15   Polyak: 0.995000   Buffer: 51.54   Loss: 138.450348  33.387146  30.424129
Ep: 777   Rew: -102.12   Avg Rew: -101.16   Polyak: 0.995000   Buffer: 51.55   Loss: 134.744049  32.370964  26.709957
Ep: 778   Rew: -101.03   Avg Rew: -101.17   Polyak: 0.995000   Buffer: 51.57   Loss: 136.106598  26.393284  27.755821
Ep: 779   Rew: -104.29   Avg Rew: -101.21   Polyak: 0.995000   Buffer: 51.59   Loss: 139.182373  29.025288  33.633301
Ep: 780   Rew: -100.41   Avg Rew: -101.19   Polyak: 0.995000   Buffer: 51.61   Loss: 135.203323  26.437765  27.975769
Ep: 781   Rew: -101.98   Avg Rew: -101.17   Polyak: 0.995000   Buffer: 51.62   Loss: 138.151138  30.897507  30.273712
Ep: 782   Rew: -105.22   Avg Rew: -101.21   Polyak: 0.99

Ep: 844   Rew: -100.26   Avg Rew: -101.38   Polyak: 0.995000   Buffer: 52.85   Loss: 136.709625  36.986935  39.263493
Ep: 845   Rew: -100.75   Avg Rew: -101.39   Polyak: 0.995000   Buffer: 52.87   Loss: 135.382919  30.804415  30.297537
Ep: 846   Rew: -99.86   Avg Rew: -101.39   Polyak: 0.995000   Buffer: 52.89   Loss: 139.004822  34.840084  28.311378
Ep: 847   Rew: -99.48   Avg Rew: -101.33   Polyak: 0.995000   Buffer: 52.91   Loss: 135.488373  25.302740  28.686100
Ep: 848   Rew: -101.55   Avg Rew: -101.29   Polyak: 0.995000   Buffer: 52.93   Loss: 137.880585  30.411423  30.801741
Ep: 849   Rew: -99.81   Avg Rew: -101.24   Polyak: 0.995000   Buffer: 52.95   Loss: 139.551804  24.219286  24.905622
Ep: 850   Rew: -99.62   Avg Rew: -101.24   Polyak: 0.995000   Buffer: 52.97   Loss: 133.605576  33.058407  23.593353
Ep: 851   Rew: -100.25   Avg Rew: -101.23   Polyak: 0.995000   Buffer: 52.99   Loss: 135.538696  28.513084  25.465300
Ep: 852   Rew: -105.29   Avg Rew: -101.27   Polyak: 0.995000

Ep: 914   Rew: -99.52   Avg Rew: -101.74   Polyak: 0.995000   Buffer: 54.16   Loss: 140.736267  26.569046  34.364624
Ep: 915   Rew: -99.43   Avg Rew: -101.68   Polyak: 0.995000   Buffer: 54.18   Loss: 138.361069  34.483719  30.899834
Ep: 916   Rew: -102.10   Avg Rew: -101.69   Polyak: 0.995000   Buffer: 54.20   Loss: 138.064148  37.437759  25.064827
Ep: 917   Rew: -99.95   Avg Rew: -101.70   Polyak: 0.995000   Buffer: 54.22   Loss: 138.527222  33.485825  28.342510
Ep: 918   Rew: -100.81   Avg Rew: -101.65   Polyak: 0.995000   Buffer: 54.23   Loss: 138.232834  26.192722  27.680782
Ep: 919   Rew: -100.58   Avg Rew: -101.65   Polyak: 0.995000   Buffer: 54.25   Loss: 135.852798  27.948700  28.742596
Ep: 920   Rew: -99.78   Avg Rew: -101.65   Polyak: 0.995000   Buffer: 54.27   Loss: 138.584549  38.037994  33.745979
Ep: 921   Rew: -102.49   Avg Rew: -101.67   Polyak: 0.995000   Buffer: 54.29   Loss: 135.797485  24.723068  23.053894
Ep: 922   Rew: -99.93   Avg Rew: -101.63   Polyak: 0.995000 

Ep: 984   Rew: -104.73   Avg Rew: -101.09   Polyak: 0.995000   Buffer: 55.63   Loss: 137.826477  29.953712  31.454563
Ep: 985   Rew: -102.42   Avg Rew: -101.06   Polyak: 0.995000   Buffer: 55.65   Loss: 135.866150  24.291929  28.826908
Ep: 986   Rew: -100.78   Avg Rew: -101.05   Polyak: 0.995000   Buffer: 55.67   Loss: 136.797089  29.881525  28.415810
Ep: 987   Rew: -99.46   Avg Rew: -101.04   Polyak: 0.995000   Buffer: 55.69   Loss: 137.576279  33.539165  31.615299
Ep: 988   Rew: -102.50   Avg Rew: -101.05   Polyak: 0.995000   Buffer: 55.70   Loss: 137.541931  35.696953  35.929276
Ep: 989   Rew: -99.78   Avg Rew: -101.05   Polyak: 0.995000   Buffer: 55.72   Loss: 137.472763  24.871721  30.243607
Ep: 990   Rew: -99.85   Avg Rew: -101.05   Polyak: 0.995000   Buffer: 55.74   Loss: 137.430740  25.544376  35.131096
Ep: 991   Rew: -102.36   Avg Rew: -101.08   Polyak: 0.995000   Buffer: 55.76   Loss: 134.041138  25.196049  26.938328
Ep: 992   Rew: -100.51   Avg Rew: -101.08   Polyak: 0.99500

Ep: 1054   Rew: -102.43   Avg Rew: -101.32   Polyak: 0.995000   Buffer: 57.00   Loss: 135.524017  29.207314  19.723513
Ep: 1055   Rew: -105.30   Avg Rew: -101.36   Polyak: 0.995000   Buffer: 57.02   Loss: 135.768768  25.873808  24.363192
Ep: 1056   Rew: -100.67   Avg Rew: -101.35   Polyak: 0.995000   Buffer: 57.03   Loss: 133.926575  199.980896  187.757370
Ep: 1057   Rew: -100.01   Avg Rew: -101.36   Polyak: 0.995000   Buffer: 57.05   Loss: 135.273880  31.918194  30.828587
Ep: 1058   Rew: -100.76   Avg Rew: -101.35   Polyak: 0.995000   Buffer: 57.08   Loss: 137.758438  26.712315  29.657902
Ep: 1059   Rew: -104.97   Avg Rew: -101.41   Polyak: 0.995000   Buffer: 57.10   Loss: 138.028351  53.790188  41.216545
Ep: 1060   Rew: -101.87   Avg Rew: -101.42   Polyak: 0.995000   Buffer: 57.12   Loss: 136.406860  28.219967  29.713947
Ep: 1061   Rew: -100.33   Avg Rew: -101.36   Polyak: 0.995000   Buffer: 57.14   Loss: 137.535095  25.625546  27.676502
Ep: 1062   Rew: -105.78   Avg Rew: -101.41   P

Ep: 1123   Rew: -106.25   Avg Rew: -101.75   Polyak: 0.995000   Buffer: 58.31   Loss: 132.204346  24.234585  27.550705
Ep: 1124   Rew: -102.00   Avg Rew: -101.76   Polyak: 0.995000   Buffer: 58.33   Loss: 134.076340  21.780897  23.637800
Ep: 1125   Rew: -100.01   Avg Rew: -101.73   Polyak: 0.995000   Buffer: 58.34   Loss: 133.839539  25.940454  26.262230
Ep: 1126   Rew: -100.38   Avg Rew: -101.73   Polyak: 0.995000   Buffer: 58.37   Loss: 131.465714  22.151480  28.640688
Ep: 1127   Rew: -100.71   Avg Rew: -101.74   Polyak: 0.995000   Buffer: 58.39   Loss: 137.756683  24.883213  22.649788
Ep: 1128   Rew: -99.75   Avg Rew: -101.73   Polyak: 0.995000   Buffer: 58.41   Loss: 135.935928  21.297388  27.247576
Ep: 1129   Rew: -101.25   Avg Rew: -101.75   Polyak: 0.995000   Buffer: 58.43   Loss: 137.571915  31.900299  26.578377
Ep: 1130   Rew: -105.95   Avg Rew: -101.75   Polyak: 0.995000   Buffer: 58.44   Loss: 136.242950  185.642120  193.598969
Ep: 1131   Rew: -101.87   Avg Rew: -101.73   Po

Ep: 1192   Rew: -101.46   Avg Rew: -101.47   Polyak: 0.995000   Buffer: 59.69   Loss: 134.360336  22.560266  18.619638
Ep: 1193   Rew: -100.04   Avg Rew: -101.46   Polyak: 0.995000   Buffer: 59.72   Loss: 131.074722  25.633072  26.205246
Ep: 1194   Rew: -104.98   Avg Rew: -101.52   Polyak: 0.995000   Buffer: 59.73   Loss: 136.901306  24.694981  21.691256
Ep: 1195   Rew: -100.26   Avg Rew: -101.51   Polyak: 0.995000   Buffer: 59.76   Loss: 132.093948  29.312477  25.429407
Ep: 1196   Rew: -99.38   Avg Rew: -101.50   Polyak: 0.995000   Buffer: 59.77   Loss: 134.134918  30.105892  24.768841
Ep: 1197   Rew: -99.39   Avg Rew: -101.47   Polyak: 0.995000   Buffer: 59.79   Loss: 132.376633  21.988600  25.628288
Ep: 1198   Rew: -99.79   Avg Rew: -101.41   Polyak: 0.995000   Buffer: 59.81   Loss: 135.919174  22.541164  24.826757
Ep: 1199   Rew: -99.21   Avg Rew: -101.34   Polyak: 0.995000   Buffer: 59.83   Loss: 132.183563  27.265568  26.212831
Ep: 1200   Rew: -99.52   Avg Rew: -101.33   Polyak: 

Ep: 1262   Rew: -100.64   Avg Rew: -101.28   Polyak: 0.995000   Buffer: 61.06   Loss: 131.324478  28.011936  28.283209
Ep: 1263   Rew: -101.14   Avg Rew: -101.22   Polyak: 0.995000   Buffer: 61.08   Loss: 130.882248  21.290644  22.749428
Ep: 1264   Rew: -100.03   Avg Rew: -101.22   Polyak: 0.995000   Buffer: 61.11   Loss: 131.773132  22.986414  19.222050
Ep: 1265   Rew: -99.98   Avg Rew: -101.21   Polyak: 0.995000   Buffer: 61.12   Loss: 128.386658  18.514120  22.678869
Ep: 1266   Rew: -100.00   Avg Rew: -101.21   Polyak: 0.995000   Buffer: 61.14   Loss: 131.028961  23.428928  24.002087
Ep: 1267   Rew: -100.54   Avg Rew: -101.21   Polyak: 0.995000   Buffer: 61.16   Loss: 131.171112  21.597315  20.405020
Ep: 1268   Rew: -100.39   Avg Rew: -101.21   Polyak: 0.995000   Buffer: 61.18   Loss: 129.709091  22.920298  22.676033
Ep: 1269   Rew: -100.25   Avg Rew: -101.19   Polyak: 0.995000   Buffer: 61.20   Loss: 128.450394  30.477249  28.658535
Ep: 1270   Rew: -100.66   Avg Rew: -101.21   Poly

Ep: 1331   Rew: -99.52   Avg Rew: -101.42   Polyak: 0.995000   Buffer: 62.40   Loss: 131.958801  22.451075  24.754616
Ep: 1332   Rew: -99.61   Avg Rew: -101.36   Polyak: 0.995000   Buffer: 62.42   Loss: 132.124588  27.812477  18.582615
Ep: 1333   Rew: -99.90   Avg Rew: -101.31   Polyak: 0.995000   Buffer: 62.45   Loss: 132.060394  20.870083  19.702873
Ep: 1334   Rew: -100.19   Avg Rew: -101.26   Polyak: 0.995000   Buffer: 62.47   Loss: 132.559982  20.254364  26.873810
Ep: 1335   Rew: -105.44   Avg Rew: -101.32   Polyak: 0.995000   Buffer: 62.49   Loss: 133.735947  29.450127  21.366243
Ep: 1336   Rew: -100.08   Avg Rew: -101.32   Polyak: 0.995000   Buffer: 62.51   Loss: 130.558456  25.696529  20.593300
Ep: 1337   Rew: -100.44   Avg Rew: -101.32   Polyak: 0.995000   Buffer: 62.53   Loss: 130.497375  19.632927  22.262501
Ep: 1338   Rew: -101.32   Avg Rew: -101.31   Polyak: 0.995000   Buffer: 62.55   Loss: 128.406219  22.642046  20.131239
Ep: 1339   Rew: -100.15   Avg Rew: -101.30   Polyak

Ep: 1401   Rew: -102.63   Avg Rew: -101.11   Polyak: 0.995000   Buffer: 63.80   Loss: 132.530136  26.302315  28.996626
Ep: 1402   Rew: -99.23   Avg Rew: -101.10   Polyak: 0.995000   Buffer: 63.82   Loss: 131.452896  24.343216  22.674520
Ep: 1403   Rew: -100.79   Avg Rew: -101.07   Polyak: 0.995000   Buffer: 63.84   Loss: 130.836380  33.059437  41.026981
Ep: 1404   Rew: -103.03   Avg Rew: -101.03   Polyak: 0.995000   Buffer: 63.86   Loss: 128.733429  26.532654  27.272379
Ep: 1405   Rew: -99.46   Avg Rew: -100.96   Polyak: 0.995000   Buffer: 63.88   Loss: 131.660233  37.299263  37.374722
Ep: 1406   Rew: -99.80   Avg Rew: -100.94   Polyak: 0.995000   Buffer: 63.90   Loss: 129.818481  24.099007  22.510479
Ep: 1407   Rew: -104.97   Avg Rew: -100.98   Polyak: 0.995000   Buffer: 63.92   Loss: 132.868378  26.582922  25.139536
Ep: 1408   Rew: -105.25   Avg Rew: -101.03   Polyak: 0.995000   Buffer: 63.94   Loss: 130.017807  24.839256  19.998367
Ep: 1409   Rew: -100.13   Avg Rew: -101.03   Polyak

Ep: 1470   Rew: -100.39   Avg Rew: -101.22   Polyak: 0.995000   Buffer: 65.16   Loss: 131.631470  26.032021  26.986290
Ep: 1471   Rew: -100.12   Avg Rew: -101.22   Polyak: 0.995000   Buffer: 65.19   Loss: 128.890915  24.135818  22.781792
Ep: 1472   Rew: -100.93   Avg Rew: -101.23   Polyak: 0.995000   Buffer: 65.21   Loss: 132.473877  24.795712  21.074757
Ep: 1473   Rew: -102.44   Avg Rew: -101.26   Polyak: 0.995000   Buffer: 65.22   Loss: 132.136230  18.620148  27.260550
Ep: 1474   Rew: -103.94   Avg Rew: -101.30   Polyak: 0.995000   Buffer: 65.24   Loss: 130.388062  26.960720  23.685085
Ep: 1475   Rew: -100.54   Avg Rew: -101.31   Polyak: 0.995000   Buffer: 65.26   Loss: 130.235260  24.867304  28.688742
Ep: 1476   Rew: -100.57   Avg Rew: -101.31   Polyak: 0.995000   Buffer: 65.28   Loss: 128.187607  21.583673  24.379189
Ep: 1477   Rew: -99.43   Avg Rew: -101.29   Polyak: 0.995000   Buffer: 65.30   Loss: 128.081741  27.352934  27.550291
Ep: 1478   Rew: -102.71   Avg Rew: -101.33   Poly

Ep: 1539   Rew: -104.92   Avg Rew: -101.64   Polyak: 0.995000   Buffer: 66.49   Loss: 128.620041  23.844692  21.387985
Ep: 1540   Rew: -99.30   Avg Rew: -101.63   Polyak: 0.995000   Buffer: 66.51   Loss: 127.277054  21.964128  23.681080
Ep: 1541   Rew: -104.57   Avg Rew: -101.67   Polyak: 0.995000   Buffer: 66.52   Loss: 130.278473  25.625957  21.119743
Ep: 1542   Rew: -99.83   Avg Rew: -101.64   Polyak: 0.995000   Buffer: 66.55   Loss: 128.748856  22.820744  24.925097
Ep: 1543   Rew: -102.66   Avg Rew: -101.67   Polyak: 0.995000   Buffer: 66.56   Loss: 128.481033  24.353067  22.877581
Ep: 1544   Rew: -100.03   Avg Rew: -101.66   Polyak: 0.995000   Buffer: 66.58   Loss: 126.600609  193.365265  184.860718
Ep: 1545   Rew: -107.05   Avg Rew: -101.67   Polyak: 0.995000   Buffer: 66.60   Loss: 127.654678  22.895182  18.668068
Ep: 1546   Rew: -104.63   Avg Rew: -101.71   Polyak: 0.995000   Buffer: 66.61   Loss: 127.727264  26.568678  23.001486
Ep: 1547   Rew: -103.34   Avg Rew: -101.73   Pol

Ep: 1608   Rew: -99.42   Avg Rew: -101.72   Polyak: 0.995000   Buffer: 67.75   Loss: 126.040199  21.704025  22.782646
Ep: 1609   Rew: -100.33   Avg Rew: -101.73   Polyak: 0.995000   Buffer: 67.77   Loss: 125.467087  22.304464  23.793974
Ep: 1610   Rew: -101.73   Avg Rew: -101.71   Polyak: 0.995000   Buffer: 67.81   Loss: 125.288078  21.218307  17.377554
Ep: 1611   Rew: -100.89   Avg Rew: -101.66   Polyak: 0.995000   Buffer: 67.83   Loss: 126.330635  22.413200  25.363344
Ep: 1612   Rew: -99.72   Avg Rew: -101.66   Polyak: 0.995000   Buffer: 67.85   Loss: 124.253143  19.669773  17.004826
Ep: 1613   Rew: -101.18   Avg Rew: -101.67   Polyak: 0.995000   Buffer: 67.87   Loss: 127.833267  17.781416  19.760805
Ep: 1614   Rew: -101.37   Avg Rew: -101.63   Polyak: 0.995000   Buffer: 67.91   Loss: 125.674812  17.260460  21.625530
Ep: 1615   Rew: -100.00   Avg Rew: -101.64   Polyak: 0.995000   Buffer: 67.93   Loss: 123.517326  21.383423  22.142635
Ep: 1616   Rew: -100.71   Avg Rew: -101.64   Polya

Ep: 1678   Rew: -100.42   Avg Rew: -101.30   Polyak: 0.995000   Buffer: 69.17   Loss: 121.745224  17.827944  15.327959
Ep: 1679   Rew: -98.80   Avg Rew: -101.26   Polyak: 0.995000   Buffer: 69.18   Loss: 122.352409  162.668182  157.430374
Ep: 1680   Rew: -105.69   Avg Rew: -101.27   Polyak: 0.995000   Buffer: 69.20   Loss: 123.193916  21.339506  17.839264
Ep: 1681   Rew: -99.21   Avg Rew: -101.24   Polyak: 0.995000   Buffer: 69.22   Loss: 122.930244  19.253714  18.686148
Ep: 1682   Rew: -98.55   Avg Rew: -101.23   Polyak: 0.995000   Buffer: 69.25   Loss: 124.305382  21.854828  14.954004
Ep: 1683   Rew: -100.38   Avg Rew: -101.21   Polyak: 0.995000   Buffer: 69.27   Loss: 122.778351  20.004713  21.891714
Ep: 1684   Rew: -101.17   Avg Rew: -101.16   Polyak: 0.995000   Buffer: 69.29   Loss: 122.827553  22.823772  22.705761
Ep: 1685   Rew: -100.66   Avg Rew: -101.17   Polyak: 0.995000   Buffer: 69.31   Loss: 124.430023  20.048323  15.671598
Ep: 1686   Rew: -100.43   Avg Rew: -101.17   Poly

Ep: 1748   Rew: -105.14   Avg Rew: -101.55   Polyak: 0.995000   Buffer: 70.52   Loss: 122.429268  17.791416  20.091516
Ep: 1749   Rew: -99.62   Avg Rew: -101.54   Polyak: 0.995000   Buffer: 70.54   Loss: 121.024170  18.971348  13.362152
Ep: 1750   Rew: -99.79   Avg Rew: -101.54   Polyak: 0.995000   Buffer: 70.56   Loss: 122.419395  16.082132  17.346214
Ep: 1751   Rew: -100.25   Avg Rew: -101.54   Polyak: 0.995000   Buffer: 70.60   Loss: 123.146469  21.669254  16.990210
Ep: 1752   Rew: -100.24   Avg Rew: -101.54   Polyak: 0.995000   Buffer: 70.62   Loss: 125.430229  14.921175  23.802738
Ep: 1753   Rew: -100.53   Avg Rew: -101.49   Polyak: 0.995000   Buffer: 70.63   Loss: 123.059067  23.374201  21.115486
Ep: 1754   Rew: -105.27   Avg Rew: -101.49   Polyak: 0.995000   Buffer: 70.65   Loss: 123.346710  16.225281  16.368668
Ep: 1755   Rew: -105.68   Avg Rew: -101.55   Polyak: 0.995000   Buffer: 70.67   Loss: 125.006569  17.911215  21.181829
Ep: 1756   Rew: -99.62   Avg Rew: -101.50   Polyak

Ep: 1818   Rew: -105.30   Avg Rew: -101.57   Polyak: 0.995000   Buffer: 71.86   Loss: 123.236923  16.646589  18.159519
Ep: 1819   Rew: -105.85   Avg Rew: -101.63   Polyak: 0.995000   Buffer: 71.88   Loss: 121.273415  16.044868  17.613327
Ep: 1820   Rew: -100.22   Avg Rew: -101.63   Polyak: 0.995000   Buffer: 71.90   Loss: 121.082458  17.134087  14.963844
Ep: 1821   Rew: -101.55   Avg Rew: -101.64   Polyak: 0.995000   Buffer: 71.92   Loss: 124.685654  19.741381  16.554039
Ep: 1822   Rew: -100.08   Avg Rew: -101.64   Polyak: 0.995000   Buffer: 71.94   Loss: 120.393478  20.834179  17.657213
Ep: 1823   Rew: -105.07   Avg Rew: -101.68   Polyak: 0.995000   Buffer: 71.95   Loss: 123.597572  18.367596  16.131218
Ep: 1824   Rew: -100.72   Avg Rew: -101.63   Polyak: 0.995000   Buffer: 71.97   Loss: 121.629807  16.260344  15.033314
Ep: 1825   Rew: -100.55   Avg Rew: -101.57   Polyak: 0.995000   Buffer: 71.99   Loss: 121.864342  16.832281  19.208824
Ep: 1826   Rew: -105.59   Avg Rew: -101.61   Pol

Ep: 1888   Rew: -101.68   Avg Rew: -101.30   Polyak: 0.995000   Buffer: 73.24   Loss: 123.161987  22.887684  17.313553
Ep: 1889   Rew: -106.25   Avg Rew: -101.36   Polyak: 0.995000   Buffer: 73.26   Loss: 120.310585  15.306837  17.996660
Ep: 1890   Rew: -100.62   Avg Rew: -101.34   Polyak: 0.995000   Buffer: 73.28   Loss: 123.507835  18.502037  20.029194
Ep: 1891   Rew: -99.75   Avg Rew: -101.31   Polyak: 0.995000   Buffer: 73.30   Loss: 124.669884  18.505144  21.149727
Ep: 1892   Rew: -99.95   Avg Rew: -101.27   Polyak: 0.995000   Buffer: 73.32   Loss: 122.207832  19.416786  16.776485
Ep: 1893   Rew: -102.86   Avg Rew: -101.23   Polyak: 0.995000   Buffer: 73.34   Loss: 122.693497  16.209248  16.239090
Ep: 1894   Rew: -100.25   Avg Rew: -101.23   Polyak: 0.995000   Buffer: 73.37   Loss: 125.146210  19.042271  16.064583
Ep: 1895   Rew: -99.29   Avg Rew: -101.24   Polyak: 0.995000   Buffer: 73.39   Loss: 123.993172  15.364086  17.727989
Ep: 1896   Rew: -99.69   Avg Rew: -101.24   Polyak:

Ep: 1957   Rew: -101.00   Avg Rew: -101.05   Polyak: 0.995000   Buffer: 74.60   Loss: 123.008110  19.204920  18.174944
Ep: 1958   Rew: -100.17   Avg Rew: -101.04   Polyak: 0.995000   Buffer: 74.62   Loss: 122.413765  17.674536  14.952978
Ep: 1959   Rew: -99.98   Avg Rew: -101.03   Polyak: 0.995000   Buffer: 74.64   Loss: 123.371674  19.850189  16.682537
Ep: 1960   Rew: -100.80   Avg Rew: -101.04   Polyak: 0.995000   Buffer: 74.66   Loss: 122.979294  16.286797  14.797539
Ep: 1961   Rew: -99.68   Avg Rew: -101.01   Polyak: 0.995000   Buffer: 74.68   Loss: 123.052490  18.091042  18.696293
Ep: 1962   Rew: -99.68   Avg Rew: -101.00   Polyak: 0.995000   Buffer: 74.71   Loss: 120.974777  17.548403  20.450411
Ep: 1963   Rew: -100.08   Avg Rew: -101.01   Polyak: 0.995000   Buffer: 74.73   Loss: 120.401283  20.750166  15.182695
Ep: 1964   Rew: -100.15   Avg Rew: -101.02   Polyak: 0.995000   Buffer: 74.75   Loss: 122.722397  19.459354  12.772517
Ep: 1965   Rew: -105.65   Avg Rew: -101.08   Polyak

Ep: 2027   Rew: -100.12   Avg Rew: -100.95   Polyak: 0.995000   Buffer: 76.03   Loss: 120.139671  14.977959  13.746842
Ep: 2028   Rew: -101.17   Avg Rew: -100.96   Polyak: 0.995000   Buffer: 76.05   Loss: 121.172081  15.982466  17.706007
Ep: 2029   Rew: -99.82   Avg Rew: -100.94   Polyak: 0.995000   Buffer: 76.08   Loss: 119.551636  16.194042  16.776192
Ep: 2030   Rew: -99.64   Avg Rew: -100.91   Polyak: 0.995000   Buffer: 76.10   Loss: 121.077583  17.808821  19.501101
Ep: 2031   Rew: -105.43   Avg Rew: -100.96   Polyak: 0.995000   Buffer: 76.11   Loss: 120.415398  14.440672  13.656879
Ep: 2032   Rew: -103.17   Avg Rew: -100.99   Polyak: 0.995000   Buffer: 76.13   Loss: 120.460114  17.465570  15.533981
Ep: 2033   Rew: -99.78   Avg Rew: -100.97   Polyak: 0.995000   Buffer: 76.15   Loss: 118.864708  11.972116  13.860695
Ep: 2034   Rew: -101.30   Avg Rew: -100.93   Polyak: 0.995000   Buffer: 76.17   Loss: 120.623367  15.250759  14.562758
Ep: 2035   Rew: -99.30   Avg Rew: -100.92   Polyak:

Ep: 2097   Rew: -99.89   Avg Rew: -100.65   Polyak: 0.995000   Buffer: 77.47   Loss: 119.096375  145.770355  143.669174
Ep: 2098   Rew: -99.56   Avg Rew: -100.65   Polyak: 0.995000   Buffer: 77.49   Loss: 120.030869  15.973284  18.942446
Ep: 2099   Rew: -98.95   Avg Rew: -100.65   Polyak: 0.995000   Buffer: 77.51   Loss: 118.598991  15.485589  17.320112
Ep: 2100   Rew: -99.12   Avg Rew: -100.65   Polyak: 0.995000   Buffer: 77.53   Loss: 119.067245  14.603333  17.728460
Ep: 2101   Rew: -102.58   Avg Rew: -100.67   Polyak: 0.995000   Buffer: 77.54   Loss: 119.362915  14.531963  13.387711
Ep: 2102   Rew: -102.34   Avg Rew: -100.69   Polyak: 0.995000   Buffer: 77.56   Loss: 119.380562  12.601379  15.472905
Ep: 2103   Rew: -106.73   Avg Rew: -100.71   Polyak: 0.995000   Buffer: 77.58   Loss: 118.303070  22.270065  16.833668
Ep: 2104   Rew: -105.26   Avg Rew: -100.71   Polyak: 0.995000   Buffer: 77.60   Loss: 119.808403  13.578711  15.006881
Ep: 2105   Rew: -100.18   Avg Rew: -100.72   Polya

Ep: 2166   Rew: -104.33   Avg Rew: -101.24   Polyak: 0.995000   Buffer: 78.79   Loss: 119.542130  16.086483  18.047386
Ep: 2167   Rew: -100.19   Avg Rew: -101.21   Polyak: 0.995000   Buffer: 78.81   Loss: 120.127609  14.695210  14.567889
Ep: 2168   Rew: -99.06   Avg Rew: -101.20   Polyak: 0.995000   Buffer: 78.84   Loss: 118.408951  13.964970  13.812300
Ep: 2169   Rew: -100.54   Avg Rew: -101.15   Polyak: 0.995000   Buffer: 78.86   Loss: 120.289421  13.451594  13.944389
Ep: 2170   Rew: -100.49   Avg Rew: -101.15   Polyak: 0.995000   Buffer: 78.88   Loss: 121.294617  14.836721  16.605177
Ep: 2171   Rew: -99.87   Avg Rew: -101.14   Polyak: 0.995000   Buffer: 78.90   Loss: 118.441200  15.905377  13.431124
Ep: 2172   Rew: -100.81   Avg Rew: -101.14   Polyak: 0.995000   Buffer: 78.92   Loss: 120.254204  16.131863  17.726662
Ep: 2173   Rew: -103.20   Avg Rew: -101.18   Polyak: 0.995000   Buffer: 78.93   Loss: 120.407562  15.136559  15.852273
Ep: 2174   Rew: -100.36   Avg Rew: -101.19   Polya

Ep: 2235   Rew: -99.87   Avg Rew: -101.29   Polyak: 0.995000   Buffer: 80.15   Loss: 119.764679  12.038260  15.972880
Ep: 2236   Rew: -99.72   Avg Rew: -101.28   Polyak: 0.995000   Buffer: 80.17   Loss: 119.236832  18.343243  18.297836
Ep: 2237   Rew: -99.37   Avg Rew: -101.23   Polyak: 0.995000   Buffer: 80.19   Loss: 119.854103  16.238218  16.674332
Ep: 2238   Rew: -105.50   Avg Rew: -101.27   Polyak: 0.995000   Buffer: 80.21   Loss: 121.234261  17.127502  15.039009
Ep: 2239   Rew: -105.21   Avg Rew: -101.31   Polyak: 0.995000   Buffer: 80.22   Loss: 120.228279  14.790727  14.185987
Ep: 2240   Rew: -100.86   Avg Rew: -101.30   Polyak: 0.995000   Buffer: 80.24   Loss: 119.814819  16.145264  13.280663
Ep: 2241   Rew: -100.38   Avg Rew: -101.29   Polyak: 0.995000   Buffer: 80.26   Loss: 118.804253  16.791044  9.694847
Ep: 2242   Rew: -101.05   Avg Rew: -101.29   Polyak: 0.995000   Buffer: 80.28   Loss: 120.036972  16.084412  16.171186
Ep: 2243   Rew: -105.09   Avg Rew: -101.33   Polyak:

Ep: 2304   Rew: -101.49   Avg Rew: -101.68   Polyak: 0.995000   Buffer: 81.44   Loss: 118.200836  12.818523  15.259159
Ep: 2305   Rew: -102.47   Avg Rew: -101.71   Polyak: 0.995000   Buffer: 81.45   Loss: 118.828293  16.958710  12.492320
Ep: 2306   Rew: -99.09   Avg Rew: -101.65   Polyak: 0.995000   Buffer: 81.47   Loss: 118.559258  13.990995  15.949396
Ep: 2307   Rew: -100.67   Avg Rew: -101.65   Polyak: 0.995000   Buffer: 81.49   Loss: 120.080368  14.876739  17.724281
Ep: 2308   Rew: -103.68   Avg Rew: -101.64   Polyak: 0.995000   Buffer: 81.51   Loss: 118.464432  12.293772  16.135616
Ep: 2309   Rew: -101.09   Avg Rew: -101.60   Polyak: 0.995000   Buffer: 81.53   Loss: 120.424927  15.633201  14.414732
Ep: 2310   Rew: -100.31   Avg Rew: -101.60   Polyak: 0.995000   Buffer: 81.55   Loss: 118.968719  16.517107  14.157482
Ep: 2311   Rew: -104.24   Avg Rew: -101.65   Polyak: 0.995000   Buffer: 81.57   Loss: 119.267952  14.341159  15.611925
Ep: 2312   Rew: -100.03   Avg Rew: -101.66   Poly

Ep: 2373   Rew: -101.03   Avg Rew: -102.05   Polyak: 0.995000   Buffer: 82.81   Loss: 116.536385  16.770920  17.490364
Ep: 2374   Rew: -106.50   Avg Rew: -102.11   Polyak: 0.995000   Buffer: 82.82   Loss: 119.049934  16.755722  14.607260
Ep: 2375   Rew: -107.01   Avg Rew: -102.18   Polyak: 0.995000   Buffer: 82.84   Loss: 117.414467  15.729063  13.115885
Ep: 2376   Rew: -101.39   Avg Rew: -102.19   Polyak: 0.995000   Buffer: 82.86   Loss: 117.303551  17.310129  13.659506
Ep: 2377   Rew: -102.93   Avg Rew: -102.21   Polyak: 0.995000   Buffer: 82.88   Loss: 119.133965  15.445293  14.950653
Ep: 2378   Rew: -100.64   Avg Rew: -102.18   Polyak: 0.995000   Buffer: 82.90   Loss: 117.261017  17.491653  16.142788
Ep: 2379   Rew: -103.39   Avg Rew: -102.21   Polyak: 0.995000   Buffer: 82.91   Loss: 116.273071  17.288477  14.433925
Ep: 2380   Rew: -100.78   Avg Rew: -102.22   Polyak: 0.995000   Buffer: 82.94   Loss: 117.233803  14.600210  14.482121
Ep: 2381   Rew: -105.47   Avg Rew: -102.23   Pol

Ep: 2442   Rew: -104.69   Avg Rew: -103.61   Polyak: 0.995000   Buffer: 84.01   Loss: 115.875511  16.351543  15.590120
Ep: 2443   Rew: -103.79   Avg Rew: -103.64   Polyak: 0.995000   Buffer: 84.03   Loss: 118.095276  15.958344  14.562196
Ep: 2444   Rew: -100.89   Avg Rew: -103.65   Polyak: 0.995000   Buffer: 84.05   Loss: 114.535461  17.937496  13.296703
Ep: 2445   Rew: -108.61   Avg Rew: -103.70   Polyak: 0.995000   Buffer: 84.07   Loss: 115.811653  15.204492  15.361420
Ep: 2446   Rew: -106.69   Avg Rew: -103.77   Polyak: 0.995000   Buffer: 84.08   Loss: 117.247704  16.585346  14.226295
Ep: 2447   Rew: -104.06   Avg Rew: -103.43   Polyak: 0.995000   Buffer: 84.10   Loss: 115.889381  14.370419  15.343184
Ep: 2448   Rew: -103.74   Avg Rew: -103.46   Polyak: 0.995000   Buffer: 84.11   Loss: 116.154259  15.909981  13.123415
Ep: 2449   Rew: -105.56   Avg Rew: -103.50   Polyak: 0.995000   Buffer: 84.13   Loss: 114.596794  17.601761  11.678984
Ep: 2450   Rew: -105.00   Avg Rew: -103.54   Pol

Ep: 2511   Rew: -104.48   Avg Rew: -104.08   Polyak: 0.995000   Buffer: 85.19   Loss: 115.082542  15.441519  15.814948
Ep: 2512   Rew: -103.81   Avg Rew: -104.04   Polyak: 0.995000   Buffer: 85.21   Loss: 114.438675  14.433119  16.571192
Ep: 2513   Rew: -105.10   Avg Rew: -104.06   Polyak: 0.995000   Buffer: 85.23   Loss: 113.629967  13.927580  15.822460
Ep: 2514   Rew: -116.64   Avg Rew: -104.22   Polyak: 0.995000   Buffer: 85.25   Loss: 114.659386  16.728024  12.914549
Ep: 2515   Rew: -105.58   Avg Rew: -104.23   Polyak: 0.995000   Buffer: 85.26   Loss: 113.259529  14.251666  14.972574
Ep: 2516   Rew: -108.38   Avg Rew: -104.31   Polyak: 0.995000   Buffer: 85.28   Loss: 114.315826  16.041821  13.850266
Ep: 2517   Rew: -115.42   Avg Rew: -104.46   Polyak: 0.995000   Buffer: 85.30   Loss: 113.569344  120.329102  132.262222
Ep: 2518   Rew: -105.35   Avg Rew: -104.47   Polyak: 0.995000   Buffer: 85.31   Loss: 114.159805  15.048189  16.599625
Ep: 2519   Rew: -110.47   Avg Rew: -104.57   P

Ep: 2580   Rew: -110.36   Avg Rew: -103.74   Polyak: 0.995000   Buffer: 86.38   Loss: 112.412956  12.839302  13.784785
Ep: 2581   Rew: -113.59   Avg Rew: -103.87   Polyak: 0.995000   Buffer: 86.40   Loss: 112.266678  18.744377  13.598707
Ep: 2582   Rew: -107.33   Avg Rew: -103.90   Polyak: 0.995000   Buffer: 86.42   Loss: 112.410423  13.718831  12.307899
Ep: 2583   Rew: -110.71   Avg Rew: -103.96   Polyak: 0.995000   Buffer: 86.45   Loss: 111.398872  11.976138  13.088946
Ep: 2584   Rew: -112.69   Avg Rew: -104.03   Polyak: 0.995000   Buffer: 86.47   Loss: 110.950401  15.649017  12.550667
Ep: 2585   Rew: -112.17   Avg Rew: -104.14   Polyak: 0.995000   Buffer: 86.49   Loss: 112.818748  13.444036  11.657553
Ep: 2586   Rew: -115.76   Avg Rew: -104.21   Polyak: 0.995000   Buffer: 86.51   Loss: 111.897362  11.120710  11.468445
Ep: 2587   Rew: -113.29   Avg Rew: -104.29   Polyak: 0.995000   Buffer: 86.54   Loss: 111.300774  12.712470  16.280880
Ep: 2588   Rew: -113.19   Avg Rew: -104.41   Pol

Ep: 2649   Rew: -104.25   Avg Rew: -107.31   Polyak: 0.995000   Buffer: 87.84   Loss: 109.012794  12.558600  12.600856
Ep: 2650   Rew: -102.08   Avg Rew: -107.31   Polyak: 0.995000   Buffer: 87.86   Loss: 107.572792  12.016839  15.841018
Ep: 2651   Rew: -102.31   Avg Rew: -107.32   Polyak: 0.995000   Buffer: 87.88   Loss: 108.851501  10.039671  12.356098
Ep: 2652   Rew: -105.37   Avg Rew: -107.32   Polyak: 0.995000   Buffer: 87.90   Loss: 108.064651  11.171448  12.510830
Ep: 2653   Rew: -103.32   Avg Rew: -107.31   Polyak: 0.995000   Buffer: 87.92   Loss: 107.925491  10.177683  12.480519
Ep: 2654   Rew: -104.29   Avg Rew: -107.30   Polyak: 0.995000   Buffer: 87.94   Loss: 108.944519  10.482042  11.372972
Ep: 2655   Rew: -105.84   Avg Rew: -107.28   Polyak: 0.995000   Buffer: 87.96   Loss: 107.946167  10.765818  11.450518
Ep: 2656   Rew: -104.54   Avg Rew: -107.31   Polyak: 0.995000   Buffer: 87.98   Loss: 107.467880  11.745821  11.336160
Ep: 2657   Rew: -103.08   Avg Rew: -107.33   Pol

Ep: 2718   Rew: -100.06   Avg Rew: -104.78   Polyak: 0.995000   Buffer: 89.22   Loss: 106.335220  10.094798  11.155870
Ep: 2719   Rew: -130.85   Avg Rew: -105.04   Polyak: 0.995000   Buffer: 89.28   Loss: 107.052605  10.426222  13.034557
Ep: 2720   Rew: -100.45   Avg Rew: -104.96   Polyak: 0.995000   Buffer: 89.30   Loss: 106.745888  13.185667  12.482280
Ep: 2721   Rew: -112.03   Avg Rew: -105.04   Polyak: 0.995000   Buffer: 89.34   Loss: 106.550018  12.838173  14.518807
Ep: 2722   Rew: -100.58   Avg Rew: -104.93   Polyak: 0.995000   Buffer: 89.38   Loss: 106.990364  13.407207  11.579242
Ep: 2723   Rew: -102.34   Avg Rew: -104.86   Polyak: 0.995000   Buffer: 89.39   Loss: 107.383408  12.840967  13.099795
Ep: 2724   Rew: -103.57   Avg Rew: -104.84   Polyak: 0.995000   Buffer: 89.41   Loss: 106.658394  14.751451  13.216980
Ep: 2725   Rew: -117.36   Avg Rew: -104.92   Polyak: 0.995000   Buffer: 89.46   Loss: 106.578903  11.520025  10.775452
Ep: 2726   Rew: -120.51   Avg Rew: -105.06   Pol

Ep: 2787   Rew: -100.59   Avg Rew: -110.49   Polyak: 0.995000   Buffer: 92.64   Loss: 110.890831  12.483585  13.096937
Ep: 2788   Rew: -101.50   Avg Rew: -110.42   Polyak: 0.995000   Buffer: 92.65   Loss: 110.999611  13.667872  12.295081
Ep: 2789   Rew: -102.60   Avg Rew: -110.43   Polyak: 0.995000   Buffer: 92.67   Loss: 111.276070  11.788269  13.237492
Ep: 2790   Rew: -100.88   Avg Rew: -110.36   Polyak: 0.995000   Buffer: 92.69   Loss: 110.575684  12.164120  8.365005
Ep: 2791   Rew: -102.26   Avg Rew: -110.30   Polyak: 0.995000   Buffer: 92.71   Loss: 110.765472  13.269826  11.874791
Ep: 2792   Rew: -103.75   Avg Rew: -110.27   Polyak: 0.995000   Buffer: 92.72   Loss: 111.977890  113.865891  108.339333
Ep: 2793   Rew: -103.06   Avg Rew: -110.25   Polyak: 0.995000   Buffer: 92.74   Loss: 110.943665  10.805256  11.505414
Ep: 2794   Rew: -101.46   Avg Rew: -110.21   Polyak: 0.995000   Buffer: 92.76   Loss: 110.412781  15.031374  12.780880
Ep: 2795   Rew: -104.01   Avg Rew: -110.22   Po

Ep: 2856   Rew: -104.32   Avg Rew: -105.74   Polyak: 0.995000   Buffer: 94.47   Loss: 113.061478  13.816967  11.944445
Ep: 2857   Rew: -102.98   Avg Rew: -105.74   Polyak: 0.995000   Buffer: 94.49   Loss: 112.685074  14.121413  13.936845
Ep: 2858   Rew: -103.98   Avg Rew: -105.74   Polyak: 0.995000   Buffer: 94.50   Loss: 112.672615  14.034567  15.010777
Ep: 2859   Rew: -100.95   Avg Rew: -105.64   Polyak: 0.995000   Buffer: 94.52   Loss: 111.864502  9.799862  12.306570
Ep: 2860   Rew: -103.38   Avg Rew: -105.59   Polyak: 0.995000   Buffer: 94.54   Loss: 112.039139  17.176720  12.193970
Ep: 2861   Rew: -102.62   Avg Rew: -105.60   Polyak: 0.995000   Buffer: 94.55   Loss: 112.117897  13.425480  11.352037
Ep: 2862   Rew: -101.63   Avg Rew: -105.62   Polyak: 0.995000   Buffer: 94.57   Loss: 112.203781  10.840580  12.633637
Ep: 2863   Rew: -100.75   Avg Rew: -105.61   Polyak: 0.995000   Buffer: 94.59   Loss: 113.163277  14.632995  14.756330
Ep: 2864   Rew: -101.30   Avg Rew: -105.60   Poly

Ep: 2925   Rew: -106.08   Avg Rew: -102.41   Polyak: 0.995000   Buffer: 95.69   Loss: 112.367256  13.957911  11.482374
Ep: 2926   Rew: -101.44   Avg Rew: -102.39   Polyak: 0.995000   Buffer: 95.71   Loss: 112.802292  11.742352  11.035852
Ep: 2927   Rew: -99.99   Avg Rew: -102.36   Polyak: 0.995000   Buffer: 95.72   Loss: 112.670761  13.800848  13.559607
Ep: 2928   Rew: -100.60   Avg Rew: -102.32   Polyak: 0.995000   Buffer: 95.74   Loss: 112.087372  16.052359  15.459389
Ep: 2929   Rew: -101.30   Avg Rew: -102.32   Polyak: 0.995000   Buffer: 95.76   Loss: 111.836372  13.643623  14.572145
Ep: 2930   Rew: -102.20   Avg Rew: -102.30   Polyak: 0.995000   Buffer: 95.78   Loss: 112.493965  12.744347  14.388202
Ep: 2931   Rew: -102.94   Avg Rew: -102.33   Polyak: 0.995000   Buffer: 95.79   Loss: 112.310684  11.147945  11.516010
Ep: 2932   Rew: -104.20   Avg Rew: -102.34   Polyak: 0.995000   Buffer: 95.81   Loss: 112.673119  12.412987  13.104671
Ep: 2933   Rew: -101.36   Avg Rew: -102.33   Poly

Ep: 2994   Rew: -100.61   Avg Rew: -102.39   Polyak: 0.995000   Buffer: 96.89   Loss: 112.748856  12.696081  11.365831
Ep: 2995   Rew: -102.75   Avg Rew: -102.39   Polyak: 0.995000   Buffer: 96.91   Loss: 112.920769  12.366364  14.810891
Ep: 2996   Rew: -104.78   Avg Rew: -102.41   Polyak: 0.995000   Buffer: 96.92   Loss: 113.614784  16.858315  12.422850
Ep: 2997   Rew: -101.79   Avg Rew: -102.39   Polyak: 0.995000   Buffer: 96.94   Loss: 113.504402  12.405905  13.264237
Ep: 2998   Rew: -101.55   Avg Rew: -102.38   Polyak: 0.995000   Buffer: 96.96   Loss: 113.336884  12.975754  12.745388
Ep: 2999   Rew: -103.66   Avg Rew: -102.43   Polyak: 0.995000   Buffer: 96.97   Loss: 112.632729  13.295286  17.017031
Ep: 3000   Rew: -101.75   Avg Rew: -102.42   Polyak: 0.995000   Buffer: 96.99   Loss: 112.817062  13.241780  13.031839
Ep: 3001   Rew: -101.57   Avg Rew: -102.40   Polyak: 0.995000   Buffer: 97.01   Loss: 112.391312  14.455649  13.163564
Ep: 3002   Rew: -100.53   Avg Rew: -102.36   Pol

Ep: 3063   Rew: -101.05   Avg Rew: -103.05   Polyak: 0.995000   Buffer: 98.20   Loss: 113.370804  12.622346  10.412277
Ep: 3064   Rew: -100.97   Avg Rew: -103.01   Polyak: 0.995000   Buffer: 98.22   Loss: 112.253868  15.152466  12.996139
Ep: 3065   Rew: -100.46   Avg Rew: -103.00   Polyak: 0.995000   Buffer: 98.24   Loss: 113.334793  11.763213  11.071766
Ep: 3066   Rew: -99.73   Avg Rew: -102.95   Polyak: 0.995000   Buffer: 98.26   Loss: 112.504814  12.854670  13.814266
Ep: 3067   Rew: -99.58   Avg Rew: -102.93   Polyak: 0.995000   Buffer: 98.28   Loss: 113.938065  14.003839  11.552822
Ep: 3068   Rew: -102.25   Avg Rew: -102.89   Polyak: 0.995000   Buffer: 98.30   Loss: 112.474960  12.240358  12.107750
Ep: 3069   Rew: -100.76   Avg Rew: -102.86   Polyak: 0.995000   Buffer: 98.32   Loss: 112.900330  14.556161  13.479626
Ep: 3070   Rew: -99.92   Avg Rew: -102.85   Polyak: 0.995000   Buffer: 98.34   Loss: 113.229637  11.942922  13.905767
Ep: 3071   Rew: -101.26   Avg Rew: -102.85   Polyak

Ep: 3132   Rew: -105.06   Avg Rew: -102.95   Polyak: 0.995000   Buffer: 100.00   Loss: 112.827797  11.113688  12.621853
Ep: 3133   Rew: -104.43   Avg Rew: -102.97   Polyak: 0.995000   Buffer: 100.00   Loss: 114.055557  11.846613  15.771837
Ep: 3134   Rew: -100.62   Avg Rew: -102.93   Polyak: 0.995000   Buffer: 100.00   Loss: 112.839806  15.849558  12.842505
Ep: 3135   Rew: -102.35   Avg Rew: -102.91   Polyak: 0.995000   Buffer: 100.00   Loss: 113.989471  9.985081  12.763899
Ep: 3136   Rew: -100.30   Avg Rew: -102.90   Polyak: 0.995000   Buffer: 100.00   Loss: 113.638184  14.246345  12.389629
Ep: 3137   Rew: -105.79   Avg Rew: -102.94   Polyak: 0.995000   Buffer: 100.00   Loss: 113.103012  12.203424  9.848765
Ep: 3138   Rew: -104.07   Avg Rew: -102.97   Polyak: 0.995000   Buffer: 100.00   Loss: 113.150673  15.494545  13.011609
Ep: 3139   Rew: -102.50   Avg Rew: -102.97   Polyak: 0.995000   Buffer: 100.00   Loss: 113.522591  16.912010  15.706562
Ep: 3140   Rew: -104.18   Avg Rew: -102.99

Ep: 3201   Rew: -102.23   Avg Rew: -104.74   Polyak: 0.995000   Buffer: 100.00   Loss: 113.592079  12.052009  12.176121
Ep: 3202   Rew: -104.71   Avg Rew: -104.78   Polyak: 0.995000   Buffer: 100.00   Loss: 114.348099  15.161953  10.888738
Ep: 3203   Rew: -102.28   Avg Rew: -104.80   Polyak: 0.995000   Buffer: 100.00   Loss: 113.570694  13.859190  14.883166
Ep: 3204   Rew: -104.85   Avg Rew: -104.83   Polyak: 0.995000   Buffer: 100.00   Loss: 113.811653  12.878889  14.971834
Ep: 3205   Rew: -102.67   Avg Rew: -104.85   Polyak: 0.995000   Buffer: 100.00   Loss: 112.838638  10.460393  14.595781
Ep: 3206   Rew: -104.79   Avg Rew: -104.89   Polyak: 0.995000   Buffer: 100.00   Loss: 113.881966  12.988226  12.617535
Ep: 3207   Rew: -107.12   Avg Rew: -104.95   Polyak: 0.995000   Buffer: 100.00   Loss: 112.823097  15.832148  14.981873
Ep: 3208   Rew: -104.69   Avg Rew: -104.98   Polyak: 0.995000   Buffer: 100.00   Loss: 114.189369  11.255267  10.955402
Ep: 3209   Rew: -102.28   Avg Rew: -104.

Ep: 3270   Rew: -105.84   Avg Rew: -103.70   Polyak: 0.995000   Buffer: 100.00   Loss: 112.909279  13.408762  13.062077
Ep: 3271   Rew: -103.78   Avg Rew: -103.68   Polyak: 0.995000   Buffer: 100.00   Loss: 110.657326  15.906972  13.686163
Ep: 3272   Rew: -102.54   Avg Rew: -103.68   Polyak: 0.995000   Buffer: 100.00   Loss: 113.345894  13.303389  16.562235
Ep: 3273   Rew: -100.51   Avg Rew: -103.65   Polyak: 0.995000   Buffer: 100.00   Loss: 113.892532  13.672848  13.092018
Ep: 3274   Rew: -105.13   Avg Rew: -103.66   Polyak: 0.995000   Buffer: 100.00   Loss: 112.979652  13.354151  13.143332
Ep: 3275   Rew: -102.33   Avg Rew: -103.63   Polyak: 0.995000   Buffer: 100.00   Loss: 114.028328  12.156689  15.145082
Ep: 3276   Rew: -103.22   Avg Rew: -103.62   Polyak: 0.995000   Buffer: 100.00   Loss: 112.527351  12.989962  12.306229
Ep: 3277   Rew: -104.45   Avg Rew: -103.63   Polyak: 0.995000   Buffer: 100.00   Loss: 113.701096  13.519405  10.493913
Ep: 3278   Rew: -99.99   Avg Rew: -103.5

Ep: 3339   Rew: -102.18   Avg Rew: -102.40   Polyak: 0.995000   Buffer: 100.00   Loss: 113.977837  14.145329  13.706144
Ep: 3340   Rew: -100.50   Avg Rew: -102.37   Polyak: 0.995000   Buffer: 100.00   Loss: 111.574867  11.953908  11.380802
Ep: 3341   Rew: -100.65   Avg Rew: -102.35   Polyak: 0.995000   Buffer: 100.00   Loss: 112.635765  14.444899  13.224982
Ep: 3342   Rew: -105.00   Avg Rew: -102.38   Polyak: 0.995000   Buffer: 100.00   Loss: 112.633453  15.540785  13.544325
Ep: 3343   Rew: -100.98   Avg Rew: -102.36   Polyak: 0.995000   Buffer: 100.00   Loss: 112.663284  13.682523  14.644218
Ep: 3344   Rew: -101.66   Avg Rew: -102.33   Polyak: 0.995000   Buffer: 100.00   Loss: 111.841751  14.371577  13.548461
Ep: 3345   Rew: -101.67   Avg Rew: -102.35   Polyak: 0.995000   Buffer: 100.00   Loss: 112.495544  12.683833  12.394857
Ep: 3346   Rew: -103.01   Avg Rew: -102.32   Polyak: 0.995000   Buffer: 100.00   Loss: 112.523232  13.593343  11.227357
Ep: 3347   Rew: -100.87   Avg Rew: -102.

Ep: 3408   Rew: -101.88   Avg Rew: -102.20   Polyak: 0.995000   Buffer: 100.00   Loss: 113.699173  10.177843  12.102449
Ep: 3409   Rew: -102.00   Avg Rew: -102.19   Polyak: 0.995000   Buffer: 100.00   Loss: 111.326698  10.333744  11.895031
Ep: 3410   Rew: -102.71   Avg Rew: -102.21   Polyak: 0.995000   Buffer: 100.00   Loss: 112.894699  13.242634  12.446522
Ep: 3411   Rew: -100.51   Avg Rew: -102.20   Polyak: 0.995000   Buffer: 100.00   Loss: 111.778557  14.393209  12.696217
Ep: 3412   Rew: -100.94   Avg Rew: -102.21   Polyak: 0.995000   Buffer: 100.00   Loss: 111.490318  11.519539  11.901091
Ep: 3413   Rew: -101.73   Avg Rew: -102.21   Polyak: 0.995000   Buffer: 100.00   Loss: 112.197792  15.393314  13.878551
Ep: 3414   Rew: -101.83   Avg Rew: -102.23   Polyak: 0.995000   Buffer: 100.00   Loss: 112.099258  12.445892  8.847118
Ep: 3415   Rew: -100.35   Avg Rew: -102.23   Polyak: 0.995000   Buffer: 100.00   Loss: 111.965981  13.852386  11.975470
Ep: 3416   Rew: -100.41   Avg Rew: -102.1

Ep: 3477   Rew: -102.14   Avg Rew: -102.26   Polyak: 0.995000   Buffer: 100.00   Loss: 111.582321  14.895645  15.981944
Ep: 3478   Rew: -102.24   Avg Rew: -102.26   Polyak: 0.995000   Buffer: 100.00   Loss: 110.965279  13.312294  11.684258
Ep: 3479   Rew: -102.21   Avg Rew: -102.26   Polyak: 0.995000   Buffer: 100.00   Loss: 112.966377  13.300990  11.873313
Ep: 3480   Rew: -101.35   Avg Rew: -102.25   Polyak: 0.995000   Buffer: 100.00   Loss: 111.605560  11.498869  10.476207
Ep: 3481   Rew: -100.94   Avg Rew: -102.22   Polyak: 0.995000   Buffer: 100.00   Loss: 112.611900  10.033699  12.357939
Ep: 3482   Rew: -101.62   Avg Rew: -102.19   Polyak: 0.995000   Buffer: 100.00   Loss: 111.678673  11.904327  12.432892
Ep: 3483   Rew: -100.76   Avg Rew: -102.16   Polyak: 0.995000   Buffer: 100.00   Loss: 111.956894  8.964413  11.571743
Ep: 3484   Rew: -103.68   Avg Rew: -102.17   Polyak: 0.995000   Buffer: 100.00   Loss: 112.250351  12.757957  11.174207
Ep: 3485   Rew: -100.84   Avg Rew: -102.1

Ep: 3546   Rew: -101.76   Avg Rew: -102.48   Polyak: 0.995000   Buffer: 100.00   Loss: 111.667809  10.765022  12.337425
Ep: 3547   Rew: -101.72   Avg Rew: -102.48   Polyak: 0.995000   Buffer: 100.00   Loss: 111.478119  13.519415  11.676962
Ep: 3548   Rew: -102.51   Avg Rew: -102.48   Polyak: 0.995000   Buffer: 100.00   Loss: 111.389427  14.384392  13.030182
Ep: 3549   Rew: -103.05   Avg Rew: -102.49   Polyak: 0.995000   Buffer: 100.00   Loss: 111.643257  13.884049  16.412399
Ep: 3550   Rew: -102.77   Avg Rew: -102.48   Polyak: 0.995000   Buffer: 100.00   Loss: 111.780510  12.247680  15.411893
Ep: 3551   Rew: -104.33   Avg Rew: -102.49   Polyak: 0.995000   Buffer: 100.00   Loss: 112.028282  12.746897  11.582892
Ep: 3552   Rew: -102.90   Avg Rew: -102.49   Polyak: 0.995000   Buffer: 100.00   Loss: 111.366539  13.764709  11.633756
Ep: 3553   Rew: -104.20   Avg Rew: -102.49   Polyak: 0.995000   Buffer: 100.00   Loss: 112.259178  10.201797  11.472782
Ep: 3554   Rew: -103.81   Avg Rew: -102.

Ep: 3615   Rew: -103.85   Avg Rew: -102.81   Polyak: 0.995000   Buffer: 100.00   Loss: 112.398064  14.420656  10.961562
Ep: 3616   Rew: -104.98   Avg Rew: -102.85   Polyak: 0.995000   Buffer: 100.00   Loss: 112.165634  13.289936  10.768353
Ep: 3617   Rew: -102.24   Avg Rew: -102.85   Polyak: 0.995000   Buffer: 100.00   Loss: 111.212860  11.584075  13.362703
Ep: 3618   Rew: -103.24   Avg Rew: -102.84   Polyak: 0.995000   Buffer: 100.00   Loss: 111.540810  11.512828  14.466995
Ep: 3619   Rew: -102.77   Avg Rew: -102.85   Polyak: 0.995000   Buffer: 100.00   Loss: 111.753853  9.344625  14.678771
Ep: 3620   Rew: -102.64   Avg Rew: -102.87   Polyak: 0.995000   Buffer: 100.00   Loss: 110.754204  11.394464  11.968256
Ep: 3621   Rew: -105.40   Avg Rew: -102.89   Polyak: 0.995000   Buffer: 100.00   Loss: 110.914024  11.755903  12.563997
Ep: 3622   Rew: -104.56   Avg Rew: -102.90   Polyak: 0.995000   Buffer: 100.00   Loss: 110.145508  8.971162  9.406793
Ep: 3623   Rew: -101.33   Avg Rew: -102.89 

Ep: 3684   Rew: -102.48   Avg Rew: -102.62   Polyak: 0.995000   Buffer: 100.00   Loss: 111.438652  11.164963  13.739012
Ep: 3685   Rew: -102.95   Avg Rew: -102.64   Polyak: 0.995000   Buffer: 100.00   Loss: 111.071457  14.081848  11.304112
Ep: 3686   Rew: -105.67   Avg Rew: -102.68   Polyak: 0.995000   Buffer: 100.00   Loss: 110.854492  10.162045  11.047917
Ep: 3687   Rew: -105.55   Avg Rew: -102.73   Polyak: 0.995000   Buffer: 100.00   Loss: 110.434807  13.736188  10.962836
Ep: 3688   Rew: -104.32   Avg Rew: -102.72   Polyak: 0.995000   Buffer: 100.00   Loss: 111.592293  13.490808  12.677334
Ep: 3689   Rew: -103.32   Avg Rew: -102.72   Polyak: 0.995000   Buffer: 100.00   Loss: 110.765251  10.769950  10.849418
Ep: 3690   Rew: -103.36   Avg Rew: -102.74   Polyak: 0.995000   Buffer: 100.00   Loss: 110.787796  13.028419  13.105084
Ep: 3691   Rew: -103.25   Avg Rew: -102.74   Polyak: 0.995000   Buffer: 100.00   Loss: 111.346649  13.898557  11.059653
Ep: 3692   Rew: -105.08   Avg Rew: -102.

Ep: 3753   Rew: -105.36   Avg Rew: -103.13   Polyak: 0.995000   Buffer: 100.00   Loss: 110.720215  10.910096  11.851528
Ep: 3754   Rew: -101.22   Avg Rew: -103.11   Polyak: 0.995000   Buffer: 100.00   Loss: 111.018265  10.549317  11.913319
Ep: 3755   Rew: -99.97   Avg Rew: -103.08   Polyak: 0.995000   Buffer: 100.00   Loss: 111.944374  10.108066  11.330594
Ep: 3756   Rew: -101.35   Avg Rew: -103.06   Polyak: 0.995000   Buffer: 100.00   Loss: 111.004295  12.358883  11.308012
Ep: 3757   Rew: -105.11   Avg Rew: -103.09   Polyak: 0.995000   Buffer: 100.00   Loss: 110.850143  13.782872  12.021741
Ep: 3758   Rew: -102.29   Avg Rew: -103.10   Polyak: 0.995000   Buffer: 100.00   Loss: 110.730347  13.554937  14.290727
Ep: 3759   Rew: -100.14   Avg Rew: -103.07   Polyak: 0.995000   Buffer: 100.00   Loss: 110.933601  14.044714  13.353483
Ep: 3760   Rew: -101.00   Avg Rew: -103.05   Polyak: 0.995000   Buffer: 100.00   Loss: 111.298195  11.961081  11.480890
Ep: 3761   Rew: -103.75   Avg Rew: -103.0

Ep: 3822   Rew: -101.68   Avg Rew: -102.23   Polyak: 0.995000   Buffer: 100.00   Loss: 110.550278  11.134485  9.865931
Ep: 3823   Rew: -102.15   Avg Rew: -102.22   Polyak: 0.995000   Buffer: 100.00   Loss: 110.373825  10.687881  12.234757
Ep: 3824   Rew: -101.98   Avg Rew: -102.21   Polyak: 0.995000   Buffer: 100.00   Loss: 110.741814  12.551873  13.031775
Ep: 3825   Rew: -101.18   Avg Rew: -102.20   Polyak: 0.995000   Buffer: 100.00   Loss: 111.373474  13.636673  11.937188
Ep: 3826   Rew: -101.09   Avg Rew: -102.18   Polyak: 0.995000   Buffer: 100.00   Loss: 110.315186  12.014136  12.380768
Ep: 3827   Rew: -104.20   Avg Rew: -102.19   Polyak: 0.995000   Buffer: 100.00   Loss: 110.673187  17.253626  12.031845
Ep: 3828   Rew: -104.26   Avg Rew: -102.22   Polyak: 0.995000   Buffer: 100.00   Loss: 110.452827  13.304884  14.305777
Ep: 3829   Rew: -100.73   Avg Rew: -102.19   Polyak: 0.995000   Buffer: 100.00   Loss: 110.706741  10.345171  11.733702
Ep: 3830   Rew: -102.30   Avg Rew: -102.1

Ep: 3891   Rew: -105.94   Avg Rew: -102.95   Polyak: 0.995000   Buffer: 100.00   Loss: 110.641922  12.141200  10.108158
Ep: 3892   Rew: -104.24   Avg Rew: -102.98   Polyak: 0.995000   Buffer: 100.00   Loss: 111.267410  11.056505  11.188745
Ep: 3893   Rew: -105.72   Avg Rew: -103.02   Polyak: 0.995000   Buffer: 100.00   Loss: 110.173409  13.517819  13.269997
Ep: 3894   Rew: -105.81   Avg Rew: -103.07   Polyak: 0.995000   Buffer: 100.00   Loss: 110.082771  11.102474  13.623154
Ep: 3895   Rew: -104.80   Avg Rew: -103.10   Polyak: 0.995000   Buffer: 100.00   Loss: 109.506615  11.138687  12.108814
Ep: 3896   Rew: -104.99   Avg Rew: -103.14   Polyak: 0.995000   Buffer: 100.00   Loss: 110.479050  11.402931  11.427958
Ep: 3897   Rew: -105.55   Avg Rew: -103.17   Polyak: 0.995000   Buffer: 100.00   Loss: 111.278511  13.052383  12.680680
Ep: 3898   Rew: -102.97   Avg Rew: -103.18   Polyak: 0.995000   Buffer: 100.00   Loss: 110.209999  12.615013  13.675745
Ep: 3899   Rew: -101.51   Avg Rew: -103.

Ep: 3960   Rew: -102.22   Avg Rew: -103.60   Polyak: 0.995000   Buffer: 100.00   Loss: 111.706909  15.681070  13.076395
Ep: 3961   Rew: -102.53   Avg Rew: -103.58   Polyak: 0.995000   Buffer: 100.00   Loss: 111.378510  10.969336  10.769644
Ep: 3962   Rew: -101.93   Avg Rew: -103.56   Polyak: 0.995000   Buffer: 100.00   Loss: 110.143478  13.049522  13.779797
Ep: 3963   Rew: -103.23   Avg Rew: -103.56   Polyak: 0.995000   Buffer: 100.00   Loss: 111.552162  14.484228  11.331159
Ep: 3964   Rew: -101.73   Avg Rew: -103.55   Polyak: 0.995000   Buffer: 100.00   Loss: 110.814308  12.415187  13.121699
Ep: 3965   Rew: -101.29   Avg Rew: -103.53   Polyak: 0.995000   Buffer: 100.00   Loss: 110.437790  12.909219  10.743935
Ep: 3966   Rew: -100.87   Avg Rew: -103.50   Polyak: 0.995000   Buffer: 100.00   Loss: 110.013107  14.131441  11.638302
Ep: 3967   Rew: -102.84   Avg Rew: -103.49   Polyak: 0.995000   Buffer: 100.00   Loss: 111.418022  13.307909  12.032647
Ep: 3968   Rew: -103.18   Avg Rew: -103.

Ep: 4029   Rew: -101.78   Avg Rew: -102.42   Polyak: 0.995000   Buffer: 100.00   Loss: 110.291634  13.987541  13.226427
Ep: 4030   Rew: -101.66   Avg Rew: -102.40   Polyak: 0.995000   Buffer: 100.00   Loss: 110.471283  12.354034  12.533974
Ep: 4031   Rew: -104.36   Avg Rew: -102.40   Polyak: 0.995000   Buffer: 100.00   Loss: 109.760201  11.990664  13.328750
Ep: 4032   Rew: -105.40   Avg Rew: -102.41   Polyak: 0.995000   Buffer: 100.00   Loss: 110.482559  11.390238  10.824085
Ep: 4033   Rew: -103.33   Avg Rew: -102.40   Polyak: 0.995000   Buffer: 100.00   Loss: 109.911995  9.890331  10.005251
Ep: 4034   Rew: -102.80   Avg Rew: -102.38   Polyak: 0.995000   Buffer: 100.00   Loss: 110.065239  10.140832  11.408559
Ep: 4035   Rew: -101.73   Avg Rew: -102.36   Polyak: 0.995000   Buffer: 100.00   Loss: 109.993324  13.163958  11.395489
Ep: 4036   Rew: -101.20   Avg Rew: -102.34   Polyak: 0.995000   Buffer: 100.00   Loss: 109.351936  15.677852  12.164800
Ep: 4037   Rew: -107.30   Avg Rew: -102.3

Ep: 4098   Rew: -102.51   Avg Rew: -102.13   Polyak: 0.995000   Buffer: 100.00   Loss: 110.850243  14.490667  11.206158
Ep: 4099   Rew: -102.19   Avg Rew: -102.13   Polyak: 0.995000   Buffer: 100.00   Loss: 109.806656  11.083985  11.775284
Ep: 4100   Rew: -103.44   Avg Rew: -102.12   Polyak: 0.995000   Buffer: 100.00   Loss: 110.844902  10.240367  10.172077
Ep: 4101   Rew: -102.75   Avg Rew: -102.12   Polyak: 0.995000   Buffer: 100.00   Loss: 110.179916  11.477604  13.810743
Ep: 4102   Rew: -101.74   Avg Rew: -102.10   Polyak: 0.995000   Buffer: 100.00   Loss: 108.500443  9.957806  10.703063
Ep: 4103   Rew: -102.37   Avg Rew: -102.09   Polyak: 0.995000   Buffer: 100.00   Loss: 110.068062  10.778276  12.975567
Ep: 4104   Rew: -103.40   Avg Rew: -102.09   Polyak: 0.995000   Buffer: 100.00   Loss: 109.455956  13.853846  11.294926
Ep: 4105   Rew: -102.92   Avg Rew: -102.09   Polyak: 0.995000   Buffer: 100.00   Loss: 109.670982  10.635787  11.266596
Ep: 4106   Rew: -103.94   Avg Rew: -102.0

Ep: 4167   Rew: -101.65   Avg Rew: -102.57   Polyak: 0.995000   Buffer: 100.00   Loss: 110.185997  10.637230  13.022490
Ep: 4168   Rew: -101.92   Avg Rew: -102.57   Polyak: 0.995000   Buffer: 100.00   Loss: 110.464981  13.827723  10.193817
Ep: 4169   Rew: -102.38   Avg Rew: -102.56   Polyak: 0.995000   Buffer: 100.00   Loss: 109.838852  16.526672  12.182349
Ep: 4170   Rew: -103.71   Avg Rew: -102.56   Polyak: 0.995000   Buffer: 100.00   Loss: 109.366257  10.465282  14.327111
Ep: 4171   Rew: -103.67   Avg Rew: -102.56   Polyak: 0.995000   Buffer: 100.00   Loss: 109.916641  11.763924  12.320078
Ep: 4172   Rew: -102.26   Avg Rew: -102.55   Polyak: 0.995000   Buffer: 100.00   Loss: 110.604881  10.544029  14.150044
Ep: 4173   Rew: -103.13   Avg Rew: -102.56   Polyak: 0.995000   Buffer: 100.00   Loss: 110.360588  11.793269  12.941652
Ep: 4174   Rew: -102.63   Avg Rew: -102.55   Polyak: 0.995000   Buffer: 100.00   Loss: 110.174828  11.920382  9.500296
Ep: 4175   Rew: -102.37   Avg Rew: -102.5

Ep: 4236   Rew: -101.98   Avg Rew: -102.45   Polyak: 0.995000   Buffer: 100.00   Loss: 109.775909  14.509024  11.289105
Ep: 4237   Rew: -101.53   Avg Rew: -102.46   Polyak: 0.995000   Buffer: 100.00   Loss: 109.481331  13.233814  12.088441
Ep: 4238   Rew: -100.74   Avg Rew: -102.44   Polyak: 0.995000   Buffer: 100.00   Loss: 110.797836  12.901849  13.166597
Ep: 4239   Rew: -102.14   Avg Rew: -102.45   Polyak: 0.995000   Buffer: 100.00   Loss: 109.941589  12.531521  13.672914
Ep: 4240   Rew: -101.27   Avg Rew: -102.45   Polyak: 0.995000   Buffer: 100.00   Loss: 109.506683  15.194521  13.341383
Ep: 4241   Rew: -101.05   Avg Rew: -102.44   Polyak: 0.995000   Buffer: 100.00   Loss: 109.905449  14.226362  13.081772
Ep: 4242   Rew: -101.10   Avg Rew: -102.42   Polyak: 0.995000   Buffer: 100.00   Loss: 109.770912  12.224244  12.401071
Ep: 4243   Rew: -100.94   Avg Rew: -102.41   Polyak: 0.995000   Buffer: 100.00   Loss: 110.283058  8.850228  8.931234
Ep: 4244   Rew: -101.38   Avg Rew: -102.40

Ep: 4305   Rew: -104.05   Avg Rew: -102.99   Polyak: 0.995000   Buffer: 100.00   Loss: 109.237961  12.162542  9.591190
Ep: 4306   Rew: -103.36   Avg Rew: -103.02   Polyak: 0.995000   Buffer: 100.00   Loss: 110.122696  14.766880  14.030712
Ep: 4307   Rew: -103.90   Avg Rew: -103.07   Polyak: 0.995000   Buffer: 100.00   Loss: 109.085686  10.986639  12.956596
Ep: 4308   Rew: -103.93   Avg Rew: -103.09   Polyak: 0.995000   Buffer: 100.00   Loss: 109.240982  11.527191  10.561905
Ep: 4309   Rew: -103.25   Avg Rew: -103.10   Polyak: 0.995000   Buffer: 100.00   Loss: 108.906326  11.887009  9.358084
Ep: 4310   Rew: -104.38   Avg Rew: -103.12   Polyak: 0.995000   Buffer: 100.00   Loss: 110.458664  12.202336  14.093059
Ep: 4311   Rew: -106.28   Avg Rew: -103.13   Polyak: 0.995000   Buffer: 100.00   Loss: 109.921425  10.800274  11.707300
Ep: 4312   Rew: -105.79   Avg Rew: -103.17   Polyak: 0.995000   Buffer: 100.00   Loss: 109.737808  11.243533  10.936935
Ep: 4313   Rew: -103.98   Avg Rew: -103.19

Ep: 4374   Rew: -102.44   Avg Rew: -103.65   Polyak: 0.995000   Buffer: 100.00   Loss: 109.283134  11.195223  12.771112
Ep: 4375   Rew: -102.25   Avg Rew: -103.66   Polyak: 0.995000   Buffer: 100.00   Loss: 109.939598  11.197140  13.707891
Ep: 4376   Rew: -103.48   Avg Rew: -103.67   Polyak: 0.995000   Buffer: 100.00   Loss: 109.281548  10.448074  9.705424
Ep: 4377   Rew: -102.65   Avg Rew: -103.66   Polyak: 0.995000   Buffer: 100.00   Loss: 109.387474  11.718584  10.376299
Ep: 4378   Rew: -102.50   Avg Rew: -103.66   Polyak: 0.995000   Buffer: 100.00   Loss: 109.998459  11.837999  10.748919
Ep: 4379   Rew: -104.02   Avg Rew: -103.69   Polyak: 0.995000   Buffer: 100.00   Loss: 109.599129  12.891321  9.857824
Ep: 4380   Rew: -103.37   Avg Rew: -103.70   Polyak: 0.995000   Buffer: 100.00   Loss: 108.919144  11.761909  11.107435
Ep: 4381   Rew: -101.32   Avg Rew: -103.68   Polyak: 0.995000   Buffer: 100.00   Loss: 109.120277  114.251350  118.156906
Ep: 4382   Rew: -102.35   Avg Rew: -103.

Ep: 4443   Rew: -104.32   Avg Rew: -102.60   Polyak: 0.995000   Buffer: 100.00   Loss: 110.123360  12.902129  12.114529
Ep: 4444   Rew: -102.93   Avg Rew: -102.60   Polyak: 0.995000   Buffer: 100.00   Loss: 110.574638  10.730169  10.668678
Ep: 4445   Rew: -101.75   Avg Rew: -102.58   Polyak: 0.995000   Buffer: 100.00   Loss: 109.739014  9.517517  14.669353
Ep: 4446   Rew: -103.67   Avg Rew: -102.61   Polyak: 0.995000   Buffer: 100.00   Loss: 110.030724  12.775669  9.517797
Ep: 4447   Rew: -103.67   Avg Rew: -102.62   Polyak: 0.995000   Buffer: 100.00   Loss: 109.715233  12.899200  10.181984
Ep: 4448   Rew: -102.24   Avg Rew: -102.62   Polyak: 0.995000   Buffer: 100.00   Loss: 110.008995  12.122816  12.038999
Ep: 4449   Rew: -103.34   Avg Rew: -102.62   Polyak: 0.995000   Buffer: 100.00   Loss: 108.658875  12.142095  10.873521
Ep: 4450   Rew: -102.81   Avg Rew: -102.65   Polyak: 0.995000   Buffer: 100.00   Loss: 109.395508  12.153074  11.558212
Ep: 4451   Rew: -102.77   Avg Rew: -102.68

Ep: 4512   Rew: -101.16   Avg Rew: -104.02   Polyak: 0.995000   Buffer: 100.00   Loss: 109.653214  12.917106  13.256281
Ep: 4513   Rew: -104.24   Avg Rew: -104.04   Polyak: 0.995000   Buffer: 100.00   Loss: 109.918259  12.543825  11.125885
Ep: 4514   Rew: -104.64   Avg Rew: -104.06   Polyak: 0.995000   Buffer: 100.00   Loss: 109.246391  10.608257  12.456032
Ep: 4515   Rew: -103.44   Avg Rew: -104.05   Polyak: 0.995000   Buffer: 100.00   Loss: 109.764336  10.455383  13.588634
Ep: 4516   Rew: -105.01   Avg Rew: -104.07   Polyak: 0.995000   Buffer: 100.00   Loss: 109.534447  9.860331  11.773480
Ep: 4517   Rew: -105.69   Avg Rew: -104.08   Polyak: 0.995000   Buffer: 100.00   Loss: 108.450058  13.047916  13.689140
Ep: 4518   Rew: -105.52   Avg Rew: -104.10   Polyak: 0.995000   Buffer: 100.00   Loss: 109.533180  11.081650  10.339826
Ep: 4519   Rew: -104.64   Avg Rew: -104.12   Polyak: 0.995000   Buffer: 100.00   Loss: 108.955727  11.711025  9.671354
Ep: 4520   Rew: -103.59   Avg Rew: -104.15

Ep: 4581   Rew: -103.81   Avg Rew: -103.60   Polyak: 0.995000   Buffer: 100.00   Loss: 108.822701  12.635670  11.773550
Ep: 4582   Rew: -104.72   Avg Rew: -103.59   Polyak: 0.995000   Buffer: 100.00   Loss: 108.759239  11.961702  13.461893
Ep: 4583   Rew: -105.24   Avg Rew: -103.59   Polyak: 0.995000   Buffer: 100.00   Loss: 109.494324  9.108277  12.089767
Ep: 4584   Rew: -104.23   Avg Rew: -103.58   Polyak: 0.995000   Buffer: 100.00   Loss: 109.546371  11.362744  14.699765
Ep: 4585   Rew: -105.48   Avg Rew: -103.58   Polyak: 0.995000   Buffer: 100.00   Loss: 110.043907  11.328596  10.372486
Ep: 4586   Rew: -104.63   Avg Rew: -103.58   Polyak: 0.995000   Buffer: 100.00   Loss: 109.363579  10.741259  12.328176
Ep: 4587   Rew: -104.72   Avg Rew: -103.58   Polyak: 0.995000   Buffer: 100.00   Loss: 110.472672  13.473053  10.175945
Ep: 4588   Rew: -102.96   Avg Rew: -103.56   Polyak: 0.995000   Buffer: 100.00   Loss: 109.872963  12.424271  11.507888
Ep: 4589   Rew: -101.04   Avg Rew: -103.5

Ep: 4650   Rew: -103.41   Avg Rew: -102.64   Polyak: 0.995000   Buffer: 100.00   Loss: 109.198944  10.481871  10.276367
Ep: 4651   Rew: -100.37   Avg Rew: -102.58   Polyak: 0.995000   Buffer: 100.00   Loss: 108.938293  13.925563  14.200144
Ep: 4652   Rew: -101.75   Avg Rew: -102.55   Polyak: 0.995000   Buffer: 100.00   Loss: 109.297478  10.683372  13.236269
Ep: 4653   Rew: -101.19   Avg Rew: -102.55   Polyak: 0.995000   Buffer: 100.00   Loss: 110.311264  12.236649  9.602958
Ep: 4654   Rew: -102.16   Avg Rew: -102.55   Polyak: 0.995000   Buffer: 100.00   Loss: 109.599464  11.508987  10.936298
Ep: 4655   Rew: -104.01   Avg Rew: -102.55   Polyak: 0.995000   Buffer: 100.00   Loss: 109.145729  13.797411  10.464478
Ep: 4656   Rew: -101.99   Avg Rew: -102.54   Polyak: 0.995000   Buffer: 100.00   Loss: 108.739151  9.194670  10.629504
Ep: 4657   Rew: -103.24   Avg Rew: -102.56   Polyak: 0.995000   Buffer: 100.00   Loss: 108.969368  9.041991  9.572546
Ep: 4658   Rew: -103.24   Avg Rew: -102.57  

Ep: 4719   Rew: -105.41   Avg Rew: -102.92   Polyak: 0.995000   Buffer: 100.00   Loss: 109.152901  17.240524  13.934949
Ep: 4720   Rew: -101.60   Avg Rew: -102.92   Polyak: 0.995000   Buffer: 100.00   Loss: 108.591209  13.531877  11.843336
Ep: 4721   Rew: -105.00   Avg Rew: -102.93   Polyak: 0.995000   Buffer: 100.00   Loss: 108.373314  11.757285  12.135450
Ep: 4722   Rew: -100.31   Avg Rew: -102.90   Polyak: 0.995000   Buffer: 100.00   Loss: 109.013077  11.310316  12.859602
Ep: 4723   Rew: -100.94   Avg Rew: -102.87   Polyak: 0.995000   Buffer: 100.00   Loss: 108.467262  15.912559  13.388857
Ep: 4724   Rew: -101.70   Avg Rew: -102.86   Polyak: 0.995000   Buffer: 100.00   Loss: 107.983582  14.816686  13.199177
Ep: 4725   Rew: -100.99   Avg Rew: -102.85   Polyak: 0.995000   Buffer: 100.00   Loss: 108.983734  10.882864  10.142949
Ep: 4726   Rew: -104.99   Avg Rew: -102.87   Polyak: 0.995000   Buffer: 100.00   Loss: 108.466148  12.467913  10.798842
Ep: 4727   Rew: -101.02   Avg Rew: -102.

Ep: 4788   Rew: -102.30   Avg Rew: -103.03   Polyak: 0.995000   Buffer: 100.00   Loss: 107.962059  11.569366  9.087475
Ep: 4789   Rew: -102.42   Avg Rew: -103.02   Polyak: 0.995000   Buffer: 100.00   Loss: 108.060883  12.229572  12.186665
Ep: 4790   Rew: -105.99   Avg Rew: -103.02   Polyak: 0.995000   Buffer: 100.00   Loss: 108.801094  10.848080  12.477077
Ep: 4791   Rew: -102.71   Avg Rew: -103.03   Polyak: 0.995000   Buffer: 100.00   Loss: 109.479561  12.078298  12.312084
Ep: 4792   Rew: -102.22   Avg Rew: -103.03   Polyak: 0.995000   Buffer: 100.00   Loss: 108.430397  12.393493  11.284925
Ep: 4793   Rew: -102.12   Avg Rew: -103.00   Polyak: 0.995000   Buffer: 100.00   Loss: 108.991905  11.824896  11.577342
Ep: 4794   Rew: -102.20   Avg Rew: -102.88   Polyak: 0.995000   Buffer: 100.00   Loss: 108.943787  13.046497  11.138425
Ep: 4795   Rew: -105.29   Avg Rew: -102.90   Polyak: 0.995000   Buffer: 100.00   Loss: 109.078384  10.233820  11.996532
Ep: 4796   Rew: -99.65   Avg Rew: -102.87

Ep: 4857   Rew: -102.04   Avg Rew: -102.37   Polyak: 0.995000   Buffer: 100.00   Loss: 109.074852  11.103325  12.110395
Ep: 4858   Rew: -100.52   Avg Rew: -102.34   Polyak: 0.995000   Buffer: 100.00   Loss: 109.770668  10.306516  12.118769
Ep: 4859   Rew: -102.69   Avg Rew: -102.34   Polyak: 0.995000   Buffer: 100.00   Loss: 107.942108  10.722631  10.430567
Ep: 4860   Rew: -102.48   Avg Rew: -102.34   Polyak: 0.995000   Buffer: 100.00   Loss: 108.360909  10.542027  13.053556
Ep: 4861   Rew: -101.70   Avg Rew: -102.33   Polyak: 0.995000   Buffer: 100.00   Loss: 108.998909  16.503033  13.759984
Ep: 4862   Rew: -101.02   Avg Rew: -102.32   Polyak: 0.995000   Buffer: 100.00   Loss: 109.924736  11.151708  11.179725
Ep: 4863   Rew: -100.76   Avg Rew: -102.30   Polyak: 0.995000   Buffer: 100.00   Loss: 109.247734  12.609473  11.239362
Ep: 4864   Rew: -101.58   Avg Rew: -102.26   Polyak: 0.995000   Buffer: 100.00   Loss: 108.632751  8.474104  9.919367
Ep: 4865   Rew: -102.65   Avg Rew: -102.22

Ep: 4926   Rew: -101.93   Avg Rew: -102.25   Polyak: 0.995000   Buffer: 100.00   Loss: 108.334412  10.074021  9.152195
Ep: 4927   Rew: -100.32   Avg Rew: -102.23   Polyak: 0.995000   Buffer: 100.00   Loss: 109.114899  14.139994  12.182087
Ep: 4928   Rew: -101.40   Avg Rew: -102.23   Polyak: 0.995000   Buffer: 100.00   Loss: 108.017632  9.604851  10.157518
Ep: 4929   Rew: -100.19   Avg Rew: -102.19   Polyak: 0.995000   Buffer: 100.00   Loss: 109.694916  8.035573  12.330384
Ep: 4930   Rew: -103.19   Avg Rew: -102.18   Polyak: 0.995000   Buffer: 100.00   Loss: 109.135567  12.326513  9.940733
Ep: 4931   Rew: -103.02   Avg Rew: -102.18   Polyak: 0.995000   Buffer: 100.00   Loss: 109.098961  9.989380  13.730581
Ep: 4932   Rew: -101.64   Avg Rew: -102.15   Polyak: 0.995000   Buffer: 100.00   Loss: 107.745850  9.597223  11.493384
Ep: 4933   Rew: -102.21   Avg Rew: -102.15   Polyak: 0.995000   Buffer: 100.00   Loss: 108.343781  13.099010  11.015985
Ep: 4934   Rew: -102.00   Avg Rew: -102.16   P

Ep: 4995   Rew: -101.55   Avg Rew: -102.32   Polyak: 0.995000   Buffer: 100.00   Loss: 108.171478  13.675783  10.857845
Ep: 4996   Rew: -101.72   Avg Rew: -102.31   Polyak: 0.995000   Buffer: 100.00   Loss: 108.017288  10.439710  10.255619
Ep: 4997   Rew: -101.94   Avg Rew: -102.30   Polyak: 0.995000   Buffer: 100.00   Loss: 107.408966  9.380247  8.885442
Ep: 4998   Rew: -101.69   Avg Rew: -102.31   Polyak: 0.995000   Buffer: 100.00   Loss: 108.487892  11.786791  10.905875
Ep: 4999   Rew: -102.74   Avg Rew: -102.30   Polyak: 0.995000   Buffer: 100.00   Loss: 108.686577  13.046341  11.209649
Ep: 5000   Rew: -102.40   Avg Rew: -102.29   Polyak: 0.995000   Buffer: 100.00   Loss: 108.391449  12.642332  9.152628
Ep: 5001   Rew: -101.74   Avg Rew: -102.29   Polyak: 0.995000   Buffer: 100.00   Loss: 107.564568  10.108844  12.036070
Ep: 5002   Rew: -103.30   Avg Rew: -102.33   Polyak: 0.995000   Buffer: 100.00   Loss: 107.435516  9.663551  8.367529
Ep: 5003   Rew: -102.12   Avg Rew: -102.32   

Ep: 5064   Rew: -102.05   Avg Rew: -102.78   Polyak: 0.995000   Buffer: 100.00   Loss: 108.571724  12.254813  9.311383
Ep: 5065   Rew: -104.30   Avg Rew: -102.80   Polyak: 0.995000   Buffer: 100.00   Loss: 107.550964  12.249828  10.751626
Ep: 5066   Rew: -102.42   Avg Rew: -102.80   Polyak: 0.995000   Buffer: 100.00   Loss: 108.881500  10.518536  9.301527
Ep: 5067   Rew: -101.74   Avg Rew: -102.79   Polyak: 0.995000   Buffer: 100.00   Loss: 107.281967  12.213298  14.267137
Ep: 5068   Rew: -103.42   Avg Rew: -102.79   Polyak: 0.995000   Buffer: 100.00   Loss: 108.198189  10.662905  11.400504
Ep: 5069   Rew: -103.94   Avg Rew: -102.80   Polyak: 0.995000   Buffer: 100.00   Loss: 108.136848  11.076908  11.820129
Ep: 5070   Rew: -104.04   Avg Rew: -102.82   Polyak: 0.995000   Buffer: 100.00   Loss: 107.876923  13.087548  12.936937
Ep: 5071   Rew: -103.72   Avg Rew: -102.84   Polyak: 0.995000   Buffer: 100.00   Loss: 108.010391  105.881065  115.507568
Ep: 5072   Rew: -104.00   Avg Rew: -102.

Ep: 5133   Rew: -101.83   Avg Rew: -103.46   Polyak: 0.995000   Buffer: 100.00   Loss: 108.253220  8.723501  10.834810
Ep: 5134   Rew: -104.11   Avg Rew: -103.47   Polyak: 0.995000   Buffer: 100.00   Loss: 108.113953  9.655794  8.612839
Ep: 5135   Rew: -103.85   Avg Rew: -103.48   Polyak: 0.995000   Buffer: 100.00   Loss: 108.041916  14.003792  10.448666
Ep: 5136   Rew: -102.36   Avg Rew: -103.47   Polyak: 0.995000   Buffer: 100.00   Loss: 108.356216  10.738178  10.208745
Ep: 5137   Rew: -104.18   Avg Rew: -103.50   Polyak: 0.995000   Buffer: 100.00   Loss: 108.921822  10.134123  9.590595
Ep: 5138   Rew: -105.04   Avg Rew: -103.53   Polyak: 0.995000   Buffer: 100.00   Loss: 108.425194  10.803310  11.381929
Ep: 5139   Rew: -104.57   Avg Rew: -103.56   Polyak: 0.995000   Buffer: 100.00   Loss: 107.744827  11.670645  10.858201
Ep: 5140   Rew: -102.26   Avg Rew: -103.54   Polyak: 0.995000   Buffer: 100.00   Loss: 108.049408  14.085129  13.896236
Ep: 5141   Rew: -102.50   Avg Rew: -103.54  

Ep: 5202   Rew: -101.49   Avg Rew: -103.24   Polyak: 0.995000   Buffer: 100.00   Loss: 107.356293  12.435696  12.088296
Ep: 5203   Rew: -101.46   Avg Rew: -103.24   Polyak: 0.995000   Buffer: 100.00   Loss: 107.332825  10.876184  12.541080
Ep: 5204   Rew: -103.06   Avg Rew: -103.24   Polyak: 0.995000   Buffer: 100.00   Loss: 108.133888  14.521544  9.510599
Ep: 5205   Rew: -102.97   Avg Rew: -103.24   Polyak: 0.995000   Buffer: 100.00   Loss: 107.476013  10.887365  11.831884
Ep: 5206   Rew: -104.03   Avg Rew: -103.24   Polyak: 0.995000   Buffer: 100.00   Loss: 107.953796  9.756845  11.613716
Ep: 5207   Rew: -101.47   Avg Rew: -103.23   Polyak: 0.995000   Buffer: 100.00   Loss: 108.365433  11.251718  10.858123
Ep: 5208   Rew: -103.11   Avg Rew: -103.24   Polyak: 0.995000   Buffer: 100.00   Loss: 107.974434  13.404239  9.278851
Ep: 5209   Rew: -103.82   Avg Rew: -103.24   Polyak: 0.995000   Buffer: 100.00   Loss: 106.106888  13.137689  12.043198
Ep: 5210   Rew: -104.51   Avg Rew: -103.28 

Ep: 5271   Rew: -102.29   Avg Rew: -102.32   Polyak: 0.995000   Buffer: 100.00   Loss: 107.612175  13.133348  12.066738
Ep: 5272   Rew: -101.98   Avg Rew: -102.31   Polyak: 0.995000   Buffer: 100.00   Loss: 107.186058  10.617897  10.283392
Ep: 5273   Rew: -102.87   Avg Rew: -102.31   Polyak: 0.995000   Buffer: 100.00   Loss: 107.396500  12.782228  8.116821
Ep: 5274   Rew: -103.19   Avg Rew: -102.32   Polyak: 0.995000   Buffer: 100.00   Loss: 107.546059  10.441874  11.338100
Ep: 5275   Rew: -103.53   Avg Rew: -102.30   Polyak: 0.995000   Buffer: 100.00   Loss: 107.814438  10.434072  12.421069
Ep: 5276   Rew: -102.27   Avg Rew: -102.27   Polyak: 0.995000   Buffer: 100.00   Loss: 107.327377  10.105379  10.851593
Ep: 5277   Rew: -101.60   Avg Rew: -102.21   Polyak: 0.995000   Buffer: 100.00   Loss: 107.598526  10.831475  11.699056
Ep: 5278   Rew: -103.84   Avg Rew: -102.20   Polyak: 0.995000   Buffer: 100.00   Loss: 107.578636  11.806594  11.624841
Ep: 5279   Rew: -102.17   Avg Rew: -102.1

Ep: 5340   Rew: -100.85   Avg Rew: -102.43   Polyak: 0.995000   Buffer: 100.00   Loss: 108.233620  13.306170  11.191914
Ep: 5341   Rew: -101.43   Avg Rew: -102.41   Polyak: 0.995000   Buffer: 100.00   Loss: 107.710007  12.008399  12.446716
Ep: 5342   Rew: -104.78   Avg Rew: -102.43   Polyak: 0.995000   Buffer: 100.00   Loss: 106.900909  12.565746  10.405617
Ep: 5343   Rew: -102.01   Avg Rew: -102.41   Polyak: 0.995000   Buffer: 100.00   Loss: 107.980034  11.324755  11.825199
Ep: 5344   Rew: -105.27   Avg Rew: -102.45   Polyak: 0.995000   Buffer: 100.00   Loss: 107.734833  10.856016  10.173608
Ep: 5345   Rew: -102.56   Avg Rew: -102.45   Polyak: 0.995000   Buffer: 100.00   Loss: 106.663544  11.730277  9.485773
Ep: 5346   Rew: -103.43   Avg Rew: -102.46   Polyak: 0.995000   Buffer: 100.00   Loss: 107.385986  11.066581  10.048985
Ep: 5347   Rew: -100.16   Avg Rew: -102.43   Polyak: 0.995000   Buffer: 100.00   Loss: 107.444733  12.241563  11.334598
Ep: 5348   Rew: -102.11   Avg Rew: -102.4

Ep: 5409   Rew: -101.31   Avg Rew: -102.06   Polyak: 0.995000   Buffer: 100.00   Loss: 106.580994  10.634797  10.217274
Ep: 5410   Rew: -100.71   Avg Rew: -102.03   Polyak: 0.995000   Buffer: 100.00   Loss: 107.229996  11.283968  12.159365
Ep: 5411   Rew: -100.55   Avg Rew: -101.97   Polyak: 0.995000   Buffer: 100.00   Loss: 107.432808  9.949897  11.918167
Ep: 5412   Rew: -103.34   Avg Rew: -102.00   Polyak: 0.995000   Buffer: 100.00   Loss: 106.884254  10.134782  12.306134
Ep: 5413   Rew: -102.44   Avg Rew: -102.01   Polyak: 0.995000   Buffer: 100.00   Loss: 107.869743  12.227457  11.239020
Ep: 5414   Rew: -104.49   Avg Rew: -102.04   Polyak: 0.995000   Buffer: 100.00   Loss: 107.677223  11.669472  10.396093
Ep: 5415   Rew: -101.10   Avg Rew: -102.03   Polyak: 0.995000   Buffer: 100.00   Loss: 107.665802  10.232447  11.746368
Ep: 5416   Rew: -103.00   Avg Rew: -102.04   Polyak: 0.995000   Buffer: 100.00   Loss: 107.921913  10.537468  11.836069
Ep: 5417   Rew: -101.80   Avg Rew: -102.0

Ep: 5478   Rew: -101.82   Avg Rew: -102.30   Polyak: 0.995000   Buffer: 100.00   Loss: 107.736443  11.034637  10.946136
Ep: 5479   Rew: -102.62   Avg Rew: -102.30   Polyak: 0.995000   Buffer: 100.00   Loss: 106.848602  12.746622  12.926098
Ep: 5480   Rew: -102.79   Avg Rew: -102.31   Polyak: 0.995000   Buffer: 100.00   Loss: 106.996941  13.413625  8.288813
Ep: 5481   Rew: -101.83   Avg Rew: -102.30   Polyak: 0.995000   Buffer: 100.00   Loss: 107.508301  12.109984  12.403425
Ep: 5482   Rew: -102.52   Avg Rew: -102.33   Polyak: 0.995000   Buffer: 100.00   Loss: 107.714729  9.754945  11.781590
Ep: 5483   Rew: -101.54   Avg Rew: -102.32   Polyak: 0.995000   Buffer: 100.00   Loss: 106.645531  9.757908  12.841073
Ep: 5484   Rew: -102.33   Avg Rew: -102.34   Polyak: 0.995000   Buffer: 100.00   Loss: 107.198456  11.149166  9.704646
Ep: 5485   Rew: -103.45   Avg Rew: -102.35   Polyak: 0.995000   Buffer: 100.00   Loss: 107.209068  13.057700  10.787223
Ep: 5486   Rew: -102.92   Avg Rew: -102.37  

Ep: 5547   Rew: -102.09   Avg Rew: -102.61   Polyak: 0.995000   Buffer: 100.00   Loss: 106.992653  11.703362  9.793964
Ep: 5548   Rew: -102.79   Avg Rew: -102.62   Polyak: 0.995000   Buffer: 100.00   Loss: 107.225647  9.350971  10.354872
Ep: 5549   Rew: -102.15   Avg Rew: -102.63   Polyak: 0.995000   Buffer: 100.00   Loss: 107.114449  12.024811  11.264527
Ep: 5550   Rew: -103.42   Avg Rew: -102.64   Polyak: 0.995000   Buffer: 100.00   Loss: 107.599892  9.602211  9.224897
Ep: 5551   Rew: -103.78   Avg Rew: -102.65   Polyak: 0.995000   Buffer: 100.00   Loss: 107.079971  11.248837  13.401046
Ep: 5552   Rew: -102.55   Avg Rew: -102.62   Polyak: 0.995000   Buffer: 100.00   Loss: 106.546768  11.628928  10.274284
Ep: 5553   Rew: -103.66   Avg Rew: -102.61   Polyak: 0.995000   Buffer: 100.00   Loss: 106.269661  10.132904  9.457351
Ep: 5554   Rew: -103.74   Avg Rew: -102.59   Polyak: 0.995000   Buffer: 100.00   Loss: 107.318504  12.149402  10.967819
Ep: 5555   Rew: -104.55   Avg Rew: -102.58   

Ep: 5616   Rew: -101.59   Avg Rew: -102.77   Polyak: 0.995000   Buffer: 100.00   Loss: 107.168114  10.666943  10.490841
Ep: 5617   Rew: -100.91   Avg Rew: -102.78   Polyak: 0.995000   Buffer: 100.00   Loss: 107.421265  11.285490  9.856940
Ep: 5618   Rew: -100.49   Avg Rew: -102.78   Polyak: 0.995000   Buffer: 100.00   Loss: 107.297295  9.617023  9.185609
Ep: 5619   Rew: -103.01   Avg Rew: -102.78   Polyak: 0.995000   Buffer: 100.00   Loss: 107.182030  10.937407  10.155294
Ep: 5620   Rew: -99.81   Avg Rew: -102.78   Polyak: 0.995000   Buffer: 100.00   Loss: 107.839424  11.843633  11.277672
Ep: 5621   Rew: -104.62   Avg Rew: -102.78   Polyak: 0.995000   Buffer: 100.00   Loss: 106.937683  10.705627  9.006373
Ep: 5622   Rew: -102.77   Avg Rew: -102.78   Polyak: 0.995000   Buffer: 100.00   Loss: 107.146324  11.659975  11.594694
Ep: 5623   Rew: -104.25   Avg Rew: -102.79   Polyak: 0.995000   Buffer: 100.00   Loss: 106.795288  9.907773  9.015700
Ep: 5624   Rew: -102.70   Avg Rew: -102.80   Po

Ep: 5685   Rew: -103.64   Avg Rew: -102.72   Polyak: 0.995000   Buffer: 100.00   Loss: 107.231857  13.010158  10.599817
Ep: 5686   Rew: -103.03   Avg Rew: -102.72   Polyak: 0.995000   Buffer: 100.00   Loss: 106.944923  8.628262  12.046205
Ep: 5687   Rew: -101.81   Avg Rew: -102.73   Polyak: 0.995000   Buffer: 100.00   Loss: 107.620438  10.759539  11.825400
Ep: 5688   Rew: -102.67   Avg Rew: -102.73   Polyak: 0.995000   Buffer: 100.00   Loss: 107.520500  13.781565  12.033203
Ep: 5689   Rew: -100.46   Avg Rew: -102.72   Polyak: 0.995000   Buffer: 100.00   Loss: 106.632187  12.118380  10.080206
Ep: 5690   Rew: -102.66   Avg Rew: -102.72   Polyak: 0.995000   Buffer: 100.00   Loss: 107.175156  9.086736  10.077649
Ep: 5691   Rew: -102.49   Avg Rew: -102.72   Polyak: 0.995000   Buffer: 100.00   Loss: 107.330963  10.280140  12.004531
Ep: 5692   Rew: -101.78   Avg Rew: -102.70   Polyak: 0.995000   Buffer: 100.00   Loss: 106.522987  8.826004  10.547441
Ep: 5693   Rew: -101.81   Avg Rew: -102.69 

Ep: 5754   Rew: -102.62   Avg Rew: -102.72   Polyak: 0.995000   Buffer: 100.00   Loss: 107.103775  10.536508  11.845666
Ep: 5755   Rew: -103.15   Avg Rew: -102.71   Polyak: 0.995000   Buffer: 100.00   Loss: 106.095627  11.631325  10.292109
Ep: 5756   Rew: -103.96   Avg Rew: -102.72   Polyak: 0.995000   Buffer: 100.00   Loss: 106.615417  10.863869  11.637056
Ep: 5757   Rew: -103.35   Avg Rew: -102.75   Polyak: 0.995000   Buffer: 100.00   Loss: 106.172127  10.750851  10.465726
Ep: 5758   Rew: -103.81   Avg Rew: -102.78   Polyak: 0.995000   Buffer: 100.00   Loss: 106.825264  8.452258  8.529172
Ep: 5759   Rew: -100.49   Avg Rew: -102.77   Polyak: 0.995000   Buffer: 100.00   Loss: 107.205498  10.457537  9.038885
Ep: 5760   Rew: -104.54   Avg Rew: -102.77   Polyak: 0.995000   Buffer: 100.00   Loss: 106.910927  12.499382  11.110036
Ep: 5761   Rew: -99.37   Avg Rew: -102.73   Polyak: 0.995000   Buffer: 100.00   Loss: 107.906876  10.592966  9.664803
Ep: 5762   Rew: -99.94   Avg Rew: -102.72   P

Ep: 5823   Rew: -103.52   Avg Rew: -102.57   Polyak: 0.995000   Buffer: 100.00   Loss: 107.512604  11.055750  10.468495
Ep: 5824   Rew: -102.39   Avg Rew: -102.57   Polyak: 0.995000   Buffer: 100.00   Loss: 107.872658  12.539470  10.831984
Ep: 5825   Rew: -103.66   Avg Rew: -102.58   Polyak: 0.995000   Buffer: 100.00   Loss: 107.338684  11.236755  9.451399
Ep: 5826   Rew: -102.66   Avg Rew: -102.60   Polyak: 0.995000   Buffer: 100.00   Loss: 107.412056  7.952779  10.369788
Ep: 5827   Rew: -103.16   Avg Rew: -102.61   Polyak: 0.995000   Buffer: 100.00   Loss: 106.949432  10.285077  10.657488
Ep: 5828   Rew: -101.74   Avg Rew: -102.58   Polyak: 0.995000   Buffer: 100.00   Loss: 107.425896  9.517664  9.523971
Ep: 5829   Rew: -100.23   Avg Rew: -102.54   Polyak: 0.995000   Buffer: 100.00   Loss: 107.209915  9.538902  9.906032
Ep: 5830   Rew: -101.09   Avg Rew: -102.51   Polyak: 0.995000   Buffer: 100.00   Loss: 105.832993  7.854783  9.965944
Ep: 5831   Rew: -101.25   Avg Rew: -102.49   Pol

Ep: 5892   Rew: -103.15   Avg Rew: -102.59   Polyak: 0.995000   Buffer: 100.00   Loss: 107.387856  9.968822  10.772469
Ep: 5893   Rew: -103.25   Avg Rew: -102.60   Polyak: 0.995000   Buffer: 100.00   Loss: 106.989799  12.135878  12.651940
Ep: 5894   Rew: -102.23   Avg Rew: -102.59   Polyak: 0.995000   Buffer: 100.00   Loss: 106.082573  10.260591  11.085465
Ep: 5895   Rew: -103.85   Avg Rew: -102.61   Polyak: 0.995000   Buffer: 100.00   Loss: 106.557915  9.152884  9.623380
Ep: 5896   Rew: -103.48   Avg Rew: -102.63   Polyak: 0.995000   Buffer: 100.00   Loss: 106.462891  10.067383  12.025737
Ep: 5897   Rew: -103.08   Avg Rew: -102.64   Polyak: 0.995000   Buffer: 100.00   Loss: 106.997253  10.684808  10.044844
Ep: 5898   Rew: -102.51   Avg Rew: -102.65   Polyak: 0.995000   Buffer: 100.00   Loss: 106.431412  11.465801  10.141144
Ep: 5899   Rew: -102.33   Avg Rew: -102.65   Polyak: 0.995000   Buffer: 100.00   Loss: 106.213463  11.908333  8.730782
Ep: 5900   Rew: -103.86   Avg Rew: -102.68  

Ep: 5961   Rew: -100.30   Avg Rew: -102.69   Polyak: 0.995000   Buffer: 100.00   Loss: 106.553070  11.261001  10.137835
Ep: 5962   Rew: -102.90   Avg Rew: -102.69   Polyak: 0.995000   Buffer: 100.00   Loss: 107.281677  10.758614  10.225468
Ep: 5963   Rew: -104.74   Avg Rew: -102.71   Polyak: 0.995000   Buffer: 100.00   Loss: 106.003609  9.992751  10.130585
Ep: 5964   Rew: -105.12   Avg Rew: -102.73   Polyak: 0.995000   Buffer: 100.00   Loss: 106.853935  8.179780  10.688314
Ep: 5965   Rew: -104.71   Avg Rew: -102.75   Polyak: 0.995000   Buffer: 100.00   Loss: 106.583321  12.664194  9.494594
Ep: 5966   Rew: -104.61   Avg Rew: -102.77   Polyak: 0.995000   Buffer: 100.00   Loss: 106.221359  12.119431  8.988305
Ep: 5967   Rew: -102.94   Avg Rew: -102.78   Polyak: 0.995000   Buffer: 100.00   Loss: 106.469025  10.106934  9.176877
Ep: 5968   Rew: -103.96   Avg Rew: -102.80   Polyak: 0.995000   Buffer: 100.00   Loss: 107.634903  9.895061  9.178719
Ep: 5969   Rew: -103.84   Avg Rew: -102.81   Po

Ep: 6030   Rew: -105.33   Avg Rew: -102.97   Polyak: 0.995000   Buffer: 100.00   Loss: 106.557808  11.291805  9.949650
Ep: 6031   Rew: -105.57   Avg Rew: -102.99   Polyak: 0.995000   Buffer: 100.00   Loss: 107.320747  14.281381  11.304388
Ep: 6032   Rew: -105.95   Avg Rew: -103.02   Polyak: 0.995000   Buffer: 100.00   Loss: 107.341232  12.903054  11.090977
Ep: 6033   Rew: -106.28   Avg Rew: -103.03   Polyak: 0.995000   Buffer: 100.00   Loss: 106.752419  8.975806  10.678929
Ep: 6034   Rew: -105.94   Avg Rew: -103.03   Polyak: 0.995000   Buffer: 100.00   Loss: 106.347054  10.386087  11.063915
Ep: 6035   Rew: -105.82   Avg Rew: -103.04   Polyak: 0.995000   Buffer: 100.00   Loss: 107.506744  10.667835  8.384741
Ep: 6036   Rew: -106.61   Avg Rew: -103.06   Polyak: 0.995000   Buffer: 100.00   Loss: 106.769821  10.951914  11.294903
Ep: 6037   Rew: -105.89   Avg Rew: -103.05   Polyak: 0.995000   Buffer: 100.00   Loss: 106.684761  99.129890  98.550255
Ep: 6038   Rew: -107.63   Avg Rew: -103.09 

Ep: 6099   Rew: -104.31   Avg Rew: -104.05   Polyak: 0.995000   Buffer: 100.00   Loss: 107.202042  10.334106  10.626189
Ep: 6100   Rew: -102.11   Avg Rew: -104.05   Polyak: 0.995000   Buffer: 100.00   Loss: 107.242241  10.318075  10.469030
Ep: 6101   Rew: -104.34   Avg Rew: -104.09   Polyak: 0.995000   Buffer: 100.00   Loss: 106.361870  9.448571  9.840898
Ep: 6102   Rew: -101.17   Avg Rew: -104.07   Polyak: 0.995000   Buffer: 100.00   Loss: 107.047142  13.215200  10.711966
Ep: 6103   Rew: -104.73   Avg Rew: -104.09   Polyak: 0.995000   Buffer: 100.00   Loss: 106.898613  11.329227  13.006643
Ep: 6104   Rew: -103.72   Avg Rew: -104.10   Polyak: 0.995000   Buffer: 100.00   Loss: 107.089104  10.132576  12.075235
Ep: 6105   Rew: -103.95   Avg Rew: -104.12   Polyak: 0.995000   Buffer: 100.00   Loss: 106.882858  10.514404  8.592628
Ep: 6106   Rew: -105.03   Avg Rew: -104.14   Polyak: 0.995000   Buffer: 100.00   Loss: 106.507729  10.580132  8.917493
Ep: 6107   Rew: -103.52   Avg Rew: -104.13  

Ep: 6168   Rew: -102.86   Avg Rew: -103.42   Polyak: 0.995000   Buffer: 100.00   Loss: 106.526962  11.424613  11.612025
Ep: 6169   Rew: -103.35   Avg Rew: -103.41   Polyak: 0.995000   Buffer: 100.00   Loss: 107.056839  9.923531  9.189497
Ep: 6170   Rew: -103.28   Avg Rew: -103.40   Polyak: 0.995000   Buffer: 100.00   Loss: 107.704544  9.399469  10.612450
Ep: 6171   Rew: -104.57   Avg Rew: -103.39   Polyak: 0.995000   Buffer: 100.00   Loss: 106.684494  10.491825  11.746048
Ep: 6172   Rew: -101.29   Avg Rew: -103.34   Polyak: 0.995000   Buffer: 100.00   Loss: 106.613220  12.638894  7.908710
Ep: 6173   Rew: -103.16   Avg Rew: -103.31   Polyak: 0.995000   Buffer: 100.00   Loss: 107.881981  9.195860  12.337483
Ep: 6174   Rew: -104.88   Avg Rew: -103.31   Polyak: 0.995000   Buffer: 100.00   Loss: 106.466400  11.839466  12.075308
Ep: 6175   Rew: -103.33   Avg Rew: -103.30   Polyak: 0.995000   Buffer: 100.00   Loss: 107.325356  12.073691  12.086485
Ep: 6176   Rew: -102.08   Avg Rew: -103.26   

Ep: 6237   Rew: -103.41   Avg Rew: -103.56   Polyak: 0.995000   Buffer: 100.00   Loss: 107.074356  9.950041  13.172462
Ep: 6238   Rew: -103.56   Avg Rew: -103.58   Polyak: 0.995000   Buffer: 100.00   Loss: 106.078140  10.381730  10.287615
Ep: 6239   Rew: -102.70   Avg Rew: -103.58   Polyak: 0.995000   Buffer: 100.00   Loss: 106.706963  7.195292  9.914726
Ep: 6240   Rew: -101.97   Avg Rew: -103.60   Polyak: 0.995000   Buffer: 100.00   Loss: 106.730247  10.710741  10.956088
Ep: 6241   Rew: -104.86   Avg Rew: -103.63   Polyak: 0.995000   Buffer: 100.00   Loss: 106.661934  10.896190  9.693926
Ep: 6242   Rew: -102.56   Avg Rew: -103.62   Polyak: 0.995000   Buffer: 100.00   Loss: 107.204506  10.124745  10.535493
Ep: 6243   Rew: -103.21   Avg Rew: -103.63   Polyak: 0.995000   Buffer: 100.00   Loss: 106.612961  9.510012  10.289396
Ep: 6244   Rew: -103.85   Avg Rew: -103.64   Polyak: 0.995000   Buffer: 100.00   Loss: 106.860794  9.541916  10.114225
Ep: 6245   Rew: -103.82   Avg Rew: -103.64   P

Ep: 6306   Rew: -102.47   Avg Rew: -105.07   Polyak: 0.995000   Buffer: 100.00   Loss: 106.776199  9.538433  9.582437
Ep: 6307   Rew: -103.14   Avg Rew: -105.06   Polyak: 0.995000   Buffer: 100.00   Loss: 107.225220  11.098298  9.803545
Ep: 6308   Rew: -104.98   Avg Rew: -105.08   Polyak: 0.995000   Buffer: 100.00   Loss: 106.616241  11.949291  10.000553
Ep: 6309   Rew: -101.74   Avg Rew: -105.06   Polyak: 0.995000   Buffer: 100.00   Loss: 106.537132  11.374807  10.247354
Ep: 6310   Rew: -104.70   Avg Rew: -105.07   Polyak: 0.995000   Buffer: 100.00   Loss: 107.529106  7.678415  9.407301
Ep: 6311   Rew: -104.94   Avg Rew: -105.08   Polyak: 0.995000   Buffer: 100.00   Loss: 106.596024  7.919681  9.388079
Ep: 6312   Rew: -104.59   Avg Rew: -105.07   Polyak: 0.995000   Buffer: 100.00   Loss: 105.964287  8.747641  11.136752
Ep: 6313   Rew: -104.63   Avg Rew: -105.06   Polyak: 0.995000   Buffer: 100.00   Loss: 106.859596  10.878825  9.453045
Ep: 6314   Rew: -106.18   Avg Rew: -105.09   Poly

Ep: 6375   Rew: -107.45   Avg Rew: -105.39   Polyak: 0.995000   Buffer: 100.00   Loss: 106.931648  11.334361  10.115829
Ep: 6376   Rew: -107.42   Avg Rew: -105.40   Polyak: 0.995000   Buffer: 100.00   Loss: 106.582314  10.214326  10.933151
Ep: 6377   Rew: -105.75   Avg Rew: -105.40   Polyak: 0.995000   Buffer: 100.00   Loss: 106.432800  11.646962  11.492340
Ep: 6378   Rew: -106.69   Avg Rew: -105.41   Polyak: 0.995000   Buffer: 100.00   Loss: 106.853851  8.696373  10.211913
Ep: 6379   Rew: -104.53   Avg Rew: -105.41   Polyak: 0.995000   Buffer: 100.00   Loss: 107.050056  10.586027  9.502173
Ep: 6380   Rew: -104.15   Avg Rew: -105.41   Polyak: 0.995000   Buffer: 100.00   Loss: 107.682022  10.986140  8.313266
Ep: 6381   Rew: -103.45   Avg Rew: -105.40   Polyak: 0.995000   Buffer: 100.00   Loss: 106.256554  13.725366  13.966979
Ep: 6382   Rew: -104.33   Avg Rew: -105.40   Polyak: 0.995000   Buffer: 100.00   Loss: 106.564224  12.290256  12.551196
Ep: 6383   Rew: -104.93   Avg Rew: -105.39 

Ep: 6444   Rew: -104.31   Avg Rew: -106.32   Polyak: 0.995000   Buffer: 100.00   Loss: 106.539810  12.393815  11.670685
Ep: 6445   Rew: -103.83   Avg Rew: -106.30   Polyak: 0.995000   Buffer: 100.00   Loss: 106.932381  10.638881  10.250126
Ep: 6446   Rew: -105.35   Avg Rew: -106.29   Polyak: 0.995000   Buffer: 100.00   Loss: 107.026070  10.594461  10.278641
Ep: 6447   Rew: -104.61   Avg Rew: -106.28   Polyak: 0.995000   Buffer: 100.00   Loss: 106.381958  10.210399  11.326637
Ep: 6448   Rew: -108.58   Avg Rew: -106.30   Polyak: 0.995000   Buffer: 100.00   Loss: 106.507469  10.867067  11.788385
Ep: 6449   Rew: -101.92   Avg Rew: -106.28   Polyak: 0.995000   Buffer: 100.00   Loss: 107.697212  10.519240  14.377979
Ep: 6450   Rew: -101.06   Avg Rew: -106.24   Polyak: 0.995000   Buffer: 100.00   Loss: 106.980904  10.847584  9.741033
Ep: 6451   Rew: -103.31   Avg Rew: -106.23   Polyak: 0.995000   Buffer: 100.00   Loss: 107.398972  12.328272  11.073627
Ep: 6452   Rew: -101.26   Avg Rew: -106.2

Ep: 6513   Rew: -104.02   Avg Rew: -103.30   Polyak: 0.995000   Buffer: 100.00   Loss: 107.282791  10.800500  7.982532
Ep: 6514   Rew: -103.64   Avg Rew: -103.27   Polyak: 0.995000   Buffer: 100.00   Loss: 106.936203  11.157512  9.771439
Ep: 6515   Rew: -103.24   Avg Rew: -103.27   Polyak: 0.995000   Buffer: 100.00   Loss: 106.329391  11.525359  9.693151
Ep: 6516   Rew: -102.47   Avg Rew: -103.27   Polyak: 0.995000   Buffer: 100.00   Loss: 106.410721  11.136273  8.824885
Ep: 6517   Rew: -103.44   Avg Rew: -103.26   Polyak: 0.995000   Buffer: 100.00   Loss: 107.063606  10.845716  10.310820
Ep: 6518   Rew: -103.27   Avg Rew: -103.28   Polyak: 0.995000   Buffer: 100.00   Loss: 106.799713  8.758071  9.720545
Ep: 6519   Rew: -103.52   Avg Rew: -103.28   Polyak: 0.995000   Buffer: 100.00   Loss: 106.469307  11.985414  11.530962
Ep: 6520   Rew: -103.06   Avg Rew: -103.27   Polyak: 0.995000   Buffer: 100.00   Loss: 106.451340  9.385571  9.337912
Ep: 6521   Rew: -103.33   Avg Rew: -103.27   Pol

Ep: 6582   Rew: -103.54   Avg Rew: -103.24   Polyak: 0.995000   Buffer: 100.00   Loss: 106.969894  9.410179  12.424303
Ep: 6583   Rew: -102.69   Avg Rew: -103.26   Polyak: 0.995000   Buffer: 100.00   Loss: 106.466095  9.694490  11.211692
Ep: 6584   Rew: -103.19   Avg Rew: -103.27   Polyak: 0.995000   Buffer: 100.00   Loss: 106.872345  13.203333  10.450194
Ep: 6585   Rew: -103.08   Avg Rew: -103.27   Polyak: 0.995000   Buffer: 100.00   Loss: 107.146523  10.384301  10.781097
Ep: 6586   Rew: -102.91   Avg Rew: -103.28   Polyak: 0.995000   Buffer: 100.00   Loss: 106.721428  9.293065  9.413198
Ep: 6587   Rew: -103.45   Avg Rew: -103.27   Polyak: 0.995000   Buffer: 100.00   Loss: 106.467628  11.807191  11.546902
Ep: 6588   Rew: -103.06   Avg Rew: -103.27   Polyak: 0.995000   Buffer: 100.00   Loss: 106.414162  8.892677  7.875548
Ep: 6589   Rew: -102.59   Avg Rew: -103.27   Polyak: 0.995000   Buffer: 100.00   Loss: 106.935646  12.657393  9.656672
Ep: 6590   Rew: -103.20   Avg Rew: -103.28   Po

Ep: 6651   Rew: -105.12   Avg Rew: -103.47   Polyak: 0.995000   Buffer: 100.00   Loss: 106.276413  9.706234  11.555605
Ep: 6652   Rew: -102.58   Avg Rew: -103.46   Polyak: 0.995000   Buffer: 100.00   Loss: 106.181343  10.520214  12.579839
Ep: 6653   Rew: -104.50   Avg Rew: -103.46   Polyak: 0.995000   Buffer: 100.00   Loss: 106.753174  9.118176  9.723855
Ep: 6654   Rew: -104.73   Avg Rew: -103.48   Polyak: 0.995000   Buffer: 100.00   Loss: 107.009300  10.976033  9.713619
Ep: 6655   Rew: -104.57   Avg Rew: -103.50   Polyak: 0.995000   Buffer: 100.00   Loss: 105.866951  10.133617  9.089442
Ep: 6656   Rew: -103.81   Avg Rew: -103.50   Polyak: 0.995000   Buffer: 100.00   Loss: 107.329445  11.469678  11.636367
Ep: 6657   Rew: -103.29   Avg Rew: -103.50   Polyak: 0.995000   Buffer: 100.00   Loss: 106.422966  11.782255  9.591239
Ep: 6658   Rew: -103.60   Avg Rew: -103.48   Polyak: 0.995000   Buffer: 100.00   Loss: 107.488487  10.907316  10.261821
Ep: 6659   Rew: -103.44   Avg Rew: -103.48   P

Ep: 6720   Rew: -105.12   Avg Rew: -103.52   Polyak: 0.995000   Buffer: 100.00   Loss: 106.766685  12.670295  10.749311
Ep: 6721   Rew: -104.00   Avg Rew: -103.53   Polyak: 0.995000   Buffer: 100.00   Loss: 106.917740  12.186821  9.793371
Ep: 6722   Rew: -103.98   Avg Rew: -103.55   Polyak: 0.995000   Buffer: 100.00   Loss: 106.807098  12.475244  11.031841
Ep: 6723   Rew: -104.64   Avg Rew: -103.56   Polyak: 0.995000   Buffer: 100.00   Loss: 106.634476  9.052912  11.391491
Ep: 6724   Rew: -103.88   Avg Rew: -103.56   Polyak: 0.995000   Buffer: 100.00   Loss: 106.920387  9.707067  12.922982
Ep: 6725   Rew: -102.74   Avg Rew: -103.54   Polyak: 0.995000   Buffer: 100.00   Loss: 107.457016  10.809870  9.717593
Ep: 6726   Rew: -104.18   Avg Rew: -103.57   Polyak: 0.995000   Buffer: 100.00   Loss: 106.664917  11.310202  10.788106
Ep: 6727   Rew: -104.31   Avg Rew: -103.58   Polyak: 0.995000   Buffer: 100.00   Loss: 106.583015  9.868576  11.377850
Ep: 6728   Rew: -105.27   Avg Rew: -103.60   

Ep: 6789   Rew: -104.13   Avg Rew: -103.85   Polyak: 0.995000   Buffer: 100.00   Loss: 106.852699  10.035726  7.954486
Ep: 6790   Rew: -104.10   Avg Rew: -103.87   Polyak: 0.995000   Buffer: 100.00   Loss: 107.335213  11.768784  8.878315
Ep: 6791   Rew: -101.77   Avg Rew: -103.85   Polyak: 0.995000   Buffer: 100.00   Loss: 106.282974  11.161269  9.813021
Ep: 6792   Rew: -103.23   Avg Rew: -103.86   Polyak: 0.995000   Buffer: 100.00   Loss: 107.455750  10.505083  9.939425
Ep: 6793   Rew: -102.57   Avg Rew: -103.87   Polyak: 0.995000   Buffer: 100.00   Loss: 107.088188  9.506183  8.710017
Ep: 6794   Rew: -101.94   Avg Rew: -103.88   Polyak: 0.995000   Buffer: 100.00   Loss: 106.685677  9.397821  9.820429
Ep: 6795   Rew: -102.42   Avg Rew: -103.88   Polyak: 0.995000   Buffer: 100.00   Loss: 107.081909  10.899048  12.377081
Ep: 6796   Rew: -102.19   Avg Rew: -103.87   Polyak: 0.995000   Buffer: 100.00   Loss: 107.515793  10.218730  11.453465
Ep: 6797   Rew: -99.89   Avg Rew: -103.82   Poly

Ep: 6858   Rew: -104.29   Avg Rew: -103.54   Polyak: 0.995000   Buffer: 100.00   Loss: 106.355507  12.988205  12.955251
Ep: 6859   Rew: -103.70   Avg Rew: -103.54   Polyak: 0.995000   Buffer: 100.00   Loss: 105.803139  11.720648  9.949078
Ep: 6860   Rew: -104.05   Avg Rew: -103.53   Polyak: 0.995000   Buffer: 100.00   Loss: 106.987892  11.376078  13.555443
Ep: 6861   Rew: -104.00   Avg Rew: -103.54   Polyak: 0.995000   Buffer: 100.00   Loss: 107.069305  12.626524  11.465373
Ep: 6862   Rew: -104.04   Avg Rew: -103.54   Polyak: 0.995000   Buffer: 100.00   Loss: 107.229370  11.308064  9.333387
Ep: 6863   Rew: -104.63   Avg Rew: -103.54   Polyak: 0.995000   Buffer: 100.00   Loss: 106.866180  9.550894  10.636786
Ep: 6864   Rew: -104.05   Avg Rew: -103.54   Polyak: 0.995000   Buffer: 100.00   Loss: 107.006859  10.293800  10.046017
Ep: 6865   Rew: -103.90   Avg Rew: -103.54   Polyak: 0.995000   Buffer: 100.00   Loss: 106.236740  12.948048  9.455615
Ep: 6866   Rew: -104.53   Avg Rew: -103.54  

Ep: 6927   Rew: -104.29   Avg Rew: -103.78   Polyak: 0.995000   Buffer: 100.00   Loss: 106.695518  12.978193  10.527333
Ep: 6928   Rew: -102.90   Avg Rew: -103.78   Polyak: 0.995000   Buffer: 100.00   Loss: 107.354996  9.601625  7.677514
Ep: 6929   Rew: -104.32   Avg Rew: -103.79   Polyak: 0.995000   Buffer: 100.00   Loss: 107.520378  10.400496  11.980793
Ep: 6930   Rew: -102.82   Avg Rew: -103.79   Polyak: 0.995000   Buffer: 100.00   Loss: 107.156219  8.847789  9.579659
Ep: 6931   Rew: -103.62   Avg Rew: -103.78   Polyak: 0.995000   Buffer: 100.00   Loss: 106.773788  12.078180  8.661576
Ep: 6932   Rew: -103.37   Avg Rew: -103.78   Polyak: 0.995000   Buffer: 100.00   Loss: 106.805099  10.975505  11.108543
Ep: 6933   Rew: -103.27   Avg Rew: -103.79   Polyak: 0.995000   Buffer: 100.00   Loss: 106.799850  10.995186  10.809877
Ep: 6934   Rew: -104.36   Avg Rew: -103.79   Polyak: 0.995000   Buffer: 100.00   Loss: 107.160149  12.079713  10.277422
Ep: 6935   Rew: -110.98   Avg Rew: -103.86   

Ep: 6996   Rew: -109.30   Avg Rew: -103.71   Polyak: 0.995000   Buffer: 100.00   Loss: 106.152412  11.717888  10.256808
Ep: 6997   Rew: -102.37   Avg Rew: -103.70   Polyak: 0.995000   Buffer: 100.00   Loss: 106.176346  12.697189  10.022775
Ep: 6998   Rew: -103.78   Avg Rew: -103.71   Polyak: 0.995000   Buffer: 100.00   Loss: 106.414780  9.286944  11.306353
Ep: 6999   Rew: -101.11   Avg Rew: -103.68   Polyak: 0.995000   Buffer: 100.00   Loss: 106.840210  12.250819  9.564219
Ep: 7000   Rew: -104.12   Avg Rew: -103.68   Polyak: 0.995000   Buffer: 100.00   Loss: 107.283020  14.588900  9.690132
Ep: 7001   Rew: -109.52   Avg Rew: -103.74   Polyak: 0.995000   Buffer: 100.00   Loss: 106.595367  9.827951  9.389463
Ep: 7002   Rew: -103.10   Avg Rew: -103.73   Polyak: 0.995000   Buffer: 100.00   Loss: 106.744293  10.246701  8.891871
Ep: 7003   Rew: -102.72   Avg Rew: -103.72   Polyak: 0.995000   Buffer: 100.00   Loss: 107.459641  11.853451  10.667584
Ep: 7004   Rew: -102.74   Avg Rew: -103.72   P

Ep: 7065   Rew: -104.23   Avg Rew: -103.30   Polyak: 0.995000   Buffer: 100.00   Loss: 106.205467  10.376893  10.210191
Ep: 7066   Rew: -103.09   Avg Rew: -103.30   Polyak: 0.995000   Buffer: 100.00   Loss: 106.567444  9.941118  9.206107
Ep: 7067   Rew: -104.25   Avg Rew: -103.29   Polyak: 0.995000   Buffer: 100.00   Loss: 106.025131  8.891287  10.202520
Ep: 7068   Rew: -103.81   Avg Rew: -103.29   Polyak: 0.995000   Buffer: 100.00   Loss: 105.649658  7.798845  10.119590
Ep: 7069   Rew: -104.27   Avg Rew: -103.28   Polyak: 0.995000   Buffer: 100.00   Loss: 106.385216  9.630892  11.568316
Ep: 7070   Rew: -103.54   Avg Rew: -103.29   Polyak: 0.995000   Buffer: 100.00   Loss: 107.383583  9.519627  10.129615
Ep: 7071   Rew: -102.09   Avg Rew: -103.27   Polyak: 0.995000   Buffer: 100.00   Loss: 106.374214  11.604692  10.195514
Ep: 7072   Rew: -103.01   Avg Rew: -103.25   Polyak: 0.995000   Buffer: 100.00   Loss: 106.936432  10.999228  9.458490
Ep: 7073   Rew: -102.66   Avg Rew: -103.25   Po

Ep: 7134   Rew: -103.21   Avg Rew: -103.15   Polyak: 0.995000   Buffer: 100.00   Loss: 106.088425  10.529577  10.640790
Ep: 7135   Rew: -102.71   Avg Rew: -103.14   Polyak: 0.995000   Buffer: 100.00   Loss: 106.367760  8.873619  9.598446
Ep: 7136   Rew: -103.84   Avg Rew: -103.14   Polyak: 0.995000   Buffer: 100.00   Loss: 106.825455  9.852268  11.104733
Ep: 7137   Rew: -102.93   Avg Rew: -103.15   Polyak: 0.995000   Buffer: 100.00   Loss: 106.758690  9.080307  9.417320
Ep: 7138   Rew: -103.00   Avg Rew: -103.15   Polyak: 0.995000   Buffer: 100.00   Loss: 106.349106  8.485568  9.312934
Ep: 7139   Rew: -102.79   Avg Rew: -103.16   Polyak: 0.995000   Buffer: 100.00   Loss: 106.671394  11.019395  8.935390
Ep: 7140   Rew: -103.47   Avg Rew: -103.17   Polyak: 0.995000   Buffer: 100.00   Loss: 106.535728  11.135956  8.618447
Ep: 7141   Rew: -102.48   Avg Rew: -103.17   Polyak: 0.995000   Buffer: 100.00   Loss: 106.760147  9.204647  11.369103
Ep: 7142   Rew: -102.81   Avg Rew: -103.16   Polya

Ep: 7203   Rew: -102.69   Avg Rew: -103.42   Polyak: 0.995000   Buffer: 100.00   Loss: 106.378677  11.053595  11.644938
Ep: 7204   Rew: -103.12   Avg Rew: -103.43   Polyak: 0.995000   Buffer: 100.00   Loss: 106.657684  8.955731  7.193042
Ep: 7205   Rew: -104.24   Avg Rew: -103.45   Polyak: 0.995000   Buffer: 100.00   Loss: 105.883492  9.946498  8.746607
Ep: 7206   Rew: -103.05   Avg Rew: -103.45   Polyak: 0.995000   Buffer: 100.00   Loss: 106.646973  9.573723  10.875305
Ep: 7207   Rew: -102.57   Avg Rew: -103.45   Polyak: 0.995000   Buffer: 100.00   Loss: 106.737572  10.161291  11.054234
Ep: 7208   Rew: -102.67   Avg Rew: -103.44   Polyak: 0.995000   Buffer: 100.00   Loss: 106.266769  14.305804  10.791384
Ep: 7209   Rew: -103.60   Avg Rew: -103.44   Polyak: 0.995000   Buffer: 100.00   Loss: 106.224693  12.780027  10.782721
Ep: 7210   Rew: -103.15   Avg Rew: -103.43   Polyak: 0.995000   Buffer: 100.00   Loss: 106.015434  12.447254  9.913715
Ep: 7211   Rew: -103.02   Avg Rew: -103.42   P

Ep: 7272   Rew: -103.11   Avg Rew: -103.45   Polyak: 0.995000   Buffer: 100.00   Loss: 105.980499  10.373665  9.233079
Ep: 7273   Rew: -103.77   Avg Rew: -103.47   Polyak: 0.995000   Buffer: 100.00   Loss: 105.890633  9.984735  9.661211
Ep: 7274   Rew: -103.64   Avg Rew: -103.47   Polyak: 0.995000   Buffer: 100.00   Loss: 105.891357  9.875919  9.065553
Ep: 7275   Rew: -103.57   Avg Rew: -103.45   Polyak: 0.995000   Buffer: 100.00   Loss: 105.839706  8.444554  9.974270
Ep: 7276   Rew: -103.41   Avg Rew: -103.45   Polyak: 0.995000   Buffer: 100.00   Loss: 107.034660  11.221169  11.044842
Ep: 7277   Rew: -101.67   Avg Rew: -103.44   Polyak: 0.995000   Buffer: 100.00   Loss: 106.710358  8.137485  9.043311
Ep: 7278   Rew: -103.51   Avg Rew: -103.43   Polyak: 0.995000   Buffer: 100.00   Loss: 106.605118  9.378684  10.501976
Ep: 7279   Rew: -103.69   Avg Rew: -103.43   Polyak: 0.995000   Buffer: 100.00   Loss: 106.453926  12.623368  10.951830
Ep: 7280   Rew: -103.92   Avg Rew: -103.43   Polya

Ep: 7341   Rew: -102.67   Avg Rew: -103.85   Polyak: 0.995000   Buffer: 100.00   Loss: 105.711060  10.928400  8.657483
Ep: 7342   Rew: -104.59   Avg Rew: -103.87   Polyak: 0.995000   Buffer: 100.00   Loss: 105.537727  11.064034  10.617996
Ep: 7343   Rew: -103.83   Avg Rew: -103.86   Polyak: 0.995000   Buffer: 100.00   Loss: 106.130699  11.595283  10.750983
Ep: 7344   Rew: -103.39   Avg Rew: -103.87   Polyak: 0.995000   Buffer: 100.00   Loss: 105.964790  11.639845  10.943020
Ep: 7345   Rew: -102.78   Avg Rew: -103.86   Polyak: 0.995000   Buffer: 100.00   Loss: 105.903259  11.054973  11.513646
Ep: 7346   Rew: -103.70   Avg Rew: -103.86   Polyak: 0.995000   Buffer: 100.00   Loss: 106.553093  9.447180  10.651129
Ep: 7347   Rew: -104.13   Avg Rew: -103.86   Polyak: 0.995000   Buffer: 100.00   Loss: 105.410614  10.640673  12.007252
Ep: 7348   Rew: -103.06   Avg Rew: -103.86   Polyak: 0.995000   Buffer: 100.00   Loss: 106.418068  10.076488  7.023937
Ep: 7349   Rew: -101.62   Avg Rew: -103.85 

Ep: 7410   Rew: -103.23   Avg Rew: -103.43   Polyak: 0.995000   Buffer: 100.00   Loss: 106.783257  9.642833  10.624106
Ep: 7411   Rew: -103.61   Avg Rew: -103.41   Polyak: 0.995000   Buffer: 100.00   Loss: 106.225502  9.071480  8.122805
Ep: 7412   Rew: -103.65   Avg Rew: -103.41   Polyak: 0.995000   Buffer: 100.00   Loss: 106.263359  11.069116  12.311760
Ep: 7413   Rew: -103.21   Avg Rew: -103.40   Polyak: 0.995000   Buffer: 100.00   Loss: 105.604118  8.939357  9.210719
Ep: 7414   Rew: -103.25   Avg Rew: -103.39   Polyak: 0.995000   Buffer: 100.00   Loss: 106.132790  11.562051  10.889004
Ep: 7415   Rew: -102.02   Avg Rew: -103.38   Polyak: 0.995000   Buffer: 100.00   Loss: 105.427521  9.816521  10.282083
Ep: 7416   Rew: -101.59   Avg Rew: -103.36   Polyak: 0.995000   Buffer: 100.00   Loss: 106.284805  12.302567  11.118466
Ep: 7417   Rew: -102.29   Avg Rew: -103.34   Polyak: 0.995000   Buffer: 100.00   Loss: 106.536079  11.663794  11.677335
Ep: 7418   Rew: -103.63   Avg Rew: -103.34   P

Ep: 7479   Rew: -103.03   Avg Rew: -103.21   Polyak: 0.995000   Buffer: 100.00   Loss: 105.575653  9.207735  9.448214
Ep: 7480   Rew: -101.69   Avg Rew: -103.20   Polyak: 0.995000   Buffer: 100.00   Loss: 106.844177  12.257181  9.411980
Ep: 7481   Rew: -102.26   Avg Rew: -103.21   Polyak: 0.995000   Buffer: 100.00   Loss: 105.350563  11.888796  9.252024
Ep: 7482   Rew: -102.76   Avg Rew: -103.22   Polyak: 0.995000   Buffer: 100.00   Loss: 106.312981  11.695051  9.193775
Ep: 7483   Rew: -102.78   Avg Rew: -103.21   Polyak: 0.995000   Buffer: 100.00   Loss: 106.444519  8.492449  9.178675
Ep: 7484   Rew: -103.33   Avg Rew: -103.20   Polyak: 0.995000   Buffer: 100.00   Loss: 106.824738  9.624582  12.350786
Ep: 7485   Rew: -103.54   Avg Rew: -103.22   Polyak: 0.995000   Buffer: 100.00   Loss: 105.730125  9.868765  9.880437
Ep: 7486   Rew: -103.59   Avg Rew: -103.22   Polyak: 0.995000   Buffer: 100.00   Loss: 106.922348  11.424809  11.673556
Ep: 7487   Rew: -102.53   Avg Rew: -103.21   Polya

Ep: 7548   Rew: -103.38   Avg Rew: -103.19   Polyak: 0.995000   Buffer: 100.00   Loss: 106.034576  10.352359  9.912624
Ep: 7549   Rew: -102.75   Avg Rew: -103.18   Polyak: 0.995000   Buffer: 100.00   Loss: 105.579773  9.795679  9.643023
Ep: 7550   Rew: -103.27   Avg Rew: -103.18   Polyak: 0.995000   Buffer: 100.00   Loss: 106.365364  9.151125  11.625881
Ep: 7551   Rew: -102.62   Avg Rew: -103.17   Polyak: 0.995000   Buffer: 100.00   Loss: 105.997719  12.629639  8.188341
Ep: 7552   Rew: -102.17   Avg Rew: -103.16   Polyak: 0.995000   Buffer: 100.00   Loss: 105.809547  8.844420  10.896309
Ep: 7553   Rew: -102.98   Avg Rew: -103.15   Polyak: 0.995000   Buffer: 100.00   Loss: 106.279030  11.254442  8.959341
Ep: 7554   Rew: -102.99   Avg Rew: -103.16   Polyak: 0.995000   Buffer: 100.00   Loss: 105.308716  9.204670  10.043609
Ep: 7555   Rew: -102.49   Avg Rew: -103.14   Polyak: 0.995000   Buffer: 100.00   Loss: 105.921051  9.191706  9.570619
Ep: 7556   Rew: -102.70   Avg Rew: -103.15   Polya

Ep: 7617   Rew: -104.47   Avg Rew: -103.27   Polyak: 0.995000   Buffer: 100.00   Loss: 105.799721  9.715856  9.781130
Ep: 7618   Rew: -104.17   Avg Rew: -103.27   Polyak: 0.995000   Buffer: 100.00   Loss: 106.329247  7.949073  7.637285
Ep: 7619   Rew: -103.60   Avg Rew: -103.27   Polyak: 0.995000   Buffer: 100.00   Loss: 106.301659  11.954219  9.728627
Ep: 7620   Rew: -102.85   Avg Rew: -103.30   Polyak: 0.995000   Buffer: 100.00   Loss: 106.733704  8.774740  9.571863
Ep: 7621   Rew: -104.44   Avg Rew: -103.31   Polyak: 0.995000   Buffer: 100.00   Loss: 107.127350  8.418318  8.996645
Ep: 7622   Rew: -103.07   Avg Rew: -103.29   Polyak: 0.995000   Buffer: 100.00   Loss: 106.416618  9.758429  11.122207
Ep: 7623   Rew: -103.21   Avg Rew: -103.30   Polyak: 0.995000   Buffer: 100.00   Loss: 106.051865  9.007335  7.979348
Ep: 7624   Rew: -103.14   Avg Rew: -103.30   Polyak: 0.995000   Buffer: 100.00   Loss: 106.550171  10.312612  9.468301
Ep: 7625   Rew: -100.96   Avg Rew: -103.28   Polyak: 

Ep: 7686   Rew: -102.43   Avg Rew: -103.14   Polyak: 0.995000   Buffer: 100.00   Loss: 105.755417  11.150833  10.129690
Ep: 7687   Rew: -102.83   Avg Rew: -103.13   Polyak: 0.995000   Buffer: 100.00   Loss: 106.733505  11.622358  10.821540
Ep: 7688   Rew: -103.36   Avg Rew: -103.13   Polyak: 0.995000   Buffer: 100.00   Loss: 106.569725  10.261068  10.221128
Ep: 7689   Rew: -103.05   Avg Rew: -103.13   Polyak: 0.995000   Buffer: 100.00   Loss: 106.896736  11.990165  10.013250
Ep: 7690   Rew: -103.67   Avg Rew: -103.12   Polyak: 0.995000   Buffer: 100.00   Loss: 106.817101  10.404857  9.579688
Ep: 7691   Rew: -102.48   Avg Rew: -103.11   Polyak: 0.995000   Buffer: 100.00   Loss: 106.104309  9.544171  9.494221
Ep: 7692   Rew: -102.84   Avg Rew: -103.12   Polyak: 0.995000   Buffer: 100.00   Loss: 105.885117  11.688315  9.742634
Ep: 7693   Rew: -102.53   Avg Rew: -103.11   Polyak: 0.995000   Buffer: 100.00   Loss: 106.605713  9.778816  9.518208
Ep: 7694   Rew: -103.45   Avg Rew: -103.11   P

Ep: 7755   Rew: -102.86   Avg Rew: -103.03   Polyak: 0.995000   Buffer: 100.00   Loss: 106.179649  10.028066  10.436516
Ep: 7756   Rew: -102.58   Avg Rew: -103.03   Polyak: 0.995000   Buffer: 100.00   Loss: 107.603630  9.552042  8.549868
Ep: 7757   Rew: -102.88   Avg Rew: -103.03   Polyak: 0.995000   Buffer: 100.00   Loss: 106.953178  11.484485  9.149437
Ep: 7758   Rew: -102.79   Avg Rew: -103.03   Polyak: 0.995000   Buffer: 100.00   Loss: 105.979118  8.403402  9.756502
Ep: 7759   Rew: -103.57   Avg Rew: -103.03   Polyak: 0.995000   Buffer: 100.00   Loss: 105.844322  10.494143  11.315296
Ep: 7760   Rew: -103.67   Avg Rew: -103.03   Polyak: 0.995000   Buffer: 100.00   Loss: 107.480782  10.651407  11.095337
Ep: 7761   Rew: -104.37   Avg Rew: -103.05   Polyak: 0.995000   Buffer: 100.00   Loss: 106.699249  12.069752  9.912381
Ep: 7762   Rew: -103.72   Avg Rew: -103.06   Polyak: 0.995000   Buffer: 100.00   Loss: 105.753021  10.886808  9.754478
Ep: 7763   Rew: -103.59   Avg Rew: -103.06   Po

Ep: 7824   Rew: -103.10   Avg Rew: -103.12   Polyak: 0.995000   Buffer: 100.00   Loss: 106.256996  13.168129  11.225049
Ep: 7825   Rew: -103.54   Avg Rew: -103.12   Polyak: 0.995000   Buffer: 100.00   Loss: 106.123260  10.738533  9.585365
Ep: 7826   Rew: -102.76   Avg Rew: -103.11   Polyak: 0.995000   Buffer: 100.00   Loss: 106.360680  9.172894  10.784212
Ep: 7827   Rew: -103.79   Avg Rew: -103.12   Polyak: 0.995000   Buffer: 100.00   Loss: 106.550438  12.778305  11.213006
Ep: 7828   Rew: -103.97   Avg Rew: -103.13   Polyak: 0.995000   Buffer: 100.00   Loss: 105.905853  11.805192  11.293175
Ep: 7829   Rew: -104.25   Avg Rew: -103.14   Polyak: 0.995000   Buffer: 100.00   Loss: 106.241020  11.089981  9.212103
Ep: 7830   Rew: -103.05   Avg Rew: -103.14   Polyak: 0.995000   Buffer: 100.00   Loss: 106.296661  11.199400  10.549758
Ep: 7831   Rew: -103.35   Avg Rew: -103.14   Polyak: 0.995000   Buffer: 100.00   Loss: 106.199272  9.736502  9.496538
Ep: 7832   Rew: -103.41   Avg Rew: -103.15   

Ep: 7893   Rew: -103.28   Avg Rew: -103.32   Polyak: 0.995000   Buffer: 100.00   Loss: 105.996132  11.065655  11.806369
Ep: 7894   Rew: -104.44   Avg Rew: -103.33   Polyak: 0.995000   Buffer: 100.00   Loss: 105.301308  8.244638  10.785676
Ep: 7895   Rew: -105.00   Avg Rew: -103.34   Polyak: 0.995000   Buffer: 100.00   Loss: 106.045006  12.079142  10.980543
Ep: 7896   Rew: -104.84   Avg Rew: -103.37   Polyak: 0.995000   Buffer: 100.00   Loss: 106.468895  11.597072  9.600445
Ep: 7897   Rew: -103.83   Avg Rew: -103.39   Polyak: 0.995000   Buffer: 100.00   Loss: 105.740952  10.141089  9.028395
Ep: 7898   Rew: -103.94   Avg Rew: -103.39   Polyak: 0.995000   Buffer: 100.00   Loss: 106.079391  10.907665  10.916737
Ep: 7899   Rew: -103.99   Avg Rew: -103.40   Polyak: 0.995000   Buffer: 100.00   Loss: 106.478233  9.680529  10.075077
Ep: 7900   Rew: -102.11   Avg Rew: -103.39   Polyak: 0.995000   Buffer: 100.00   Loss: 106.360931  12.809422  9.957219
Ep: 7901   Rew: -103.32   Avg Rew: -103.40   

Ep: 7962   Rew: -103.25   Avg Rew: -103.22   Polyak: 0.995000   Buffer: 100.00   Loss: 106.783035  12.120590  14.716398
Ep: 7963   Rew: -103.09   Avg Rew: -103.22   Polyak: 0.995000   Buffer: 100.00   Loss: 107.030319  12.352669  11.696530
Ep: 7964   Rew: -103.32   Avg Rew: -103.22   Polyak: 0.995000   Buffer: 100.00   Loss: 106.035278  10.938867  10.272787
Ep: 7965   Rew: -103.88   Avg Rew: -103.23   Polyak: 0.995000   Buffer: 100.00   Loss: 106.447487  8.385491  10.803450
Ep: 7966   Rew: -102.36   Avg Rew: -103.22   Polyak: 0.995000   Buffer: 100.00   Loss: 106.322441  7.653122  8.592693
Ep: 7967   Rew: -103.73   Avg Rew: -103.22   Polyak: 0.995000   Buffer: 100.00   Loss: 105.941559  10.513021  11.812660
Ep: 7968   Rew: -104.83   Avg Rew: -103.24   Polyak: 0.995000   Buffer: 100.00   Loss: 106.156807  9.230947  9.119212
Ep: 7969   Rew: -103.87   Avg Rew: -103.25   Polyak: 0.995000   Buffer: 100.00   Loss: 106.673172  10.783423  10.530108
Ep: 7970   Rew: -104.12   Avg Rew: -103.26   

Ep: 8031   Rew: -102.36   Avg Rew: -103.57   Polyak: 0.995000   Buffer: 100.00   Loss: 106.248177  8.525187  10.351733
Ep: 8032   Rew: -104.13   Avg Rew: -103.59   Polyak: 0.995000   Buffer: 100.00   Loss: 106.033592  11.111563  9.777528
Ep: 8033   Rew: -104.28   Avg Rew: -103.60   Polyak: 0.995000   Buffer: 100.00   Loss: 106.161346  9.465220  10.825536
Ep: 8034   Rew: -104.44   Avg Rew: -103.61   Polyak: 0.995000   Buffer: 100.00   Loss: 105.678925  12.915526  10.604521
Ep: 8035   Rew: -103.78   Avg Rew: -103.63   Polyak: 0.995000   Buffer: 100.00   Loss: 106.476608  9.218138  7.894754
Ep: 8036   Rew: -104.91   Avg Rew: -103.68   Polyak: 0.995000   Buffer: 100.00   Loss: 106.404625  12.960033  9.646530
Ep: 8037   Rew: -104.79   Avg Rew: -103.73   Polyak: 0.995000   Buffer: 100.00   Loss: 105.804298  8.602841  12.064443
Ep: 8038   Rew: -105.01   Avg Rew: -103.75   Polyak: 0.995000   Buffer: 100.00   Loss: 106.039955  11.914633  10.960136
Ep: 8039   Rew: -104.49   Avg Rew: -103.77   Po

Ep: 8100   Rew: -104.40   Avg Rew: -103.91   Polyak: 0.995000   Buffer: 100.00   Loss: 106.514740  10.774020  8.818012
Ep: 8101   Rew: -105.08   Avg Rew: -103.92   Polyak: 0.995000   Buffer: 100.00   Loss: 105.919746  9.585224  11.369413
Ep: 8102   Rew: -104.71   Avg Rew: -103.94   Polyak: 0.995000   Buffer: 100.00   Loss: 105.731842  11.526013  9.718418
Ep: 8103   Rew: -103.71   Avg Rew: -103.94   Polyak: 0.995000   Buffer: 100.00   Loss: 106.078842  9.058201  9.821327
Ep: 8104   Rew: -104.47   Avg Rew: -103.96   Polyak: 0.995000   Buffer: 100.00   Loss: 105.701233  10.692190  10.237419
Ep: 8105   Rew: -105.50   Avg Rew: -103.97   Polyak: 0.995000   Buffer: 100.00   Loss: 106.425568  11.721246  10.702879
Ep: 8106   Rew: -104.59   Avg Rew: -103.98   Polyak: 0.995000   Buffer: 100.00   Loss: 106.956940  10.479377  10.242051
Ep: 8107   Rew: -106.97   Avg Rew: -104.01   Polyak: 0.995000   Buffer: 100.00   Loss: 105.553894  9.164827  9.064787
Ep: 8108   Rew: -104.40   Avg Rew: -104.01   Po

Ep: 8169   Rew: -104.03   Avg Rew: -103.82   Polyak: 0.995000   Buffer: 100.00   Loss: 106.419846  8.832887  10.285220
Ep: 8170   Rew: -103.49   Avg Rew: -103.82   Polyak: 0.995000   Buffer: 100.00   Loss: 105.662186  10.046256  9.338464
Ep: 8171   Rew: -103.58   Avg Rew: -103.82   Polyak: 0.995000   Buffer: 100.00   Loss: 105.708885  8.921380  9.803243
Ep: 8172   Rew: -105.10   Avg Rew: -103.83   Polyak: 0.995000   Buffer: 100.00   Loss: 105.521149  11.871803  10.870186
Ep: 8173   Rew: -104.38   Avg Rew: -103.83   Polyak: 0.995000   Buffer: 100.00   Loss: 105.771027  9.943816  9.108683
Ep: 8174   Rew: -104.90   Avg Rew: -103.83   Polyak: 0.995000   Buffer: 100.00   Loss: 106.493408  12.076000  11.809669
Ep: 8175   Rew: -104.89   Avg Rew: -103.84   Polyak: 0.995000   Buffer: 100.00   Loss: 106.160240  8.780326  10.555355
Ep: 8176   Rew: -103.87   Avg Rew: -103.83   Polyak: 0.995000   Buffer: 100.00   Loss: 106.526833  9.139931  10.645030
Ep: 8177   Rew: -103.96   Avg Rew: -103.82   Pol

Ep: 8238   Rew: -102.92   Avg Rew: -103.87   Polyak: 0.995000   Buffer: 100.00   Loss: 106.536560  9.381791  10.103016
Ep: 8239   Rew: -103.61   Avg Rew: -103.88   Polyak: 0.995000   Buffer: 100.00   Loss: 106.057289  8.897966  8.648859
Ep: 8240   Rew: -103.95   Avg Rew: -103.88   Polyak: 0.995000   Buffer: 100.00   Loss: 105.534813  11.402928  9.269743
Ep: 8241   Rew: -102.94   Avg Rew: -103.87   Polyak: 0.995000   Buffer: 100.00   Loss: 106.846863  9.412285  9.184461
Ep: 8242   Rew: -104.34   Avg Rew: -103.87   Polyak: 0.995000   Buffer: 100.00   Loss: 106.544617  9.340284  9.204281
Ep: 8243   Rew: -102.51   Avg Rew: -103.87   Polyak: 0.995000   Buffer: 100.00   Loss: 107.112679  10.059242  6.677605
Ep: 8244   Rew: -103.71   Avg Rew: -103.87   Polyak: 0.995000   Buffer: 100.00   Loss: 106.823967  9.629894  10.058880
Ep: 8245   Rew: -102.76   Avg Rew: -103.87   Polyak: 0.995000   Buffer: 100.00   Loss: 106.240578  7.632481  10.669553
Ep: 8246   Rew: -104.69   Avg Rew: -103.89   Polyak

Ep: 8307   Rew: -104.06   Avg Rew: -103.53   Polyak: 0.995000   Buffer: 100.00   Loss: 106.364212  10.081120  10.769630
Ep: 8308   Rew: -102.64   Avg Rew: -103.51   Polyak: 0.995000   Buffer: 100.00   Loss: 105.843315  12.040785  9.285340
Ep: 8309   Rew: -104.38   Avg Rew: -103.51   Polyak: 0.995000   Buffer: 100.00   Loss: 106.724403  10.591269  9.352823
Ep: 8310   Rew: -105.32   Avg Rew: -103.51   Polyak: 0.995000   Buffer: 100.00   Loss: 106.280121  7.880582  10.787191
Ep: 8311   Rew: -104.16   Avg Rew: -103.52   Polyak: 0.995000   Buffer: 100.00   Loss: 105.926247  8.895957  11.477570
Ep: 8312   Rew: -104.54   Avg Rew: -103.52   Polyak: 0.995000   Buffer: 100.00   Loss: 106.646927  9.627750  10.664370
Ep: 8313   Rew: -104.49   Avg Rew: -103.52   Polyak: 0.995000   Buffer: 100.00   Loss: 105.996567  11.421801  9.403301
Ep: 8314   Rew: -103.25   Avg Rew: -103.51   Polyak: 0.995000   Buffer: 100.00   Loss: 106.199913  12.798767  10.234221
Ep: 8315   Rew: -102.52   Avg Rew: -103.51   P

Ep: 8376   Rew: -103.05   Avg Rew: -103.44   Polyak: 0.995000   Buffer: 100.00   Loss: 105.814751  11.484644  9.490301
Ep: 8377   Rew: -102.47   Avg Rew: -103.44   Polyak: 0.995000   Buffer: 100.00   Loss: 106.715904  10.192898  11.335391
Ep: 8378   Rew: -105.09   Avg Rew: -103.46   Polyak: 0.995000   Buffer: 100.00   Loss: 105.913910  10.549835  9.212316
Ep: 8379   Rew: -102.88   Avg Rew: -103.45   Polyak: 0.995000   Buffer: 100.00   Loss: 106.113632  7.641088  8.614854
Ep: 8380   Rew: -103.48   Avg Rew: -103.46   Polyak: 0.995000   Buffer: 100.00   Loss: 106.231697  10.145804  9.833697
Ep: 8381   Rew: -103.21   Avg Rew: -103.46   Polyak: 0.995000   Buffer: 100.00   Loss: 105.950996  8.849229  11.997864
Ep: 8382   Rew: -101.60   Avg Rew: -103.44   Polyak: 0.995000   Buffer: 100.00   Loss: 105.979012  10.248948  10.936779
Ep: 8383   Rew: -103.14   Avg Rew: -103.44   Polyak: 0.995000   Buffer: 100.00   Loss: 106.897491  11.212025  11.721628
Ep: 8384   Rew: -102.47   Avg Rew: -103.44   P

Ep: 8445   Rew: -104.09   Avg Rew: -103.17   Polyak: 0.995000   Buffer: 100.00   Loss: 105.310013  10.874431  10.455081
Ep: 8446   Rew: -104.79   Avg Rew: -103.19   Polyak: 0.995000   Buffer: 100.00   Loss: 106.601654  12.239601  8.791520
Ep: 8447   Rew: -104.39   Avg Rew: -103.20   Polyak: 0.995000   Buffer: 100.00   Loss: 107.075378  9.844740  10.526454
Ep: 8448   Rew: -103.02   Avg Rew: -103.17   Polyak: 0.995000   Buffer: 100.00   Loss: 106.403152  9.838266  9.892844
Ep: 8449   Rew: -102.07   Avg Rew: -103.17   Polyak: 0.995000   Buffer: 100.00   Loss: 105.772034  8.626473  9.388882
Ep: 8450   Rew: -102.00   Avg Rew: -103.16   Polyak: 0.995000   Buffer: 100.00   Loss: 106.424477  9.905920  11.062925
Ep: 8451   Rew: -103.87   Avg Rew: -103.17   Polyak: 0.995000   Buffer: 100.00   Loss: 106.490631  8.865398  10.086845
Ep: 8452   Rew: -103.18   Avg Rew: -103.17   Polyak: 0.995000   Buffer: 100.00   Loss: 106.391342  12.030672  10.361941
Ep: 8453   Rew: -102.31   Avg Rew: -103.16   Pol

Ep: 8514   Rew: -103.04   Avg Rew: -103.43   Polyak: 0.995000   Buffer: 100.00   Loss: 105.581665  11.889095  10.401175
Ep: 8515   Rew: -104.57   Avg Rew: -103.45   Polyak: 0.995000   Buffer: 100.00   Loss: 105.420876  11.805605  9.247166
Ep: 8516   Rew: -103.20   Avg Rew: -103.46   Polyak: 0.995000   Buffer: 100.00   Loss: 105.573456  8.300545  10.404325
Ep: 8517   Rew: -104.13   Avg Rew: -103.47   Polyak: 0.995000   Buffer: 100.00   Loss: 106.257698  9.769368  11.288653
Ep: 8518   Rew: -103.05   Avg Rew: -103.46   Polyak: 0.995000   Buffer: 100.00   Loss: 105.809212  10.364485  9.886904
Ep: 8519   Rew: -104.61   Avg Rew: -103.47   Polyak: 0.995000   Buffer: 100.00   Loss: 106.404884  10.750864  9.303436
Ep: 8520   Rew: -102.67   Avg Rew: -103.47   Polyak: 0.995000   Buffer: 100.00   Loss: 105.563576  7.713021  10.701259
Ep: 8521   Rew: -102.82   Avg Rew: -103.47   Polyak: 0.995000   Buffer: 100.00   Loss: 105.642441  9.075784  9.748074
Ep: 8522   Rew: -102.55   Avg Rew: -103.46   Pol

Ep: 8583   Rew: -104.79   Avg Rew: -103.48   Polyak: 0.995000   Buffer: 100.00   Loss: 106.463715  10.968224  9.352984
Ep: 8584   Rew: -104.40   Avg Rew: -103.49   Polyak: 0.995000   Buffer: 100.00   Loss: 106.052116  10.021578  8.154177
Ep: 8585   Rew: -103.78   Avg Rew: -103.50   Polyak: 0.995000   Buffer: 100.00   Loss: 105.449371  10.857418  9.073357
Ep: 8586   Rew: -104.13   Avg Rew: -103.51   Polyak: 0.995000   Buffer: 100.00   Loss: 106.186447  10.433920  11.440222
Ep: 8587   Rew: -104.22   Avg Rew: -103.52   Polyak: 0.995000   Buffer: 100.00   Loss: 106.044052  10.210083  8.785160
Ep: 8588   Rew: -103.91   Avg Rew: -103.53   Polyak: 0.995000   Buffer: 100.00   Loss: 106.241180  11.689030  10.918262
Ep: 8589   Rew: -104.45   Avg Rew: -103.54   Polyak: 0.995000   Buffer: 100.00   Loss: 106.413528  9.268872  10.541727
Ep: 8590   Rew: -102.56   Avg Rew: -103.53   Polyak: 0.995000   Buffer: 100.00   Loss: 105.398720  10.651512  9.422033
Ep: 8591   Rew: -102.81   Avg Rew: -103.53   P

Ep: 8652   Rew: -102.77   Avg Rew: -103.55   Polyak: 0.995000   Buffer: 100.00   Loss: 105.681297  11.706892  10.319492
Ep: 8653   Rew: -105.14   Avg Rew: -103.56   Polyak: 0.995000   Buffer: 100.00   Loss: 105.960838  9.999449  10.782665
Ep: 8654   Rew: -102.72   Avg Rew: -103.56   Polyak: 0.995000   Buffer: 100.00   Loss: 106.390381  11.115785  10.222720
Ep: 8655   Rew: -102.71   Avg Rew: -103.56   Polyak: 0.995000   Buffer: 100.00   Loss: 106.693016  9.903363  7.421175
Ep: 8656   Rew: -104.29   Avg Rew: -103.58   Polyak: 0.995000   Buffer: 100.00   Loss: 105.771492  10.282973  10.489813
Ep: 8657   Rew: -102.75   Avg Rew: -103.58   Polyak: 0.995000   Buffer: 100.00   Loss: 105.933899  9.971524  10.773705
Ep: 8658   Rew: -104.06   Avg Rew: -103.58   Polyak: 0.995000   Buffer: 100.00   Loss: 106.416634  11.198715  8.384023
Ep: 8659   Rew: -104.03   Avg Rew: -103.57   Polyak: 0.995000   Buffer: 100.00   Loss: 105.960464  9.842098  11.045991
Ep: 8660   Rew: -103.35   Avg Rew: -103.56   P

Ep: 8721   Rew: -104.23   Avg Rew: -103.43   Polyak: 0.995000   Buffer: 100.00   Loss: 105.420189  11.441785  9.697876
Ep: 8722   Rew: -101.62   Avg Rew: -103.40   Polyak: 0.995000   Buffer: 100.00   Loss: 106.664337  8.345318  12.184734
Ep: 8723   Rew: -102.60   Avg Rew: -103.40   Polyak: 0.995000   Buffer: 100.00   Loss: 105.875488  14.130091  9.391074
Ep: 8724   Rew: -104.05   Avg Rew: -103.39   Polyak: 0.995000   Buffer: 100.00   Loss: 106.130295  10.497602  11.329910
Ep: 8725   Rew: -102.57   Avg Rew: -103.37   Polyak: 0.995000   Buffer: 100.00   Loss: 106.433853  11.771081  10.816225
Ep: 8726   Rew: -104.83   Avg Rew: -103.38   Polyak: 0.995000   Buffer: 100.00   Loss: 106.356270  9.457444  9.339561
Ep: 8727   Rew: -103.75   Avg Rew: -103.38   Polyak: 0.995000   Buffer: 100.00   Loss: 105.838150  10.948338  9.663432
Ep: 8728   Rew: -103.04   Avg Rew: -103.37   Polyak: 0.995000   Buffer: 100.00   Loss: 105.752342  12.309212  10.154379
Ep: 8729   Rew: -103.70   Avg Rew: -103.36   P

Ep: 8790   Rew: -104.41   Avg Rew: -103.44   Polyak: 0.995000   Buffer: 100.00   Loss: 107.132584  10.428657  9.620018
Ep: 8791   Rew: -103.58   Avg Rew: -103.45   Polyak: 0.995000   Buffer: 100.00   Loss: 105.362129  9.730165  9.420029
Ep: 8792   Rew: -104.32   Avg Rew: -103.46   Polyak: 0.995000   Buffer: 100.00   Loss: 105.627594  9.218804  9.251122
Ep: 8793   Rew: -103.14   Avg Rew: -103.48   Polyak: 0.995000   Buffer: 100.00   Loss: 106.594589  7.868362  7.512343
Ep: 8794   Rew: -103.64   Avg Rew: -103.50   Polyak: 0.995000   Buffer: 100.00   Loss: 106.684311  9.913567  9.971824
Ep: 8795   Rew: -103.71   Avg Rew: -103.50   Polyak: 0.995000   Buffer: 100.00   Loss: 105.509552  9.375688  8.867466
Ep: 8796   Rew: -103.63   Avg Rew: -103.52   Polyak: 0.995000   Buffer: 100.00   Loss: 105.687988  9.830574  9.096584
Ep: 8797   Rew: -103.63   Avg Rew: -103.53   Polyak: 0.995000   Buffer: 100.00   Loss: 106.248108  11.592689  10.341093
Ep: 8798   Rew: -103.67   Avg Rew: -103.53   Polyak: 

Ep: 8859   Rew: -99.12   Avg Rew: -103.66   Polyak: 0.995000   Buffer: 100.00   Loss: 105.965538  10.587412  12.059172
Ep: 8860   Rew: -100.56   Avg Rew: -103.63   Polyak: 0.995000   Buffer: 100.00   Loss: 105.899811  8.987546  6.934412
Ep: 8861   Rew: -99.79   Avg Rew: -103.60   Polyak: 0.995000   Buffer: 100.00   Loss: 105.752045  10.199453  10.397829
Ep: 8862   Rew: -101.93   Avg Rew: -103.59   Polyak: 0.995000   Buffer: 100.00   Loss: 106.047249  11.377431  7.207105
Ep: 8863   Rew: -103.60   Avg Rew: -103.59   Polyak: 0.995000   Buffer: 100.00   Loss: 105.821388  9.482833  12.054613
Ep: 8864   Rew: -104.19   Avg Rew: -103.60   Polyak: 0.995000   Buffer: 100.00   Loss: 105.449898  9.115123  9.903616
Ep: 8865   Rew: -103.66   Avg Rew: -103.60   Polyak: 0.995000   Buffer: 100.00   Loss: 105.754990  10.310564  12.834327
Ep: 8866   Rew: -102.24   Avg Rew: -103.58   Polyak: 0.995000   Buffer: 100.00   Loss: 105.633507  9.494725  8.900347
Ep: 8867   Rew: -99.28   Avg Rew: -103.55   Polyak

Ep: 8928   Rew: -102.99   Avg Rew: -103.33   Polyak: 0.995000   Buffer: 100.00   Loss: 105.543541  13.267347  8.125393
Ep: 8929   Rew: -104.20   Avg Rew: -103.32   Polyak: 0.995000   Buffer: 100.00   Loss: 105.099556  9.881862  10.663690
Ep: 8930   Rew: -104.33   Avg Rew: -103.31   Polyak: 0.995000   Buffer: 100.00   Loss: 105.540298  9.927719  9.325058
Ep: 8931   Rew: -103.70   Avg Rew: -103.30   Polyak: 0.995000   Buffer: 100.00   Loss: 105.892242  10.910130  9.491012
Ep: 8932   Rew: -103.30   Avg Rew: -103.30   Polyak: 0.995000   Buffer: 100.00   Loss: 106.333038  8.985789  10.666494
Ep: 8933   Rew: -103.59   Avg Rew: -103.30   Polyak: 0.995000   Buffer: 100.00   Loss: 105.880074  10.133923  12.332519
Ep: 8934   Rew: -103.80   Avg Rew: -103.31   Polyak: 0.995000   Buffer: 100.00   Loss: 106.261604  10.931982  11.491521
Ep: 8935   Rew: -103.37   Avg Rew: -103.32   Polyak: 0.995000   Buffer: 100.00   Loss: 106.091179  9.366798  9.448239
Ep: 8936   Rew: -104.04   Avg Rew: -103.33   Pol

Ep: 8997   Rew: -102.83   Avg Rew: -103.60   Polyak: 0.995000   Buffer: 100.00   Loss: 106.052567  10.533072  9.574104
Ep: 8998   Rew: -102.36   Avg Rew: -103.58   Polyak: 0.995000   Buffer: 100.00   Loss: 105.325401  10.841527  9.468231
Ep: 8999   Rew: -103.60   Avg Rew: -103.59   Polyak: 0.995000   Buffer: 100.00   Loss: 106.163307  10.074655  8.806149
Ep: 9000   Rew: -101.50   Avg Rew: -103.55   Polyak: 0.995000   Buffer: 100.00   Loss: 106.711555  9.603312  7.760935
Ep: 9001   Rew: -102.55   Avg Rew: -103.55   Polyak: 0.995000   Buffer: 100.00   Loss: 106.359634  10.382481  6.997513
Ep: 9002   Rew: -104.17   Avg Rew: -103.56   Polyak: 0.995000   Buffer: 100.00   Loss: 106.541168  9.609018  12.428221
Ep: 9003   Rew: -100.42   Avg Rew: -103.54   Polyak: 0.995000   Buffer: 100.00   Loss: 106.235237  10.558956  8.913749
Ep: 9004   Rew: -102.25   Avg Rew: -103.52   Polyak: 0.995000   Buffer: 100.00   Loss: 106.394539  10.362140  8.815149
Ep: 9005   Rew: -102.56   Avg Rew: -103.52   Poly

Ep: 9066   Rew: -103.09   Avg Rew: -103.31   Polyak: 0.995000   Buffer: 100.00   Loss: 105.854118  11.310815  11.489726
Ep: 9067   Rew: -102.80   Avg Rew: -103.31   Polyak: 0.995000   Buffer: 100.00   Loss: 106.349625  9.297109  8.561502
Ep: 9068   Rew: -104.35   Avg Rew: -103.32   Polyak: 0.995000   Buffer: 100.00   Loss: 106.521713  8.263052  7.166952
Ep: 9069   Rew: -104.85   Avg Rew: -103.33   Polyak: 0.995000   Buffer: 100.00   Loss: 106.119720  7.760496  10.348334
Ep: 9070   Rew: -104.27   Avg Rew: -103.33   Polyak: 0.995000   Buffer: 100.00   Loss: 106.745392  10.241517  7.857864
Ep: 9071   Rew: -104.37   Avg Rew: -103.33   Polyak: 0.995000   Buffer: 100.00   Loss: 105.530876  8.899334  8.170756
Ep: 9072   Rew: -104.37   Avg Rew: -103.35   Polyak: 0.995000   Buffer: 100.00   Loss: 105.863319  9.784605  8.376686
Ep: 9073   Rew: -104.14   Avg Rew: -103.36   Polyak: 0.995000   Buffer: 100.00   Loss: 106.231789  10.602100  10.368038
Ep: 9074   Rew: -103.09   Avg Rew: -103.35   Polya

Ep: 9135   Rew: -103.47   Avg Rew: -103.31   Polyak: 0.995000   Buffer: 100.00   Loss: 106.122681  9.838441  8.422743
Ep: 9136   Rew: -102.69   Avg Rew: -103.32   Polyak: 0.995000   Buffer: 100.00   Loss: 106.794563  9.357578  7.728750
Ep: 9137   Rew: -102.76   Avg Rew: -103.31   Polyak: 0.995000   Buffer: 100.00   Loss: 105.992752  10.634384  9.604424
Ep: 9138   Rew: -102.99   Avg Rew: -103.30   Polyak: 0.995000   Buffer: 100.00   Loss: 106.105446  10.156265  9.421399
Ep: 9139   Rew: -104.16   Avg Rew: -103.32   Polyak: 0.995000   Buffer: 100.00   Loss: 105.338211  7.963894  8.591148
Ep: 9140   Rew: -104.18   Avg Rew: -103.32   Polyak: 0.995000   Buffer: 100.00   Loss: 105.469986  10.290033  9.345566
Ep: 9141   Rew: -104.12   Avg Rew: -103.35   Polyak: 0.995000   Buffer: 100.00   Loss: 106.147141  9.407764  7.682508
Ep: 9142   Rew: -104.02   Avg Rew: -103.36   Polyak: 0.995000   Buffer: 100.00   Loss: 106.429756  11.502342  10.990185
Ep: 9143   Rew: -104.20   Avg Rew: -103.36   Polyak

Ep: 9204   Rew: -104.16   Avg Rew: -103.54   Polyak: 0.995000   Buffer: 100.00   Loss: 105.569061  9.888750  8.513291
Ep: 9205   Rew: -103.97   Avg Rew: -103.55   Polyak: 0.995000   Buffer: 100.00   Loss: 105.938782  8.332344  9.488968
Ep: 9206   Rew: -102.59   Avg Rew: -103.54   Polyak: 0.995000   Buffer: 100.00   Loss: 105.573700  10.591306  9.754830
Ep: 9207   Rew: -103.38   Avg Rew: -103.54   Polyak: 0.995000   Buffer: 100.00   Loss: 106.228745  9.438897  9.936753
Ep: 9208   Rew: -103.91   Avg Rew: -103.55   Polyak: 0.995000   Buffer: 100.00   Loss: 106.396271  11.926445  10.674364
Ep: 9209   Rew: -103.26   Avg Rew: -103.55   Polyak: 0.995000   Buffer: 100.00   Loss: 105.836105  11.711632  10.378078
Ep: 9210   Rew: -103.93   Avg Rew: -103.56   Polyak: 0.995000   Buffer: 100.00   Loss: 105.932716  9.589852  8.054655
Ep: 9211   Rew: -103.65   Avg Rew: -103.56   Polyak: 0.995000   Buffer: 100.00   Loss: 105.369987  10.354809  10.597980
Ep: 9212   Rew: -102.57   Avg Rew: -103.56   Poly

Ep: 9273   Rew: -103.64   Avg Rew: -103.70   Polyak: 0.995000   Buffer: 100.00   Loss: 105.892784  9.987913  9.346771
Ep: 9274   Rew: -103.26   Avg Rew: -103.71   Polyak: 0.995000   Buffer: 100.00   Loss: 106.006233  9.704872  10.077324
Ep: 9275   Rew: -103.41   Avg Rew: -103.72   Polyak: 0.995000   Buffer: 100.00   Loss: 106.046638  11.177965  9.727787
Ep: 9276   Rew: -104.95   Avg Rew: -103.72   Polyak: 0.995000   Buffer: 100.00   Loss: 106.193451  9.754433  11.863955
Ep: 9277   Rew: -103.71   Avg Rew: -103.71   Polyak: 0.995000   Buffer: 100.00   Loss: 106.232468  8.884169  11.387240
Ep: 9278   Rew: -103.34   Avg Rew: -103.70   Polyak: 0.995000   Buffer: 100.00   Loss: 105.989723  11.374327  12.456686
Ep: 9279   Rew: -102.43   Avg Rew: -103.69   Polyak: 0.995000   Buffer: 100.00   Loss: 106.027718  11.065919  11.826103
Ep: 9280   Rew: -103.16   Avg Rew: -103.67   Polyak: 0.995000   Buffer: 100.00   Loss: 106.366280  10.504868  9.767670
Ep: 9281   Rew: -104.40   Avg Rew: -103.67   Po

Ep: 9342   Rew: -104.38   Avg Rew: -103.51   Polyak: 0.995000   Buffer: 100.00   Loss: 105.833031  9.171177  11.363461
Ep: 9343   Rew: -102.46   Avg Rew: -103.50   Polyak: 0.995000   Buffer: 100.00   Loss: 106.096359  10.025865  11.112974
Ep: 9344   Rew: -102.22   Avg Rew: -103.48   Polyak: 0.995000   Buffer: 100.00   Loss: 104.968491  9.351435  8.999565
Ep: 9345   Rew: -104.26   Avg Rew: -103.48   Polyak: 0.995000   Buffer: 100.00   Loss: 105.627045  10.478785  13.759638
Ep: 9346   Rew: -102.46   Avg Rew: -103.48   Polyak: 0.995000   Buffer: 100.00   Loss: 105.575310  8.201581  8.922132
Ep: 9347   Rew: -104.16   Avg Rew: -103.48   Polyak: 0.995000   Buffer: 100.00   Loss: 106.188148  10.941832  11.710075
Ep: 9348   Rew: -104.20   Avg Rew: -103.48   Polyak: 0.995000   Buffer: 100.00   Loss: 106.142632  9.784672  7.965698
Ep: 9349   Rew: -103.62   Avg Rew: -103.47   Polyak: 0.995000   Buffer: 100.00   Loss: 105.350273  11.187905  9.686475
Ep: 9350   Rew: -103.58   Avg Rew: -103.48   Pol

Ep: 9411   Rew: -102.90   Avg Rew: -103.45   Polyak: 0.995000   Buffer: 100.00   Loss: 105.334488  8.781485  10.193314
Ep: 9412   Rew: -103.00   Avg Rew: -103.44   Polyak: 0.995000   Buffer: 100.00   Loss: 105.166771  8.303525  7.591225
Ep: 9413   Rew: -103.17   Avg Rew: -103.42   Polyak: 0.995000   Buffer: 100.00   Loss: 105.428802  10.814629  9.758214
Ep: 9414   Rew: -103.15   Avg Rew: -103.43   Polyak: 0.995000   Buffer: 100.00   Loss: 105.652771  11.398858  9.988277
Ep: 9415   Rew: -102.02   Avg Rew: -103.42   Polyak: 0.995000   Buffer: 100.00   Loss: 105.546150  9.065783  12.539315
Ep: 9416   Rew: -102.42   Avg Rew: -103.42   Polyak: 0.995000   Buffer: 100.00   Loss: 105.878166  9.408927  9.082937
Ep: 9417   Rew: -102.05   Avg Rew: -103.40   Polyak: 0.995000   Buffer: 100.00   Loss: 105.902458  10.156531  10.758867
Ep: 9418   Rew: -101.26   Avg Rew: -103.38   Polyak: 0.995000   Buffer: 100.00   Loss: 106.259293  10.794990  10.663099
Ep: 9419   Rew: -103.12   Avg Rew: -103.37   Pol

Ep: 9480   Rew: -103.07   Avg Rew: -103.21   Polyak: 0.995000   Buffer: 100.00   Loss: 105.126778  11.651731  9.867519
Ep: 9481   Rew: -101.97   Avg Rew: -103.19   Polyak: 0.995000   Buffer: 100.00   Loss: 105.241142  13.762933  10.267818
Ep: 9482   Rew: -102.88   Avg Rew: -103.18   Polyak: 0.995000   Buffer: 100.00   Loss: 105.706963  11.348247  10.674319
Ep: 9483   Rew: -102.63   Avg Rew: -103.17   Polyak: 0.995000   Buffer: 100.00   Loss: 105.742493  7.759892  7.973520
Ep: 9484   Rew: -102.60   Avg Rew: -103.17   Polyak: 0.995000   Buffer: 100.00   Loss: 104.868095  9.409416  9.264355
Ep: 9485   Rew: -100.87   Avg Rew: -103.14   Polyak: 0.995000   Buffer: 100.00   Loss: 105.776756  8.738269  8.798109
Ep: 9486   Rew: -103.19   Avg Rew: -103.14   Polyak: 0.995000   Buffer: 100.00   Loss: 105.764900  9.296512  7.822519
Ep: 9487   Rew: -103.18   Avg Rew: -103.14   Polyak: 0.995000   Buffer: 100.00   Loss: 105.685417  8.680207  9.595670
Ep: 9488   Rew: -102.21   Avg Rew: -103.13   Polyak

Ep: 9549   Rew: -104.32   Avg Rew: -102.69   Polyak: 0.995000   Buffer: 100.00   Loss: 105.514107  9.415292  9.911355
Ep: 9550   Rew: -104.73   Avg Rew: -102.71   Polyak: 0.995000   Buffer: 100.00   Loss: 105.502899  10.661993  10.169926
Ep: 9551   Rew: -103.43   Avg Rew: -102.71   Polyak: 0.995000   Buffer: 100.00   Loss: 105.219955  10.817316  8.794390
Ep: 9552   Rew: -103.74   Avg Rew: -102.73   Polyak: 0.995000   Buffer: 100.00   Loss: 105.728592  10.066847  10.182239
Ep: 9553   Rew: -104.45   Avg Rew: -102.75   Polyak: 0.995000   Buffer: 100.00   Loss: 106.011696  12.085835  8.574706
Ep: 9554   Rew: -104.55   Avg Rew: -102.77   Polyak: 0.995000   Buffer: 100.00   Loss: 105.748131  10.554357  10.652015
Ep: 9555   Rew: -104.70   Avg Rew: -102.80   Polyak: 0.995000   Buffer: 100.00   Loss: 105.213280  10.591444  10.930920
Ep: 9556   Rew: -104.02   Avg Rew: -102.80   Polyak: 0.995000   Buffer: 100.00   Loss: 106.334641  10.924091  8.672183
Ep: 9557   Rew: -104.20   Avg Rew: -102.83   

Ep: 9618   Rew: -104.52   Avg Rew: -103.34   Polyak: 0.995000   Buffer: 100.00   Loss: 105.242065  9.725751  10.793104
Ep: 9619   Rew: -103.43   Avg Rew: -103.34   Polyak: 0.995000   Buffer: 100.00   Loss: 105.783302  9.302799  9.132963
Ep: 9620   Rew: -104.57   Avg Rew: -103.36   Polyak: 0.995000   Buffer: 100.00   Loss: 105.759590  9.761085  11.387060
Ep: 9621   Rew: -103.98   Avg Rew: -103.38   Polyak: 0.995000   Buffer: 100.00   Loss: 105.861679  9.598477  10.468207
Ep: 9622   Rew: -103.78   Avg Rew: -103.39   Polyak: 0.995000   Buffer: 100.00   Loss: 105.244843  9.370739  9.367647
Ep: 9623   Rew: -103.94   Avg Rew: -103.41   Polyak: 0.995000   Buffer: 100.00   Loss: 105.578758  9.667179  8.558624
Ep: 9624   Rew: -102.08   Avg Rew: -103.42   Polyak: 0.995000   Buffer: 100.00   Loss: 105.580933  9.353167  10.801429
Ep: 9625   Rew: -103.74   Avg Rew: -103.42   Polyak: 0.995000   Buffer: 100.00   Loss: 105.764252  10.609287  8.073653
Ep: 9626   Rew: -102.53   Avg Rew: -103.42   Polyak

Ep: 9687   Rew: -102.90   Avg Rew: -103.45   Polyak: 0.995000   Buffer: 100.00   Loss: 106.214981  11.264090  10.847168
Ep: 9688   Rew: -102.58   Avg Rew: -103.44   Polyak: 0.995000   Buffer: 100.00   Loss: 105.049522  8.471799  10.837673
Ep: 9689   Rew: -103.00   Avg Rew: -103.44   Polyak: 0.995000   Buffer: 100.00   Loss: 104.992973  9.873416  9.554855
Ep: 9690   Rew: -103.21   Avg Rew: -103.44   Polyak: 0.995000   Buffer: 100.00   Loss: 105.158012  11.572512  8.419221
Ep: 9691   Rew: -103.49   Avg Rew: -103.45   Polyak: 0.995000   Buffer: 100.00   Loss: 105.466827  10.314287  9.194094
Ep: 9692   Rew: -105.01   Avg Rew: -103.48   Polyak: 0.995000   Buffer: 100.00   Loss: 105.679115  8.350419  9.533303
Ep: 9693   Rew: -103.90   Avg Rew: -103.49   Polyak: 0.995000   Buffer: 100.00   Loss: 105.798088  11.947803  12.275848
Ep: 9694   Rew: -103.80   Avg Rew: -103.49   Polyak: 0.995000   Buffer: 100.00   Loss: 105.448013  8.581811  12.212831
Ep: 9695   Rew: -103.93   Avg Rew: -103.51   Pol

Ep: 9757   Rew: -102.12   Avg Rew: -103.35   Polyak: 0.995000   Buffer: 100.00   Loss: 105.589226  10.680650  9.389066
Ep: 9758   Rew: -102.61   Avg Rew: -103.34   Polyak: 0.995000   Buffer: 100.00   Loss: 105.155815  10.112735  12.642296
Ep: 9759   Rew: -101.35   Avg Rew: -103.32   Polyak: 0.995000   Buffer: 100.00   Loss: 104.538361  8.075496  8.618628
Ep: 9760   Rew: -102.46   Avg Rew: -103.31   Polyak: 0.995000   Buffer: 100.00   Loss: 105.634140  9.344786  11.799439
Ep: 9761   Rew: -101.97   Avg Rew: -103.29   Polyak: 0.995000   Buffer: 100.00   Loss: 105.317406  10.546273  10.253222
Ep: 9762   Rew: -101.62   Avg Rew: -103.27   Polyak: 0.995000   Buffer: 100.00   Loss: 105.625343  9.378059  7.447642
Ep: 9763   Rew: -102.24   Avg Rew: -103.26   Polyak: 0.995000   Buffer: 100.00   Loss: 105.678368  9.336133  9.268615
Ep: 9764   Rew: -101.97   Avg Rew: -103.24   Polyak: 0.995000   Buffer: 100.00   Loss: 105.466110  12.004490  10.551081
Ep: 9765   Rew: -102.45   Avg Rew: -103.24   Pol

Ep: 9826   Rew: -103.07   Avg Rew: -102.99   Polyak: 0.995000   Buffer: 100.00   Loss: 105.287498  12.184020  9.453014
Ep: 9827   Rew: -103.18   Avg Rew: -102.99   Polyak: 0.995000   Buffer: 100.00   Loss: 105.195839  8.288733  9.186880
Ep: 9828   Rew: -103.03   Avg Rew: -102.99   Polyak: 0.995000   Buffer: 100.00   Loss: 105.623283  9.849449  8.938061
Ep: 9829   Rew: -102.83   Avg Rew: -102.99   Polyak: 0.995000   Buffer: 100.00   Loss: 105.044342  10.500118  9.424446
Ep: 9830   Rew: -102.25   Avg Rew: -102.98   Polyak: 0.995000   Buffer: 100.00   Loss: 106.053253  8.127967  11.828717
Ep: 9831   Rew: -102.37   Avg Rew: -102.97   Polyak: 0.995000   Buffer: 100.00   Loss: 105.737465  8.555182  10.983252
Ep: 9832   Rew: -101.91   Avg Rew: -102.96   Polyak: 0.995000   Buffer: 100.00   Loss: 105.292419  9.812369  8.594726
Ep: 9833   Rew: -102.61   Avg Rew: -102.95   Polyak: 0.995000   Buffer: 100.00   Loss: 105.104614  7.931534  8.105680
Ep: 9834   Rew: -101.31   Avg Rew: -102.92   Polyak:

Ep: 9896   Rew: -100.48   Avg Rew: -102.31   Polyak: 0.995000   Buffer: 100.00   Loss: 104.855179  8.310677  9.970951
Ep: 9897   Rew: -100.86   Avg Rew: -102.30   Polyak: 0.995000   Buffer: 100.00   Loss: 105.965080  8.603436  12.575675
Ep: 9898   Rew: -102.25   Avg Rew: -102.30   Polyak: 0.995000   Buffer: 100.00   Loss: 105.464500  11.763190  8.555556
Ep: 9899   Rew: -102.21   Avg Rew: -102.30   Polyak: 0.995000   Buffer: 100.00   Loss: 106.275749  8.158068  10.448762
Ep: 9900   Rew: -101.60   Avg Rew: -102.28   Polyak: 0.995000   Buffer: 100.00   Loss: 105.927330  9.028772  11.380078
Ep: 9901   Rew: -101.07   Avg Rew: -102.25   Polyak: 0.995000   Buffer: 100.00   Loss: 105.311089  9.454741  9.208498
Ep: 9902   Rew: -101.44   Avg Rew: -102.23   Polyak: 0.995000   Buffer: 100.00   Loss: 105.943474  11.582020  9.898001
Ep: 9903   Rew: -101.48   Avg Rew: -102.21   Polyak: 0.995000   Buffer: 100.00   Loss: 104.998604  9.608079  10.108801
Ep: 9904   Rew: -101.71   Avg Rew: -102.19   Polya

Ep: 9965   Rew: -101.86   Avg Rew: -102.06   Polyak: 0.995000   Buffer: 100.00   Loss: 105.734932  11.785524  10.133550
Ep: 9966   Rew: -101.37   Avg Rew: -102.05   Polyak: 0.995000   Buffer: 100.00   Loss: 105.542084  9.957662  10.098221
Ep: 9967   Rew: -103.10   Avg Rew: -102.05   Polyak: 0.995000   Buffer: 100.00   Loss: 105.241562  12.008331  10.023867
Ep: 9968   Rew: -102.02   Avg Rew: -102.05   Polyak: 0.995000   Buffer: 100.00   Loss: 105.791641  9.489116  8.624628
Ep: 9969   Rew: -101.79   Avg Rew: -102.04   Polyak: 0.995000   Buffer: 100.00   Loss: 105.904823  8.382888  10.372081
Ep: 9970   Rew: -101.48   Avg Rew: -102.06   Polyak: 0.995000   Buffer: 100.00   Loss: 105.757332  9.089013  9.106186
Ep: 9971   Rew: -102.21   Avg Rew: -102.06   Polyak: 0.995000   Buffer: 100.00   Loss: 105.871170  10.068339  10.067436
Ep: 9972   Rew: -102.47   Avg Rew: -102.06   Polyak: 0.995000   Buffer: 100.00   Loss: 105.270096  9.719596  8.524007
Ep: 9973   Rew: -102.00   Avg Rew: -102.06   Pol

Ep: 10034   Rew: -103.38   Avg Rew: -102.79   Polyak: 0.995000   Buffer: 100.00   Loss: 105.022148  8.806275  7.562934
Ep: 10035   Rew: -102.67   Avg Rew: -102.79   Polyak: 0.995000   Buffer: 100.00   Loss: 105.273880  8.566561  9.496351
Ep: 10036   Rew: -102.95   Avg Rew: -102.76   Polyak: 0.995000   Buffer: 100.00   Loss: 106.008614  8.831150  9.814936
Ep: 10037   Rew: -101.23   Avg Rew: -102.76   Polyak: 0.995000   Buffer: 100.00   Loss: 105.509354  9.832216  11.976290
Ep: 10038   Rew: -100.17   Avg Rew: -102.71   Polyak: 0.995000   Buffer: 100.00   Loss: 105.593132  9.737672  8.710938
Ep: 10039   Rew: -99.36   Avg Rew: -102.68   Polyak: 0.995000   Buffer: 100.00   Loss: 105.756340  10.737914  9.825325
Ep: 10040   Rew: -101.52   Avg Rew: -102.67   Polyak: 0.995000   Buffer: 100.00   Loss: 105.743744  11.431728  9.430224
Ep: 10041   Rew: -100.98   Avg Rew: -102.66   Polyak: 0.995000   Buffer: 100.00   Loss: 105.959900  9.856010  11.193678
Ep: 10042   Rew: -103.14   Avg Rew: -102.66  

Ep: 10103   Rew: -103.51   Avg Rew: -102.91   Polyak: 0.995000   Buffer: 100.00   Loss: 105.523369  7.675235  8.787537
Ep: 10104   Rew: -103.84   Avg Rew: -102.91   Polyak: 0.995000   Buffer: 100.00   Loss: 105.639008  9.428274  9.281082
Ep: 10105   Rew: -104.25   Avg Rew: -102.92   Polyak: 0.995000   Buffer: 100.00   Loss: 105.487968  8.486632  9.917035
Ep: 10106   Rew: -102.39   Avg Rew: -102.91   Polyak: 0.995000   Buffer: 100.00   Loss: 105.694420  11.628751  8.613711
Ep: 10107   Rew: -103.59   Avg Rew: -102.91   Polyak: 0.995000   Buffer: 100.00   Loss: 105.570854  8.914380  12.302874
Ep: 10108   Rew: -103.45   Avg Rew: -102.90   Polyak: 0.995000   Buffer: 100.00   Loss: 105.472824  9.093111  9.056485
Ep: 10109   Rew: -102.63   Avg Rew: -102.89   Polyak: 0.995000   Buffer: 100.00   Loss: 105.638054  9.576195  8.092299
Ep: 10110   Rew: -103.65   Avg Rew: -102.89   Polyak: 0.995000   Buffer: 100.00   Loss: 105.892105  9.559324  13.181018
Ep: 10111   Rew: -103.64   Avg Rew: -102.90  

Ep: 10172   Rew: -103.13   Avg Rew: -103.14   Polyak: 0.995000   Buffer: 100.00   Loss: 105.940964  9.740004  9.661212
Ep: 10173   Rew: -103.18   Avg Rew: -103.15   Polyak: 0.995000   Buffer: 100.00   Loss: 105.850388  9.901669  9.096904
Ep: 10174   Rew: -103.28   Avg Rew: -103.15   Polyak: 0.995000   Buffer: 100.00   Loss: 105.312004  8.476165  11.934017
Ep: 10175   Rew: -103.15   Avg Rew: -103.17   Polyak: 0.995000   Buffer: 100.00   Loss: 104.962074  7.471381  10.071760
Ep: 10176   Rew: -102.83   Avg Rew: -103.17   Polyak: 0.995000   Buffer: 100.00   Loss: 105.391258  9.956026  10.348083
Ep: 10177   Rew: -102.38   Avg Rew: -103.17   Polyak: 0.995000   Buffer: 100.00   Loss: 105.316971  7.758309  9.732635
Ep: 10178   Rew: -103.02   Avg Rew: -103.17   Polyak: 0.995000   Buffer: 100.00   Loss: 105.747078  7.896151  8.990100
Ep: 10179   Rew: -102.85   Avg Rew: -103.17   Polyak: 0.995000   Buffer: 100.00   Loss: 105.363724  10.079174  10.103274
Ep: 10180   Rew: -103.81   Avg Rew: -103.16

Ep: 10241   Rew: -102.75   Avg Rew: -103.14   Polyak: 0.995000   Buffer: 100.00   Loss: 105.246994  10.754402  9.327248
Ep: 10242   Rew: -103.41   Avg Rew: -103.13   Polyak: 0.995000   Buffer: 100.00   Loss: 106.218628  10.031509  11.344162
Ep: 10243   Rew: -102.15   Avg Rew: -103.12   Polyak: 0.995000   Buffer: 100.00   Loss: 105.975830  9.286416  7.841002
Ep: 10244   Rew: -103.82   Avg Rew: -103.12   Polyak: 0.995000   Buffer: 100.00   Loss: 105.695168  12.063823  8.651188
Ep: 10245   Rew: -103.14   Avg Rew: -103.12   Polyak: 0.995000   Buffer: 100.00   Loss: 105.244019  10.889412  11.492104
Ep: 10246   Rew: -102.48   Avg Rew: -103.11   Polyak: 0.995000   Buffer: 100.00   Loss: 105.175804  8.579008  9.744688
Ep: 10247   Rew: -103.52   Avg Rew: -103.11   Polyak: 0.995000   Buffer: 100.00   Loss: 105.335449  11.351989  8.923790
Ep: 10248   Rew: -104.59   Avg Rew: -103.13   Polyak: 0.995000   Buffer: 100.00   Loss: 105.295471  7.976574  10.077140
Ep: 10249   Rew: -104.04   Avg Rew: -103

Ep: 10310   Rew: -102.32   Avg Rew: -103.45   Polyak: 0.995000   Buffer: 100.00   Loss: 104.840523  9.592608  9.991074
Ep: 10311   Rew: -102.48   Avg Rew: -103.44   Polyak: 0.995000   Buffer: 100.00   Loss: 105.225929  9.419686  11.481430
Ep: 10312   Rew: -103.59   Avg Rew: -103.44   Polyak: 0.995000   Buffer: 100.00   Loss: 104.983681  7.839377  10.173161
Ep: 10313   Rew: -102.33   Avg Rew: -103.43   Polyak: 0.995000   Buffer: 100.00   Loss: 105.207802  8.975156  10.844164
Ep: 10314   Rew: -102.10   Avg Rew: -103.41   Polyak: 0.995000   Buffer: 100.00   Loss: 105.049370  11.960773  8.606878
Ep: 10315   Rew: -103.50   Avg Rew: -103.41   Polyak: 0.995000   Buffer: 100.00   Loss: 104.840942  9.531841  9.691502
Ep: 10316   Rew: -103.24   Avg Rew: -103.42   Polyak: 0.995000   Buffer: 100.00   Loss: 105.688889  8.789150  10.350333
Ep: 10317   Rew: -103.95   Avg Rew: -103.42   Polyak: 0.995000   Buffer: 100.00   Loss: 105.362015  8.426702  8.680874
Ep: 10318   Rew: -103.41   Avg Rew: -103.41

Ep: 10379   Rew: -101.59   Avg Rew: -103.57   Polyak: 0.995000   Buffer: 100.00   Loss: 105.075897  11.308423  11.090378
Ep: 10380   Rew: -103.55   Avg Rew: -103.57   Polyak: 0.995000   Buffer: 100.00   Loss: 104.280396  10.503488  8.393230
Ep: 10381   Rew: -102.75   Avg Rew: -103.55   Polyak: 0.995000   Buffer: 100.00   Loss: 104.887489  8.882256  8.893862
Ep: 10382   Rew: -102.46   Avg Rew: -103.54   Polyak: 0.995000   Buffer: 100.00   Loss: 105.495522  13.793932  11.984106
Ep: 10383   Rew: -102.12   Avg Rew: -103.52   Polyak: 0.995000   Buffer: 100.00   Loss: 105.250511  7.615741  11.435676
Ep: 10384   Rew: -102.48   Avg Rew: -103.51   Polyak: 0.995000   Buffer: 100.00   Loss: 105.474266  9.443626  9.291147
Ep: 10385   Rew: -101.74   Avg Rew: -103.49   Polyak: 0.995000   Buffer: 100.00   Loss: 104.988091  8.992155  11.679310
Ep: 10386   Rew: -102.89   Avg Rew: -103.48   Polyak: 0.995000   Buffer: 100.00   Loss: 105.323227  9.890331  7.100317
Ep: 10387   Rew: -102.60   Avg Rew: -103.

Ep: 10448   Rew: -103.50   Avg Rew: -103.11   Polyak: 0.995000   Buffer: 100.00   Loss: 105.181114  9.301904  11.418552
Ep: 10449   Rew: -103.48   Avg Rew: -103.12   Polyak: 0.995000   Buffer: 100.00   Loss: 105.699478  8.382906  8.398962
Ep: 10450   Rew: -103.35   Avg Rew: -103.09   Polyak: 0.995000   Buffer: 100.00   Loss: 105.284416  11.599324  10.445637
Ep: 10451   Rew: -103.14   Avg Rew: -103.04   Polyak: 0.995000   Buffer: 100.00   Loss: 105.385826  8.550605  9.654737
Ep: 10452   Rew: -103.42   Avg Rew: -102.92   Polyak: 0.995000   Buffer: 100.00   Loss: 104.784073  10.328752  8.348980
Ep: 10453   Rew: -103.01   Avg Rew: -102.86   Polyak: 0.995000   Buffer: 100.00   Loss: 105.062752  11.892863  10.581549
Ep: 10454   Rew: -102.29   Avg Rew: -102.85   Polyak: 0.995000   Buffer: 100.00   Loss: 105.134895  7.693476  8.421600
Ep: 10455   Rew: -102.33   Avg Rew: -102.84   Polyak: 0.995000   Buffer: 100.00   Loss: 105.698296  9.682188  8.151836
Ep: 10456   Rew: -102.83   Avg Rew: -102.8

Ep: 10517   Rew: -101.39   Avg Rew: -102.86   Polyak: 0.995000   Buffer: 100.00   Loss: 104.693855  9.626203  10.120634
Ep: 10518   Rew: -105.46   Avg Rew: -102.90   Polyak: 0.995000   Buffer: 100.00   Loss: 105.733521  9.479609  8.471149
Ep: 10519   Rew: -102.25   Avg Rew: -102.88   Polyak: 0.995000   Buffer: 100.00   Loss: 106.124115  9.442049  10.043588
Ep: 10520   Rew: -104.58   Avg Rew: -102.90   Polyak: 0.995000   Buffer: 100.00   Loss: 105.138092  8.865607  8.633372
Ep: 10521   Rew: -101.75   Avg Rew: -102.88   Polyak: 0.995000   Buffer: 100.00   Loss: 105.835197  12.166794  8.428490
Ep: 10522   Rew: -103.50   Avg Rew: -102.88   Polyak: 0.995000   Buffer: 100.00   Loss: 105.048866  7.443115  13.137919
Ep: 10523   Rew: -104.16   Avg Rew: -102.89   Polyak: 0.995000   Buffer: 100.00   Loss: 105.727478  8.401648  9.908724
Ep: 10524   Rew: -103.93   Avg Rew: -102.90   Polyak: 0.995000   Buffer: 100.00   Loss: 105.465179  9.801001  7.181665
Ep: 10525   Rew: -103.08   Avg Rew: -102.90 

Ep: 10586   Rew: -103.43   Avg Rew: -103.09   Polyak: 0.995000   Buffer: 100.00   Loss: 104.855171  6.397650  9.206316
Ep: 10587   Rew: -103.89   Avg Rew: -103.10   Polyak: 0.995000   Buffer: 100.00   Loss: 104.915466  7.325200  8.596293
Ep: 10588   Rew: -103.29   Avg Rew: -103.11   Polyak: 0.995000   Buffer: 100.00   Loss: 104.851013  8.628102  8.059989
Ep: 10589   Rew: -102.93   Avg Rew: -103.11   Polyak: 0.995000   Buffer: 100.00   Loss: 104.856453  9.210011  8.824671
Ep: 10590   Rew: -103.37   Avg Rew: -103.11   Polyak: 0.995000   Buffer: 100.00   Loss: 105.396202  11.181105  9.837283
Ep: 10591   Rew: -103.52   Avg Rew: -103.11   Polyak: 0.995000   Buffer: 100.00   Loss: 105.363251  9.880131  9.095238
Ep: 10592   Rew: -103.41   Avg Rew: -103.11   Polyak: 0.995000   Buffer: 100.00   Loss: 105.954117  12.384430  10.132095
Ep: 10593   Rew: -103.03   Avg Rew: -103.11   Polyak: 0.995000   Buffer: 100.00   Loss: 104.940750  10.207369  10.201978
Ep: 10594   Rew: -102.53   Avg Rew: -103.12

Ep: 10655   Rew: -102.63   Avg Rew: -103.06   Polyak: 0.995000   Buffer: 100.00   Loss: 105.402359  11.867354  9.013328
Ep: 10656   Rew: -103.16   Avg Rew: -103.07   Polyak: 0.995000   Buffer: 100.00   Loss: 104.988075  8.591655  10.216694
Ep: 10657   Rew: -102.03   Avg Rew: -103.07   Polyak: 0.995000   Buffer: 100.00   Loss: 104.973984  8.702852  8.367459
Ep: 10658   Rew: -103.21   Avg Rew: -103.08   Polyak: 0.995000   Buffer: 100.00   Loss: 105.265984  11.650789  9.320547
Ep: 10659   Rew: -103.63   Avg Rew: -103.09   Polyak: 0.995000   Buffer: 100.00   Loss: 105.547409  8.192931  7.409808
Ep: 10660   Rew: -103.30   Avg Rew: -103.10   Polyak: 0.995000   Buffer: 100.00   Loss: 106.540039  9.624644  10.591973
Ep: 10661   Rew: -102.56   Avg Rew: -103.10   Polyak: 0.995000   Buffer: 100.00   Loss: 104.900467  11.716675  9.408298
Ep: 10662   Rew: -102.16   Avg Rew: -103.09   Polyak: 0.995000   Buffer: 100.00   Loss: 105.141525  7.324808  10.162901
Ep: 10663   Rew: -102.56   Avg Rew: -103.0

Ep: 10724   Rew: -102.77   Avg Rew: -102.92   Polyak: 0.995000   Buffer: 100.00   Loss: 105.535858  8.875238  10.082013
Ep: 10725   Rew: -102.33   Avg Rew: -102.92   Polyak: 0.995000   Buffer: 100.00   Loss: 105.615639  8.644615  8.929471
Ep: 10726   Rew: -102.74   Avg Rew: -102.92   Polyak: 0.995000   Buffer: 100.00   Loss: 105.904785  7.295514  9.530875
Ep: 10727   Rew: -102.11   Avg Rew: -102.93   Polyak: 0.995000   Buffer: 100.00   Loss: 105.436165  10.687782  9.389364
Ep: 10728   Rew: -102.93   Avg Rew: -102.95   Polyak: 0.995000   Buffer: 100.00   Loss: 105.223457  12.049100  6.759203
Ep: 10729   Rew: -102.56   Avg Rew: -102.96   Polyak: 0.995000   Buffer: 100.00   Loss: 105.196823  8.647266  9.469702
Ep: 10730   Rew: -102.21   Avg Rew: -102.96   Polyak: 0.995000   Buffer: 100.00   Loss: 105.314850  8.074203  9.471840
Ep: 10731   Rew: -101.99   Avg Rew: -102.95   Polyak: 0.995000   Buffer: 100.00   Loss: 105.255737  8.888063  9.449167
Ep: 10732   Rew: -102.51   Avg Rew: -102.96  

Ep: 10793   Rew: -102.60   Avg Rew: -103.20   Polyak: 0.995000   Buffer: 100.00   Loss: 105.158188  9.328947  8.951788
Ep: 10794   Rew: -103.46   Avg Rew: -103.17   Polyak: 0.995000   Buffer: 100.00   Loss: 104.728081  8.203562  9.791825
Ep: 10795   Rew: -104.19   Avg Rew: -103.16   Polyak: 0.995000   Buffer: 100.00   Loss: 105.542038  7.956381  11.977916
Ep: 10796   Rew: -103.84   Avg Rew: -103.16   Polyak: 0.995000   Buffer: 100.00   Loss: 104.803932  7.766452  7.759717
Ep: 10797   Rew: -103.84   Avg Rew: -103.15   Polyak: 0.995000   Buffer: 100.00   Loss: 105.883125  10.351849  9.086718
Ep: 10798   Rew: -103.29   Avg Rew: -103.15   Polyak: 0.995000   Buffer: 100.00   Loss: 105.155029  8.815941  9.008620
Ep: 10799   Rew: -103.45   Avg Rew: -103.14   Polyak: 0.995000   Buffer: 100.00   Loss: 105.079765  10.405705  10.206286
Ep: 10800   Rew: -103.69   Avg Rew: -103.14   Polyak: 0.995000   Buffer: 100.00   Loss: 105.726639  7.813887  9.993084
Ep: 10801   Rew: -103.74   Avg Rew: -103.15 

Ep: 10862   Rew: -102.07   Avg Rew: -103.28   Polyak: 0.995000   Buffer: 100.00   Loss: 105.046684  9.370852  8.505239
Ep: 10863   Rew: -103.58   Avg Rew: -103.29   Polyak: 0.995000   Buffer: 100.00   Loss: 105.409203  8.349359  8.433724
Ep: 10864   Rew: -103.51   Avg Rew: -103.29   Polyak: 0.995000   Buffer: 100.00   Loss: 105.303238  10.167866  10.926283
Ep: 10865   Rew: -103.32   Avg Rew: -103.29   Polyak: 0.995000   Buffer: 100.00   Loss: 104.872543  8.021212  7.934325
Ep: 10866   Rew: -102.49   Avg Rew: -103.29   Polyak: 0.995000   Buffer: 100.00   Loss: 105.573273  8.074121  7.540915
Ep: 10867   Rew: -102.03   Avg Rew: -103.27   Polyak: 0.995000   Buffer: 100.00   Loss: 105.739082  7.840858  8.225823
Ep: 10868   Rew: -102.46   Avg Rew: -103.26   Polyak: 0.995000   Buffer: 100.00   Loss: 105.029175  9.937122  9.104177
Ep: 10869   Rew: -103.16   Avg Rew: -103.26   Polyak: 0.995000   Buffer: 100.00   Loss: 105.306816  10.440760  9.302055
Ep: 10870   Rew: -104.30   Avg Rew: -103.27  

Ep: 10931   Rew: -101.25   Avg Rew: -102.98   Polyak: 0.995000   Buffer: 100.00   Loss: 105.883217  8.547604  11.456978
Ep: 10932   Rew: -101.69   Avg Rew: -102.96   Polyak: 0.995000   Buffer: 100.00   Loss: 104.650078  8.395664  11.204637
Ep: 10933   Rew: -102.56   Avg Rew: -102.95   Polyak: 0.995000   Buffer: 100.00   Loss: 104.548386  10.084610  7.798048
Ep: 10934   Rew: -102.64   Avg Rew: -102.94   Polyak: 0.995000   Buffer: 100.00   Loss: 105.474854  9.767958  7.918952
Ep: 10935   Rew: -102.71   Avg Rew: -102.94   Polyak: 0.995000   Buffer: 100.00   Loss: 105.823341  8.372460  8.853607
Ep: 10936   Rew: -102.89   Avg Rew: -102.94   Polyak: 0.995000   Buffer: 100.00   Loss: 105.857849  10.971119  12.495315
Ep: 10937   Rew: -103.51   Avg Rew: -102.94   Polyak: 0.995000   Buffer: 100.00   Loss: 105.482727  9.830298  9.678697
Ep: 10938   Rew: -103.35   Avg Rew: -102.94   Polyak: 0.995000   Buffer: 100.00   Loss: 105.158325  10.813292  9.207958
Ep: 10939   Rew: -103.53   Avg Rew: -102.9

Ep: 11000   Rew: -102.93   Avg Rew: -102.64   Polyak: 0.995000   Buffer: 100.00   Loss: 105.133270  9.785998  10.159798
Ep: 11001   Rew: -102.60   Avg Rew: -102.63   Polyak: 0.995000   Buffer: 100.00   Loss: 104.567009  9.596453  10.323408
Ep: 11002   Rew: -100.43   Avg Rew: -102.60   Polyak: 0.995000   Buffer: 100.00   Loss: 105.184067  8.456538  9.459812
Ep: 11003   Rew: -101.34   Avg Rew: -102.58   Polyak: 0.995000   Buffer: 100.00   Loss: 105.036407  10.174434  10.514380
Ep: 11004   Rew: -101.57   Avg Rew: -102.56   Polyak: 0.995000   Buffer: 100.00   Loss: 105.218201  9.269701  9.689174
Ep: 11005   Rew: -102.52   Avg Rew: -102.54   Polyak: 0.995000   Buffer: 100.00   Loss: 104.849915  9.926243  8.325379
Ep: 11006   Rew: -102.95   Avg Rew: -102.52   Polyak: 0.995000   Buffer: 100.00   Loss: 105.412994  9.000156  7.317638
Ep: 11007   Rew: -103.30   Avg Rew: -102.52   Polyak: 0.995000   Buffer: 100.00   Loss: 104.805092  8.330484  10.291548
Ep: 11008   Rew: -103.43   Avg Rew: -102.52

Ep: 11069   Rew: -102.96   Avg Rew: -102.39   Polyak: 0.995000   Buffer: 100.00   Loss: 105.483475  10.759567  8.372994
Ep: 11070   Rew: -103.36   Avg Rew: -102.39   Polyak: 0.995000   Buffer: 100.00   Loss: 105.701744  7.729471  9.009082
Ep: 11071   Rew: -102.50   Avg Rew: -102.39   Polyak: 0.995000   Buffer: 100.00   Loss: 105.400139  8.895142  10.146031
Ep: 11072   Rew: -102.12   Avg Rew: -102.38   Polyak: 0.995000   Buffer: 100.00   Loss: 105.292183  8.696146  8.222915
Ep: 11073   Rew: -102.52   Avg Rew: -102.40   Polyak: 0.995000   Buffer: 100.00   Loss: 105.375694  8.688735  8.922751
Ep: 11074   Rew: -101.74   Avg Rew: -102.38   Polyak: 0.995000   Buffer: 100.00   Loss: 104.803864  11.296604  9.763358
Ep: 11075   Rew: -102.20   Avg Rew: -102.37   Polyak: 0.995000   Buffer: 100.00   Loss: 105.449944  9.616179  8.869979
Ep: 11076   Rew: -102.95   Avg Rew: -102.37   Polyak: 0.995000   Buffer: 100.00   Loss: 105.519875  6.880901  6.722571
Ep: 11077   Rew: -102.68   Avg Rew: -102.37  

Ep: 11138   Rew: -103.69   Avg Rew: -102.95   Polyak: 0.995000   Buffer: 100.00   Loss: 105.399414  9.006528  7.672828
Ep: 11139   Rew: -103.27   Avg Rew: -102.96   Polyak: 0.995000   Buffer: 100.00   Loss: 104.782059  9.938265  9.019453
Ep: 11140   Rew: -103.20   Avg Rew: -102.96   Polyak: 0.995000   Buffer: 100.00   Loss: 105.006950  9.903626  8.615240
Ep: 11141   Rew: -103.05   Avg Rew: -102.97   Polyak: 0.995000   Buffer: 100.00   Loss: 104.990433  8.871072  8.913379
Ep: 11142   Rew: -103.48   Avg Rew: -102.97   Polyak: 0.995000   Buffer: 100.00   Loss: 105.469604  7.333858  9.661873
Ep: 11143   Rew: -102.36   Avg Rew: -102.96   Polyak: 0.995000   Buffer: 100.00   Loss: 104.630028  10.739913  8.962868
Ep: 11144   Rew: -101.83   Avg Rew: -102.95   Polyak: 0.995000   Buffer: 100.00   Loss: 105.256607  9.342596  7.560058
Ep: 11145   Rew: -103.73   Avg Rew: -102.97   Polyak: 0.995000   Buffer: 100.00   Loss: 105.184456  7.252850  10.046415
Ep: 11146   Rew: -101.58   Avg Rew: -102.96   

Ep: 11207   Rew: -103.03   Avg Rew: -102.62   Polyak: 0.995000   Buffer: 100.00   Loss: 105.362564  9.930690  7.732444
Ep: 11208   Rew: -103.26   Avg Rew: -102.62   Polyak: 0.995000   Buffer: 100.00   Loss: 105.247299  10.803841  9.861483
Ep: 11209   Rew: -103.24   Avg Rew: -102.63   Polyak: 0.995000   Buffer: 100.00   Loss: 104.612808  8.631491  9.944016
Ep: 11210   Rew: -103.59   Avg Rew: -102.63   Polyak: 0.995000   Buffer: 100.00   Loss: 105.207909  11.538254  8.465778
Ep: 11211   Rew: -103.44   Avg Rew: -102.63   Polyak: 0.995000   Buffer: 100.00   Loss: 105.298927  8.758289  8.793588
Ep: 11212   Rew: -103.36   Avg Rew: -102.64   Polyak: 0.995000   Buffer: 100.00   Loss: 105.073059  11.346606  10.661108
Ep: 11213   Rew: -102.04   Avg Rew: -102.63   Polyak: 0.995000   Buffer: 100.00   Loss: 105.358452  7.627958  9.198888
Ep: 11214   Rew: -103.09   Avg Rew: -102.63   Polyak: 0.995000   Buffer: 100.00   Loss: 105.188179  8.920538  8.192877
Ep: 11215   Rew: -102.63   Avg Rew: -102.63 

Ep: 11276   Rew: -102.81   Avg Rew: -102.82   Polyak: 0.995000   Buffer: 100.00   Loss: 105.345093  8.756003  9.556649
Ep: 11277   Rew: -101.83   Avg Rew: -102.81   Polyak: 0.995000   Buffer: 100.00   Loss: 105.891792  11.057141  10.325510
Ep: 11278   Rew: -103.00   Avg Rew: -102.81   Polyak: 0.995000   Buffer: 100.00   Loss: 105.633713  9.449952  9.960158
Ep: 11279   Rew: -103.39   Avg Rew: -102.84   Polyak: 0.995000   Buffer: 100.00   Loss: 104.808357  9.299053  9.201031
Ep: 11280   Rew: -102.80   Avg Rew: -102.84   Polyak: 0.995000   Buffer: 100.00   Loss: 105.129204  10.274846  9.357439
Ep: 11281   Rew: -103.51   Avg Rew: -102.86   Polyak: 0.995000   Buffer: 100.00   Loss: 104.984749  10.047167  9.210239
Ep: 11282   Rew: -102.18   Avg Rew: -102.86   Polyak: 0.995000   Buffer: 100.00   Loss: 105.310326  11.032166  9.210848
Ep: 11283   Rew: -103.34   Avg Rew: -102.87   Polyak: 0.995000   Buffer: 100.00   Loss: 105.308929  8.385112  8.342262
Ep: 11284   Rew: -102.89   Avg Rew: -102.87

Ep: 11345   Rew: -103.67   Avg Rew: -103.05   Polyak: 0.995000   Buffer: 100.00   Loss: 105.006142  9.253227  9.116255
Ep: 11346   Rew: -102.20   Avg Rew: -103.04   Polyak: 0.995000   Buffer: 100.00   Loss: 105.171562  9.213911  9.332034
Ep: 11347   Rew: -102.65   Avg Rew: -103.06   Polyak: 0.995000   Buffer: 100.00   Loss: 105.092072  7.887154  8.273129
Ep: 11348   Rew: -102.23   Avg Rew: -103.04   Polyak: 0.995000   Buffer: 100.00   Loss: 104.912163  8.023707  10.393692
Ep: 11349   Rew: -103.10   Avg Rew: -103.05   Polyak: 0.995000   Buffer: 100.00   Loss: 105.470726  11.617108  7.437026
Ep: 11350   Rew: -103.19   Avg Rew: -103.05   Polyak: 0.995000   Buffer: 100.00   Loss: 105.216316  11.741949  12.374640
Ep: 11351   Rew: -103.71   Avg Rew: -103.05   Polyak: 0.995000   Buffer: 100.00   Loss: 105.322617  8.173452  10.820770
Ep: 11352   Rew: -103.51   Avg Rew: -103.07   Polyak: 0.995000   Buffer: 100.00   Loss: 105.158188  8.145828  8.067566
Ep: 11353   Rew: -103.36   Avg Rew: -103.07

Ep: 11414   Rew: -102.39   Avg Rew: -103.08   Polyak: 0.995000   Buffer: 100.00   Loss: 104.938034  9.915640  7.651910
Ep: 11415   Rew: -102.17   Avg Rew: -103.07   Polyak: 0.995000   Buffer: 100.00   Loss: 105.677498  9.278854  8.951594
Ep: 11416   Rew: -102.83   Avg Rew: -103.07   Polyak: 0.995000   Buffer: 100.00   Loss: 105.248665  8.457304  10.925295
Ep: 11417   Rew: -102.66   Avg Rew: -103.06   Polyak: 0.995000   Buffer: 100.00   Loss: 104.678574  7.838772  10.692049
Ep: 11418   Rew: -103.00   Avg Rew: -103.07   Polyak: 0.995000   Buffer: 100.00   Loss: 104.626709  11.043472  9.202063
Ep: 11419   Rew: -102.11   Avg Rew: -103.06   Polyak: 0.995000   Buffer: 100.00   Loss: 105.153839  10.177807  10.073277
Ep: 11420   Rew: -101.39   Avg Rew: -103.03   Polyak: 0.995000   Buffer: 100.00   Loss: 104.751343  11.839228  10.012561
Ep: 11421   Rew: -102.26   Avg Rew: -103.02   Polyak: 0.995000   Buffer: 100.00   Loss: 106.072090  8.725348  6.299280
Ep: 11422   Rew: -101.89   Avg Rew: -103.

Ep: 11483   Rew: -103.13   Avg Rew: -102.99   Polyak: 0.995000   Buffer: 100.00   Loss: 105.434952  9.430817  10.206014
Ep: 11484   Rew: -103.13   Avg Rew: -102.97   Polyak: 0.995000   Buffer: 100.00   Loss: 104.376129  7.409648  10.740323
Ep: 11485   Rew: -103.18   Avg Rew: -102.96   Polyak: 0.995000   Buffer: 100.00   Loss: 105.239311  9.343714  10.324335
Ep: 11486   Rew: -103.36   Avg Rew: -102.95   Polyak: 0.995000   Buffer: 100.00   Loss: 105.625183  11.932030  8.922085
Ep: 11487   Rew: -103.86   Avg Rew: -102.95   Polyak: 0.995000   Buffer: 100.00   Loss: 105.216225  9.227204  6.783751
Ep: 11488   Rew: -103.49   Avg Rew: -102.96   Polyak: 0.995000   Buffer: 100.00   Loss: 104.880096  10.640944  10.156372
Ep: 11489   Rew: -102.52   Avg Rew: -102.96   Polyak: 0.995000   Buffer: 100.00   Loss: 105.048958  11.464710  9.921371
Ep: 11490   Rew: -103.80   Avg Rew: -102.97   Polyak: 0.995000   Buffer: 100.00   Loss: 105.481674  10.239354  7.171744
Ep: 11491   Rew: -103.69   Avg Rew: -102

Ep: 11552   Rew: -103.52   Avg Rew: -103.24   Polyak: 0.995000   Buffer: 100.00   Loss: 104.708076  10.641108  9.587528
Ep: 11553   Rew: -102.79   Avg Rew: -103.24   Polyak: 0.995000   Buffer: 100.00   Loss: 104.828049  10.532911  8.991520
Ep: 11554   Rew: -102.47   Avg Rew: -103.22   Polyak: 0.995000   Buffer: 100.00   Loss: 104.714569  8.993506  8.766336
Ep: 11555   Rew: -102.17   Avg Rew: -103.21   Polyak: 0.995000   Buffer: 100.00   Loss: 104.921204  8.692932  9.523075
Ep: 11556   Rew: -103.28   Avg Rew: -103.20   Polyak: 0.995000   Buffer: 100.00   Loss: 105.240150  10.823858  10.088032
Ep: 11557   Rew: -102.09   Avg Rew: -103.19   Polyak: 0.995000   Buffer: 100.00   Loss: 105.431747  9.355682  9.838562
Ep: 11558   Rew: -103.41   Avg Rew: -103.19   Polyak: 0.995000   Buffer: 100.00   Loss: 104.590645  9.852699  9.874466
Ep: 11559   Rew: -103.07   Avg Rew: -103.19   Polyak: 0.995000   Buffer: 100.00   Loss: 105.287376  9.057242  10.031399
Ep: 11560   Rew: -102.81   Avg Rew: -103.18

Ep: 11621   Rew: -101.72   Avg Rew: -102.71   Polyak: 0.995000   Buffer: 100.00   Loss: 105.116554  7.885447  8.868373
Ep: 11622   Rew: -102.25   Avg Rew: -102.70   Polyak: 0.995000   Buffer: 100.00   Loss: 104.312477  7.482456  9.266156
Ep: 11623   Rew: -102.04   Avg Rew: -102.71   Polyak: 0.995000   Buffer: 100.00   Loss: 104.953644  9.617243  8.863967
Ep: 11624   Rew: -101.65   Avg Rew: -102.70   Polyak: 0.995000   Buffer: 100.00   Loss: 105.563599  10.501131  10.735134
Ep: 11625   Rew: -103.57   Avg Rew: -102.70   Polyak: 0.995000   Buffer: 100.00   Loss: 105.168457  11.071067  8.470079
Ep: 11626   Rew: -106.28   Avg Rew: -102.73   Polyak: 0.995000   Buffer: 100.00   Loss: 105.308540  9.081034  8.389448
Ep: 11627   Rew: -100.87   Avg Rew: -102.70   Polyak: 0.995000   Buffer: 100.00   Loss: 105.118210  9.030401  8.945210
Ep: 11628   Rew: -102.29   Avg Rew: -102.69   Polyak: 0.995000   Buffer: 100.00   Loss: 104.777847  12.849579  11.037229
Ep: 11629   Rew: -102.83   Avg Rew: -102.69

Ep: 11690   Rew: -102.65   Avg Rew: -102.56   Polyak: 0.995000   Buffer: 100.00   Loss: 104.696571  8.899302  9.239902
Ep: 11691   Rew: -103.22   Avg Rew: -102.56   Polyak: 0.995000   Buffer: 100.00   Loss: 105.057571  10.653094  10.733370
Ep: 11692   Rew: -102.40   Avg Rew: -102.55   Polyak: 0.995000   Buffer: 100.00   Loss: 104.409943  10.636822  10.057306
Ep: 11693   Rew: -103.37   Avg Rew: -102.55   Polyak: 0.995000   Buffer: 100.00   Loss: 105.311119  11.571841  9.045389
Ep: 11694   Rew: -102.81   Avg Rew: -102.56   Polyak: 0.995000   Buffer: 100.00   Loss: 105.272362  10.222735  9.982099
Ep: 11695   Rew: -102.98   Avg Rew: -102.56   Polyak: 0.995000   Buffer: 100.00   Loss: 104.807014  8.929343  9.251390
Ep: 11696   Rew: -102.36   Avg Rew: -102.55   Polyak: 0.995000   Buffer: 100.00   Loss: 104.737259  8.278466  8.446123
Ep: 11697   Rew: -102.51   Avg Rew: -102.54   Polyak: 0.995000   Buffer: 100.00   Loss: 104.788338  8.058184  9.802822
Ep: 11698   Rew: -102.92   Avg Rew: -102.5

Ep: 11759   Rew: -103.63   Avg Rew: -102.49   Polyak: 0.995000   Buffer: 100.00   Loss: 104.681717  8.946087  8.108491
Ep: 11760   Rew: -103.93   Avg Rew: -102.51   Polyak: 0.995000   Buffer: 100.00   Loss: 105.173004  8.213151  7.924824
Ep: 11761   Rew: -103.70   Avg Rew: -102.51   Polyak: 0.995000   Buffer: 100.00   Loss: 105.081879  7.926296  10.422905
Ep: 11762   Rew: -103.37   Avg Rew: -102.52   Polyak: 0.995000   Buffer: 100.00   Loss: 104.907669  8.354654  8.952012
Ep: 11763   Rew: -103.21   Avg Rew: -102.52   Polyak: 0.995000   Buffer: 100.00   Loss: 104.341934  9.920572  7.892109
Ep: 11764   Rew: -102.97   Avg Rew: -102.52   Polyak: 0.995000   Buffer: 100.00   Loss: 104.748833  8.704058  9.461389
Ep: 11765   Rew: -102.80   Avg Rew: -102.52   Polyak: 0.995000   Buffer: 100.00   Loss: 105.345932  8.690643  7.677902
Ep: 11766   Rew: -102.41   Avg Rew: -102.52   Polyak: 0.995000   Buffer: 100.00   Loss: 105.305626  7.750878  7.945616
Ep: 11767   Rew: -103.13   Avg Rew: -102.51   P

Ep: 11828   Rew: -102.48   Avg Rew: -102.81   Polyak: 0.995000   Buffer: 100.00   Loss: 104.967155  8.548557  8.382393
Ep: 11829   Rew: -102.59   Avg Rew: -102.81   Polyak: 0.995000   Buffer: 100.00   Loss: 104.933067  8.311933  7.576555
Ep: 11830   Rew: -101.97   Avg Rew: -102.81   Polyak: 0.995000   Buffer: 100.00   Loss: 105.282021  7.347981  9.901316
Ep: 11831   Rew: -100.90   Avg Rew: -102.79   Polyak: 0.995000   Buffer: 100.00   Loss: 105.193245  10.711042  9.122087
Ep: 11832   Rew: -102.79   Avg Rew: -102.81   Polyak: 0.995000   Buffer: 100.00   Loss: 105.384514  10.612900  9.626969
Ep: 11833   Rew: -102.78   Avg Rew: -102.81   Polyak: 0.995000   Buffer: 100.00   Loss: 105.374687  9.481093  9.903778
Ep: 11834   Rew: -102.77   Avg Rew: -102.80   Polyak: 0.995000   Buffer: 100.00   Loss: 104.752289  9.432764  8.872869
Ep: 11835   Rew: -102.78   Avg Rew: -102.80   Polyak: 0.995000   Buffer: 100.00   Loss: 105.334648  10.790512  9.493268
Ep: 11836   Rew: -103.12   Avg Rew: -102.80  

Ep: 11897   Rew: -103.17   Avg Rew: -102.94   Polyak: 0.995000   Buffer: 100.00   Loss: 105.229912  11.942108  9.011758
Ep: 11898   Rew: -103.03   Avg Rew: -102.94   Polyak: 0.995000   Buffer: 100.00   Loss: 105.089432  10.098548  7.802671
Ep: 11899   Rew: -103.27   Avg Rew: -102.95   Polyak: 0.995000   Buffer: 100.00   Loss: 105.039734  10.983659  9.656169
Ep: 11900   Rew: -103.22   Avg Rew: -102.95   Polyak: 0.995000   Buffer: 100.00   Loss: 105.180069  9.973632  9.726360
Ep: 11901   Rew: -103.23   Avg Rew: -102.95   Polyak: 0.995000   Buffer: 100.00   Loss: 105.210938  10.027264  10.823233
Ep: 11902   Rew: -102.52   Avg Rew: -102.94   Polyak: 0.995000   Buffer: 100.00   Loss: 104.735657  11.472842  9.311477
Ep: 11903   Rew: -103.37   Avg Rew: -102.95   Polyak: 0.995000   Buffer: 100.00   Loss: 104.737213  10.150379  9.296246
Ep: 11904   Rew: -102.54   Avg Rew: -102.95   Polyak: 0.995000   Buffer: 100.00   Loss: 105.291473  8.494930  9.671544
Ep: 11905   Rew: -102.49   Avg Rew: -102.

Ep: 11966   Rew: -103.19   Avg Rew: -102.87   Polyak: 0.995000   Buffer: 100.00   Loss: 104.795135  10.667398  8.880221
Ep: 11967   Rew: -103.24   Avg Rew: -102.88   Polyak: 0.995000   Buffer: 100.00   Loss: 104.946739  8.172247  7.688996
Ep: 11968   Rew: -102.61   Avg Rew: -102.88   Polyak: 0.995000   Buffer: 100.00   Loss: 104.630608  9.385658  8.198895
Ep: 11969   Rew: -103.16   Avg Rew: -102.87   Polyak: 0.995000   Buffer: 100.00   Loss: 105.044044  9.505690  10.102837
Ep: 11970   Rew: -103.10   Avg Rew: -102.88   Polyak: 0.995000   Buffer: 100.00   Loss: 105.117081  11.151802  9.512138
Ep: 11971   Rew: -103.03   Avg Rew: -102.88   Polyak: 0.995000   Buffer: 100.00   Loss: 104.431946  8.195105  8.975019
Ep: 11972   Rew: -102.54   Avg Rew: -102.87   Polyak: 0.995000   Buffer: 100.00   Loss: 104.892746  9.019127  8.514093
Ep: 11973   Rew: -102.26   Avg Rew: -102.86   Polyak: 0.995000   Buffer: 100.00   Loss: 104.714272  11.445761  8.489408
Ep: 11974   Rew: -102.73   Avg Rew: -102.86 

Ep: 12035   Rew: -102.28   Avg Rew: -102.50   Polyak: 0.995000   Buffer: 100.00   Loss: 105.023987  6.505198  8.328160
Ep: 12036   Rew: -101.38   Avg Rew: -102.48   Polyak: 0.995000   Buffer: 100.00   Loss: 104.325439  9.475368  7.789433
Ep: 12037   Rew: -102.37   Avg Rew: -102.46   Polyak: 0.995000   Buffer: 100.00   Loss: 104.261482  10.275819  9.152269
Ep: 12038   Rew: -102.45   Avg Rew: -102.44   Polyak: 0.995000   Buffer: 100.00   Loss: 105.536484  8.769182  7.750137
Ep: 12039   Rew: -102.81   Avg Rew: -102.44   Polyak: 0.995000   Buffer: 100.00   Loss: 105.174034  9.717520  10.119421
Ep: 12040   Rew: -102.74   Avg Rew: -102.42   Polyak: 0.995000   Buffer: 100.00   Loss: 105.138481  9.529097  10.184359
Ep: 12041   Rew: -101.82   Avg Rew: -102.41   Polyak: 0.995000   Buffer: 100.00   Loss: 105.271477  9.043313  10.477049
Ep: 12042   Rew: -102.33   Avg Rew: -102.41   Polyak: 0.995000   Buffer: 100.00   Loss: 105.227539  12.292463  9.467463
Ep: 12043   Rew: -103.02   Avg Rew: -102.41

Ep: 12104   Rew: -101.40   Avg Rew: -102.11   Polyak: 0.995000   Buffer: 100.00   Loss: 104.873276  10.396002  8.472559
Ep: 12105   Rew: -101.45   Avg Rew: -102.10   Polyak: 0.995000   Buffer: 100.00   Loss: 104.742538  8.801951  9.590616
Ep: 12106   Rew: -101.78   Avg Rew: -102.11   Polyak: 0.995000   Buffer: 100.00   Loss: 104.605827  8.766073  9.177137
Ep: 12107   Rew: -101.43   Avg Rew: -102.10   Polyak: 0.995000   Buffer: 100.00   Loss: 104.991707  9.968305  8.827724
Ep: 12108   Rew: -101.18   Avg Rew: -102.09   Polyak: 0.995000   Buffer: 100.00   Loss: 104.441933  7.904675  9.484951
Ep: 12109   Rew: -102.00   Avg Rew: -102.08   Polyak: 0.995000   Buffer: 100.00   Loss: 104.700500  8.945945  9.058444
Ep: 12110   Rew: -101.45   Avg Rew: -102.08   Polyak: 0.995000   Buffer: 100.00   Loss: 104.596977  7.279035  8.788742
Ep: 12111   Rew: -101.51   Avg Rew: -102.07   Polyak: 0.995000   Buffer: 100.00   Loss: 104.887794  10.104782  11.116573
Ep: 12112   Rew: -101.59   Avg Rew: -102.07  

Ep: 12173   Rew: -102.51   Avg Rew: -102.60   Polyak: 0.995000   Buffer: 100.00   Loss: 105.311050  10.866360  9.266106
Ep: 12174   Rew: -102.85   Avg Rew: -102.61   Polyak: 0.995000   Buffer: 100.00   Loss: 104.836090  7.713513  8.649479
Ep: 12175   Rew: -102.88   Avg Rew: -102.61   Polyak: 0.995000   Buffer: 100.00   Loss: 104.949295  10.215304  7.544445
Ep: 12176   Rew: -103.02   Avg Rew: -102.62   Polyak: 0.995000   Buffer: 100.00   Loss: 104.679657  8.043310  8.585552
Ep: 12177   Rew: -103.39   Avg Rew: -102.64   Polyak: 0.995000   Buffer: 100.00   Loss: 104.458435  9.424040  9.842248
Ep: 12178   Rew: -103.51   Avg Rew: -102.65   Polyak: 0.995000   Buffer: 100.00   Loss: 104.673706  10.053890  11.264118
Ep: 12179   Rew: -102.53   Avg Rew: -102.67   Polyak: 0.995000   Buffer: 100.00   Loss: 104.597664  7.054096  7.654977
Ep: 12180   Rew: -102.72   Avg Rew: -102.67   Polyak: 0.995000   Buffer: 100.00   Loss: 104.240768  8.263674  9.581325
Ep: 12181   Rew: -103.23   Avg Rew: -102.69 

Ep: 12242   Rew: -101.67   Avg Rew: -102.75   Polyak: 0.995000   Buffer: 100.00   Loss: 105.202400  8.794128  8.220739
Ep: 12243   Rew: -102.86   Avg Rew: -102.74   Polyak: 0.995000   Buffer: 100.00   Loss: 104.946609  7.838911  7.842411
Ep: 12244   Rew: -102.90   Avg Rew: -102.74   Polyak: 0.995000   Buffer: 100.00   Loss: 104.866966  8.885530  8.133793
Ep: 12245   Rew: -102.98   Avg Rew: -102.73   Polyak: 0.995000   Buffer: 100.00   Loss: 104.889595  8.591704  10.265759
Ep: 12246   Rew: -102.97   Avg Rew: -102.72   Polyak: 0.995000   Buffer: 100.00   Loss: 105.431351  6.600138  9.535533
Ep: 12247   Rew: -103.16   Avg Rew: -102.72   Polyak: 0.995000   Buffer: 100.00   Loss: 104.920143  8.674425  10.048543
Ep: 12248   Rew: -102.38   Avg Rew: -102.71   Polyak: 0.995000   Buffer: 100.00   Loss: 105.216423  7.134830  11.540341
Ep: 12249   Rew: -102.63   Avg Rew: -102.69   Polyak: 0.995000   Buffer: 100.00   Loss: 104.755409  9.653868  8.978877
Ep: 12250   Rew: -102.34   Avg Rew: -102.68  

Ep: 12311   Rew: -100.89   Avg Rew: -102.01   Polyak: 0.995000   Buffer: 100.00   Loss: 104.962814  11.065655  9.371021
Ep: 12312   Rew: -101.70   Avg Rew: -102.01   Polyak: 0.995000   Buffer: 100.00   Loss: 104.822098  10.583416  7.722174
Ep: 12313   Rew: -101.43   Avg Rew: -102.00   Polyak: 0.995000   Buffer: 100.00   Loss: 105.219955  9.777408  8.246233
Ep: 12314   Rew: -101.93   Avg Rew: -102.02   Polyak: 0.995000   Buffer: 100.00   Loss: 104.268135  9.305682  9.607840
Ep: 12315   Rew: -101.58   Avg Rew: -102.02   Polyak: 0.995000   Buffer: 100.00   Loss: 104.042953  9.403811  10.413148
Ep: 12316   Rew: -100.71   Avg Rew: -102.01   Polyak: 0.995000   Buffer: 100.00   Loss: 104.417702  8.499234  7.868171
Ep: 12317   Rew: -100.82   Avg Rew: -102.00   Polyak: 0.995000   Buffer: 100.00   Loss: 105.160889  10.336718  8.471493
Ep: 12318   Rew: -99.78   Avg Rew: -101.99   Polyak: 0.995000   Buffer: 100.00   Loss: 104.978622  9.153229  10.706242
Ep: 12319   Rew: -102.81   Avg Rew: -102.01 

Ep: 12380   Rew: -101.39   Avg Rew: -101.76   Polyak: 0.995000   Buffer: 100.00   Loss: 105.387337  8.589992  8.816586
Ep: 12381   Rew: -102.63   Avg Rew: -101.78   Polyak: 0.995000   Buffer: 100.00   Loss: 105.386009  8.845211  8.512594
Ep: 12382   Rew: -102.54   Avg Rew: -101.80   Polyak: 0.995000   Buffer: 100.00   Loss: 104.741516  7.805284  9.568623
Ep: 12383   Rew: -101.63   Avg Rew: -101.78   Polyak: 0.995000   Buffer: 100.00   Loss: 104.005074  9.297337  7.941210
Ep: 12384   Rew: -102.76   Avg Rew: -101.79   Polyak: 0.995000   Buffer: 100.00   Loss: 104.939209  9.038149  9.922350
Ep: 12385   Rew: -102.67   Avg Rew: -101.80   Polyak: 0.995000   Buffer: 100.00   Loss: 104.860626  7.564798  7.316547
Ep: 12386   Rew: -103.22   Avg Rew: -101.81   Polyak: 0.995000   Buffer: 100.00   Loss: 104.987144  9.843001  7.537716
Ep: 12387   Rew: -102.85   Avg Rew: -101.81   Polyak: 0.995000   Buffer: 100.00   Loss: 104.720894  10.957237  7.197855
Ep: 12388   Rew: -102.21   Avg Rew: -101.81   P

Ep: 12449   Rew: -102.31   Avg Rew: -102.14   Polyak: 0.995000   Buffer: 100.00   Loss: 104.475960  9.707685  9.220591
Ep: 12450   Rew: -103.15   Avg Rew: -102.15   Polyak: 0.995000   Buffer: 100.00   Loss: 104.734222  8.470209  9.918591
Ep: 12451   Rew: -98.33   Avg Rew: -102.13   Polyak: 0.995000   Buffer: 100.00   Loss: 104.669624  9.953361  9.520639
Ep: 12452   Rew: -102.86   Avg Rew: -102.13   Polyak: 0.995000   Buffer: 100.00   Loss: 104.287796  8.594772  8.009120
Ep: 12453   Rew: -101.66   Avg Rew: -102.12   Polyak: 0.995000   Buffer: 100.00   Loss: 104.968575  9.710993  9.273135
Ep: 12454   Rew: -103.73   Avg Rew: -102.13   Polyak: 0.995000   Buffer: 100.00   Loss: 104.741791  8.078074  11.042302
Ep: 12455   Rew: -100.72   Avg Rew: -102.12   Polyak: 0.995000   Buffer: 100.00   Loss: 105.147049  9.880911  8.079908
Ep: 12456   Rew: -102.55   Avg Rew: -102.12   Polyak: 0.995000   Buffer: 100.00   Loss: 105.087700  9.902603  8.138150
Ep: 12457   Rew: -103.51   Avg Rew: -102.13   Po

Ep: 12518   Rew: -101.45   Avg Rew: -102.76   Polyak: 0.995000   Buffer: 100.00   Loss: 104.253372  9.176295  5.411557
Ep: 12519   Rew: -102.23   Avg Rew: -102.76   Polyak: 0.995000   Buffer: 100.00   Loss: 104.362877  9.662251  9.051001
Ep: 12520   Rew: -102.71   Avg Rew: -102.77   Polyak: 0.995000   Buffer: 100.00   Loss: 105.198914  9.433916  10.434578
Ep: 12521   Rew: -102.29   Avg Rew: -102.79   Polyak: 0.995000   Buffer: 100.00   Loss: 104.921608  9.082411  8.023021
Ep: 12522   Rew: -103.22   Avg Rew: -102.88   Polyak: 0.995000   Buffer: 100.00   Loss: 105.007652  8.539532  9.560243
Ep: 12523   Rew: -102.37   Avg Rew: -102.86   Polyak: 0.995000   Buffer: 100.00   Loss: 104.813538  8.374286  8.009346
Ep: 12524   Rew: -102.70   Avg Rew: -102.94   Polyak: 0.995000   Buffer: 100.00   Loss: 105.093918  8.516573  9.260950
Ep: 12525   Rew: -102.81   Avg Rew: -102.95   Polyak: 0.995000   Buffer: 100.00   Loss: 104.721672  8.964325  8.318148
Ep: 12526   Rew: -101.07   Avg Rew: -102.94   P

Ep: 12587   Rew: -101.93   Avg Rew: -103.29   Polyak: 0.995000   Buffer: 100.00   Loss: 104.945938  9.153245  9.867380
Ep: 12588   Rew: -105.91   Avg Rew: -103.31   Polyak: 0.995000   Buffer: 100.00   Loss: 104.988701  6.183823  9.404573
Ep: 12589   Rew: -241.38   Avg Rew: -104.70   Polyak: 0.995000   Buffer: 100.00   Loss: 104.967010  7.214498  7.521183
Ep: 12590   Rew: -101.66   Avg Rew: -104.71   Polyak: 0.995000   Buffer: 100.00   Loss: 105.079300  7.834120  8.715199
Ep: 12591   Rew: -101.51   Avg Rew: -104.70   Polyak: 0.995000   Buffer: 100.00   Loss: 105.511948  9.483730  9.330311
Ep: 12592   Rew: -101.82   Avg Rew: -104.69   Polyak: 0.995000   Buffer: 100.00   Loss: 104.992760  10.214650  7.715370
Ep: 12593   Rew: -102.54   Avg Rew: -104.67   Polyak: 0.995000   Buffer: 100.00   Loss: 105.601440  10.347602  10.216424
Ep: 12594   Rew: -102.44   Avg Rew: -104.67   Polyak: 0.995000   Buffer: 100.00   Loss: 104.674248  8.488783  9.376001
Ep: 12595   Rew: -102.78   Avg Rew: -104.70  

Ep: 12656   Rew: -102.75   Avg Rew: -103.95   Polyak: 0.995000   Buffer: 100.00   Loss: 105.321907  9.555246  8.516949
Ep: 12657   Rew: -102.34   Avg Rew: -103.94   Polyak: 0.995000   Buffer: 100.00   Loss: 105.101166  7.829659  7.628297
Ep: 12658   Rew: -103.24   Avg Rew: -103.94   Polyak: 0.995000   Buffer: 100.00   Loss: 105.141739  8.269616  8.331123
Ep: 12659   Rew: -102.38   Avg Rew: -103.93   Polyak: 0.995000   Buffer: 100.00   Loss: 104.885681  8.424238  7.993988
Ep: 12660   Rew: -102.67   Avg Rew: -103.94   Polyak: 0.995000   Buffer: 100.00   Loss: 104.808060  11.204617  9.185267
Ep: 12661   Rew: -102.61   Avg Rew: -103.94   Polyak: 0.995000   Buffer: 100.00   Loss: 105.147285  8.893593  8.308487
Ep: 12662   Rew: -102.61   Avg Rew: -103.93   Polyak: 0.995000   Buffer: 100.00   Loss: 105.168892  8.738425  6.892036
Ep: 12663   Rew: -101.79   Avg Rew: -103.93   Polyak: 0.995000   Buffer: 100.00   Loss: 105.523697  8.148110  9.930317
Ep: 12664   Rew: -102.59   Avg Rew: -103.92   P

Ep: 12725   Rew: -101.11   Avg Rew: -102.24   Polyak: 0.995000   Buffer: 100.00   Loss: 105.312714  7.999049  10.702537
Ep: 12726   Rew: -101.26   Avg Rew: -102.22   Polyak: 0.995000   Buffer: 100.00   Loss: 105.103821  11.485243  9.516817
Ep: 12727   Rew: -100.50   Avg Rew: -102.19   Polyak: 0.995000   Buffer: 100.00   Loss: 104.346985  8.165547  8.420280
Ep: 12728   Rew: -101.05   Avg Rew: -102.17   Polyak: 0.995000   Buffer: 100.00   Loss: 105.151100  7.623824  8.340157
Ep: 12729   Rew: -101.61   Avg Rew: -102.16   Polyak: 0.995000   Buffer: 100.00   Loss: 104.959320  10.140910  9.415899
Ep: 12730   Rew: -100.69   Avg Rew: -102.13   Polyak: 0.995000   Buffer: 100.00   Loss: 105.225128  9.317102  7.363979
Ep: 12731   Rew: -100.58   Avg Rew: -102.12   Polyak: 0.995000   Buffer: 100.00   Loss: 104.515045  9.936966  9.082524
Ep: 12732   Rew: -101.23   Avg Rew: -102.12   Polyak: 0.995000   Buffer: 100.00   Loss: 105.336212  9.823945  7.899960
Ep: 12733   Rew: -100.98   Avg Rew: -102.12  

Ep: 12794   Rew: -102.82   Avg Rew: -101.90   Polyak: 0.995000   Buffer: 100.00   Loss: 105.341850  9.389325  10.397882
Ep: 12795   Rew: -102.63   Avg Rew: -101.92   Polyak: 0.995000   Buffer: 100.00   Loss: 105.145714  8.542282  9.177605
Ep: 12796   Rew: -102.54   Avg Rew: -101.94   Polyak: 0.995000   Buffer: 100.00   Loss: 105.321518  8.773565  9.121092
Ep: 12797   Rew: -102.47   Avg Rew: -101.95   Polyak: 0.995000   Buffer: 100.00   Loss: 104.942276  8.273756  9.116290
Ep: 12798   Rew: -103.27   Avg Rew: -101.95   Polyak: 0.995000   Buffer: 100.00   Loss: 104.635666  8.733778  9.833652
Ep: 12799   Rew: -103.06   Avg Rew: -101.95   Polyak: 0.995000   Buffer: 100.00   Loss: 105.311241  9.162325  10.503019
Ep: 12800   Rew: -103.70   Avg Rew: -101.96   Polyak: 0.995000   Buffer: 100.00   Loss: 105.021355  8.253300  6.692845
Ep: 12801   Rew: -103.07   Avg Rew: -101.97   Polyak: 0.995000   Buffer: 100.00   Loss: 105.421135  9.625790  9.397209
Ep: 12802   Rew: -103.34   Avg Rew: -101.98   

Ep: 12863   Rew: -102.63   Avg Rew: -102.32   Polyak: 0.995000   Buffer: 100.00   Loss: 104.597519  8.753269  9.619936
Ep: 12864   Rew: -103.03   Avg Rew: -102.32   Polyak: 0.995000   Buffer: 100.00   Loss: 104.918343  8.147216  8.417016
Ep: 12865   Rew: -101.87   Avg Rew: -102.33   Polyak: 0.995000   Buffer: 100.00   Loss: 105.252464  7.893077  8.407709
Ep: 12866   Rew: -102.89   Avg Rew: -102.34   Polyak: 0.995000   Buffer: 100.00   Loss: 104.424156  10.669473  9.134624
Ep: 12867   Rew: -102.57   Avg Rew: -102.34   Polyak: 0.995000   Buffer: 100.00   Loss: 104.576164  9.821797  9.725390
Ep: 12868   Rew: -102.81   Avg Rew: -102.34   Polyak: 0.995000   Buffer: 100.00   Loss: 104.570351  10.511520  9.259987
Ep: 12869   Rew: -102.90   Avg Rew: -102.36   Polyak: 0.995000   Buffer: 100.00   Loss: 105.303421  7.809546  8.125776
Ep: 12870   Rew: -102.09   Avg Rew: -102.37   Polyak: 0.995000   Buffer: 100.00   Loss: 105.769814  10.635484  8.257854
Ep: 12871   Rew: -103.04   Avg Rew: -102.37  

Ep: 12932   Rew: -101.77   Avg Rew: -102.20   Polyak: 0.995000   Buffer: 100.00   Loss: 104.926376  9.255086  9.592579
Ep: 12933   Rew: -102.40   Avg Rew: -102.20   Polyak: 0.995000   Buffer: 100.00   Loss: 105.484528  11.389619  7.494330
Ep: 12934   Rew: -101.93   Avg Rew: -102.20   Polyak: 0.995000   Buffer: 100.00   Loss: 105.079994  9.769300  7.221163
Ep: 12935   Rew: -101.98   Avg Rew: -102.19   Polyak: 0.995000   Buffer: 100.00   Loss: 105.262390  9.485551  8.628898
Ep: 12936   Rew: -102.57   Avg Rew: -102.19   Polyak: 0.995000   Buffer: 100.00   Loss: 104.732048  8.229256  6.688998
Ep: 12937   Rew: -101.85   Avg Rew: -102.18   Polyak: 0.995000   Buffer: 100.00   Loss: 104.811707  12.609301  11.834934
Ep: 12938   Rew: -101.56   Avg Rew: -102.17   Polyak: 0.995000   Buffer: 100.00   Loss: 105.340004  11.483415  9.268941
Ep: 12939   Rew: -101.49   Avg Rew: -102.15   Polyak: 0.995000   Buffer: 100.00   Loss: 105.388092  10.002894  10.362532
Ep: 12940   Rew: -103.53   Avg Rew: -102.1

Ep: 13001   Rew: -103.23   Avg Rew: -102.41   Polyak: 0.995000   Buffer: 100.00   Loss: 104.819244  10.192737  9.151910
Ep: 13002   Rew: -103.76   Avg Rew: -102.44   Polyak: 0.995000   Buffer: 100.00   Loss: 105.101486  9.591192  9.628530
Ep: 13003   Rew: -103.62   Avg Rew: -102.46   Polyak: 0.995000   Buffer: 100.00   Loss: 104.904243  8.800350  7.279142
Ep: 13004   Rew: -103.09   Avg Rew: -102.47   Polyak: 0.995000   Buffer: 100.00   Loss: 105.871841  9.387618  8.499882
Ep: 13005   Rew: -102.52   Avg Rew: -102.47   Polyak: 0.995000   Buffer: 100.00   Loss: 105.169548  9.703414  8.587254
Ep: 13006   Rew: -103.76   Avg Rew: -102.48   Polyak: 0.995000   Buffer: 100.00   Loss: 105.152039  8.400146  9.795379
Ep: 13007   Rew: -101.88   Avg Rew: -102.49   Polyak: 0.995000   Buffer: 100.00   Loss: 105.242241  10.114996  9.650779
Ep: 13008   Rew: -101.83   Avg Rew: -102.48   Polyak: 0.995000   Buffer: 100.00   Loss: 105.345222  10.174700  9.031475
Ep: 13009   Rew: -101.69   Avg Rew: -102.48  

Ep: 13070   Rew: -103.40   Avg Rew: -103.91   Polyak: 0.995000   Buffer: 100.00   Loss: 104.830360  9.198803  8.981116
Ep: 13071   Rew: -103.84   Avg Rew: -103.92   Polyak: 0.995000   Buffer: 100.00   Loss: 104.692039  8.960887  8.322749
Ep: 13072   Rew: -104.56   Avg Rew: -103.93   Polyak: 0.995000   Buffer: 100.00   Loss: 105.562332  8.313948  8.141229
Ep: 13073   Rew: -103.71   Avg Rew: -103.93   Polyak: 0.995000   Buffer: 100.00   Loss: 105.391380  10.527330  8.722748
Ep: 13074   Rew: -103.61   Avg Rew: -103.94   Polyak: 0.995000   Buffer: 100.00   Loss: 104.266808  8.393061  8.393918
Ep: 13075   Rew: -103.31   Avg Rew: -103.95   Polyak: 0.995000   Buffer: 100.00   Loss: 105.543350  11.349416  8.782921
Ep: 13076   Rew: -103.67   Avg Rew: -103.96   Polyak: 0.995000   Buffer: 100.00   Loss: 104.334305  9.341719  7.371776
Ep: 13077   Rew: -103.59   Avg Rew: -103.97   Polyak: 0.995000   Buffer: 100.00   Loss: 104.851120  12.695696  10.174848
Ep: 13078   Rew: -102.38   Avg Rew: -103.96 

Ep: 13139   Rew: -102.17   Avg Rew: -102.94   Polyak: 0.995000   Buffer: 100.00   Loss: 104.952644  9.191201  7.790470
Ep: 13140   Rew: -101.49   Avg Rew: -102.87   Polyak: 0.995000   Buffer: 100.00   Loss: 104.829025  9.657491  7.607051
Ep: 13141   Rew: -102.77   Avg Rew: -102.81   Polyak: 0.995000   Buffer: 100.00   Loss: 104.919632  10.787034  7.856176
Ep: 13142   Rew: -102.70   Avg Rew: -102.76   Polyak: 0.995000   Buffer: 100.00   Loss: 104.989510  8.476053  8.985023
Ep: 13143   Rew: -102.39   Avg Rew: -102.72   Polyak: 0.995000   Buffer: 100.00   Loss: 104.674484  8.052258  9.171268
Ep: 13144   Rew: -101.89   Avg Rew: -102.67   Polyak: 0.995000   Buffer: 100.00   Loss: 105.094620  8.416191  10.896684
Ep: 13145   Rew: -101.95   Avg Rew: -102.63   Polyak: 0.995000   Buffer: 100.00   Loss: 105.333565  11.221029  10.327268
Ep: 13146   Rew: -101.89   Avg Rew: -102.63   Polyak: 0.995000   Buffer: 100.00   Loss: 104.827950  8.602942  8.209248
Ep: 13147   Rew: -102.09   Avg Rew: -102.63 

Ep: 13208   Rew: -101.14   Avg Rew: -102.24   Polyak: 0.995000   Buffer: 100.00   Loss: 104.762581  9.471898  8.929283
Ep: 13209   Rew: -102.09   Avg Rew: -102.24   Polyak: 0.995000   Buffer: 100.00   Loss: 105.221169  9.896412  9.227797
Ep: 13210   Rew: -101.53   Avg Rew: -102.23   Polyak: 0.995000   Buffer: 100.00   Loss: 105.691040  9.522246  8.432846
Ep: 13211   Rew: -100.38   Avg Rew: -102.23   Polyak: 0.995000   Buffer: 100.00   Loss: 105.019707  8.241121  9.879003
Ep: 13212   Rew: -100.26   Avg Rew: -102.21   Polyak: 0.995000   Buffer: 100.00   Loss: 104.538704  6.516407  9.042413
Ep: 13213   Rew: -101.73   Avg Rew: -102.20   Polyak: 0.995000   Buffer: 100.00   Loss: 104.647354  11.400331  9.836900
Ep: 13214   Rew: -101.55   Avg Rew: -102.19   Polyak: 0.995000   Buffer: 100.00   Loss: 105.391655  9.325438  8.325163
Ep: 13215   Rew: -102.20   Avg Rew: -102.19   Polyak: 0.995000   Buffer: 100.00   Loss: 104.919327  9.786548  8.602064
Ep: 13216   Rew: -101.73   Avg Rew: -102.18   P

Ep: 13277   Rew: -101.43   Avg Rew: -102.10   Polyak: 0.995000   Buffer: 100.00   Loss: 105.368309  9.435227  7.737766
Ep: 13278   Rew: -101.91   Avg Rew: -102.08   Polyak: 0.995000   Buffer: 100.00   Loss: 105.297165  7.138668  8.755295
Ep: 13279   Rew: -102.48   Avg Rew: -102.09   Polyak: 0.995000   Buffer: 100.00   Loss: 104.791756  10.849002  8.501654
Ep: 13280   Rew: -101.75   Avg Rew: -102.08   Polyak: 0.995000   Buffer: 100.00   Loss: 105.090225  8.610332  8.757001
Ep: 13281   Rew: -101.46   Avg Rew: -102.07   Polyak: 0.995000   Buffer: 100.00   Loss: 104.709724  7.800629  7.937733
Ep: 13282   Rew: -101.69   Avg Rew: -102.06   Polyak: 0.995000   Buffer: 100.00   Loss: 105.089539  9.043930  9.349135
Ep: 13283   Rew: -101.89   Avg Rew: -102.04   Polyak: 0.995000   Buffer: 100.00   Loss: 104.974541  8.927122  8.142235
Ep: 13284   Rew: -102.18   Avg Rew: -102.04   Polyak: 0.995000   Buffer: 100.00   Loss: 104.875404  9.614325  9.286726
Ep: 13285   Rew: -103.18   Avg Rew: -102.03   P

Ep: 13346   Rew: -101.79   Avg Rew: -102.06   Polyak: 0.995000   Buffer: 100.00   Loss: 105.138443  9.193862  8.049403
Ep: 13347   Rew: -101.97   Avg Rew: -102.06   Polyak: 0.995000   Buffer: 100.00   Loss: 105.398689  7.942294  8.069517
Ep: 13348   Rew: -101.75   Avg Rew: -102.07   Polyak: 0.995000   Buffer: 100.00   Loss: 104.992653  9.752845  8.175782
Ep: 13349   Rew: -102.01   Avg Rew: -102.08   Polyak: 0.995000   Buffer: 100.00   Loss: 104.348442  8.350954  7.999966
Ep: 13350   Rew: -101.40   Avg Rew: -102.08   Polyak: 0.995000   Buffer: 100.00   Loss: 105.095222  8.713631  8.927006
Ep: 13351   Rew: -101.77   Avg Rew: -102.08   Polyak: 0.995000   Buffer: 100.00   Loss: 105.006302  10.388197  8.668792
Ep: 13352   Rew: -102.03   Avg Rew: -102.08   Polyak: 0.995000   Buffer: 100.00   Loss: 105.028450  8.672908  6.996327
Ep: 13353   Rew: -101.33   Avg Rew: -102.08   Polyak: 0.995000   Buffer: 100.00   Loss: 105.041153  7.889208  8.449120
Ep: 13354   Rew: -101.54   Avg Rew: -102.09   P

Ep: 13415   Rew: -102.48   Avg Rew: -102.04   Polyak: 0.995000   Buffer: 100.00   Loss: 104.561951  9.285477  8.458615
Ep: 13416   Rew: -101.70   Avg Rew: -102.03   Polyak: 0.995000   Buffer: 100.00   Loss: 104.727463  8.197882  8.182430
Ep: 13417   Rew: -103.22   Avg Rew: -102.04   Polyak: 0.995000   Buffer: 100.00   Loss: 105.178413  9.157024  7.669261
Ep: 13418   Rew: -102.85   Avg Rew: -102.06   Polyak: 0.995000   Buffer: 100.00   Loss: 105.135941  10.122631  9.266879
Ep: 13419   Rew: -101.08   Avg Rew: -102.05   Polyak: 0.995000   Buffer: 100.00   Loss: 104.316673  8.407717  9.059052
Ep: 13420   Rew: -103.14   Avg Rew: -102.08   Polyak: 0.995000   Buffer: 100.00   Loss: 104.607841  9.905674  10.435459
Ep: 13421   Rew: -102.37   Avg Rew: -102.08   Polyak: 0.995000   Buffer: 100.00   Loss: 104.531784  9.022806  10.772507
Ep: 13422   Rew: -102.17   Avg Rew: -102.08   Polyak: 0.995000   Buffer: 100.00   Loss: 104.249886  8.553588  8.336627
Ep: 13423   Rew: -101.16   Avg Rew: -102.08  

Ep: 13484   Rew: -102.25   Avg Rew: -102.26   Polyak: 0.995000   Buffer: 100.00   Loss: 104.772934  10.621463  8.962427
Ep: 13485   Rew: -101.47   Avg Rew: -102.25   Polyak: 0.995000   Buffer: 100.00   Loss: 104.585167  9.199321  8.218740
Ep: 13486   Rew: -102.42   Avg Rew: -102.25   Polyak: 0.995000   Buffer: 100.00   Loss: 104.559593  7.343894  7.148254
Ep: 13487   Rew: -101.79   Avg Rew: -102.25   Polyak: 0.995000   Buffer: 100.00   Loss: 104.689819  9.422789  10.239977
Ep: 13488   Rew: -102.16   Avg Rew: -102.24   Polyak: 0.995000   Buffer: 100.00   Loss: 104.916351  10.908928  10.569042
Ep: 13489   Rew: -101.50   Avg Rew: -102.24   Polyak: 0.995000   Buffer: 100.00   Loss: 104.671646  8.176786  9.077999
Ep: 13490   Rew: -102.14   Avg Rew: -102.25   Polyak: 0.995000   Buffer: 100.00   Loss: 104.859505  8.548447  9.051601
Ep: 13491   Rew: -101.90   Avg Rew: -102.25   Polyak: 0.995000   Buffer: 100.00   Loss: 104.729393  9.538793  8.321849
Ep: 13492   Rew: -102.36   Avg Rew: -102.25 

Ep: 13553   Rew: -103.19   Avg Rew: -102.24   Polyak: 0.995000   Buffer: 100.00   Loss: 105.006477  11.087849  9.787577
Ep: 13554   Rew: -102.52   Avg Rew: -102.24   Polyak: 0.995000   Buffer: 100.00   Loss: 104.704811  9.521053  8.227524
Ep: 13555   Rew: -102.50   Avg Rew: -102.25   Polyak: 0.995000   Buffer: 100.00   Loss: 104.897789  8.439017  7.707685
Ep: 13556   Rew: -102.56   Avg Rew: -102.25   Polyak: 0.995000   Buffer: 100.00   Loss: 105.053711  9.375914  7.247318
Ep: 13557   Rew: -102.47   Avg Rew: -102.25   Polyak: 0.995000   Buffer: 100.00   Loss: 104.202034  8.140353  9.296110
Ep: 13558   Rew: -103.09   Avg Rew: -102.25   Polyak: 0.995000   Buffer: 100.00   Loss: 104.557953  7.942200  8.013421
Ep: 13559   Rew: -102.34   Avg Rew: -102.25   Polyak: 0.995000   Buffer: 100.00   Loss: 104.591820  10.927559  7.380374
Ep: 13560   Rew: -102.85   Avg Rew: -102.25   Polyak: 0.995000   Buffer: 100.00   Loss: 104.214661  7.347412  8.387552
Ep: 13561   Rew: -101.99   Avg Rew: -102.24   

Ep: 13622   Rew: -102.40   Avg Rew: -102.31   Polyak: 0.995000   Buffer: 100.00   Loss: 104.863617  8.777912  7.784831
Ep: 13623   Rew: -101.93   Avg Rew: -102.30   Polyak: 0.995000   Buffer: 100.00   Loss: 104.534592  8.623841  7.247137
Ep: 13624   Rew: -101.86   Avg Rew: -102.30   Polyak: 0.995000   Buffer: 100.00   Loss: 104.485214  8.939719  6.992514
Ep: 13625   Rew: -103.17   Avg Rew: -102.31   Polyak: 0.995000   Buffer: 100.00   Loss: 103.890289  7.286925  8.293637
Ep: 13626   Rew: -102.16   Avg Rew: -102.30   Polyak: 0.995000   Buffer: 100.00   Loss: 104.080788  7.543412  7.835696
Ep: 13627   Rew: -102.63   Avg Rew: -102.30   Polyak: 0.995000   Buffer: 100.00   Loss: 104.613953  9.177545  9.027806
Ep: 13628   Rew: -102.18   Avg Rew: -102.31   Polyak: 0.995000   Buffer: 100.00   Loss: 104.826622  8.769465  8.596321
Ep: 13629   Rew: -102.76   Avg Rew: -102.32   Polyak: 0.995000   Buffer: 100.00   Loss: 104.656937  8.709921  7.600260
Ep: 13630   Rew: -102.74   Avg Rew: -102.33   Po

Ep: 13691   Rew: -102.35   Avg Rew: -102.43   Polyak: 0.995000   Buffer: 100.00   Loss: 104.316025  8.641343  8.852619
Ep: 13692   Rew: -102.35   Avg Rew: -102.42   Polyak: 0.995000   Buffer: 100.00   Loss: 105.060219  8.568235  7.879277
Ep: 13693   Rew: -102.76   Avg Rew: -102.43   Polyak: 0.995000   Buffer: 100.00   Loss: 104.938393  8.929268  9.923767
Ep: 13694   Rew: -102.00   Avg Rew: -102.43   Polyak: 0.995000   Buffer: 100.00   Loss: 104.261604  8.723444  7.892731
Ep: 13695   Rew: -102.23   Avg Rew: -102.43   Polyak: 0.995000   Buffer: 100.00   Loss: 104.724998  8.885561  9.019226
Ep: 13696   Rew: -102.38   Avg Rew: -102.43   Polyak: 0.995000   Buffer: 100.00   Loss: 104.453201  8.277126  7.830462
Ep: 13697   Rew: -102.04   Avg Rew: -102.45   Polyak: 0.995000   Buffer: 100.00   Loss: 104.360931  11.458208  8.855070
Ep: 13698   Rew: -102.37   Avg Rew: -102.45   Polyak: 0.995000   Buffer: 100.00   Loss: 104.080589  8.981718  9.725992
Ep: 13699   Rew: -102.52   Avg Rew: -102.45   P

Ep: 13760   Rew: -102.67   Avg Rew: -102.66   Polyak: 0.995000   Buffer: 100.00   Loss: 104.551308  7.909527  8.688007
Ep: 13761   Rew: -102.25   Avg Rew: -102.65   Polyak: 0.995000   Buffer: 100.00   Loss: 104.689796  10.295491  8.955702
Ep: 13762   Rew: -102.45   Avg Rew: -102.65   Polyak: 0.995000   Buffer: 100.00   Loss: 104.700996  9.261386  8.283337
Ep: 13763   Rew: -102.03   Avg Rew: -102.64   Polyak: 0.995000   Buffer: 100.00   Loss: 104.503784  7.427373  8.185880
Ep: 13764   Rew: -101.78   Avg Rew: -102.64   Polyak: 0.995000   Buffer: 100.00   Loss: 104.320564  8.844368  9.637159
Ep: 13765   Rew: -102.19   Avg Rew: -102.64   Polyak: 0.995000   Buffer: 100.00   Loss: 104.104294  9.954705  8.532916
Ep: 13766   Rew: -102.91   Avg Rew: -102.64   Polyak: 0.995000   Buffer: 100.00   Loss: 104.931091  7.958599  9.857322
Ep: 13767   Rew: -102.42   Avg Rew: -102.64   Polyak: 0.995000   Buffer: 100.00   Loss: 105.235107  10.571386  9.370983
Ep: 13768   Rew: -102.53   Avg Rew: -102.64   

Ep: 13829   Rew: -103.40   Avg Rew: -102.71   Polyak: 0.995000   Buffer: 100.00   Loss: 104.432930  8.343120  8.704697
Ep: 13830   Rew: -103.07   Avg Rew: -102.72   Polyak: 0.995000   Buffer: 100.00   Loss: 105.024307  8.453796  7.926371
Ep: 13831   Rew: -103.15   Avg Rew: -102.72   Polyak: 0.995000   Buffer: 100.00   Loss: 104.722916  9.287856  9.517071
Ep: 13832   Rew: -103.25   Avg Rew: -102.72   Polyak: 0.995000   Buffer: 100.00   Loss: 104.800232  10.390750  9.530461
Ep: 13833   Rew: -103.34   Avg Rew: -102.73   Polyak: 0.995000   Buffer: 100.00   Loss: 104.963593  10.385286  6.483670
Ep: 13834   Rew: -103.35   Avg Rew: -102.74   Polyak: 0.995000   Buffer: 100.00   Loss: 105.134727  10.320129  9.895235
Ep: 13835   Rew: -102.47   Avg Rew: -102.73   Polyak: 0.995000   Buffer: 100.00   Loss: 104.084801  8.702417  6.350144
Ep: 13836   Rew: -103.45   Avg Rew: -102.74   Polyak: 0.995000   Buffer: 100.00   Loss: 104.953896  9.135553  8.412569
Ep: 13837   Rew: -103.57   Avg Rew: -102.75  

Ep: 13898   Rew: -102.15   Avg Rew: -102.61   Polyak: 0.995000   Buffer: 100.00   Loss: 105.112396  8.168688  8.038409
Ep: 13899   Rew: -101.71   Avg Rew: -102.59   Polyak: 0.995000   Buffer: 100.00   Loss: 104.525108  8.060113  7.766905
Ep: 13900   Rew: -102.38   Avg Rew: -102.59   Polyak: 0.995000   Buffer: 100.00   Loss: 104.405609  9.794678  8.414801
Ep: 13901   Rew: -101.80   Avg Rew: -102.58   Polyak: 0.995000   Buffer: 100.00   Loss: 104.252258  10.540430  8.406295
Ep: 13902   Rew: -102.43   Avg Rew: -102.59   Polyak: 0.995000   Buffer: 100.00   Loss: 104.800209  10.946766  9.448085
Ep: 13903   Rew: -102.15   Avg Rew: -102.58   Polyak: 0.995000   Buffer: 100.00   Loss: 104.399811  10.341646  8.057764
Ep: 13904   Rew: -101.97   Avg Rew: -102.57   Polyak: 0.995000   Buffer: 100.00   Loss: 105.174911  7.134408  8.962046
Ep: 13905   Rew: -101.56   Avg Rew: -102.56   Polyak: 0.995000   Buffer: 100.00   Loss: 104.800682  8.553058  8.164234
Ep: 13906   Rew: -102.28   Avg Rew: -102.55  

Ep: 13967   Rew: -102.62   Avg Rew: -102.61   Polyak: 0.995000   Buffer: 100.00   Loss: 104.539543  8.627831  8.266174
Ep: 13968   Rew: -102.50   Avg Rew: -102.61   Polyak: 0.995000   Buffer: 100.00   Loss: 104.493431  8.467149  7.819788
Ep: 13969   Rew: -102.06   Avg Rew: -102.61   Polyak: 0.995000   Buffer: 100.00   Loss: 104.882095  9.880369  9.454428
Ep: 13970   Rew: -104.09   Avg Rew: -102.62   Polyak: 0.995000   Buffer: 100.00   Loss: 104.479401  8.250227  6.980905
Ep: 13971   Rew: -103.13   Avg Rew: -102.63   Polyak: 0.995000   Buffer: 100.00   Loss: 105.204941  10.272856  8.997269
Ep: 13972   Rew: -103.26   Avg Rew: -102.63   Polyak: 0.995000   Buffer: 100.00   Loss: 104.721237  10.318731  10.416766
Ep: 13973   Rew: -102.52   Avg Rew: -102.63   Polyak: 0.995000   Buffer: 100.00   Loss: 104.739151  8.167338  8.624432
Ep: 13974   Rew: -102.18   Avg Rew: -102.62   Polyak: 0.995000   Buffer: 100.00   Loss: 104.310448  9.842169  8.866967
Ep: 13975   Rew: -102.72   Avg Rew: -102.62  

Ep: 14036   Rew: -102.25   Avg Rew: -102.45   Polyak: 0.995000   Buffer: 100.00   Loss: 105.112869  8.818976  9.955661
Ep: 14037   Rew: -102.45   Avg Rew: -102.43   Polyak: 0.995000   Buffer: 100.00   Loss: 104.433014  9.783391  7.997636
Ep: 14038   Rew: -102.75   Avg Rew: -102.43   Polyak: 0.995000   Buffer: 100.00   Loss: 104.366158  7.882463  8.956323
Ep: 14039   Rew: -102.76   Avg Rew: -102.43   Polyak: 0.995000   Buffer: 100.00   Loss: 103.681816  8.077148  7.098225
Ep: 14040   Rew: -102.19   Avg Rew: -102.43   Polyak: 0.995000   Buffer: 100.00   Loss: 104.175644  11.595658  8.220175
Ep: 14041   Rew: -102.48   Avg Rew: -102.43   Polyak: 0.995000   Buffer: 100.00   Loss: 104.233803  10.117701  9.287228
Ep: 14042   Rew: -102.60   Avg Rew: -102.43   Polyak: 0.995000   Buffer: 100.00   Loss: 104.190788  7.723767  8.635667
Ep: 14043   Rew: -102.32   Avg Rew: -102.43   Polyak: 0.995000   Buffer: 100.00   Loss: 104.933441  10.116396  10.002766
Ep: 14044   Rew: -102.15   Avg Rew: -102.43 

Ep: 14105   Rew: -102.06   Avg Rew: -102.62   Polyak: 0.995000   Buffer: 100.00   Loss: 104.444389  9.175835  10.244161
Ep: 14106   Rew: -101.53   Avg Rew: -102.61   Polyak: 0.995000   Buffer: 100.00   Loss: 104.161224  10.896533  8.750796
Ep: 14107   Rew: -101.90   Avg Rew: -102.60   Polyak: 0.995000   Buffer: 100.00   Loss: 105.012611  10.818159  7.709912
Ep: 14108   Rew: -102.47   Avg Rew: -102.61   Polyak: 0.995000   Buffer: 100.00   Loss: 104.178940  9.074135  7.924392
Ep: 14109   Rew: -101.36   Avg Rew: -102.60   Polyak: 0.995000   Buffer: 100.00   Loss: 104.885414  9.082317  8.364287
Ep: 14110   Rew: -101.06   Avg Rew: -102.59   Polyak: 0.995000   Buffer: 100.00   Loss: 104.027191  8.078870  7.557929
Ep: 14111   Rew: -102.87   Avg Rew: -102.60   Polyak: 0.995000   Buffer: 100.00   Loss: 104.424286  7.067041  11.173442
Ep: 14112   Rew: -101.88   Avg Rew: -102.60   Polyak: 0.995000   Buffer: 100.00   Loss: 104.243004  9.802305  9.537920
Ep: 14113   Rew: -101.81   Avg Rew: -102.60 

Ep: 14174   Rew: -100.62   Avg Rew: -101.85   Polyak: 0.995000   Buffer: 100.00   Loss: 104.505775  7.921002  7.469578
Ep: 14175   Rew: -100.99   Avg Rew: -101.83   Polyak: 0.995000   Buffer: 100.00   Loss: 104.498131  8.495764  9.360054
Ep: 14176   Rew: -101.06   Avg Rew: -101.82   Polyak: 0.995000   Buffer: 100.00   Loss: 104.224731  8.149006  7.779023
Ep: 14177   Rew: -101.27   Avg Rew: -101.80   Polyak: 0.995000   Buffer: 100.00   Loss: 104.756493  6.392989  7.423704
Ep: 14178   Rew: -100.95   Avg Rew: -101.80   Polyak: 0.995000   Buffer: 100.00   Loss: 104.619400  10.581903  10.502789
Ep: 14179   Rew: -101.79   Avg Rew: -101.80   Polyak: 0.995000   Buffer: 100.00   Loss: 104.774734  9.820489  8.746651
Ep: 14180   Rew: -102.14   Avg Rew: -101.81   Polyak: 0.995000   Buffer: 100.00   Loss: 104.591591  9.398190  7.995135
Ep: 14181   Rew: -102.59   Avg Rew: -101.81   Polyak: 0.995000   Buffer: 100.00   Loss: 104.170952  10.137136  8.655280
Ep: 14182   Rew: -102.07   Avg Rew: -101.82  

Ep: 14243   Rew: -102.38   Avg Rew: -101.86   Polyak: 0.995000   Buffer: 100.00   Loss: 104.362953  11.007662  8.273402
Ep: 14244   Rew: -101.86   Avg Rew: -101.82   Polyak: 0.995000   Buffer: 100.00   Loss: 104.301811  10.371356  7.055945
Ep: 14245   Rew: -102.38   Avg Rew: -101.80   Polyak: 0.995000   Buffer: 100.00   Loss: 104.685997  6.952714  10.579638
Ep: 14246   Rew: -103.20   Avg Rew: -101.82   Polyak: 0.995000   Buffer: 100.00   Loss: 104.145821  7.532376  7.873975
Ep: 14247   Rew: -103.42   Avg Rew: -101.88   Polyak: 0.995000   Buffer: 100.00   Loss: 104.649902  7.302705  8.635779
Ep: 14248   Rew: -101.37   Avg Rew: -101.88   Polyak: 0.995000   Buffer: 100.00   Loss: 104.226341  9.722696  9.696496
Ep: 14249   Rew: -102.40   Avg Rew: -101.89   Polyak: 0.995000   Buffer: 100.00   Loss: 105.027206  9.180342  7.968296
Ep: 14250   Rew: -102.11   Avg Rew: -101.89   Polyak: 0.995000   Buffer: 100.00   Loss: 104.537064  7.751737  8.361013
Ep: 14251   Rew: -102.05   Avg Rew: -101.89  

Ep: 14312   Rew: -102.94   Avg Rew: -102.11   Polyak: 0.995000   Buffer: 100.00   Loss: 104.213249  9.389458  7.440891
Ep: 14313   Rew: -102.43   Avg Rew: -102.11   Polyak: 0.995000   Buffer: 100.00   Loss: 105.136330  7.336328  8.770953
Ep: 14314   Rew: -102.48   Avg Rew: -102.12   Polyak: 0.995000   Buffer: 100.00   Loss: 104.982407  9.421731  8.688930
Ep: 14315   Rew: -101.20   Avg Rew: -102.10   Polyak: 0.995000   Buffer: 100.00   Loss: 104.437820  6.624912  8.543525
Ep: 14316   Rew: -102.21   Avg Rew: -102.10   Polyak: 0.995000   Buffer: 100.00   Loss: 104.422485  9.908354  7.797028
Ep: 14317   Rew: -101.90   Avg Rew: -102.12   Polyak: 0.995000   Buffer: 100.00   Loss: 104.065117  10.002088  7.484918
Ep: 14318   Rew: -102.32   Avg Rew: -102.12   Polyak: 0.995000   Buffer: 100.00   Loss: 103.908470  9.606515  7.003353
Ep: 14319   Rew: -102.52   Avg Rew: -102.13   Polyak: 0.995000   Buffer: 100.00   Loss: 104.492760  11.287295  8.872511
Ep: 14320   Rew: -101.40   Avg Rew: -102.12   

Ep: 14381   Rew: -100.99   Avg Rew: -102.34   Polyak: 0.995000   Buffer: 100.00   Loss: 104.142448  9.131128  9.099411
Ep: 14382   Rew: -102.16   Avg Rew: -102.35   Polyak: 0.995000   Buffer: 100.00   Loss: 104.211845  9.184795  9.389749
Ep: 14383   Rew: -100.60   Avg Rew: -102.33   Polyak: 0.995000   Buffer: 100.00   Loss: 104.394623  7.338962  8.601475
Ep: 14384   Rew: -102.97   Avg Rew: -102.33   Polyak: 0.995000   Buffer: 100.00   Loss: 103.877190  7.667113  8.723925
Ep: 14385   Rew: -103.12   Avg Rew: -102.34   Polyak: 0.995000   Buffer: 100.00   Loss: 104.604820  10.030689  8.578410
Ep: 14386   Rew: -103.83   Avg Rew: -102.34   Polyak: 0.995000   Buffer: 100.00   Loss: 104.755943  7.138893  7.812602
Ep: 14387   Rew: -103.52   Avg Rew: -102.37   Polyak: 0.995000   Buffer: 100.00   Loss: 104.654137  9.658951  8.121765
Ep: 14388   Rew: -104.40   Avg Rew: -102.37   Polyak: 0.995000   Buffer: 100.00   Loss: 104.383644  9.933829  11.234632
Ep: 14389   Rew: -102.42   Avg Rew: -102.37   

Ep: 14450   Rew: -101.92   Avg Rew: -102.22   Polyak: 0.995000   Buffer: 100.00   Loss: 103.848022  8.376026  8.357971
Ep: 14451   Rew: -101.87   Avg Rew: -102.22   Polyak: 0.995000   Buffer: 100.00   Loss: 104.642334  7.007838  10.987179
Ep: 14452   Rew: -101.66   Avg Rew: -102.21   Polyak: 0.995000   Buffer: 100.00   Loss: 105.464211  7.268383  6.959805
Ep: 14453   Rew: -102.58   Avg Rew: -102.22   Polyak: 0.995000   Buffer: 100.00   Loss: 105.009567  7.083248  7.986123
Ep: 14454   Rew: -102.80   Avg Rew: -102.21   Polyak: 0.995000   Buffer: 100.00   Loss: 105.053123  7.785295  7.743197
Ep: 14455   Rew: -102.41   Avg Rew: -102.21   Polyak: 0.995000   Buffer: 100.00   Loss: 104.476669  7.142307  7.660117
Ep: 14456   Rew: -103.06   Avg Rew: -102.22   Polyak: 0.995000   Buffer: 100.00   Loss: 104.603493  7.980910  8.909957
Ep: 14457   Rew: -101.86   Avg Rew: -102.22   Polyak: 0.995000   Buffer: 100.00   Loss: 104.369377  7.429852  9.771391
Ep: 14458   Rew: -101.84   Avg Rew: -102.19   P

Ep: 14519   Rew: -102.92   Avg Rew: -101.94   Polyak: 0.995000   Buffer: 100.00   Loss: 104.427841  8.027296  8.416609
Ep: 14520   Rew: -102.77   Avg Rew: -101.94   Polyak: 0.995000   Buffer: 100.00   Loss: 104.362251  8.715014  6.912153
Ep: 14521   Rew: -102.60   Avg Rew: -101.94   Polyak: 0.995000   Buffer: 100.00   Loss: 104.902824  7.041637  9.950434
Ep: 14522   Rew: -102.28   Avg Rew: -101.94   Polyak: 0.995000   Buffer: 100.00   Loss: 105.045113  7.374934  8.996903
Ep: 14523   Rew: -101.87   Avg Rew: -101.94   Polyak: 0.995000   Buffer: 100.00   Loss: 104.495872  10.306770  8.905884
Ep: 14524   Rew: -100.73   Avg Rew: -101.93   Polyak: 0.995000   Buffer: 100.00   Loss: 104.938698  8.310808  7.342595
Ep: 14525   Rew: -101.84   Avg Rew: -101.92   Polyak: 0.995000   Buffer: 100.00   Loss: 103.954742  9.288378  10.681281
Ep: 14526   Rew: -100.59   Avg Rew: -101.91   Polyak: 0.995000   Buffer: 100.00   Loss: 103.925827  8.989754  9.155023
Ep: 14527   Rew: -101.02   Avg Rew: -101.89   

Ep: 14588   Rew: -102.94   Avg Rew: -101.82   Polyak: 0.995000   Buffer: 100.00   Loss: 104.642723  9.438723  8.096254
Ep: 14589   Rew: -102.33   Avg Rew: -101.82   Polyak: 0.995000   Buffer: 100.00   Loss: 103.900200  7.285770  6.563363
Ep: 14590   Rew: -101.77   Avg Rew: -101.82   Polyak: 0.995000   Buffer: 100.00   Loss: 104.647644  9.828552  8.484117
Ep: 14591   Rew: -101.86   Avg Rew: -101.82   Polyak: 0.995000   Buffer: 100.00   Loss: 103.922401  8.863069  6.733417
Ep: 14592   Rew: -102.57   Avg Rew: -101.83   Polyak: 0.995000   Buffer: 100.00   Loss: 103.960625  8.040426  7.117780
Ep: 14593   Rew: -102.91   Avg Rew: -101.83   Polyak: 0.995000   Buffer: 100.00   Loss: 104.508011  7.941395  7.675491
Ep: 14594   Rew: -101.65   Avg Rew: -101.84   Polyak: 0.995000   Buffer: 100.00   Loss: 104.442863  10.214802  8.439962
Ep: 14595   Rew: -101.91   Avg Rew: -101.84   Polyak: 0.995000   Buffer: 100.00   Loss: 103.990448  10.442163  7.256740
Ep: 14596   Rew: -101.72   Avg Rew: -101.84   

Ep: 14657   Rew: -101.62   Avg Rew: -102.32   Polyak: 0.995000   Buffer: 100.00   Loss: 104.527908  9.482366  7.344281
Ep: 14658   Rew: -101.78   Avg Rew: -102.33   Polyak: 0.995000   Buffer: 100.00   Loss: 104.176254  9.673408  8.717612
Ep: 14659   Rew: -103.90   Avg Rew: -102.34   Polyak: 0.995000   Buffer: 100.00   Loss: 104.328918  9.095297  10.284540
Ep: 14660   Rew: -102.86   Avg Rew: -102.35   Polyak: 0.995000   Buffer: 100.00   Loss: 104.822243  8.381598  9.696829
Ep: 14661   Rew: -102.06   Avg Rew: -102.35   Polyak: 0.995000   Buffer: 100.00   Loss: 103.866707  9.759418  8.133342
Ep: 14662   Rew: -102.77   Avg Rew: -102.35   Polyak: 0.995000   Buffer: 100.00   Loss: 104.478889  8.397808  7.828023
Ep: 14663   Rew: -102.60   Avg Rew: -102.36   Polyak: 0.995000   Buffer: 100.00   Loss: 104.325554  7.747945  7.671947
Ep: 14664   Rew: -102.33   Avg Rew: -102.36   Polyak: 0.995000   Buffer: 100.00   Loss: 104.625366  7.029518  7.183947
Ep: 14665   Rew: -102.29   Avg Rew: -102.34   P

Ep: 14726   Rew: -102.29   Avg Rew: -102.34   Polyak: 0.995000   Buffer: 100.00   Loss: 104.898499  7.649872  8.969830
Ep: 14727   Rew: -102.84   Avg Rew: -102.35   Polyak: 0.995000   Buffer: 100.00   Loss: 105.046501  8.973827  8.935130
Ep: 14728   Rew: -102.87   Avg Rew: -102.36   Polyak: 0.995000   Buffer: 100.00   Loss: 104.288399  10.569327  8.459643
Ep: 14729   Rew: -101.88   Avg Rew: -102.36   Polyak: 0.995000   Buffer: 100.00   Loss: 103.667351  8.778918  8.457030
Ep: 14730   Rew: -103.86   Avg Rew: -102.36   Polyak: 0.995000   Buffer: 100.00   Loss: 104.187614  7.585998  9.664233
Ep: 14731   Rew: -103.55   Avg Rew: -102.37   Polyak: 0.995000   Buffer: 100.00   Loss: 104.437820  8.484522  9.820538
Ep: 14732   Rew: -102.63   Avg Rew: -102.38   Polyak: 0.995000   Buffer: 100.00   Loss: 104.327454  8.286139  7.436312
Ep: 14733   Rew: -102.83   Avg Rew: -102.38   Polyak: 0.995000   Buffer: 100.00   Loss: 104.835533  8.870745  8.547113
Ep: 14734   Rew: -103.04   Avg Rew: -102.39   P

Ep: 14795   Rew: -102.63   Avg Rew: -102.25   Polyak: 0.995000   Buffer: 100.00   Loss: 104.119804  7.482390  8.887388
Ep: 14796   Rew: -103.02   Avg Rew: -102.27   Polyak: 0.995000   Buffer: 100.00   Loss: 104.359581  9.496263  10.507470
Ep: 14797   Rew: -102.59   Avg Rew: -102.26   Polyak: 0.995000   Buffer: 100.00   Loss: 104.357613  8.721111  8.133947
Ep: 14798   Rew: -101.92   Avg Rew: -102.25   Polyak: 0.995000   Buffer: 100.00   Loss: 104.592606  8.815137  6.544768
Ep: 14799   Rew: -102.96   Avg Rew: -102.25   Polyak: 0.995000   Buffer: 100.00   Loss: 104.580406  9.168921  7.263368
Ep: 14800   Rew: -102.44   Avg Rew: -102.24   Polyak: 0.995000   Buffer: 100.00   Loss: 105.198486  8.330532  8.812136
Ep: 14801   Rew: -102.87   Avg Rew: -102.24   Polyak: 0.995000   Buffer: 100.00   Loss: 104.465179  10.067613  8.743090
Ep: 14802   Rew: -102.30   Avg Rew: -102.24   Polyak: 0.995000   Buffer: 100.00   Loss: 104.341515  10.320190  7.870000
Ep: 14803   Rew: -102.53   Avg Rew: -102.23  

Ep: 14864   Rew: -102.39   Avg Rew: -102.28   Polyak: 0.995000   Buffer: 100.00   Loss: 104.483643  9.251810  7.952224
Ep: 14865   Rew: -101.74   Avg Rew: -102.28   Polyak: 0.995000   Buffer: 100.00   Loss: 104.520615  8.584704  10.177369
Ep: 14866   Rew: -101.60   Avg Rew: -102.27   Polyak: 0.995000   Buffer: 100.00   Loss: 103.662582  7.951643  9.160917
Ep: 14867   Rew: -101.51   Avg Rew: -102.28   Polyak: 0.995000   Buffer: 100.00   Loss: 104.306503  8.578806  7.520521
Ep: 14868   Rew: -101.88   Avg Rew: -102.27   Polyak: 0.995000   Buffer: 100.00   Loss: 104.006454  8.285326  9.364739
Ep: 14869   Rew: -100.93   Avg Rew: -102.25   Polyak: 0.995000   Buffer: 100.00   Loss: 104.198723  7.683970  7.942556
Ep: 14870   Rew: -101.62   Avg Rew: -102.28   Polyak: 0.995000   Buffer: 100.00   Loss: 103.764854  8.759237  7.331268
Ep: 14871   Rew: -102.00   Avg Rew: -102.28   Polyak: 0.995000   Buffer: 100.00   Loss: 104.042068  6.908547  8.836393
Ep: 14872   Rew: -100.68   Avg Rew: -102.27   P

Ep: 14933   Rew: -102.95   Avg Rew: -102.19   Polyak: 0.995000   Buffer: 100.00   Loss: 104.422493  7.984449  8.889849
Ep: 14934   Rew: -102.09   Avg Rew: -102.19   Polyak: 0.995000   Buffer: 100.00   Loss: 104.322739  7.655580  8.445707
Ep: 14935   Rew: -101.45   Avg Rew: -102.19   Polyak: 0.995000   Buffer: 100.00   Loss: 104.289719  10.687743  10.100119
Ep: 14936   Rew: -101.91   Avg Rew: -102.19   Polyak: 0.995000   Buffer: 100.00   Loss: 104.658684  8.075857  8.880836
Ep: 14937   Rew: -101.78   Avg Rew: -102.18   Polyak: 0.995000   Buffer: 100.00   Loss: 103.907669  9.213308  9.014924
Ep: 14938   Rew: -102.63   Avg Rew: -102.19   Polyak: 0.995000   Buffer: 100.00   Loss: 104.556015  9.647631  8.676379
Ep: 14939   Rew: -101.67   Avg Rew: -102.18   Polyak: 0.995000   Buffer: 100.00   Loss: 104.370499  9.249260  8.028132
Ep: 14940   Rew: -102.30   Avg Rew: -102.18   Polyak: 0.995000   Buffer: 100.00   Loss: 104.382362  8.386325  7.057730
Ep: 14941   Rew: -102.38   Avg Rew: -102.18   

Ep: 15002   Rew: -101.11   Avg Rew: -102.23   Polyak: 0.995000   Buffer: 100.00   Loss: 104.629578  8.850919  8.779503
Ep: 15003   Rew: -102.31   Avg Rew: -102.23   Polyak: 0.995000   Buffer: 100.00   Loss: 104.423965  8.123535  8.616524
Ep: 15004   Rew: -101.57   Avg Rew: -102.23   Polyak: 0.995000   Buffer: 100.00   Loss: 105.036873  11.153943  8.833921
Ep: 15005   Rew: -100.83   Avg Rew: -102.21   Polyak: 0.995000   Buffer: 100.00   Loss: 104.401733  9.390835  7.834409
Ep: 15006   Rew: -101.23   Avg Rew: -102.20   Polyak: 0.995000   Buffer: 100.00   Loss: 104.522720  10.224412  8.568755
Ep: 15007   Rew: -103.37   Avg Rew: -102.21   Polyak: 0.995000   Buffer: 100.00   Loss: 103.960907  8.815579  6.648667
Ep: 15008   Rew: -102.11   Avg Rew: -102.21   Polyak: 0.995000   Buffer: 100.00   Loss: 104.261696  8.701883  7.500335
Ep: 15009   Rew: -101.52   Avg Rew: -102.20   Polyak: 0.995000   Buffer: 100.00   Loss: 104.769875  6.800702  5.370801
Ep: 15010   Rew: -103.23   Avg Rew: -102.21   

Ep: 15071   Rew: -102.51   Avg Rew: -102.30   Polyak: 0.995000   Buffer: 100.00   Loss: 104.525764  9.542109  8.318830
Ep: 15072   Rew: -103.25   Avg Rew: -102.32   Polyak: 0.995000   Buffer: 100.00   Loss: 104.284882  8.753792  8.308849
Ep: 15073   Rew: -102.44   Avg Rew: -102.33   Polyak: 0.995000   Buffer: 100.00   Loss: 104.578636  7.437773  8.084997
Ep: 15074   Rew: -103.25   Avg Rew: -102.34   Polyak: 0.995000   Buffer: 100.00   Loss: 104.647629  8.034740  7.488713
Ep: 15075   Rew: -102.46   Avg Rew: -102.34   Polyak: 0.995000   Buffer: 100.00   Loss: 104.219650  8.487999  8.856926
Ep: 15076   Rew: -102.45   Avg Rew: -102.35   Polyak: 0.995000   Buffer: 100.00   Loss: 104.672897  8.713981  7.783519
Ep: 15077   Rew: -102.95   Avg Rew: -102.35   Polyak: 0.995000   Buffer: 100.00   Loss: 105.029228  7.802388  9.869398
Ep: 15078   Rew: -102.53   Avg Rew: -102.35   Polyak: 0.995000   Buffer: 100.00   Loss: 104.461708  8.814020  8.104329
Ep: 15079   Rew: -103.26   Avg Rew: -102.35   Po

Ep: 15140   Rew: -103.64   Avg Rew: -102.87   Polyak: 0.995000   Buffer: 100.00   Loss: 103.570930  9.239305  7.379968
Ep: 15141   Rew: -103.79   Avg Rew: -102.88   Polyak: 0.995000   Buffer: 100.00   Loss: 103.818748  8.472731  7.716394
Ep: 15142   Rew: -103.08   Avg Rew: -102.90   Polyak: 0.995000   Buffer: 100.00   Loss: 104.017487  10.133822  8.251184
Ep: 15143   Rew: -103.46   Avg Rew: -102.90   Polyak: 0.995000   Buffer: 100.00   Loss: 104.475929  9.254646  9.675833
Ep: 15144   Rew: -103.45   Avg Rew: -102.91   Polyak: 0.995000   Buffer: 100.00   Loss: 104.285828  7.996235  9.402940
Ep: 15145   Rew: -102.71   Avg Rew: -102.92   Polyak: 0.995000   Buffer: 100.00   Loss: 104.660576  7.228238  7.371076
Ep: 15146   Rew: -102.90   Avg Rew: -102.93   Polyak: 0.995000   Buffer: 100.00   Loss: 104.167526  8.678402  8.023718
Ep: 15147   Rew: -101.31   Avg Rew: -102.94   Polyak: 0.995000   Buffer: 100.00   Loss: 104.622276  7.794763  8.758657
Ep: 15148   Rew: -103.39   Avg Rew: -102.95   P

Ep: 15209   Rew: -102.62   Avg Rew: -102.31   Polyak: 0.995000   Buffer: 100.00   Loss: 104.169548  10.021037  8.023077
Ep: 15210   Rew: -102.57   Avg Rew: -102.30   Polyak: 0.995000   Buffer: 100.00   Loss: 103.819229  10.383869  7.175662
Ep: 15211   Rew: -101.66   Avg Rew: -102.29   Polyak: 0.995000   Buffer: 100.00   Loss: 104.346123  8.752319  9.209546
Ep: 15212   Rew: -101.42   Avg Rew: -102.27   Polyak: 0.995000   Buffer: 100.00   Loss: 104.728127  8.407980  6.978502
Ep: 15213   Rew: -101.87   Avg Rew: -102.25   Polyak: 0.995000   Buffer: 100.00   Loss: 104.522820  9.042252  10.819837
Ep: 15214   Rew: -99.90   Avg Rew: -102.21   Polyak: 0.995000   Buffer: 100.00   Loss: 104.274086  7.825444  7.367770
Ep: 15215   Rew: -102.62   Avg Rew: -102.21   Polyak: 0.995000   Buffer: 100.00   Loss: 103.985031  8.086077  8.786879
Ep: 15216   Rew: -102.38   Avg Rew: -102.20   Polyak: 0.995000   Buffer: 100.00   Loss: 104.274048  7.901284  8.734808
Ep: 15217   Rew: -100.83   Avg Rew: -102.18   

Ep: 15278   Rew: -101.73   Avg Rew: -101.82   Polyak: 0.995000   Buffer: 100.00   Loss: 103.742104  9.298683  6.527966
Ep: 15279   Rew: -102.11   Avg Rew: -101.82   Polyak: 0.995000   Buffer: 100.00   Loss: 103.949867  7.651756  8.087679
Ep: 15280   Rew: -102.11   Avg Rew: -101.82   Polyak: 0.995000   Buffer: 100.00   Loss: 104.664505  10.590007  8.393562
Ep: 15281   Rew: -101.70   Avg Rew: -101.81   Polyak: 0.995000   Buffer: 100.00   Loss: 104.191177  8.599279  7.873188
Ep: 15282   Rew: -101.79   Avg Rew: -101.80   Polyak: 0.995000   Buffer: 100.00   Loss: 103.766136  9.764647  9.057201
Ep: 15283   Rew: -99.65   Avg Rew: -101.77   Polyak: 0.995000   Buffer: 100.00   Loss: 103.515709  8.889181  8.454697
Ep: 15284   Rew: -102.03   Avg Rew: -101.77   Polyak: 0.995000   Buffer: 100.00   Loss: 104.420647  9.201832  10.082547
Ep: 15285   Rew: -102.10   Avg Rew: -101.77   Polyak: 0.995000   Buffer: 100.00   Loss: 104.695000  7.224690  8.725377
Ep: 15286   Rew: -101.85   Avg Rew: -101.76   P

Ep: 15347   Rew: -101.44   Avg Rew: -102.26   Polyak: 0.995000   Buffer: 100.00   Loss: 104.329819  8.192808  7.607270
Ep: 15348   Rew: -103.13   Avg Rew: -102.27   Polyak: 0.995000   Buffer: 100.00   Loss: 104.169762  8.458260  7.728364
Ep: 15349   Rew: -102.67   Avg Rew: -102.26   Polyak: 0.995000   Buffer: 100.00   Loss: 103.615524  8.345976  8.360744
Ep: 15350   Rew: -103.38   Avg Rew: -102.27   Polyak: 0.995000   Buffer: 100.00   Loss: 104.718765  10.173412  7.844957
Ep: 15351   Rew: -101.50   Avg Rew: -102.27   Polyak: 0.995000   Buffer: 100.00   Loss: 104.231926  8.026985  9.240709
Ep: 15352   Rew: -100.71   Avg Rew: -102.25   Polyak: 0.995000   Buffer: 100.00   Loss: 103.934967  8.145369  9.374568
Ep: 15353   Rew: -101.55   Avg Rew: -102.25   Polyak: 0.995000   Buffer: 100.00   Loss: 104.314995  10.705748  8.114529
Ep: 15354   Rew: -102.30   Avg Rew: -102.25   Polyak: 0.995000   Buffer: 100.00   Loss: 104.473763  8.447649  7.624521
Ep: 15355   Rew: -102.22   Avg Rew: -102.25   

Ep: 15416   Rew: -101.50   Avg Rew: -102.10   Polyak: 0.995000   Buffer: 100.00   Loss: 103.715164  9.886297  8.365782
Ep: 15417   Rew: -102.60   Avg Rew: -102.11   Polyak: 0.995000   Buffer: 100.00   Loss: 104.289482  7.545237  9.220007
Ep: 15418   Rew: -102.48   Avg Rew: -102.11   Polyak: 0.995000   Buffer: 100.00   Loss: 104.171021  11.314408  9.103479
Ep: 15419   Rew: -102.85   Avg Rew: -102.12   Polyak: 0.995000   Buffer: 100.00   Loss: 104.027580  7.966576  7.337036
Ep: 15420   Rew: -102.90   Avg Rew: -102.13   Polyak: 0.995000   Buffer: 100.00   Loss: 104.326805  8.625350  7.857460
Ep: 15421   Rew: -102.63   Avg Rew: -102.11   Polyak: 0.995000   Buffer: 100.00   Loss: 104.131027  7.935373  7.948961
Ep: 15422   Rew: -102.94   Avg Rew: -102.11   Polyak: 0.995000   Buffer: 100.00   Loss: 104.459122  8.758613  7.511448
Ep: 15423   Rew: -102.73   Avg Rew: -102.12   Polyak: 0.995000   Buffer: 100.00   Loss: 104.168015  8.781973  7.638624
Ep: 15424   Rew: -101.06   Avg Rew: -102.11   P

Ep: 15485   Rew: -97.69   Avg Rew: -102.16   Polyak: 0.995000   Buffer: 100.00   Loss: 104.018234  9.307934  10.169203
Ep: 15486   Rew: -98.50   Avg Rew: -102.12   Polyak: 0.995000   Buffer: 100.00   Loss: 103.985161  8.490950  9.731773
Ep: 15487   Rew: -98.89   Avg Rew: -102.09   Polyak: 0.995000   Buffer: 100.00   Loss: 104.207657  7.051682  8.398286
Ep: 15488   Rew: -99.23   Avg Rew: -102.06   Polyak: 0.995000   Buffer: 100.00   Loss: 104.223396  11.158264  9.121625
Ep: 15489   Rew: -99.37   Avg Rew: -102.04   Polyak: 0.995000   Buffer: 100.00   Loss: 104.080818  7.402721  8.204449
Ep: 15490   Rew: -101.23   Avg Rew: -102.04   Polyak: 0.995000   Buffer: 100.00   Loss: 103.857933  7.782720  8.649105
Ep: 15491   Rew: -100.16   Avg Rew: -102.02   Polyak: 0.995000   Buffer: 100.00   Loss: 103.669212  7.833104  7.645888
Ep: 15492   Rew: -100.49   Avg Rew: -101.99   Polyak: 0.995000   Buffer: 100.00   Loss: 103.926025  9.046465  10.000393
Ep: 15493   Rew: -101.24   Avg Rew: -101.98   Poly

Ep: 15554   Rew: -101.46   Avg Rew: -101.54   Polyak: 0.995000   Buffer: 100.00   Loss: 104.069206  8.175058  7.717486
Ep: 15555   Rew: -102.88   Avg Rew: -101.56   Polyak: 0.995000   Buffer: 100.00   Loss: 104.192223  7.511266  8.022173
Ep: 15556   Rew: -102.26   Avg Rew: -101.56   Polyak: 0.995000   Buffer: 100.00   Loss: 103.641998  7.311401  8.299782
Ep: 15557   Rew: -104.13   Avg Rew: -101.59   Polyak: 0.995000   Buffer: 100.00   Loss: 103.741013  8.437302  8.092426
Ep: 15558   Rew: -102.46   Avg Rew: -101.59   Polyak: 0.995000   Buffer: 100.00   Loss: 104.038208  9.005024  7.989908
Ep: 15559   Rew: -101.93   Avg Rew: -101.60   Polyak: 0.995000   Buffer: 100.00   Loss: 104.366768  7.876656  7.390570
Ep: 15560   Rew: -101.62   Avg Rew: -101.61   Polyak: 0.995000   Buffer: 100.00   Loss: 104.030487  8.364281  7.706570
Ep: 15561   Rew: -101.98   Avg Rew: -101.61   Polyak: 0.995000   Buffer: 100.00   Loss: 104.472603  7.536790  7.986095
Ep: 15562   Rew: -103.70   Avg Rew: -101.64   Po

Ep: 15623   Rew: -102.34   Avg Rew: -102.19   Polyak: 0.995000   Buffer: 100.00   Loss: 103.892548  8.333340  7.451128
Ep: 15624   Rew: -102.65   Avg Rew: -102.19   Polyak: 0.995000   Buffer: 100.00   Loss: 103.659271  9.224375  7.989547
Ep: 15625   Rew: -103.75   Avg Rew: -102.22   Polyak: 0.995000   Buffer: 100.00   Loss: 104.002266  9.066307  10.126895
Ep: 15626   Rew: -103.23   Avg Rew: -102.23   Polyak: 0.995000   Buffer: 100.00   Loss: 104.218178  8.434704  7.934529
Ep: 15627   Rew: -104.08   Avg Rew: -102.24   Polyak: 0.995000   Buffer: 100.00   Loss: 104.015579  7.227192  7.770648
Ep: 15628   Rew: -107.72   Avg Rew: -102.30   Polyak: 0.995000   Buffer: 100.00   Loss: 104.505676  9.315687  8.243768
Ep: 15629   Rew: -102.43   Avg Rew: -102.30   Polyak: 0.995000   Buffer: 100.00   Loss: 103.907745  6.609245  7.824858
Ep: 15630   Rew: -104.51   Avg Rew: -102.33   Polyak: 0.995000   Buffer: 100.00   Loss: 104.069412  7.918040  7.017542
Ep: 15631   Rew: -105.51   Avg Rew: -102.36   P

Ep: 15692   Rew: -103.00   Avg Rew: -102.82   Polyak: 0.995000   Buffer: 100.00   Loss: 104.057594  8.608959  7.281174
Ep: 15693   Rew: -102.33   Avg Rew: -102.83   Polyak: 0.995000   Buffer: 100.00   Loss: 103.571526  9.397894  7.428638
Ep: 15694   Rew: -103.12   Avg Rew: -102.84   Polyak: 0.995000   Buffer: 100.00   Loss: 104.011757  7.344934  8.601631
Ep: 15695   Rew: -103.21   Avg Rew: -102.87   Polyak: 0.995000   Buffer: 100.00   Loss: 103.783043  8.273150  9.784604
Ep: 15696   Rew: -102.93   Avg Rew: -102.86   Polyak: 0.995000   Buffer: 100.00   Loss: 103.854660  8.564660  8.495204
Ep: 15697   Rew: -102.91   Avg Rew: -102.87   Polyak: 0.995000   Buffer: 100.00   Loss: 104.513901  9.281200  9.060404
Ep: 15698   Rew: -102.83   Avg Rew: -102.88   Polyak: 0.995000   Buffer: 100.00   Loss: 104.128426  9.179773  6.391977
Ep: 15699   Rew: -102.79   Avg Rew: -102.89   Polyak: 0.995000   Buffer: 100.00   Loss: 104.221458  7.781880  7.919945
Ep: 15700   Rew: -102.78   Avg Rew: -102.90   Po

Ep: 15761   Rew: -101.74   Avg Rew: -102.60   Polyak: 0.995000   Buffer: 100.00   Loss: 104.100647  8.320235  9.319770
Ep: 15762   Rew: -101.17   Avg Rew: -102.58   Polyak: 0.995000   Buffer: 100.00   Loss: 103.846359  9.455423  7.659942
Ep: 15763   Rew: -102.84   Avg Rew: -102.57   Polyak: 0.995000   Buffer: 100.00   Loss: 104.375702  10.128500  6.820192
Ep: 15764   Rew: -101.73   Avg Rew: -102.56   Polyak: 0.995000   Buffer: 100.00   Loss: 103.976250  7.861669  6.665177
Ep: 15765   Rew: -102.80   Avg Rew: -102.56   Polyak: 0.995000   Buffer: 100.00   Loss: 104.223953  7.563454  9.872895
Ep: 15766   Rew: -101.45   Avg Rew: -102.55   Polyak: 0.995000   Buffer: 100.00   Loss: 104.437828  9.789232  9.550949
Ep: 15767   Rew: -102.42   Avg Rew: -102.55   Polyak: 0.995000   Buffer: 100.00   Loss: 104.048149  6.574832  7.756591
Ep: 15768   Rew: -102.10   Avg Rew: -102.55   Polyak: 0.995000   Buffer: 100.00   Loss: 104.097458  7.604377  9.104704
Ep: 15769   Rew: -101.94   Avg Rew: -102.54   P

Ep: 15830   Rew: -103.84   Avg Rew: -102.51   Polyak: 0.995000   Buffer: 100.00   Loss: 103.936630  7.727670  8.354922
Ep: 15831   Rew: -103.61   Avg Rew: -102.52   Polyak: 0.995000   Buffer: 100.00   Loss: 104.143700  9.358381  8.978922
Ep: 15832   Rew: -102.73   Avg Rew: -102.52   Polyak: 0.995000   Buffer: 100.00   Loss: 103.982368  8.219413  7.722134
Ep: 15833   Rew: -103.71   Avg Rew: -102.52   Polyak: 0.995000   Buffer: 100.00   Loss: 103.953705  8.065092  6.429575
Ep: 15834   Rew: -103.24   Avg Rew: -102.53   Polyak: 0.995000   Buffer: 100.00   Loss: 104.078011  7.805784  8.309278
Ep: 15835   Rew: -102.99   Avg Rew: -102.52   Polyak: 0.995000   Buffer: 100.00   Loss: 104.192451  9.412353  7.613183
Ep: 15836   Rew: -102.80   Avg Rew: -102.52   Polyak: 0.995000   Buffer: 100.00   Loss: 104.158089  7.680335  9.059282
Ep: 15837   Rew: -102.76   Avg Rew: -102.51   Polyak: 0.995000   Buffer: 100.00   Loss: 104.288628  8.897174  9.695289
Ep: 15838   Rew: -102.83   Avg Rew: -102.52   Po

Ep: 15899   Rew: -102.56   Avg Rew: -102.63   Polyak: 0.995000   Buffer: 100.00   Loss: 104.137398  8.477556  8.498822
Ep: 15900   Rew: -101.08   Avg Rew: -102.61   Polyak: 0.995000   Buffer: 100.00   Loss: 103.935303  10.489048  10.664909
Ep: 15901   Rew: -102.59   Avg Rew: -102.60   Polyak: 0.995000   Buffer: 100.00   Loss: 104.657227  9.261110  6.668041
Ep: 15902   Rew: -102.20   Avg Rew: -102.60   Polyak: 0.995000   Buffer: 100.00   Loss: 103.712769  7.650361  9.355479
Ep: 15903   Rew: -101.05   Avg Rew: -102.59   Polyak: 0.995000   Buffer: 100.00   Loss: 104.164810  7.528820  8.798361
Ep: 15904   Rew: -101.93   Avg Rew: -102.57   Polyak: 0.995000   Buffer: 100.00   Loss: 103.982483  6.735656  7.724811
Ep: 15905   Rew: -101.93   Avg Rew: -102.56   Polyak: 0.995000   Buffer: 100.00   Loss: 104.353851  7.237082  9.726076
Ep: 15906   Rew: -101.70   Avg Rew: -102.55   Polyak: 0.995000   Buffer: 100.00   Loss: 103.703087  8.458216  9.248071
Ep: 15907   Rew: -102.51   Avg Rew: -102.54   

Ep: 15968   Rew: -102.27   Avg Rew: -102.43   Polyak: 0.995000   Buffer: 100.00   Loss: 103.787354  7.743711  7.225754
Ep: 15969   Rew: -103.34   Avg Rew: -102.44   Polyak: 0.995000   Buffer: 100.00   Loss: 103.993149  6.273323  8.586111
Ep: 15970   Rew: -103.51   Avg Rew: -102.45   Polyak: 0.995000   Buffer: 100.00   Loss: 104.262032  8.416946  12.193527
Ep: 15971   Rew: -104.26   Avg Rew: -102.48   Polyak: 0.995000   Buffer: 100.00   Loss: 104.313332  10.617863  7.970890
Ep: 15972   Rew: -102.91   Avg Rew: -102.49   Polyak: 0.995000   Buffer: 100.00   Loss: 104.142387  8.825233  7.103590
Ep: 15973   Rew: -101.86   Avg Rew: -102.49   Polyak: 0.995000   Buffer: 100.00   Loss: 104.137779  9.715987  7.881277
Ep: 15974   Rew: -102.20   Avg Rew: -102.50   Polyak: 0.995000   Buffer: 100.00   Loss: 104.116646  9.990774  7.728702
Ep: 15975   Rew: -101.67   Avg Rew: -102.50   Polyak: 0.995000   Buffer: 100.00   Loss: 104.475098  9.137658  9.437717
Ep: 15976   Rew: -101.09   Avg Rew: -102.49   

Ep: 16037   Rew: -101.42   Avg Rew: -102.36   Polyak: 0.995000   Buffer: 100.00   Loss: 103.725037  10.147246  9.378757
Ep: 16038   Rew: -101.91   Avg Rew: -102.35   Polyak: 0.995000   Buffer: 100.00   Loss: 103.995926  8.558006  7.582861
Ep: 16039   Rew: -101.74   Avg Rew: -102.34   Polyak: 0.995000   Buffer: 100.00   Loss: 104.315025  8.051302  8.080436
Ep: 16040   Rew: -100.70   Avg Rew: -102.31   Polyak: 0.995000   Buffer: 100.00   Loss: 103.951187  8.373005  8.130816
Ep: 16041   Rew: -100.98   Avg Rew: -102.30   Polyak: 0.995000   Buffer: 100.00   Loss: 103.922607  8.923908  7.070562
Ep: 16042   Rew: -101.03   Avg Rew: -102.29   Polyak: 0.995000   Buffer: 100.00   Loss: 104.312164  7.229745  6.994574
Ep: 16043   Rew: -101.62   Avg Rew: -102.29   Polyak: 0.995000   Buffer: 100.00   Loss: 103.890923  7.926063  10.063989
Ep: 16044   Rew: -100.09   Avg Rew: -102.24   Polyak: 0.995000   Buffer: 100.00   Loss: 103.807877  6.796407  8.515341
Ep: 16045   Rew: -100.68   Avg Rew: -102.23   

Ep: 16106   Rew: -101.48   Avg Rew: -101.93   Polyak: 0.995000   Buffer: 100.00   Loss: 103.521042  8.223210  9.133591
Ep: 16107   Rew: -102.93   Avg Rew: -101.94   Polyak: 0.995000   Buffer: 100.00   Loss: 103.274696  9.941725  9.121456
Ep: 16108   Rew: -102.14   Avg Rew: -101.95   Polyak: 0.995000   Buffer: 100.00   Loss: 104.301270  11.188085  9.059310
Ep: 16109   Rew: -103.03   Avg Rew: -101.96   Polyak: 0.995000   Buffer: 100.00   Loss: 104.057404  7.631330  6.219769
Ep: 16110   Rew: -102.18   Avg Rew: -101.97   Polyak: 0.995000   Buffer: 100.00   Loss: 104.287560  9.955529  9.626690
Ep: 16111   Rew: -102.31   Avg Rew: -101.98   Polyak: 0.995000   Buffer: 100.00   Loss: 103.184166  8.191644  8.950232
Ep: 16112   Rew: -101.22   Avg Rew: -101.97   Polyak: 0.995000   Buffer: 100.00   Loss: 104.092850  8.596046  8.214452
Ep: 16113   Rew: -101.68   Avg Rew: -101.97   Polyak: 0.995000   Buffer: 100.00   Loss: 104.118637  7.279931  8.078684
Ep: 16114   Rew: -101.58   Avg Rew: -101.97   P

Ep: 16175   Rew: -101.84   Avg Rew: -102.10   Polyak: 0.995000   Buffer: 100.00   Loss: 104.411469  6.434656  7.179510
Ep: 16176   Rew: -102.76   Avg Rew: -102.10   Polyak: 0.995000   Buffer: 100.00   Loss: 103.959557  9.718476  8.446439
Ep: 16177   Rew: -102.26   Avg Rew: -102.08   Polyak: 0.995000   Buffer: 100.00   Loss: 104.110229  8.156925  9.098108
Ep: 16178   Rew: -102.92   Avg Rew: -102.09   Polyak: 0.995000   Buffer: 100.00   Loss: 103.806206  8.174888  8.087658
Ep: 16179   Rew: -103.15   Avg Rew: -102.10   Polyak: 0.995000   Buffer: 100.00   Loss: 103.972214  8.847495  7.994459
Ep: 16180   Rew: -102.14   Avg Rew: -102.08   Polyak: 0.995000   Buffer: 100.00   Loss: 103.483315  7.709272  9.316298
Ep: 16181   Rew: -102.09   Avg Rew: -102.08   Polyak: 0.995000   Buffer: 100.00   Loss: 104.171753  7.207392  6.450126
Ep: 16182   Rew: -102.57   Avg Rew: -102.08   Polyak: 0.995000   Buffer: 100.00   Loss: 104.338173  8.832130  8.094325
Ep: 16183   Rew: -102.23   Avg Rew: -102.07   Po

Ep: 16244   Rew: -102.77   Avg Rew: -102.13   Polyak: 0.995000   Buffer: 100.00   Loss: 104.630486  10.165353  9.218503
Ep: 16245   Rew: -102.24   Avg Rew: -102.13   Polyak: 0.995000   Buffer: 100.00   Loss: 103.893044  7.430681  6.443717
Ep: 16246   Rew: -101.73   Avg Rew: -102.14   Polyak: 0.995000   Buffer: 100.00   Loss: 104.096649  9.418470  6.931510
Ep: 16247   Rew: -101.75   Avg Rew: -102.13   Polyak: 0.995000   Buffer: 100.00   Loss: 103.677185  8.159307  7.107865
Ep: 16248   Rew: -101.54   Avg Rew: -102.13   Polyak: 0.995000   Buffer: 100.00   Loss: 103.828529  9.318836  10.554024
Ep: 16249   Rew: -101.95   Avg Rew: -102.12   Polyak: 0.995000   Buffer: 100.00   Loss: 103.609779  8.066478  7.685976
Ep: 16250   Rew: -102.36   Avg Rew: -102.13   Polyak: 0.995000   Buffer: 100.00   Loss: 103.683067  8.223383  8.136690
Ep: 16251   Rew: -101.75   Avg Rew: -102.12   Polyak: 0.995000   Buffer: 100.00   Loss: 103.927826  6.827415  8.396623
Ep: 16252   Rew: -102.37   Avg Rew: -102.12   

Ep: 16313   Rew: -102.75   Avg Rew: -102.43   Polyak: 0.995000   Buffer: 100.00   Loss: 104.320381  7.469632  7.064023
Ep: 16314   Rew: -102.74   Avg Rew: -102.43   Polyak: 0.995000   Buffer: 100.00   Loss: 103.947823  7.995461  8.214918
Ep: 16315   Rew: -102.31   Avg Rew: -102.44   Polyak: 0.995000   Buffer: 100.00   Loss: 104.132912  7.775486  7.630868
Ep: 16316   Rew: -101.00   Avg Rew: -102.42   Polyak: 0.995000   Buffer: 100.00   Loss: 104.470085  8.061565  8.234881
Ep: 16317   Rew: -101.32   Avg Rew: -102.41   Polyak: 0.995000   Buffer: 100.00   Loss: 103.697540  9.776735  10.098152
Ep: 16318   Rew: -102.07   Avg Rew: -102.41   Polyak: 0.995000   Buffer: 100.00   Loss: 104.428101  9.148197  8.744665
Ep: 16319   Rew: -102.77   Avg Rew: -102.41   Polyak: 0.995000   Buffer: 100.00   Loss: 103.778633  6.338217  8.161172
Ep: 16320   Rew: -102.10   Avg Rew: -102.40   Polyak: 0.995000   Buffer: 100.00   Loss: 104.248421  10.164206  10.736301
Ep: 16321   Rew: -102.41   Avg Rew: -102.41  

Ep: 16382   Rew: -103.78   Avg Rew: -102.57   Polyak: 0.995000   Buffer: 100.00   Loss: 103.776215  10.080532  9.227535
Ep: 16383   Rew: -104.60   Avg Rew: -102.60   Polyak: 0.995000   Buffer: 100.00   Loss: 104.410713  7.990399  10.299843
Ep: 16384   Rew: -103.09   Avg Rew: -102.60   Polyak: 0.995000   Buffer: 100.00   Loss: 104.574463  9.405676  8.701639
Ep: 16385   Rew: -103.91   Avg Rew: -102.62   Polyak: 0.995000   Buffer: 100.00   Loss: 103.718033  8.067803  9.290193
Ep: 16386   Rew: -103.19   Avg Rew: -102.62   Polyak: 0.995000   Buffer: 100.00   Loss: 104.194740  10.207909  7.182230
Ep: 16387   Rew: -103.18   Avg Rew: -102.63   Polyak: 0.995000   Buffer: 100.00   Loss: 104.123581  8.822696  7.782900
Ep: 16388   Rew: -103.62   Avg Rew: -102.63   Polyak: 0.995000   Buffer: 100.00   Loss: 104.038536  8.103745  8.891460
Ep: 16389   Rew: -102.91   Avg Rew: -102.63   Polyak: 0.995000   Buffer: 100.00   Loss: 103.744804  5.912538  9.582988
Ep: 16390   Rew: -104.23   Avg Rew: -102.65  

Ep: 16451   Rew: -101.67   Avg Rew: -102.39   Polyak: 0.995000   Buffer: 100.00   Loss: 103.717049  8.205239  7.673466
Ep: 16452   Rew: -101.93   Avg Rew: -102.38   Polyak: 0.995000   Buffer: 100.00   Loss: 103.829803  7.948148  8.890796
Ep: 16453   Rew: -100.95   Avg Rew: -102.35   Polyak: 0.995000   Buffer: 100.00   Loss: 104.108055  7.583147  9.034942
Ep: 16454   Rew: -101.82   Avg Rew: -102.34   Polyak: 0.995000   Buffer: 100.00   Loss: 103.410873  8.869062  9.227347
Ep: 16455   Rew: -103.44   Avg Rew: -102.34   Polyak: 0.995000   Buffer: 100.00   Loss: 104.089615  11.484877  9.498013
Ep: 16456   Rew: -102.17   Avg Rew: -102.33   Polyak: 0.995000   Buffer: 100.00   Loss: 103.779099  9.598667  8.627384
Ep: 16457   Rew: -101.68   Avg Rew: -102.31   Polyak: 0.995000   Buffer: 100.00   Loss: 104.140755  7.404554  9.805162
Ep: 16458   Rew: -102.22   Avg Rew: -102.30   Polyak: 0.995000   Buffer: 100.00   Loss: 104.138702  7.301368  6.790472
Ep: 16459   Rew: -101.75   Avg Rew: -102.28   P

Ep: 16520   Rew: -102.38   Avg Rew: -101.73   Polyak: 0.995000   Buffer: 100.00   Loss: 104.516266  8.228601  8.452260
Ep: 16521   Rew: -102.93   Avg Rew: -101.71   Polyak: 0.995000   Buffer: 100.00   Loss: 103.800949  11.070074  9.393103
Ep: 16522   Rew: -102.73   Avg Rew: -101.73   Polyak: 0.995000   Buffer: 100.00   Loss: 103.729683  7.516686  6.138535
Ep: 16523   Rew: -103.12   Avg Rew: -101.74   Polyak: 0.995000   Buffer: 100.00   Loss: 104.005684  7.126130  7.129557
Ep: 16524   Rew: -103.11   Avg Rew: -101.76   Polyak: 0.995000   Buffer: 100.00   Loss: 104.175873  8.460608  7.373266
Ep: 16525   Rew: -102.95   Avg Rew: -101.77   Polyak: 0.995000   Buffer: 100.00   Loss: 103.851128  8.033679  7.179117
Ep: 16526   Rew: -102.70   Avg Rew: -101.77   Polyak: 0.995000   Buffer: 100.00   Loss: 103.755203  11.207368  9.653439
Ep: 16527   Rew: -102.98   Avg Rew: -101.78   Polyak: 0.995000   Buffer: 100.00   Loss: 103.519714  8.611963  6.870214
Ep: 16528   Rew: -101.84   Avg Rew: -101.78   

Ep: 16589   Rew: -102.19   Avg Rew: -102.08   Polyak: 0.995000   Buffer: 100.00   Loss: 104.076454  9.060947  7.178927
Ep: 16590   Rew: -100.27   Avg Rew: -102.06   Polyak: 0.995000   Buffer: 100.00   Loss: 103.689316  8.518201  7.640823
Ep: 16591   Rew: -102.22   Avg Rew: -102.06   Polyak: 0.995000   Buffer: 100.00   Loss: 103.823662  8.293224  6.947040
Ep: 16592   Rew: -101.69   Avg Rew: -102.05   Polyak: 0.995000   Buffer: 100.00   Loss: 103.759415  9.188986  7.103504
Ep: 16593   Rew: -102.29   Avg Rew: -102.05   Polyak: 0.995000   Buffer: 100.00   Loss: 103.694572  10.182596  7.666796
Ep: 16594   Rew: -101.22   Avg Rew: -102.04   Polyak: 0.995000   Buffer: 100.00   Loss: 104.045609  8.518848  7.804338
Ep: 16595   Rew: -102.13   Avg Rew: -102.04   Polyak: 0.995000   Buffer: 100.00   Loss: 103.948097  8.299386  7.776407
Ep: 16596   Rew: -102.22   Avg Rew: -102.04   Polyak: 0.995000   Buffer: 100.00   Loss: 103.881714  9.007627  6.498010
Ep: 16597   Rew: -102.11   Avg Rew: -102.06   P

Ep: 16658   Rew: -102.30   Avg Rew: -102.20   Polyak: 0.995000   Buffer: 100.00   Loss: 103.537109  8.199426  5.515782
Ep: 16659   Rew: -102.18   Avg Rew: -102.21   Polyak: 0.995000   Buffer: 100.00   Loss: 103.668327  7.376616  8.890574
Ep: 16660   Rew: -100.73   Avg Rew: -102.19   Polyak: 0.995000   Buffer: 100.00   Loss: 103.810844  9.298538  9.420483
Ep: 16661   Rew: -101.20   Avg Rew: -102.18   Polyak: 0.995000   Buffer: 100.00   Loss: 103.262245  8.365968  8.023607
Ep: 16662   Rew: -102.90   Avg Rew: -102.18   Polyak: 0.995000   Buffer: 100.00   Loss: 104.009552  8.736489  9.279499
Ep: 16663   Rew: -102.10   Avg Rew: -102.19   Polyak: 0.995000   Buffer: 100.00   Loss: 103.453720  7.003922  8.138577
Ep: 16664   Rew: -102.98   Avg Rew: -102.21   Polyak: 0.995000   Buffer: 100.00   Loss: 103.707642  8.339191  7.993243
Ep: 16665   Rew: -101.69   Avg Rew: -102.20   Polyak: 0.995000   Buffer: 100.00   Loss: 104.416718  8.675664  7.987034
Ep: 16666   Rew: -102.72   Avg Rew: -102.21   Po

Ep: 16727   Rew: -100.93   Avg Rew: -102.17   Polyak: 0.995000   Buffer: 100.00   Loss: 103.356476  8.579315  7.690906
Ep: 16728   Rew: -100.75   Avg Rew: -102.15   Polyak: 0.995000   Buffer: 100.00   Loss: 102.926743  9.959349  6.297796
Ep: 16729   Rew: -101.15   Avg Rew: -102.12   Polyak: 0.995000   Buffer: 100.00   Loss: 103.547180  10.632879  11.416416
Ep: 16730   Rew: -102.00   Avg Rew: -102.11   Polyak: 0.995000   Buffer: 100.00   Loss: 103.823944  8.804686  7.741276
Ep: 16731   Rew: -102.46   Avg Rew: -102.11   Polyak: 0.995000   Buffer: 100.00   Loss: 103.951202  9.656655  6.536299
Ep: 16732   Rew: -102.18   Avg Rew: -102.10   Polyak: 0.995000   Buffer: 100.00   Loss: 104.282669  8.520697  7.775878
Ep: 16733   Rew: -101.72   Avg Rew: -102.08   Polyak: 0.995000   Buffer: 100.00   Loss: 103.850777  8.156714  6.809605
Ep: 16734   Rew: -101.91   Avg Rew: -102.07   Polyak: 0.995000   Buffer: 100.00   Loss: 103.972588  7.355425  7.002169
Ep: 16735   Rew: -101.83   Avg Rew: -102.05   

Ep: 16796   Rew: -101.50   Avg Rew: -102.03   Polyak: 0.995000   Buffer: 100.00   Loss: 103.402794  7.649798  7.067974
Ep: 16797   Rew: -101.96   Avg Rew: -102.03   Polyak: 0.995000   Buffer: 100.00   Loss: 103.751076  7.300261  8.708247
Ep: 16798   Rew: -101.84   Avg Rew: -102.04   Polyak: 0.995000   Buffer: 100.00   Loss: 103.560509  7.771187  6.787011
Ep: 16799   Rew: -101.69   Avg Rew: -102.05   Polyak: 0.995000   Buffer: 100.00   Loss: 103.553879  8.197318  8.329114
Ep: 16800   Rew: -102.27   Avg Rew: -102.05   Polyak: 0.995000   Buffer: 100.00   Loss: 103.897926  7.732152  7.728622
Ep: 16801   Rew: -101.51   Avg Rew: -102.05   Polyak: 0.995000   Buffer: 100.00   Loss: 103.457306  9.559483  10.605200
Ep: 16802   Rew: -102.43   Avg Rew: -102.06   Polyak: 0.995000   Buffer: 100.00   Loss: 103.277138  7.595095  6.417736
Ep: 16803   Rew: -102.06   Avg Rew: -102.07   Polyak: 0.995000   Buffer: 100.00   Loss: 103.439957  7.913311  8.255930
Ep: 16804   Rew: -102.11   Avg Rew: -102.08   P

Ep: 16865   Rew: -102.48   Avg Rew: -102.40   Polyak: 0.995000   Buffer: 100.00   Loss: 103.902870  7.022601  8.171531
Ep: 16866   Rew: -102.34   Avg Rew: -102.40   Polyak: 0.995000   Buffer: 100.00   Loss: 103.901466  6.382644  7.244527
Ep: 16867   Rew: -102.47   Avg Rew: -102.39   Polyak: 0.995000   Buffer: 100.00   Loss: 103.338852  9.402311  9.117101
Ep: 16868   Rew: -102.74   Avg Rew: -102.39   Polyak: 0.995000   Buffer: 100.00   Loss: 103.772438  6.183931  8.370401
Ep: 16869   Rew: -102.11   Avg Rew: -102.39   Polyak: 0.995000   Buffer: 100.00   Loss: 103.639824  8.596251  7.952871
Ep: 16870   Rew: -102.80   Avg Rew: -102.39   Polyak: 0.995000   Buffer: 100.00   Loss: 103.909668  8.726718  7.762704
Ep: 16871   Rew: -102.18   Avg Rew: -102.39   Polyak: 0.995000   Buffer: 100.00   Loss: 103.969185  10.004198  7.979224
Ep: 16872   Rew: -102.82   Avg Rew: -102.40   Polyak: 0.995000   Buffer: 100.00   Loss: 103.748604  7.903968  7.284866
Ep: 16873   Rew: -102.91   Avg Rew: -102.40   P

Ep: 16934   Rew: -102.78   Avg Rew: -102.53   Polyak: 0.995000   Buffer: 100.00   Loss: 104.501068  8.168499  6.988217
Ep: 16935   Rew: -103.04   Avg Rew: -102.54   Polyak: 0.995000   Buffer: 100.00   Loss: 103.357071  9.151565  6.747379
Ep: 16936   Rew: -103.10   Avg Rew: -102.54   Polyak: 0.995000   Buffer: 100.00   Loss: 103.608238  8.415027  8.031875
Ep: 16937   Rew: -102.89   Avg Rew: -102.55   Polyak: 0.995000   Buffer: 100.00   Loss: 103.682953  7.702748  6.939620
Ep: 16938   Rew: -102.35   Avg Rew: -102.54   Polyak: 0.995000   Buffer: 100.00   Loss: 103.876122  8.430583  8.737940
Ep: 16939   Rew: -103.12   Avg Rew: -102.54   Polyak: 0.995000   Buffer: 100.00   Loss: 103.996254  8.183712  10.488484
Ep: 16940   Rew: -101.98   Avg Rew: -102.53   Polyak: 0.995000   Buffer: 100.00   Loss: 103.825394  7.799776  9.148252
Ep: 16941   Rew: -102.44   Avg Rew: -102.53   Polyak: 0.995000   Buffer: 100.00   Loss: 104.003578  8.646461  7.176908
Ep: 16942   Rew: -102.55   Avg Rew: -102.52   P

Ep: 17003   Rew: -101.88   Avg Rew: -101.98   Polyak: 0.995000   Buffer: 100.00   Loss: 103.659409  8.517255  10.200329
Ep: 17004   Rew: -101.04   Avg Rew: -101.95   Polyak: 0.995000   Buffer: 100.00   Loss: 103.465355  9.154425  10.370158
Ep: 17005   Rew: -102.06   Avg Rew: -101.95   Polyak: 0.995000   Buffer: 100.00   Loss: 103.855446  8.376885  7.409817
Ep: 17006   Rew: -101.45   Avg Rew: -101.94   Polyak: 0.995000   Buffer: 100.00   Loss: 103.020004  10.057060  9.155643
Ep: 17007   Rew: -101.76   Avg Rew: -101.94   Polyak: 0.995000   Buffer: 100.00   Loss: 103.463844  7.735894  7.578995
Ep: 17008   Rew: -100.33   Avg Rew: -101.93   Polyak: 0.995000   Buffer: 100.00   Loss: 103.259369  6.834029  7.484253
Ep: 17009   Rew: -100.40   Avg Rew: -101.91   Polyak: 0.995000   Buffer: 100.00   Loss: 102.934677  8.559754  7.952661
Ep: 17010   Rew: -100.81   Avg Rew: -101.90   Polyak: 0.995000   Buffer: 100.00   Loss: 103.881256  8.631489  7.359830
Ep: 17011   Rew: -102.72   Avg Rew: -101.90  

Ep: 17072   Rew: -101.36   Avg Rew: -101.84   Polyak: 0.995000   Buffer: 100.00   Loss: 103.831688  7.251278  7.631454
Ep: 17073   Rew: -100.57   Avg Rew: -101.83   Polyak: 0.995000   Buffer: 100.00   Loss: 103.575539  9.542221  8.147065
Ep: 17074   Rew: -101.19   Avg Rew: -101.83   Polyak: 0.995000   Buffer: 100.00   Loss: 103.580772  7.407735  7.619688
Ep: 17075   Rew: -101.57   Avg Rew: -101.83   Polyak: 0.995000   Buffer: 100.00   Loss: 103.103149  7.803737  8.747005
Ep: 17076   Rew: -101.62   Avg Rew: -101.84   Polyak: 0.995000   Buffer: 100.00   Loss: 103.470528  8.599804  8.877912
Ep: 17077   Rew: -102.09   Avg Rew: -101.84   Polyak: 0.995000   Buffer: 100.00   Loss: 103.369537  8.685181  8.738400
Ep: 17078   Rew: -102.29   Avg Rew: -101.85   Polyak: 0.995000   Buffer: 100.00   Loss: 103.613747  5.451327  6.919389
Ep: 17079   Rew: -102.00   Avg Rew: -101.85   Polyak: 0.995000   Buffer: 100.00   Loss: 103.466270  7.511406  8.439134
Ep: 17080   Rew: -102.09   Avg Rew: -101.86   Po

Ep: 17141   Rew: -102.51   Avg Rew: -101.87   Polyak: 0.995000   Buffer: 100.00   Loss: 104.035629  7.357934  8.472317
Ep: 17142   Rew: -102.40   Avg Rew: -101.88   Polyak: 0.995000   Buffer: 100.00   Loss: 103.966820  9.627308  7.142557
Ep: 17143   Rew: -102.16   Avg Rew: -101.89   Polyak: 0.995000   Buffer: 100.00   Loss: 103.417435  7.256169  8.604517
Ep: 17144   Rew: -102.30   Avg Rew: -101.90   Polyak: 0.995000   Buffer: 100.00   Loss: 104.003983  7.284618  7.440428
Ep: 17145   Rew: -102.95   Avg Rew: -101.91   Polyak: 0.995000   Buffer: 100.00   Loss: 103.773766  8.605484  7.781226
Ep: 17146   Rew: -103.57   Avg Rew: -101.92   Polyak: 0.995000   Buffer: 100.00   Loss: 103.564598  8.868534  6.963600
Ep: 17147   Rew: -103.50   Avg Rew: -101.93   Polyak: 0.995000   Buffer: 100.00   Loss: 103.369530  7.359707  7.043596
Ep: 17148   Rew: -103.71   Avg Rew: -101.94   Polyak: 0.995000   Buffer: 100.00   Loss: 104.132057  8.640238  8.418899
Ep: 17149   Rew: -103.60   Avg Rew: -101.95   Po

Ep: 17210   Rew: -102.86   Avg Rew: -102.35   Polyak: 0.995000   Buffer: 100.00   Loss: 103.212349  6.994832  8.176208
Ep: 17211   Rew: -100.72   Avg Rew: -102.34   Polyak: 0.995000   Buffer: 100.00   Loss: 103.269981  8.450507  8.011278
Ep: 17212   Rew: -101.64   Avg Rew: -102.33   Polyak: 0.995000   Buffer: 100.00   Loss: 103.669388  8.520571  7.573495
Ep: 17213   Rew: -101.30   Avg Rew: -102.32   Polyak: 0.995000   Buffer: 100.00   Loss: 103.679535  8.516643  8.593024
Ep: 17214   Rew: -101.44   Avg Rew: -102.30   Polyak: 0.995000   Buffer: 100.00   Loss: 103.753410  8.033052  7.347831
Ep: 17215   Rew: -100.86   Avg Rew: -102.28   Polyak: 0.995000   Buffer: 100.00   Loss: 103.428421  8.662379  8.300023
Ep: 17216   Rew: -101.13   Avg Rew: -102.26   Polyak: 0.995000   Buffer: 100.00   Loss: 103.606224  8.984999  8.842505
Ep: 17217   Rew: -101.17   Avg Rew: -102.26   Polyak: 0.995000   Buffer: 100.00   Loss: 103.182495  8.451224  9.062588
Ep: 17218   Rew: -101.14   Avg Rew: -102.26   Po

Ep: 17279   Rew: -99.40   Avg Rew: -102.77   Polyak: 0.995000   Buffer: 100.00   Loss: 103.870613  8.071446  7.719861
Ep: 17280   Rew: -100.28   Avg Rew: -102.76   Polyak: 0.995000   Buffer: 100.00   Loss: 103.341286  8.449331  10.505966
Ep: 17281   Rew: -101.78   Avg Rew: -102.76   Polyak: 0.995000   Buffer: 100.00   Loss: 104.269585  8.763900  8.337674
Ep: 17282   Rew: -99.02   Avg Rew: -102.75   Polyak: 0.995000   Buffer: 100.00   Loss: 103.215210  9.108330  8.476521
Ep: 17283   Rew: -101.26   Avg Rew: -102.76   Polyak: 0.995000   Buffer: 100.00   Loss: 103.564415  9.646687  7.281816
Ep: 17284   Rew: -101.84   Avg Rew: -102.76   Polyak: 0.995000   Buffer: 100.00   Loss: 103.807732  8.200768  6.631379
Ep: 17285   Rew: -98.94   Avg Rew: -102.74   Polyak: 0.995000   Buffer: 100.00   Loss: 103.475029  8.453495  6.954753
Ep: 17286   Rew: -100.72   Avg Rew: -102.72   Polyak: 0.995000   Buffer: 100.00   Loss: 103.731255  7.767004  6.297001
Ep: 17287   Rew: -100.70   Avg Rew: -102.70   Poly

Ep: 17348   Rew: -103.21   Avg Rew: -103.20   Polyak: 0.995000   Buffer: 100.00   Loss: 103.678146  10.826402  6.985150
Ep: 17349   Rew: -103.62   Avg Rew: -103.20   Polyak: 0.995000   Buffer: 100.00   Loss: 103.308586  7.954897  7.394970
Ep: 17350   Rew: -103.83   Avg Rew: -102.96   Polyak: 0.995000   Buffer: 100.00   Loss: 103.615028  9.020771  11.007839
Ep: 17351   Rew: -103.61   Avg Rew: -102.94   Polyak: 0.995000   Buffer: 100.00   Loss: 103.959724  8.012165  6.475301
Ep: 17352   Rew: -102.20   Avg Rew: -102.92   Polyak: 0.995000   Buffer: 100.00   Loss: 103.718307  8.111839  6.993586
Ep: 17353   Rew: -103.46   Avg Rew: -102.95   Polyak: 0.995000   Buffer: 100.00   Loss: 103.861816  8.656261  7.038078
Ep: 17354   Rew: -103.54   Avg Rew: -102.94   Polyak: 0.995000   Buffer: 100.00   Loss: 103.628189  8.584093  6.660830
Ep: 17355   Rew: -104.26   Avg Rew: -102.97   Polyak: 0.995000   Buffer: 100.00   Loss: 103.502502  7.303745  7.441059
Ep: 17356   Rew: -104.07   Avg Rew: -103.01   

Ep: 17417   Rew: -101.90   Avg Rew: -102.97   Polyak: 0.995000   Buffer: 100.00   Loss: 102.864578  8.799510  7.920322
Ep: 17418   Rew: -101.14   Avg Rew: -102.96   Polyak: 0.995000   Buffer: 100.00   Loss: 103.962883  8.854251  6.985496
Ep: 17419   Rew: -103.24   Avg Rew: -102.97   Polyak: 0.995000   Buffer: 100.00   Loss: 103.907272  8.490466  7.222393
Ep: 17420   Rew: -103.06   Avg Rew: -102.96   Polyak: 0.995000   Buffer: 100.00   Loss: 103.915848  7.832960  7.697850
Ep: 17421   Rew: -102.02   Avg Rew: -102.95   Polyak: 0.995000   Buffer: 100.00   Loss: 103.379852  8.952245  9.249851
Ep: 17422   Rew: -101.99   Avg Rew: -102.93   Polyak: 0.995000   Buffer: 100.00   Loss: 103.467857  7.859955  8.808650
Ep: 17423   Rew: -102.28   Avg Rew: -102.93   Polyak: 0.995000   Buffer: 100.00   Loss: 103.580132  9.313314  9.423770
Ep: 17424   Rew: -102.19   Avg Rew: -102.90   Polyak: 0.995000   Buffer: 100.00   Loss: 103.502182  7.695195  7.971982
Ep: 17425   Rew: -103.23   Avg Rew: -102.89   Po

Ep: 17486   Rew: -103.44   Avg Rew: -103.83   Polyak: 0.995000   Buffer: 100.00   Loss: 103.890785  7.872361  7.010356
Ep: 17487   Rew: -105.06   Avg Rew: -103.85   Polyak: 0.995000   Buffer: 100.00   Loss: 103.621964  8.356594  6.426280
Ep: 17488   Rew: -103.79   Avg Rew: -103.84   Polyak: 0.995000   Buffer: 100.00   Loss: 103.086281  5.572223  8.358593
Ep: 17489   Rew: -103.73   Avg Rew: -103.86   Polyak: 0.995000   Buffer: 100.00   Loss: 103.963722  8.415084  6.875221
Ep: 17490   Rew: -104.01   Avg Rew: -103.85   Polyak: 0.995000   Buffer: 100.00   Loss: 103.624138  7.570191  6.355168
Ep: 17491   Rew: -102.76   Avg Rew: -103.81   Polyak: 0.995000   Buffer: 100.00   Loss: 103.787689  7.347085  8.489147
Ep: 17492   Rew: -103.57   Avg Rew: -103.79   Polyak: 0.995000   Buffer: 100.00   Loss: 103.755318  8.057728  8.075878
Ep: 17493   Rew: -103.33   Avg Rew: -103.78   Polyak: 0.995000   Buffer: 100.00   Loss: 103.661758  6.938992  7.792912
Ep: 17494   Rew: -102.87   Avg Rew: -103.80   Po

Ep: 17555   Rew: -102.75   Avg Rew: -102.80   Polyak: 0.995000   Buffer: 100.00   Loss: 103.877930  10.391741  7.394851
Ep: 17556   Rew: -102.60   Avg Rew: -102.82   Polyak: 0.995000   Buffer: 100.00   Loss: 103.614815  7.637863  9.656492
Ep: 17557   Rew: -102.76   Avg Rew: -102.81   Polyak: 0.995000   Buffer: 100.00   Loss: 103.668182  6.641221  8.541512
Ep: 17558   Rew: -102.36   Avg Rew: -102.82   Polyak: 0.995000   Buffer: 100.00   Loss: 103.297340  8.445167  7.629441
Ep: 17559   Rew: -103.04   Avg Rew: -102.82   Polyak: 0.995000   Buffer: 100.00   Loss: 103.598663  7.959486  7.601169
Ep: 17560   Rew: -102.26   Avg Rew: -102.83   Polyak: 0.995000   Buffer: 100.00   Loss: 103.167488  6.682956  7.015801
Ep: 17561   Rew: -102.20   Avg Rew: -102.82   Polyak: 0.995000   Buffer: 100.00   Loss: 103.485191  6.483462  8.216700
Ep: 17562   Rew: -102.07   Avg Rew: -102.81   Polyak: 0.995000   Buffer: 100.00   Loss: 103.717903  8.381713  7.968159
Ep: 17563   Rew: -102.51   Avg Rew: -102.80   P

Ep: 17624   Rew: -103.15   Avg Rew: -102.13   Polyak: 0.995000   Buffer: 100.00   Loss: 103.735245  9.238316  9.273251
Ep: 17625   Rew: -102.89   Avg Rew: -102.13   Polyak: 0.995000   Buffer: 100.00   Loss: 103.667740  8.649523  6.718024
Ep: 17626   Rew: -101.49   Avg Rew: -102.12   Polyak: 0.995000   Buffer: 100.00   Loss: 103.692589  7.603645  8.857491
Ep: 17627   Rew: -100.69   Avg Rew: -102.10   Polyak: 0.995000   Buffer: 100.00   Loss: 104.139519  7.382451  7.782848
Ep: 17628   Rew: -102.97   Avg Rew: -102.11   Polyak: 0.995000   Buffer: 100.00   Loss: 103.915085  6.775763  8.502029
Ep: 17629   Rew: -101.88   Avg Rew: -102.10   Polyak: 0.995000   Buffer: 100.00   Loss: 103.482903  8.790819  7.809683
Ep: 17630   Rew: -102.18   Avg Rew: -102.09   Polyak: 0.995000   Buffer: 100.00   Loss: 103.509354  10.789881  8.888662
Ep: 17631   Rew: -185.92   Avg Rew: -102.92   Polyak: 0.995000   Buffer: 100.00   Loss: 103.411911  9.216625  8.905047
Ep: 17632   Rew: -102.77   Avg Rew: -102.92   P

Ep: 17693   Rew: -102.44   Avg Rew: -102.94   Polyak: 0.995000   Buffer: 100.00   Loss: 103.760590  6.920157  7.153244
Ep: 17694   Rew: -103.88   Avg Rew: -102.96   Polyak: 0.995000   Buffer: 100.00   Loss: 103.135544  9.427380  8.920825
Ep: 17695   Rew: -103.33   Avg Rew: -103.00   Polyak: 0.995000   Buffer: 100.00   Loss: 103.375648  8.171389  7.277245
Ep: 17696   Rew: -102.46   Avg Rew: -103.00   Polyak: 0.995000   Buffer: 100.00   Loss: 103.515244  9.281444  8.568356
Ep: 17697   Rew: -101.69   Avg Rew: -102.99   Polyak: 0.995000   Buffer: 100.00   Loss: 104.049507  8.675500  8.874977
Ep: 17698   Rew: -101.44   Avg Rew: -102.99   Polyak: 0.995000   Buffer: 100.00   Loss: 103.913589  8.087561  6.840104
Ep: 17699   Rew: -102.74   Avg Rew: -103.00   Polyak: 0.995000   Buffer: 100.00   Loss: 103.231125  10.253078  8.728559
Ep: 17700   Rew: -100.94   Avg Rew: -103.00   Polyak: 0.995000   Buffer: 100.00   Loss: 103.465096  6.793894  7.034099
Ep: 17701   Rew: -100.53   Avg Rew: -102.98   P

Ep: 17762   Rew: -103.20   Avg Rew: -102.13   Polyak: 0.995000   Buffer: 100.00   Loss: 103.271538  9.723354  8.967751
Ep: 17763   Rew: -102.93   Avg Rew: -102.13   Polyak: 0.995000   Buffer: 100.00   Loss: 103.648926  9.338366  9.491989
Ep: 17764   Rew: -102.48   Avg Rew: -102.14   Polyak: 0.995000   Buffer: 100.00   Loss: 103.832405  8.619229  7.886669
Ep: 17765   Rew: -102.84   Avg Rew: -102.14   Polyak: 0.995000   Buffer: 100.00   Loss: 103.618942  6.467650  6.744278
Ep: 17766   Rew: -102.86   Avg Rew: -102.14   Polyak: 0.995000   Buffer: 100.00   Loss: 103.439140  8.710167  8.253464
Ep: 17767   Rew: -102.04   Avg Rew: -102.14   Polyak: 0.995000   Buffer: 100.00   Loss: 103.950279  7.371086  7.231439
Ep: 17768   Rew: -101.78   Avg Rew: -102.14   Polyak: 0.995000   Buffer: 100.00   Loss: 103.624977  7.607877  8.780692
Ep: 17769   Rew: -102.73   Avg Rew: -102.13   Polyak: 0.995000   Buffer: 100.00   Loss: 103.505196  6.576347  6.307674
Ep: 17770   Rew: -102.57   Avg Rew: -102.13   Po

Ep: 17831   Rew: -102.57   Avg Rew: -102.25   Polyak: 0.995000   Buffer: 100.00   Loss: 104.249435  8.675757  9.489826
Ep: 17832   Rew: -102.93   Avg Rew: -102.26   Polyak: 0.995000   Buffer: 100.00   Loss: 103.886833  7.801028  7.492387
Ep: 17833   Rew: -102.63   Avg Rew: -102.27   Polyak: 0.995000   Buffer: 100.00   Loss: 103.989174  7.950552  7.966105
Ep: 17834   Rew: -102.12   Avg Rew: -102.27   Polyak: 0.995000   Buffer: 100.00   Loss: 103.348183  7.027809  8.586642
Ep: 17835   Rew: -101.84   Avg Rew: -102.27   Polyak: 0.995000   Buffer: 100.00   Loss: 103.775940  9.043474  6.800746
Ep: 17836   Rew: -102.18   Avg Rew: -102.27   Polyak: 0.995000   Buffer: 100.00   Loss: 103.591095  6.370939  6.956535
Ep: 17837   Rew: -102.31   Avg Rew: -102.28   Polyak: 0.995000   Buffer: 100.00   Loss: 103.741806  8.487836  8.356175
Ep: 17838   Rew: -102.72   Avg Rew: -102.28   Polyak: 0.995000   Buffer: 100.00   Loss: 103.627403  8.293305  6.303454
Ep: 17839   Rew: -102.67   Avg Rew: -102.28   Po

Ep: 17900   Rew: -103.20   Avg Rew: -102.38   Polyak: 0.995000   Buffer: 100.00   Loss: 103.824234  7.235678  9.837111
Ep: 17901   Rew: -102.70   Avg Rew: -102.39   Polyak: 0.995000   Buffer: 100.00   Loss: 104.058060  7.047840  8.357172
Ep: 17902   Rew: -103.00   Avg Rew: -102.40   Polyak: 0.995000   Buffer: 100.00   Loss: 103.476547  8.247773  6.866784
Ep: 17903   Rew: -102.42   Avg Rew: -102.40   Polyak: 0.995000   Buffer: 100.00   Loss: 103.688057  9.099194  7.265854
Ep: 17904   Rew: -101.94   Avg Rew: -102.40   Polyak: 0.995000   Buffer: 100.00   Loss: 103.663628  7.305130  6.828368
Ep: 17905   Rew: -102.25   Avg Rew: -102.41   Polyak: 0.995000   Buffer: 100.00   Loss: 103.404915  6.193602  8.216430
Ep: 17906   Rew: -102.06   Avg Rew: -102.42   Polyak: 0.995000   Buffer: 100.00   Loss: 103.844147  8.305156  9.042000
Ep: 17907   Rew: -101.45   Avg Rew: -102.40   Polyak: 0.995000   Buffer: 100.00   Loss: 103.752693  8.663756  7.579545
Ep: 17908   Rew: -101.05   Avg Rew: -102.40   Po

Ep: 17969   Rew: -102.53   Avg Rew: -102.37   Polyak: 0.995000   Buffer: 100.00   Loss: 103.877769  9.024098  7.289359
Ep: 17970   Rew: -102.52   Avg Rew: -102.38   Polyak: 0.995000   Buffer: 100.00   Loss: 103.455383  6.833354  6.579099
Ep: 17971   Rew: -102.14   Avg Rew: -102.36   Polyak: 0.995000   Buffer: 100.00   Loss: 103.975227  8.209957  8.755830
Ep: 17972   Rew: -102.16   Avg Rew: -102.35   Polyak: 0.995000   Buffer: 100.00   Loss: 103.267387  6.875248  7.551007
Ep: 17973   Rew: -102.07   Avg Rew: -102.35   Polyak: 0.995000   Buffer: 100.00   Loss: 104.109291  8.471290  7.737627
Ep: 17974   Rew: -102.02   Avg Rew: -102.34   Polyak: 0.995000   Buffer: 100.00   Loss: 103.552010  6.917982  8.626368
Ep: 17975   Rew: -102.27   Avg Rew: -102.33   Polyak: 0.995000   Buffer: 100.00   Loss: 103.246849  7.739193  5.875108
Ep: 17976   Rew: -102.11   Avg Rew: -102.33   Polyak: 0.995000   Buffer: 100.00   Loss: 103.449257  5.850785  7.726048
Ep: 17977   Rew: -101.88   Avg Rew: -102.33   Po

Ep: 18038   Rew: -102.59   Avg Rew: -102.63   Polyak: 0.995000   Buffer: 100.00   Loss: 103.675812  9.284766  8.366423
Ep: 18039   Rew: -103.36   Avg Rew: -102.64   Polyak: 0.995000   Buffer: 100.00   Loss: 103.781670  10.881098  8.402130
Ep: 18040   Rew: -102.83   Avg Rew: -102.63   Polyak: 0.995000   Buffer: 100.00   Loss: 103.629425  6.699347  7.190785
Ep: 18041   Rew: -102.63   Avg Rew: -102.62   Polyak: 0.995000   Buffer: 100.00   Loss: 103.541946  6.975304  8.449236
Ep: 18042   Rew: -102.35   Avg Rew: -102.61   Polyak: 0.995000   Buffer: 100.00   Loss: 103.484390  8.533249  7.550855
Ep: 18043   Rew: -102.76   Avg Rew: -102.60   Polyak: 0.995000   Buffer: 100.00   Loss: 103.468651  9.015216  8.285203
Ep: 18044   Rew: -102.29   Avg Rew: -102.59   Polyak: 0.995000   Buffer: 100.00   Loss: 103.211533  7.955895  7.870739
Ep: 18045   Rew: -102.55   Avg Rew: -102.59   Polyak: 0.995000   Buffer: 100.00   Loss: 102.927338  8.348936  8.185398
Ep: 18046   Rew: -102.27   Avg Rew: -102.58   P

Ep: 18107   Rew: -100.49   Avg Rew: -102.39   Polyak: 0.995000   Buffer: 100.00   Loss: 103.648506  8.423539  6.842695
Ep: 18108   Rew: -100.50   Avg Rew: -102.38   Polyak: 0.995000   Buffer: 100.00   Loss: 103.314651  7.778549  8.566372
Ep: 18109   Rew: -102.93   Avg Rew: -102.39   Polyak: 0.995000   Buffer: 100.00   Loss: 103.472145  8.919950  8.870049
Ep: 18110   Rew: -103.76   Avg Rew: -102.40   Polyak: 0.995000   Buffer: 100.00   Loss: 103.607613  8.650496  8.839277
Ep: 18111   Rew: -100.96   Avg Rew: -102.38   Polyak: 0.995000   Buffer: 100.00   Loss: 103.445137  8.390348  8.610546
Ep: 18112   Rew: -111.73   Avg Rew: -102.47   Polyak: 0.995000   Buffer: 100.00   Loss: 103.570412  7.250460  8.193514
Ep: 18113   Rew: -103.17   Avg Rew: -102.47   Polyak: 0.995000   Buffer: 100.00   Loss: 103.593758  7.151209  6.754554
Ep: 18114   Rew: -102.15   Avg Rew: -102.46   Polyak: 0.995000   Buffer: 100.00   Loss: 103.568840  8.426989  7.504899
Ep: 18115   Rew: -101.70   Avg Rew: -102.46   Po

Ep: 18176   Rew: -102.09   Avg Rew: -102.74   Polyak: 0.995000   Buffer: 100.00   Loss: 103.274490  7.915927  6.547092
Ep: 18177   Rew: -101.07   Avg Rew: -102.74   Polyak: 0.995000   Buffer: 100.00   Loss: 103.505287  8.981096  7.053767
Ep: 18178   Rew: -103.26   Avg Rew: -102.75   Polyak: 0.995000   Buffer: 100.00   Loss: 103.115814  7.134988  8.865883
Ep: 18179   Rew: -102.61   Avg Rew: -102.75   Polyak: 0.995000   Buffer: 100.00   Loss: 102.774681  7.332003  5.912420
Ep: 18180   Rew: -100.41   Avg Rew: -102.72   Polyak: 0.995000   Buffer: 100.00   Loss: 103.577850  7.763359  8.413507
Ep: 18181   Rew: -101.50   Avg Rew: -102.71   Polyak: 0.995000   Buffer: 100.00   Loss: 102.826729  7.040505  8.491940
Ep: 18182   Rew: -99.55   Avg Rew: -102.68   Polyak: 0.995000   Buffer: 100.00   Loss: 102.736824  8.759557  7.453908
Ep: 18183   Rew: -101.38   Avg Rew: -102.68   Polyak: 0.995000   Buffer: 100.00   Loss: 103.112244  10.496922  7.896828
Ep: 18184   Rew: -100.81   Avg Rew: -102.68   Po

Ep: 18245   Rew: -98.14   Avg Rew: -107.08   Polyak: 0.995000   Buffer: 100.00   Loss: 103.047409  8.809973  8.670712
Ep: 18246   Rew: -99.47   Avg Rew: -107.06   Polyak: 0.995000   Buffer: 100.00   Loss: 102.908394  7.668671  5.546339
Ep: 18247   Rew: -99.75   Avg Rew: -107.03   Polyak: 0.995000   Buffer: 100.00   Loss: 102.827309  7.870140  8.849240
Ep: 18248   Rew: -102.45   Avg Rew: -107.03   Polyak: 0.995000   Buffer: 100.00   Loss: 102.764137  8.946906  7.313761
Ep: 18249   Rew: -106.52   Avg Rew: -107.08   Polyak: 0.995000   Buffer: 100.00   Loss: 103.570869  9.154190  7.673128
Ep: 18250   Rew: -105.42   Avg Rew: -107.11   Polyak: 0.995000   Buffer: 100.00   Loss: 102.840675  7.578791  8.093051
Ep: 18251   Rew: -100.19   Avg Rew: -107.09   Polyak: 0.995000   Buffer: 100.00   Loss: 103.049896  6.978842  7.061298
Ep: 18252   Rew: -97.16   Avg Rew: -107.04   Polyak: 0.995000   Buffer: 100.00   Loss: 103.188904  8.449300  8.873162
Ep: 18253   Rew: -102.63   Avg Rew: -107.03   Polyak

Ep: 18314   Rew: -102.49   Avg Rew: -106.53   Polyak: 0.995000   Buffer: 100.00   Loss: 102.781097  8.616690  8.389535
Ep: 18315   Rew: -102.00   Avg Rew: -106.52   Polyak: 0.995000   Buffer: 100.00   Loss: 103.015450  7.890450  7.290205
Ep: 18316   Rew: -102.23   Avg Rew: -106.50   Polyak: 0.995000   Buffer: 100.00   Loss: 103.304512  6.664312  9.621264
Ep: 18317   Rew: -102.73   Avg Rew: -106.51   Polyak: 0.995000   Buffer: 100.00   Loss: 103.456467  8.069613  7.223329
Ep: 18318   Rew: -100.84   Avg Rew: -106.49   Polyak: 0.995000   Buffer: 100.00   Loss: 102.760162  5.630054  7.636725
Ep: 18319   Rew: -101.49   Avg Rew: -106.50   Polyak: 0.995000   Buffer: 100.00   Loss: 103.477493  6.490542  7.013539
Ep: 18320   Rew: -102.82   Avg Rew: -106.52   Polyak: 0.995000   Buffer: 100.00   Loss: 102.712234  8.044113  7.289549
Ep: 18321   Rew: -102.47   Avg Rew: -106.55   Polyak: 0.995000   Buffer: 100.00   Loss: 102.999771  6.710264  7.397708
Ep: 18322   Rew: -102.66   Avg Rew: -106.57   Po

Ep: 18383   Rew: -100.80   Avg Rew: -101.95   Polyak: 0.995000   Buffer: 100.00   Loss: 102.966034  7.669728  8.010235
Ep: 18384   Rew: -101.55   Avg Rew: -101.94   Polyak: 0.995000   Buffer: 100.00   Loss: 103.209213  9.168846  7.517651
Ep: 18385   Rew: -102.03   Avg Rew: -101.93   Polyak: 0.995000   Buffer: 100.00   Loss: 103.341751  7.277287  7.221305
Ep: 18386   Rew: -100.81   Avg Rew: -101.89   Polyak: 0.995000   Buffer: 100.00   Loss: 103.263336  7.301446  7.576948
Ep: 18387   Rew: -102.38   Avg Rew: -101.90   Polyak: 0.995000   Buffer: 100.00   Loss: 103.149261  7.656328  7.115535
Ep: 18388   Rew: -101.48   Avg Rew: -101.90   Polyak: 0.995000   Buffer: 100.00   Loss: 103.203773  8.044798  7.621336
Ep: 18389   Rew: -100.36   Avg Rew: -101.90   Polyak: 0.995000   Buffer: 100.00   Loss: 103.476151  7.349751  8.200971
Ep: 18390   Rew: -102.19   Avg Rew: -101.91   Polyak: 0.995000   Buffer: 100.00   Loss: 102.978882  6.404211  7.254783
Ep: 18391   Rew: -100.70   Avg Rew: -101.91   Po

Ep: 18452   Rew: -96.89   Avg Rew: -103.84   Polyak: 0.995000   Buffer: 100.00   Loss: 103.443314  7.743410  7.431988
Ep: 18453   Rew: -98.87   Avg Rew: -103.32   Polyak: 0.995000   Buffer: 100.00   Loss: 103.468468  6.655466  6.677629
Ep: 18454   Rew: -105.06   Avg Rew: -103.39   Polyak: 0.995000   Buffer: 100.00   Loss: 103.648773  7.022445  8.308583
Ep: 18455   Rew: -95.65   Avg Rew: -103.33   Polyak: 0.995000   Buffer: 100.00   Loss: 103.055305  7.673037  7.416453
Ep: 18456   Rew: -96.10   Avg Rew: -103.26   Polyak: 0.995000   Buffer: 100.00   Loss: 103.548264  7.292659  7.835480
Ep: 18457   Rew: -98.30   Avg Rew: -103.22   Polyak: 0.995000   Buffer: 100.00   Loss: 103.010002  6.483377  7.477351
Ep: 18458   Rew: -96.39   Avg Rew: -103.20   Polyak: 0.995000   Buffer: 100.00   Loss: 103.885681  9.549761  6.127067
Ep: 18459   Rew: -106.31   Avg Rew: -103.22   Polyak: 0.995000   Buffer: 100.00   Loss: 103.179741  9.091187  7.416113
Ep: 18460   Rew: -101.58   Avg Rew: -103.21   Polyak: 

Ep: 18521   Rew: -104.78   Avg Rew: -103.12   Polyak: 0.995000   Buffer: 100.00   Loss: 103.243591  9.225813  8.557089
Ep: 18522   Rew: -103.97   Avg Rew: -103.14   Polyak: 0.995000   Buffer: 100.00   Loss: 103.097672  6.655249  6.085138
Ep: 18523   Rew: -102.94   Avg Rew: -103.15   Polyak: 0.995000   Buffer: 100.00   Loss: 103.161278  7.722639  6.477339
Ep: 18524   Rew: -101.80   Avg Rew: -103.16   Polyak: 0.995000   Buffer: 100.00   Loss: 103.358749  7.631800  6.473187
Ep: 18525   Rew: -102.11   Avg Rew: -103.15   Polyak: 0.995000   Buffer: 100.00   Loss: 103.330872  7.925680  6.711213
Ep: 18526   Rew: -102.22   Avg Rew: -103.14   Polyak: 0.995000   Buffer: 100.00   Loss: 103.664146  8.011683  6.870738
Ep: 18527   Rew: -100.63   Avg Rew: -103.13   Polyak: 0.995000   Buffer: 100.00   Loss: 103.518265  7.058048  10.622446
Ep: 18528   Rew: -100.33   Avg Rew: -103.11   Polyak: 0.995000   Buffer: 100.00   Loss: 102.918663  7.445242  8.394920
Ep: 18529   Rew: -87.28   Avg Rew: -102.99   Po

Ep: 18590   Rew: -100.94   Avg Rew: -102.02   Polyak: 0.995000   Buffer: 100.00   Loss: 103.516083  7.214031  6.764901
Ep: 18591   Rew: -102.16   Avg Rew: -102.01   Polyak: 0.995000   Buffer: 100.00   Loss: 102.811523  8.824596  7.763076
Ep: 18592   Rew: -102.63   Avg Rew: -102.00   Polyak: 0.995000   Buffer: 100.00   Loss: 102.904846  8.377499  8.689625
Ep: 18593   Rew: -102.21   Avg Rew: -101.99   Polyak: 0.995000   Buffer: 100.00   Loss: 103.324867  7.051020  7.430556
Ep: 18594   Rew: -101.72   Avg Rew: -101.95   Polyak: 0.995000   Buffer: 100.00   Loss: 103.833679  7.581531  7.304746
Ep: 18595   Rew: -101.91   Avg Rew: -101.92   Polyak: 0.995000   Buffer: 100.00   Loss: 102.865555  8.098206  7.569301
Ep: 18596   Rew: -102.04   Avg Rew: -101.90   Polyak: 0.995000   Buffer: 100.00   Loss: 103.485718  8.388881  9.207751
Ep: 18597   Rew: -99.23   Avg Rew: -101.86   Polyak: 0.995000   Buffer: 100.00   Loss: 103.156944  7.378625  7.697873
Ep: 18598   Rew: -103.07   Avg Rew: -101.84   Pol

Ep: 18659   Rew: -99.72   Avg Rew: -101.69   Polyak: 0.995000   Buffer: 100.00   Loss: 102.725540  7.853367  8.123154
Ep: 18660   Rew: -100.06   Avg Rew: -101.67   Polyak: 0.995000   Buffer: 100.00   Loss: 103.658157  8.840981  6.945693
Ep: 18661   Rew: -98.67   Avg Rew: -101.63   Polyak: 0.995000   Buffer: 100.00   Loss: 103.094696  8.280915  6.224524
Ep: 18662   Rew: -100.85   Avg Rew: -101.62   Polyak: 0.995000   Buffer: 100.00   Loss: 102.855453  8.909178  8.030389
Ep: 18663   Rew: -100.92   Avg Rew: -101.61   Polyak: 0.995000   Buffer: 100.00   Loss: 103.158852  8.495181  7.266607
Ep: 18664   Rew: -102.42   Avg Rew: -101.61   Polyak: 0.995000   Buffer: 100.00   Loss: 103.582008  7.657182  8.888966
Ep: 18665   Rew: -103.24   Avg Rew: -101.62   Polyak: 0.995000   Buffer: 100.00   Loss: 103.394783  8.572689  6.192490
Ep: 18666   Rew: -100.40   Avg Rew: -101.60   Polyak: 0.995000   Buffer: 100.00   Loss: 103.596352  8.220591  8.770835
Ep: 18667   Rew: -102.61   Avg Rew: -101.61   Poly

Ep: 18728   Rew: -101.54   Avg Rew: -101.09   Polyak: 0.995000   Buffer: 100.00   Loss: 102.992523  9.010995  5.932866
Ep: 18729   Rew: -101.62   Avg Rew: -101.08   Polyak: 0.995000   Buffer: 100.00   Loss: 103.052025  9.017229  9.271462
Ep: 18730   Rew: -101.95   Avg Rew: -101.08   Polyak: 0.995000   Buffer: 100.00   Loss: 103.287537  7.448135  8.511973
Ep: 18731   Rew: -101.08   Avg Rew: -101.07   Polyak: 0.995000   Buffer: 100.00   Loss: 103.587738  7.897606  8.913867
Ep: 18732   Rew: -101.71   Avg Rew: -101.07   Polyak: 0.995000   Buffer: 100.00   Loss: 103.550171  7.671587  8.606381
Ep: 18733   Rew: -99.26   Avg Rew: -101.05   Polyak: 0.995000   Buffer: 100.00   Loss: 102.550797  7.694980  5.878058
Ep: 18734   Rew: -101.41   Avg Rew: -101.03   Polyak: 0.995000   Buffer: 100.00   Loss: 103.135620  7.472128  6.897216
Ep: 18735   Rew: -102.00   Avg Rew: -101.03   Polyak: 0.995000   Buffer: 100.00   Loss: 103.049683  7.939861  7.173882
Ep: 18736   Rew: -100.56   Avg Rew: -101.01   Pol

Ep: 18798   Rew: -100.90   Avg Rew: -100.38   Polyak: 0.995000   Buffer: 100.00   Loss: 103.490623  9.271722  8.378488
Ep: 18799   Rew: -99.01   Avg Rew: -100.37   Polyak: 0.995000   Buffer: 100.00   Loss: 103.351990  7.589630  9.345566
Ep: 18800   Rew: -100.47   Avg Rew: -100.36   Polyak: 0.995000   Buffer: 100.00   Loss: 103.100784  5.941146  6.676658
Ep: 18801   Rew: -100.65   Avg Rew: -100.36   Polyak: 0.995000   Buffer: 100.00   Loss: 103.593445  7.957307  7.386392
Ep: 18802   Rew: -101.22   Avg Rew: -100.37   Polyak: 0.995000   Buffer: 100.00   Loss: 103.186340  6.675543  7.592650
Ep: 18803   Rew: -97.76   Avg Rew: -100.34   Polyak: 0.995000   Buffer: 100.00   Loss: 103.161324  8.269634  8.952848
Ep: 18804   Rew: -98.22   Avg Rew: -100.32   Polyak: 0.995000   Buffer: 100.00   Loss: 102.899040  7.094213  7.232417
Ep: 18805   Rew: -99.86   Avg Rew: -100.32   Polyak: 0.995000   Buffer: 100.00   Loss: 102.960976  7.701221  7.506051
Ep: 18806   Rew: -101.69   Avg Rew: -100.34   Polyak

Ep: 18867   Rew: -101.60   Avg Rew: -100.67   Polyak: 0.995000   Buffer: 100.00   Loss: 102.994354  6.656913  6.739658
Ep: 18868   Rew: -102.38   Avg Rew: -100.71   Polyak: 0.995000   Buffer: 100.00   Loss: 103.063965  6.951792  6.715378
Ep: 18869   Rew: -100.63   Avg Rew: -100.70   Polyak: 0.995000   Buffer: 100.00   Loss: 103.114449  7.296086  7.506089
Ep: 18870   Rew: -102.28   Avg Rew: -100.72   Polyak: 0.995000   Buffer: 100.00   Loss: 103.239372  9.013960  8.353759
Ep: 18871   Rew: -101.43   Avg Rew: -100.74   Polyak: 0.995000   Buffer: 100.00   Loss: 102.666672  8.615316  7.560407
Ep: 18872   Rew: -100.06   Avg Rew: -100.75   Polyak: 0.995000   Buffer: 100.00   Loss: 103.090500  8.478518  10.382442
Ep: 18873   Rew: -100.71   Avg Rew: -100.75   Polyak: 0.995000   Buffer: 100.00   Loss: 103.155830  6.717951  8.594689
Ep: 18874   Rew: -99.94   Avg Rew: -100.74   Polyak: 0.995000   Buffer: 100.00   Loss: 102.931137  11.077445  6.889720
Ep: 18875   Rew: -101.36   Avg Rew: -100.75   P

Ep: 18936   Rew: -99.24   Avg Rew: -100.91   Polyak: 0.995000   Buffer: 100.00   Loss: 103.173965  6.445538  7.877707
Ep: 18937   Rew: -101.91   Avg Rew: -100.92   Polyak: 0.995000   Buffer: 100.00   Loss: 102.972511  6.102658  5.380382
Ep: 18938   Rew: -99.94   Avg Rew: -100.92   Polyak: 0.995000   Buffer: 100.00   Loss: 103.187897  8.119515  8.818518
Ep: 18939   Rew: -97.32   Avg Rew: -100.88   Polyak: 0.995000   Buffer: 100.00   Loss: 102.849106  7.772544  7.044696
Ep: 18940   Rew: -101.50   Avg Rew: -100.88   Polyak: 0.995000   Buffer: 100.00   Loss: 102.996719  6.295531  6.895209
Ep: 18941   Rew: -102.07   Avg Rew: -100.89   Polyak: 0.995000   Buffer: 100.00   Loss: 103.031281  7.886469  7.103742
Ep: 18942   Rew: -98.41   Avg Rew: -100.86   Polyak: 0.995000   Buffer: 100.00   Loss: 103.698212  7.124897  8.050342
Ep: 18943   Rew: -101.56   Avg Rew: -100.86   Polyak: 0.995000   Buffer: 100.00   Loss: 103.408669  6.949187  7.407818
Ep: 18944   Rew: -101.78   Avg Rew: -100.86   Polyak

Ep: 19006   Rew: -104.47   Avg Rew: -100.05   Polyak: 0.995000   Buffer: 100.00   Loss: 102.853012  8.534624  8.201324
Ep: 19007   Rew: -104.41   Avg Rew: -100.10   Polyak: 0.995000   Buffer: 100.00   Loss: 102.996170  6.995533  8.547996
Ep: 19008   Rew: -104.23   Avg Rew: -100.13   Polyak: 0.995000   Buffer: 100.00   Loss: 103.031998  6.997444  6.181646
Ep: 19009   Rew: -106.34   Avg Rew: -100.18   Polyak: 0.995000   Buffer: 100.00   Loss: 102.923599  8.610130  8.355257
Ep: 19010   Rew: -103.78   Avg Rew: -100.20   Polyak: 0.995000   Buffer: 100.00   Loss: 103.045807  6.517426  7.918060
Ep: 19011   Rew: -104.90   Avg Rew: -100.28   Polyak: 0.995000   Buffer: 100.00   Loss: 103.142853  8.205633  6.252254
Ep: 19012   Rew: -103.73   Avg Rew: -100.35   Polyak: 0.995000   Buffer: 100.00   Loss: 102.944603  8.254762  7.490164
Ep: 19013   Rew: -103.65   Avg Rew: -100.40   Polyak: 0.995000   Buffer: 100.00   Loss: 103.357353  7.396453  8.644596
Ep: 19014   Rew: -102.78   Avg Rew: -100.48   Po

Ep: 19075   Rew: -105.30   Avg Rew: -101.32   Polyak: 0.995000   Buffer: 100.00   Loss: 102.699715  8.079610  5.409116
Ep: 19076   Rew: -104.27   Avg Rew: -101.37   Polyak: 0.995000   Buffer: 100.00   Loss: 102.570755  7.585840  8.172511
Ep: 19077   Rew: -105.05   Avg Rew: -101.40   Polyak: 0.995000   Buffer: 100.00   Loss: 103.235771  5.907191  7.339420
Ep: 19078   Rew: -102.10   Avg Rew: -101.43   Polyak: 0.995000   Buffer: 100.00   Loss: 102.830948  7.254982  7.494432
Ep: 19079   Rew: -101.53   Avg Rew: -101.45   Polyak: 0.995000   Buffer: 100.00   Loss: 102.826019  6.659415  8.155424
Ep: 19080   Rew: -106.68   Avg Rew: -101.53   Polyak: 0.995000   Buffer: 100.00   Loss: 103.178177  7.480472  8.187508
Ep: 19081   Rew: -104.20   Avg Rew: -101.59   Polyak: 0.995000   Buffer: 100.00   Loss: 102.565201  5.807032  6.706089
Ep: 19082   Rew: -102.74   Avg Rew: -101.61   Polyak: 0.995000   Buffer: 100.00   Loss: 103.021164  7.013433  7.269468
Ep: 19083   Rew: -102.54   Avg Rew: -101.62   Po

Ep: 19144   Rew: -102.21   Avg Rew: -101.93   Polyak: 0.995000   Buffer: 100.00   Loss: 103.356499  7.364657  7.853013
Ep: 19145   Rew: -102.77   Avg Rew: -101.94   Polyak: 0.995000   Buffer: 100.00   Loss: 103.184029  8.249145  7.916761
Ep: 19146   Rew: -102.08   Avg Rew: -101.94   Polyak: 0.995000   Buffer: 100.00   Loss: 103.436096  8.895496  8.317411
Ep: 19147   Rew: -102.49   Avg Rew: -101.94   Polyak: 0.995000   Buffer: 100.00   Loss: 103.043274  7.164296  6.207526
Ep: 19148   Rew: -102.44   Avg Rew: -101.94   Polyak: 0.995000   Buffer: 100.00   Loss: 103.757919  6.663390  6.877924
Ep: 19149   Rew: -101.73   Avg Rew: -101.94   Polyak: 0.995000   Buffer: 100.00   Loss: 103.283302  8.101266  6.823057
Ep: 19150   Rew: -103.22   Avg Rew: -101.95   Polyak: 0.995000   Buffer: 100.00   Loss: 103.115273  8.194222  8.502929
Ep: 19151   Rew: -102.26   Avg Rew: -101.95   Polyak: 0.995000   Buffer: 100.00   Loss: 103.350243  8.356906  8.649309
Ep: 19152   Rew: -102.51   Avg Rew: -101.95   Po

Ep: 19214   Rew: -97.95   Avg Rew: -100.68   Polyak: 0.995000   Buffer: 100.00   Loss: 103.236839  8.718427  7.769819
Ep: 19215   Rew: -97.81   Avg Rew: -100.68   Polyak: 0.995000   Buffer: 100.00   Loss: 102.771660  8.161449  8.683344
Ep: 19216   Rew: -97.44   Avg Rew: -100.64   Polyak: 0.995000   Buffer: 100.00   Loss: 102.752808  7.039231  7.333530
Ep: 19217   Rew: -97.35   Avg Rew: -100.59   Polyak: 0.995000   Buffer: 100.00   Loss: 102.854630  7.375296  8.194697
Ep: 19218   Rew: -97.22   Avg Rew: -100.53   Polyak: 0.995000   Buffer: 100.00   Loss: 103.147026  6.580941  7.473874
Ep: 19219   Rew: -98.51   Avg Rew: -100.48   Polyak: 0.995000   Buffer: 100.00   Loss: 103.167862  7.933434  8.658942
Ep: 19220   Rew: -96.38   Avg Rew: -100.42   Polyak: 0.995000   Buffer: 100.00   Loss: 103.371010  6.688891  7.486234
Ep: 19221   Rew: -98.32   Avg Rew: -100.39   Polyak: 0.995000   Buffer: 100.00   Loss: 103.532875  6.743616  5.571091
Ep: 19222   Rew: -99.76   Avg Rew: -100.36   Polyak: 0.9

Ep: 19283   Rew: -101.75   Avg Rew: -100.75   Polyak: 0.995000   Buffer: 100.00   Loss: 102.931610  8.553259  7.875305
Ep: 19284   Rew: -102.39   Avg Rew: -100.78   Polyak: 0.995000   Buffer: 100.00   Loss: 102.771729  6.851213  8.784471
Ep: 19285   Rew: -101.04   Avg Rew: -100.81   Polyak: 0.995000   Buffer: 100.00   Loss: 102.807014  8.121723  6.634362
Ep: 19286   Rew: -101.38   Avg Rew: -100.84   Polyak: 0.995000   Buffer: 100.00   Loss: 103.250771  6.419249  8.592169
Ep: 19287   Rew: -100.05   Avg Rew: -100.85   Polyak: 0.995000   Buffer: 100.00   Loss: 102.957733  9.155180  8.958156
Ep: 19288   Rew: -100.88   Avg Rew: -100.84   Polyak: 0.995000   Buffer: 100.00   Loss: 103.584991  7.133211  8.030930
Ep: 19289   Rew: -101.92   Avg Rew: -100.85   Polyak: 0.995000   Buffer: 100.00   Loss: 103.467331  6.673613  7.355732
Ep: 19290   Rew: -99.63   Avg Rew: -100.83   Polyak: 0.995000   Buffer: 100.00   Loss: 102.563332  7.377541  6.876818
Ep: 19291   Rew: -98.46   Avg Rew: -100.79   Poly

Ep: 19352   Rew: -102.02   Avg Rew: -100.87   Polyak: 0.995000   Buffer: 100.00   Loss: 102.852722  7.543248  7.370014
Ep: 19353   Rew: -100.96   Avg Rew: -100.86   Polyak: 0.995000   Buffer: 100.00   Loss: 102.986267  6.575708  7.750474
Ep: 19354   Rew: -101.18   Avg Rew: -100.87   Polyak: 0.995000   Buffer: 100.00   Loss: 103.241966  6.530549  7.083931
Ep: 19355   Rew: -102.00   Avg Rew: -100.87   Polyak: 0.995000   Buffer: 100.00   Loss: 102.948387  7.340827  7.810132
Ep: 19356   Rew: -102.47   Avg Rew: -100.87   Polyak: 0.995000   Buffer: 100.00   Loss: 103.316917  7.375538  7.110782
Ep: 19357   Rew: -102.56   Avg Rew: -100.89   Polyak: 0.995000   Buffer: 100.00   Loss: 103.344505  6.818269  7.395314
Ep: 19358   Rew: -102.63   Avg Rew: -100.88   Polyak: 0.995000   Buffer: 100.00   Loss: 102.973335  7.147860  7.265521
Ep: 19359   Rew: -103.02   Avg Rew: -100.89   Polyak: 0.995000   Buffer: 100.00   Loss: 103.615288  7.151432  6.278341
Ep: 19360   Rew: -101.60   Avg Rew: -100.89   Po

Ep: 19421   Rew: -101.89   Avg Rew: -101.25   Polyak: 0.995000   Buffer: 100.00   Loss: 102.774940  7.491286  7.101665
Ep: 19422   Rew: -101.67   Avg Rew: -101.32   Polyak: 0.995000   Buffer: 100.00   Loss: 102.705780  8.603034  7.317761
Ep: 19423   Rew: -102.23   Avg Rew: -101.38   Polyak: 0.995000   Buffer: 100.00   Loss: 102.947990  6.413290  7.479487
Ep: 19424   Rew: -101.57   Avg Rew: -101.44   Polyak: 0.995000   Buffer: 100.00   Loss: 103.037788  7.311465  6.761264
Ep: 19425   Rew: -102.90   Avg Rew: -101.51   Polyak: 0.995000   Buffer: 100.00   Loss: 102.731773  6.642368  7.833076
Ep: 19426   Rew: -100.04   Avg Rew: -101.55   Polyak: 0.995000   Buffer: 100.00   Loss: 102.562538  7.293849  7.799844
Ep: 19427   Rew: -100.77   Avg Rew: -101.57   Polyak: 0.995000   Buffer: 100.00   Loss: 103.020721  6.518560  6.521158
Ep: 19428   Rew: -99.59   Avg Rew: -101.55   Polyak: 0.995000   Buffer: 100.00   Loss: 103.094627  7.143553  7.368035
Ep: 19429   Rew: -101.72   Avg Rew: -101.56   Pol

Ep: 19490   Rew: -102.41   Avg Rew: -100.83   Polyak: 0.995000   Buffer: 100.00   Loss: 102.627289  6.981100  6.460787
Ep: 19491   Rew: -101.55   Avg Rew: -100.82   Polyak: 0.995000   Buffer: 100.00   Loss: 102.213974  6.879786  7.489650
Ep: 19492   Rew: -102.22   Avg Rew: -100.83   Polyak: 0.995000   Buffer: 100.00   Loss: 102.994995  7.095587  7.142253
Ep: 19493   Rew: -101.58   Avg Rew: -100.82   Polyak: 0.995000   Buffer: 100.00   Loss: 102.476723  7.870193  7.376643
Ep: 19494   Rew: -101.25   Avg Rew: -100.81   Polyak: 0.995000   Buffer: 100.00   Loss: 103.203400  8.268626  6.713987
Ep: 19495   Rew: -101.08   Avg Rew: -100.81   Polyak: 0.995000   Buffer: 100.00   Loss: 102.956902  7.391340  7.767209
Ep: 19496   Rew: -100.93   Avg Rew: -100.80   Polyak: 0.995000   Buffer: 100.00   Loss: 102.712036  7.011871  7.895184
Ep: 19497   Rew: -101.69   Avg Rew: -100.80   Polyak: 0.995000   Buffer: 100.00   Loss: 102.649200  6.711985  7.097437
Ep: 19498   Rew: -101.70   Avg Rew: -100.80   Po

Ep: 19559   Rew: -99.66   Avg Rew: -101.99   Polyak: 0.995000   Buffer: 100.00   Loss: 102.657265  8.338812  9.399811
Ep: 19560   Rew: -97.81   Avg Rew: -101.94   Polyak: 0.995000   Buffer: 100.00   Loss: 102.461929  6.991713  6.351357
Ep: 19561   Rew: -97.87   Avg Rew: -101.89   Polyak: 0.995000   Buffer: 100.00   Loss: 102.466209  7.966714  8.347639
Ep: 19562   Rew: -97.75   Avg Rew: -101.85   Polyak: 0.995000   Buffer: 100.00   Loss: 102.607697  7.221808  6.582690
Ep: 19563   Rew: -97.23   Avg Rew: -101.80   Polyak: 0.995000   Buffer: 100.00   Loss: 101.969177  7.488422  6.582853
Ep: 19564   Rew: -96.29   Avg Rew: -101.74   Polyak: 0.995000   Buffer: 100.00   Loss: 102.741737  5.294246  6.242384
Ep: 19565   Rew: -96.10   Avg Rew: -101.68   Polyak: 0.995000   Buffer: 100.00   Loss: 102.307114  7.545388  7.538610
Ep: 19566   Rew: -94.87   Avg Rew: -101.61   Polyak: 0.995000   Buffer: 100.00   Loss: 102.767517  5.902890  5.866409
Ep: 19567   Rew: -97.11   Avg Rew: -101.55   Polyak: 0.9

Ep: 19629   Rew: -101.35   Avg Rew: -100.11   Polyak: 0.995000   Buffer: 100.00   Loss: 102.591454  6.701575  5.182873
Ep: 19630   Rew: -101.90   Avg Rew: -100.10   Polyak: 0.995000   Buffer: 100.00   Loss: 102.651100  6.914414  6.728127
Ep: 19631   Rew: -101.67   Avg Rew: -100.09   Polyak: 0.995000   Buffer: 100.00   Loss: 102.675262  6.701831  6.208392
Ep: 19632   Rew: -100.21   Avg Rew: -100.08   Polyak: 0.995000   Buffer: 100.00   Loss: 102.459969  7.147890  6.324068
Ep: 19633   Rew: -100.84   Avg Rew: -100.07   Polyak: 0.995000   Buffer: 100.00   Loss: 102.163895  7.680925  8.779255
Ep: 19634   Rew: -97.19   Avg Rew: -100.03   Polyak: 0.995000   Buffer: 100.00   Loss: 102.729820  7.244802  6.714498
Ep: 19635   Rew: -97.24   Avg Rew: -99.56   Polyak: 0.995000   Buffer: 100.00   Loss: 102.447594  6.530371  7.030947
Ep: 19636   Rew: -95.43   Avg Rew: -99.50   Polyak: 0.995000   Buffer: 100.00   Loss: 102.349342  7.638817  7.565911
Ep: 19637   Rew: -94.58   Avg Rew: -99.42   Polyak: 0

Ep: 19699   Rew: -101.36   Avg Rew: -99.84   Polyak: 0.995000   Buffer: 100.00   Loss: 102.510376  7.404974  7.561199
Ep: 19700   Rew: -100.16   Avg Rew: -99.85   Polyak: 0.995000   Buffer: 100.00   Loss: 102.426003  9.487833  7.503892
Ep: 19701   Rew: -98.87   Avg Rew: -99.82   Polyak: 0.995000   Buffer: 100.00   Loss: 102.203239  6.869330  7.040681
Ep: 19702   Rew: -98.74   Avg Rew: -99.81   Polyak: 0.995000   Buffer: 100.00   Loss: 102.580666  8.685060  8.687120
Ep: 19703   Rew: -100.57   Avg Rew: -99.84   Polyak: 0.995000   Buffer: 100.00   Loss: 102.255638  8.301647  6.491662
Ep: 19704   Rew: -98.56   Avg Rew: -99.87   Polyak: 0.995000   Buffer: 100.00   Loss: 102.237198  7.384570  7.358656
Ep: 19705   Rew: -97.47   Avg Rew: -99.85   Polyak: 0.995000   Buffer: 100.00   Loss: 102.747620  7.078631  7.316011
Ep: 19706   Rew: -125.04   Avg Rew: -100.10   Polyak: 0.995000   Buffer: 100.00   Loss: 102.125153  7.017622  6.868299
Ep: 19707   Rew: -100.80   Avg Rew: -100.11   Polyak: 0.995

Ep: 19769   Rew: -98.66   Avg Rew: -103.06   Polyak: 0.995000   Buffer: 100.00   Loss: 102.602379  6.730566  6.916941
Ep: 19770   Rew: -95.79   Avg Rew: -103.05   Polyak: 0.995000   Buffer: 100.00   Loss: 102.392120  7.261277  7.243294
Ep: 19771   Rew: -105.61   Avg Rew: -103.15   Polyak: 0.995000   Buffer: 100.00   Loss: 102.515533  8.527825  7.068798
Ep: 19772   Rew: -98.90   Avg Rew: -103.17   Polyak: 0.995000   Buffer: 100.00   Loss: 102.270264  8.156783  8.123409
Ep: 19773   Rew: -103.36   Avg Rew: -103.22   Polyak: 0.995000   Buffer: 100.00   Loss: 102.066383  6.865443  6.486563
Ep: 19774   Rew: -102.61   Avg Rew: -103.25   Polyak: 0.995000   Buffer: 100.00   Loss: 102.518105  8.278902  7.102966
Ep: 19775   Rew: -100.37   Avg Rew: -103.28   Polyak: 0.995000   Buffer: 100.00   Loss: 102.711861  8.448352  6.693412
Ep: 19776   Rew: -101.95   Avg Rew: -103.32   Polyak: 0.995000   Buffer: 100.00   Loss: 102.409134  6.798138  5.884807
Ep: 19777   Rew: -101.21   Avg Rew: -103.38   Polya

Ep: 19838   Rew: -99.30   Avg Rew: -104.64   Polyak: 0.995000   Buffer: 100.00   Loss: 102.249664  7.728908  6.582072
Ep: 19839   Rew: -99.52   Avg Rew: -104.64   Polyak: 0.995000   Buffer: 100.00   Loss: 102.166466  7.103230  7.669024
Ep: 19840   Rew: -101.12   Avg Rew: -104.69   Polyak: 0.995000   Buffer: 100.00   Loss: 102.402229  8.111901  5.903474
Ep: 19841   Rew: -98.47   Avg Rew: -104.01   Polyak: 0.995000   Buffer: 100.00   Loss: 102.295860  7.324262  6.838769
Ep: 19842   Rew: -100.18   Avg Rew: -104.06   Polyak: 0.995000   Buffer: 100.00   Loss: 102.325638  92.736290  92.934692
Ep: 19843   Rew: -96.24   Avg Rew: -104.09   Polyak: 0.995000   Buffer: 100.00   Loss: 102.160202  7.391750  8.186632
Ep: 19844   Rew: -98.07   Avg Rew: -104.13   Polyak: 0.995000   Buffer: 100.00   Loss: 102.549271  6.722447  6.739637
Ep: 19845   Rew: -96.66   Avg Rew: -104.14   Polyak: 0.995000   Buffer: 100.00   Loss: 102.273613  6.990314  6.593715
Ep: 19846   Rew: -97.28   Avg Rew: -104.15   Polyak:

Ep: 19908   Rew: -100.91   Avg Rew: -101.07   Polyak: 0.995000   Buffer: 100.00   Loss: 103.087715  8.531376  6.212152
Ep: 19909   Rew: -100.87   Avg Rew: -101.05   Polyak: 0.995000   Buffer: 100.00   Loss: 103.081123  8.419279  7.353767
Ep: 19910   Rew: -100.94   Avg Rew: -101.03   Polyak: 0.995000   Buffer: 100.00   Loss: 102.513649  7.137208  7.825958
Ep: 19911   Rew: -99.51   Avg Rew: -101.00   Polyak: 0.995000   Buffer: 100.00   Loss: 102.516602  6.537067  5.487440
Ep: 19912   Rew: -100.84   Avg Rew: -100.99   Polyak: 0.995000   Buffer: 100.00   Loss: 102.599823  6.931911  7.601642
Ep: 19913   Rew: -100.53   Avg Rew: -100.97   Polyak: 0.995000   Buffer: 100.00   Loss: 102.250511  8.602825  7.404129
Ep: 19914   Rew: -100.21   Avg Rew: -100.94   Polyak: 0.995000   Buffer: 100.00   Loss: 102.685356  6.245760  6.638257
Ep: 19915   Rew: -99.57   Avg Rew: -100.91   Polyak: 0.995000   Buffer: 100.00   Loss: 102.366455  8.650734  6.996130
Ep: 19916   Rew: -98.59   Avg Rew: -100.87   Polya

Ep: 19978   Rew: -97.37   Avg Rew: -100.50   Polyak: 0.995000   Buffer: 100.00   Loss: 102.570503  7.049295  8.096130
Ep: 19979   Rew: -97.46   Avg Rew: -100.47   Polyak: 0.995000   Buffer: 100.00   Loss: 102.472496  7.094512  7.411236
Ep: 19980   Rew: -101.11   Avg Rew: -100.44   Polyak: 0.995000   Buffer: 100.00   Loss: 102.266090  6.554714  7.244631
Ep: 19981   Rew: -99.83   Avg Rew: -100.43   Polyak: 0.995000   Buffer: 100.00   Loss: 102.511879  7.641935  8.650028
Ep: 19982   Rew: -99.11   Avg Rew: -100.43   Polyak: 0.995000   Buffer: 100.00   Loss: 102.903214  9.439790  8.214553
Ep: 19983   Rew: -97.27   Avg Rew: -100.41   Polyak: 0.995000   Buffer: 100.00   Loss: 102.811813  10.302967  8.205008
Ep: 19984   Rew: -97.81   Avg Rew: -100.38   Polyak: 0.995000   Buffer: 100.00   Loss: 102.512680  8.513178  6.626090
Ep: 19985   Rew: -99.90   Avg Rew: -100.39   Polyak: 0.995000   Buffer: 100.00   Loss: 102.720535  7.232641  7.750299
Ep: 19986   Rew: -99.21   Avg Rew: -100.35   Polyak: 0

Ep: 20048   Rew: -101.07   Avg Rew: -100.23   Polyak: 0.995000   Buffer: 100.00   Loss: 102.001160  6.696594  6.994626
Ep: 20049   Rew: -102.30   Avg Rew: -100.24   Polyak: 0.995000   Buffer: 100.00   Loss: 102.350632  7.676959  5.658930
Ep: 20050   Rew: -101.04   Avg Rew: -100.24   Polyak: 0.995000   Buffer: 100.00   Loss: 102.558525  6.149752  7.314454
Ep: 20051   Rew: -99.02   Avg Rew: -100.23   Polyak: 0.995000   Buffer: 100.00   Loss: 102.667686  6.332138  8.960457
Ep: 20052   Rew: -101.29   Avg Rew: -100.23   Polyak: 0.995000   Buffer: 100.00   Loss: 102.957611  6.637691  6.147451
Ep: 20053   Rew: -96.37   Avg Rew: -100.18   Polyak: 0.995000   Buffer: 100.00   Loss: 102.831345  5.485512  6.517295
Ep: 20054   Rew: -96.82   Avg Rew: -100.15   Polyak: 0.995000   Buffer: 100.00   Loss: 102.713234  7.373585  6.511910
Ep: 20055   Rew: -99.67   Avg Rew: -100.15   Polyak: 0.995000   Buffer: 100.00   Loss: 102.333244  7.312898  7.842130
Ep: 20056   Rew: -98.65   Avg Rew: -100.12   Polyak:

Ep: 20118   Rew: -103.12   Avg Rew: -100.63   Polyak: 0.995000   Buffer: 100.00   Loss: 102.647537  6.145036  7.160663
Ep: 20119   Rew: -105.42   Avg Rew: -100.70   Polyak: 0.995000   Buffer: 100.00   Loss: 102.312653  8.186077  6.150432
Ep: 20120   Rew: -100.65   Avg Rew: -100.73   Polyak: 0.995000   Buffer: 100.00   Loss: 102.626564  6.904254  6.314688
Ep: 20121   Rew: -103.54   Avg Rew: -100.79   Polyak: 0.995000   Buffer: 100.00   Loss: 102.888550  6.565251  8.014199
Ep: 20122   Rew: -96.55   Avg Rew: -100.79   Polyak: 0.995000   Buffer: 100.00   Loss: 102.522675  7.218416  7.362991
Ep: 20123   Rew: -96.95   Avg Rew: -100.79   Polyak: 0.995000   Buffer: 100.00   Loss: 102.436531  6.631622  6.745869
Ep: 20124   Rew: -96.46   Avg Rew: -100.77   Polyak: 0.995000   Buffer: 100.00   Loss: 102.515015  6.527220  6.136751
Ep: 20125   Rew: -102.53   Avg Rew: -100.79   Polyak: 0.995000   Buffer: 100.00   Loss: 102.648087  7.167449  6.897858
Ep: 20126   Rew: -100.94   Avg Rew: -100.82   Polya

Ep: 20188   Rew: -98.20   Avg Rew: -102.28   Polyak: 0.995000   Buffer: 100.00   Loss: 102.314537  8.256292  7.917093
Ep: 20189   Rew: -97.33   Avg Rew: -102.28   Polyak: 0.995000   Buffer: 100.00   Loss: 102.761612  8.817120  8.226406
Ep: 20190   Rew: -103.20   Avg Rew: -102.28   Polyak: 0.995000   Buffer: 100.00   Loss: 102.681351  7.518062  6.716166
Ep: 20191   Rew: -99.41   Avg Rew: -102.26   Polyak: 0.995000   Buffer: 100.00   Loss: 102.903694  8.787807  7.096005
Ep: 20192   Rew: -97.52   Avg Rew: -102.20   Polyak: 0.995000   Buffer: 100.00   Loss: 102.172340  8.231234  7.326124
Ep: 20193   Rew: -94.92   Avg Rew: -102.14   Polyak: 0.995000   Buffer: 100.00   Loss: 102.497299  6.974593  7.197003
Ep: 20194   Rew: -94.71   Avg Rew: -102.07   Polyak: 0.995000   Buffer: 100.00   Loss: 102.899834  7.424868  6.402951
Ep: 20195   Rew: -96.83   Avg Rew: -102.04   Polyak: 0.995000   Buffer: 100.00   Loss: 102.646461  7.001694  6.322040
Ep: 20196   Rew: -95.65   Avg Rew: -102.00   Polyak: 0.

Ep: 20258   Rew: -97.34   Avg Rew: -100.14   Polyak: 0.995000   Buffer: 100.00   Loss: 102.471344  6.508967  7.849081
Ep: 20259   Rew: -99.79   Avg Rew: -99.99   Polyak: 0.995000   Buffer: 100.00   Loss: 102.161255  9.305071  7.009012
Ep: 20260   Rew: -102.47   Avg Rew: -99.96   Polyak: 0.995000   Buffer: 100.00   Loss: 102.767807  7.906990  7.709912
Ep: 20261   Rew: -100.74   Avg Rew: -99.75   Polyak: 0.995000   Buffer: 100.00   Loss: 102.939995  8.985396  6.672543
Ep: 20262   Rew: -99.75   Avg Rew: -99.46   Polyak: 0.995000   Buffer: 100.00   Loss: 102.565010  8.017121  6.595237
Ep: 20263   Rew: -97.83   Avg Rew: -99.41   Polyak: 0.995000   Buffer: 100.00   Loss: 102.707794  7.828412  6.957971
Ep: 20264   Rew: -100.52   Avg Rew: -99.26   Polyak: 0.995000   Buffer: 100.00   Loss: 102.677780  7.559174  6.977651
Ep: 20265   Rew: -100.28   Avg Rew: -99.18   Polyak: 0.995000   Buffer: 100.00   Loss: 102.766563  7.271339  7.197725
Ep: 20266   Rew: -101.87   Avg Rew: -99.09   Polyak: 0.9950

Ep: 20328   Rew: -104.36   Avg Rew: -101.69   Polyak: 0.995000   Buffer: 100.00   Loss: 102.455063  7.426009  6.194606
Ep: 20329   Rew: -98.99   Avg Rew: -101.68   Polyak: 0.995000   Buffer: 100.00   Loss: 102.398514  6.769287  7.560966
Ep: 20330   Rew: -101.09   Avg Rew: -101.70   Polyak: 0.995000   Buffer: 100.00   Loss: 102.729134  8.922908  6.612081
Ep: 20331   Rew: -104.55   Avg Rew: -101.76   Polyak: 0.995000   Buffer: 100.00   Loss: 102.310928  5.912825  6.674870
Ep: 20332   Rew: -98.34   Avg Rew: -101.74   Polyak: 0.995000   Buffer: 100.00   Loss: 102.789757  6.395433  5.478828
Ep: 20333   Rew: -98.80   Avg Rew: -101.76   Polyak: 0.995000   Buffer: 100.00   Loss: 102.777321  8.025893  6.955501
Ep: 20334   Rew: -96.32   Avg Rew: -101.74   Polyak: 0.995000   Buffer: 100.00   Loss: 102.905510  8.352893  6.814654
Ep: 20335   Rew: -101.51   Avg Rew: -101.77   Polyak: 0.995000   Buffer: 100.00   Loss: 102.940926  7.050650  7.344828
Ep: 20336   Rew: -105.51   Avg Rew: -101.82   Polyak

Ep: 20398   Rew: -96.63   Avg Rew: -100.18   Polyak: 0.995000   Buffer: 100.00   Loss: 102.559731  6.764334  7.592956
Ep: 20399   Rew: -95.50   Avg Rew: -100.13   Polyak: 0.995000   Buffer: 100.00   Loss: 102.527191  6.086709  7.631080
Ep: 20400   Rew: -98.17   Avg Rew: -100.06   Polyak: 0.995000   Buffer: 100.00   Loss: 102.369217  6.055714  6.880425
Ep: 20401   Rew: -97.21   Avg Rew: -100.04   Polyak: 0.995000   Buffer: 100.00   Loss: 102.270813  6.769110  5.487355
Ep: 20402   Rew: -94.81   Avg Rew: -99.97   Polyak: 0.995000   Buffer: 100.00   Loss: 102.838165  7.558067  6.601092
Ep: 20403   Rew: -98.84   Avg Rew: -99.45   Polyak: 0.995000   Buffer: 100.00   Loss: 102.673782  6.490533  6.468339
Ep: 20404   Rew: -98.98   Avg Rew: -99.42   Polyak: 0.995000   Buffer: 100.00   Loss: 102.987335  6.812794  7.074653
Ep: 20405   Rew: -94.44   Avg Rew: -99.35   Polyak: 0.995000   Buffer: 100.00   Loss: 102.418968  6.905838  7.356437
Ep: 20406   Rew: -95.24   Avg Rew: -99.29   Polyak: 0.995000

Ep: 20468   Rew: -101.26   Avg Rew: -97.25   Polyak: 0.995000   Buffer: 100.00   Loss: 102.627274  6.427162  6.014286
Ep: 20469   Rew: -99.46   Avg Rew: -97.25   Polyak: 0.995000   Buffer: 100.00   Loss: 102.901314  6.367128  6.707126
Ep: 20470   Rew: -98.33   Avg Rew: -97.22   Polyak: 0.995000   Buffer: 100.00   Loss: 102.478806  5.997922  6.322578
Ep: 20471   Rew: -99.01   Avg Rew: -97.25   Polyak: 0.995000   Buffer: 100.00   Loss: 102.010269  7.221517  7.135220
Ep: 20472   Rew: -101.17   Avg Rew: -97.31   Polyak: 0.995000   Buffer: 100.00   Loss: 103.019135  7.939984  6.386951
Ep: 20473   Rew: -100.53   Avg Rew: -97.35   Polyak: 0.995000   Buffer: 100.00   Loss: 102.434593  7.397487  7.927999
Ep: 20474   Rew: -98.60   Avg Rew: -97.38   Polyak: 0.995000   Buffer: 100.00   Loss: 102.364845  7.424864  8.826559
Ep: 20475   Rew: -97.71   Avg Rew: -97.43   Polyak: 0.995000   Buffer: 100.00   Loss: 102.494614  6.820357  6.179246
Ep: 20476   Rew: -101.74   Avg Rew: -97.52   Polyak: 0.995000

Ep: 20538   Rew: -102.39   Avg Rew: -98.32   Polyak: 0.995000   Buffer: 100.00   Loss: 102.455544  9.345145  6.638347
Ep: 20539   Rew: -100.91   Avg Rew: -98.30   Polyak: 0.995000   Buffer: 100.00   Loss: 102.656837  6.673766  6.301714
Ep: 20540   Rew: -102.75   Avg Rew: -98.31   Polyak: 0.995000   Buffer: 100.00   Loss: 102.853157  8.186067  7.980987
Ep: 20541   Rew: -101.24   Avg Rew: -98.32   Polyak: 0.995000   Buffer: 100.00   Loss: 102.504700  7.157140  6.190711
Ep: 20542   Rew: -101.25   Avg Rew: -98.36   Polyak: 0.995000   Buffer: 100.00   Loss: 102.426430  7.752947  6.893023
Ep: 20543   Rew: -100.20   Avg Rew: -98.38   Polyak: 0.995000   Buffer: 100.00   Loss: 103.322380  6.008057  8.042364
Ep: 20544   Rew: -99.55   Avg Rew: -98.41   Polyak: 0.995000   Buffer: 100.00   Loss: 102.640175  8.266432  5.373065
Ep: 20545   Rew: -100.18   Avg Rew: -98.42   Polyak: 0.995000   Buffer: 100.00   Loss: 103.294907  7.791750  7.993746
Ep: 20546   Rew: -97.17   Avg Rew: -98.46   Polyak: 0.995

Ep: 20608   Rew: -97.77   Avg Rew: -99.63   Polyak: 0.995000   Buffer: 100.00   Loss: 102.393059  7.080540  5.737353
Ep: 20609   Rew: -98.28   Avg Rew: -99.65   Polyak: 0.995000   Buffer: 100.00   Loss: 102.608559  7.165797  8.199012
Ep: 20610   Rew: -100.38   Avg Rew: -99.70   Polyak: 0.995000   Buffer: 100.00   Loss: 102.712700  8.142516  6.558113
Ep: 20611   Rew: -97.38   Avg Rew: -99.68   Polyak: 0.995000   Buffer: 100.00   Loss: 102.694313  5.619564  4.997116
Ep: 20612   Rew: -97.94   Avg Rew: -99.73   Polyak: 0.995000   Buffer: 100.00   Loss: 102.336861  6.918110  7.569993
Ep: 20613   Rew: -100.62   Avg Rew: -99.73   Polyak: 0.995000   Buffer: 100.00   Loss: 102.571999  7.366223  6.699327
Ep: 20614   Rew: -98.70   Avg Rew: -99.76   Polyak: 0.995000   Buffer: 100.00   Loss: 102.760315  6.815028  7.628852
Ep: 20615   Rew: -100.53   Avg Rew: -99.77   Polyak: 0.995000   Buffer: 100.00   Loss: 102.486023  6.763471  6.592882
Ep: 20616   Rew: -99.90   Avg Rew: -99.75   Polyak: 0.995000 

Ep: 20678   Rew: -99.19   Avg Rew: -99.66   Polyak: 0.995000   Buffer: 100.00   Loss: 102.143410  7.575830  8.412331
Ep: 20679   Rew: -99.19   Avg Rew: -99.68   Polyak: 0.995000   Buffer: 100.00   Loss: 102.815933  8.020737  7.487212
Ep: 20680   Rew: -101.80   Avg Rew: -99.68   Polyak: 0.995000   Buffer: 100.00   Loss: 102.235382  6.661146  8.173672
Ep: 20681   Rew: -101.67   Avg Rew: -99.73   Polyak: 0.995000   Buffer: 100.00   Loss: 102.421707  9.267166  6.330893
Ep: 20682   Rew: -99.87   Avg Rew: -99.74   Polyak: 0.995000   Buffer: 100.00   Loss: 103.065407  7.235804  7.491567
Ep: 20683   Rew: -101.16   Avg Rew: -99.75   Polyak: 0.995000   Buffer: 100.00   Loss: 102.656364  8.024224  6.862815
Ep: 20684   Rew: -101.61   Avg Rew: -99.79   Polyak: 0.995000   Buffer: 100.00   Loss: 102.564026  7.662964  7.102553
Ep: 20685   Rew: -102.39   Avg Rew: -99.80   Polyak: 0.995000   Buffer: 100.00   Loss: 101.785408  6.824069  7.240031
Ep: 20686   Rew: -101.36   Avg Rew: -99.79   Polyak: 0.9950

Ep: 20748   Rew: -95.00   Avg Rew: -99.21   Polyak: 0.995000   Buffer: 100.00   Loss: 102.637062  7.009574  8.060650
Ep: 20749   Rew: -97.16   Avg Rew: -99.19   Polyak: 0.995000   Buffer: 100.00   Loss: 102.213173  6.776380  5.920877
Ep: 20750   Rew: -97.74   Avg Rew: -99.27   Polyak: 0.995000   Buffer: 100.00   Loss: 102.649719  6.019697  6.978405
Ep: 20751   Rew: -97.40   Avg Rew: -99.27   Polyak: 0.995000   Buffer: 100.00   Loss: 102.844521  6.781827  6.919555
Ep: 20752   Rew: -97.26   Avg Rew: -99.26   Polyak: 0.995000   Buffer: 100.00   Loss: 102.696381  7.475768  7.670904
Ep: 20753   Rew: -101.96   Avg Rew: -99.31   Polyak: 0.995000   Buffer: 100.00   Loss: 102.905273  7.125192  6.635312
Ep: 20754   Rew: -100.08   Avg Rew: -99.30   Polyak: 0.995000   Buffer: 100.00   Loss: 103.023842  8.177054  7.573030
Ep: 20755   Rew: -100.75   Avg Rew: -99.35   Polyak: 0.995000   Buffer: 100.00   Loss: 102.549988  6.932022  6.911273
Ep: 20756   Rew: -99.65   Avg Rew: -99.40   Polyak: 0.995000 

Ep: 20818   Rew: -100.32   Avg Rew: -99.70   Polyak: 0.995000   Buffer: 100.00   Loss: 102.365303  98.451317  97.046028
Ep: 20819   Rew: -99.73   Avg Rew: -99.69   Polyak: 0.995000   Buffer: 100.00   Loss: 102.458488  7.024400  6.694852
Ep: 20820   Rew: -98.30   Avg Rew: -99.68   Polyak: 0.995000   Buffer: 100.00   Loss: 101.991753  5.947895  7.374721
Ep: 20821   Rew: -100.96   Avg Rew: -99.69   Polyak: 0.995000   Buffer: 100.00   Loss: 101.977478  6.115589  8.016113
Ep: 20822   Rew: -101.38   Avg Rew: -99.68   Polyak: 0.995000   Buffer: 100.00   Loss: 102.681335  7.024115  8.094384
Ep: 20823   Rew: -98.75   Avg Rew: -99.67   Polyak: 0.995000   Buffer: 100.00   Loss: 102.100319  8.221973  7.905081
Ep: 20824   Rew: -97.41   Avg Rew: -99.67   Polyak: 0.995000   Buffer: 100.00   Loss: 102.422333  9.172954  8.654754
Ep: 20825   Rew: -96.89   Avg Rew: -99.64   Polyak: 0.995000   Buffer: 100.00   Loss: 102.465599  7.997372  7.604418
Ep: 20826   Rew: -97.85   Avg Rew: -99.61   Polyak: 0.99500

Ep: 20888   Rew: -97.59   Avg Rew: -99.58   Polyak: 0.995000   Buffer: 100.00   Loss: 102.502762  91.215942  94.071083
Ep: 20889   Rew: -98.62   Avg Rew: -99.58   Polyak: 0.995000   Buffer: 100.00   Loss: 102.293816  6.232508  6.757388
Ep: 20890   Rew: -95.16   Avg Rew: -99.55   Polyak: 0.995000   Buffer: 100.00   Loss: 103.236732  6.134529  6.107831
Ep: 20891   Rew: -98.20   Avg Rew: -99.53   Polyak: 0.995000   Buffer: 100.00   Loss: 102.274040  7.751887  8.250237
Ep: 20892   Rew: -96.31   Avg Rew: -99.51   Polyak: 0.995000   Buffer: 100.00   Loss: 101.819641  6.843731  8.031464
Ep: 20893   Rew: -97.40   Avg Rew: -99.49   Polyak: 0.995000   Buffer: 100.00   Loss: 101.949173  6.921762  7.100765
Ep: 20894   Rew: -95.96   Avg Rew: -99.47   Polyak: 0.995000   Buffer: 100.00   Loss: 101.814980  6.247770  6.356865
Ep: 20895   Rew: -93.54   Avg Rew: -99.42   Polyak: 0.995000   Buffer: 100.00   Loss: 101.264473  6.326500  7.278615
Ep: 20896   Rew: -97.16   Avg Rew: -99.40   Polyak: 0.995000  

Ep: 20958   Rew: -101.12   Avg Rew: -99.23   Polyak: 0.995000   Buffer: 100.00   Loss: 101.548019  7.119477  6.515864
Ep: 20959   Rew: -101.21   Avg Rew: -99.22   Polyak: 0.995000   Buffer: 100.00   Loss: 102.192284  7.002814  6.445781
Ep: 20960   Rew: -101.27   Avg Rew: -99.22   Polyak: 0.995000   Buffer: 100.00   Loss: 102.563957  6.505011  5.806780
Ep: 20961   Rew: -100.18   Avg Rew: -99.22   Polyak: 0.995000   Buffer: 100.00   Loss: 102.170792  6.937324  7.167982
Ep: 20962   Rew: -99.72   Avg Rew: -99.20   Polyak: 0.995000   Buffer: 100.00   Loss: 102.162254  9.319263  7.612694
Ep: 20963   Rew: -99.34   Avg Rew: -99.20   Polyak: 0.995000   Buffer: 100.00   Loss: 102.392220  6.779548  9.286533
Ep: 20964   Rew: -98.52   Avg Rew: -99.18   Polyak: 0.995000   Buffer: 100.00   Loss: 101.882118  6.608696  7.341486
Ep: 20965   Rew: -101.11   Avg Rew: -99.18   Polyak: 0.995000   Buffer: 100.00   Loss: 101.650856  6.617597  7.292682
Ep: 20966   Rew: -100.03   Avg Rew: -99.16   Polyak: 0.9950

Ep: 21028   Rew: -98.11   Avg Rew: -101.06   Polyak: 0.995000   Buffer: 100.00   Loss: 101.208542  6.886664  5.915780
Ep: 21029   Rew: -95.60   Avg Rew: -101.03   Polyak: 0.995000   Buffer: 100.00   Loss: 102.023262  6.781580  6.866560
Ep: 21030   Rew: -97.14   Avg Rew: -101.03   Polyak: 0.995000   Buffer: 100.00   Loss: 102.012962  7.310360  5.186779
Ep: 21031   Rew: -96.60   Avg Rew: -100.98   Polyak: 0.995000   Buffer: 100.00   Loss: 101.645775  8.131628  7.968122
Ep: 21032   Rew: -94.53   Avg Rew: -100.93   Polyak: 0.995000   Buffer: 100.00   Loss: 101.673607  8.956498  6.933626
Ep: 21033   Rew: -94.32   Avg Rew: -100.91   Polyak: 0.995000   Buffer: 100.00   Loss: 101.019600  7.940659  6.680355
Ep: 21034   Rew: -97.09   Avg Rew: -100.89   Polyak: 0.995000   Buffer: 100.00   Loss: 102.461548  6.861835  7.336942
Ep: 21035   Rew: -96.99   Avg Rew: -100.88   Polyak: 0.995000   Buffer: 100.00   Loss: 101.720543  6.909974  6.820953
Ep: 21036   Rew: -98.02   Avg Rew: -100.88   Polyak: 0.9

Ep: 21098   Rew: -97.80   Avg Rew: -100.19   Polyak: 0.995000   Buffer: 100.00   Loss: 101.819458  6.669809  5.994393
Ep: 21099   Rew: -95.34   Avg Rew: -100.18   Polyak: 0.995000   Buffer: 100.00   Loss: 100.964096  6.566749  6.788701
Ep: 21100   Rew: -93.82   Avg Rew: -100.19   Polyak: 0.995000   Buffer: 100.00   Loss: 101.918137  6.981449  6.311000
Ep: 21101   Rew: -93.65   Avg Rew: -100.15   Polyak: 0.995000   Buffer: 100.00   Loss: 101.397217  6.673727  5.774372
Ep: 21102   Rew: -94.94   Avg Rew: -100.13   Polyak: 0.995000   Buffer: 100.00   Loss: 101.460358  89.088951  106.340599
Ep: 21103   Rew: -96.55   Avg Rew: -100.09   Polyak: 0.995000   Buffer: 100.00   Loss: 101.671211  6.712972  5.929594
Ep: 21104   Rew: -96.99   Avg Rew: -99.68   Polyak: 0.995000   Buffer: 100.00   Loss: 101.620377  6.842308  5.742410
Ep: 21105   Rew: -96.82   Avg Rew: -99.37   Polyak: 0.995000   Buffer: 100.00   Loss: 101.622993  6.646957  8.285426
Ep: 21106   Rew: -95.84   Avg Rew: -99.05   Polyak: 0.9

Ep: 21168   Rew: -99.93   Avg Rew: -98.68   Polyak: 0.995000   Buffer: 100.00   Loss: 101.648804  6.561566  7.047203
Ep: 21169   Rew: -99.89   Avg Rew: -98.66   Polyak: 0.995000   Buffer: 100.00   Loss: 100.965614  6.237731  5.961655
Ep: 21170   Rew: -99.80   Avg Rew: -98.70   Polyak: 0.995000   Buffer: 100.00   Loss: 101.265999  6.664399  5.032561
Ep: 21171   Rew: -100.51   Avg Rew: -98.74   Polyak: 0.995000   Buffer: 100.00   Loss: 101.503532  6.115256  6.989388
Ep: 21172   Rew: -100.78   Avg Rew: -98.77   Polyak: 0.995000   Buffer: 100.00   Loss: 100.674057  7.395945  7.497459
Ep: 21173   Rew: -101.67   Avg Rew: -98.83   Polyak: 0.995000   Buffer: 100.00   Loss: 100.724098  6.783034  5.938997
Ep: 21174   Rew: -100.99   Avg Rew: -98.90   Polyak: 0.995000   Buffer: 100.00   Loss: 101.447502  5.926284  5.983438
Ep: 21175   Rew: -97.20   Avg Rew: -98.93   Polyak: 0.995000   Buffer: 100.00   Loss: 100.876175  7.290062  6.213674
Ep: 21176   Rew: -101.37   Avg Rew: -99.00   Polyak: 0.99500

Ep: 21238   Rew: -100.01   Avg Rew: -100.19   Polyak: 0.995000   Buffer: 100.00   Loss: 100.640114  6.389748  7.893347
Ep: 21239   Rew: -100.63   Avg Rew: -100.20   Polyak: 0.995000   Buffer: 100.00   Loss: 101.053268  6.067190  7.283593
Ep: 21240   Rew: -101.42   Avg Rew: -100.15   Polyak: 0.995000   Buffer: 100.00   Loss: 100.991631  6.795866  6.466214
Ep: 21241   Rew: -99.15   Avg Rew: -100.15   Polyak: 0.995000   Buffer: 100.00   Loss: 100.920097  6.760366  7.065242
Ep: 21242   Rew: -99.29   Avg Rew: -100.15   Polyak: 0.995000   Buffer: 100.00   Loss: 100.713753  6.736527  6.644804
Ep: 21243   Rew: -95.98   Avg Rew: -100.14   Polyak: 0.995000   Buffer: 100.00   Loss: 100.987610  5.665638  4.836099
Ep: 21244   Rew: -100.56   Avg Rew: -100.15   Polyak: 0.995000   Buffer: 100.00   Loss: 100.544144  5.525734  6.666410
Ep: 21245   Rew: -99.79   Avg Rew: -100.17   Polyak: 0.995000   Buffer: 100.00   Loss: 101.177567  7.824667  7.539563
Ep: 21246   Rew: -98.25   Avg Rew: -100.15   Polyak:

Ep: 21308   Rew: -99.22   Avg Rew: -99.76   Polyak: 0.995000   Buffer: 100.00   Loss: 101.240334  6.239163  6.081788
Ep: 21309   Rew: -99.43   Avg Rew: -99.78   Polyak: 0.995000   Buffer: 100.00   Loss: 100.476608  5.229715  7.128484
Ep: 21310   Rew: -98.29   Avg Rew: -99.79   Polyak: 0.995000   Buffer: 100.00   Loss: 100.258904  6.429622  6.335650
Ep: 21311   Rew: -99.54   Avg Rew: -99.80   Polyak: 0.995000   Buffer: 100.00   Loss: 100.680153  7.711993  7.446046
Ep: 21312   Rew: -99.22   Avg Rew: -99.82   Polyak: 0.995000   Buffer: 100.00   Loss: 101.235596  7.516411  6.418953
Ep: 21313   Rew: -98.27   Avg Rew: -99.82   Polyak: 0.995000   Buffer: 100.00   Loss: 100.616455  7.676216  5.769525
Ep: 21314   Rew: -98.11   Avg Rew: -99.81   Polyak: 0.995000   Buffer: 100.00   Loss: 100.404442  5.758731  6.684762
Ep: 21315   Rew: -98.76   Avg Rew: -99.81   Polyak: 0.995000   Buffer: 100.00   Loss: 100.675194  7.407092  6.104203
Ep: 21316   Rew: -99.85   Avg Rew: -99.80   Polyak: 0.995000   B

Ep: 21378   Rew: -101.00   Avg Rew: -98.90   Polyak: 0.995000   Buffer: 100.00   Loss: 100.784584  7.239206  6.506835
Ep: 21379   Rew: -97.82   Avg Rew: -98.88   Polyak: 0.995000   Buffer: 100.00   Loss: 101.202423  6.614197  7.139744
Ep: 21380   Rew: -98.89   Avg Rew: -98.86   Polyak: 0.995000   Buffer: 100.00   Loss: 100.258133  8.006811  6.123272
Ep: 21381   Rew: -98.89   Avg Rew: -98.84   Polyak: 0.995000   Buffer: 100.00   Loss: 100.726044  6.674926  6.620113
Ep: 21382   Rew: -101.04   Avg Rew: -98.83   Polyak: 0.995000   Buffer: 100.00   Loss: 100.267067  6.774737  7.158290
Ep: 21383   Rew: -103.39   Avg Rew: -98.87   Polyak: 0.995000   Buffer: 100.00   Loss: 99.942726  7.000465  5.410633
Ep: 21384   Rew: -99.09   Avg Rew: -98.86   Polyak: 0.995000   Buffer: 100.00   Loss: 101.176041  6.472327  7.369814
Ep: 21385   Rew: -99.92   Avg Rew: -98.86   Polyak: 0.995000   Buffer: 100.00   Loss: 100.911743  6.995629  6.915912
Ep: 21386   Rew: -99.90   Avg Rew: -98.85   Polyak: 0.995000  

Ep: 21448   Rew: -99.66   Avg Rew: -99.06   Polyak: 0.995000   Buffer: 100.00   Loss: 100.705650  5.533195  7.851823
Ep: 21449   Rew: -100.88   Avg Rew: -99.09   Polyak: 0.995000   Buffer: 100.00   Loss: 100.389130  6.492172  7.858964
Ep: 21450   Rew: -100.11   Avg Rew: -99.10   Polyak: 0.995000   Buffer: 100.00   Loss: 100.449539  6.005507  5.772878
Ep: 21451   Rew: -101.18   Avg Rew: -99.19   Polyak: 0.995000   Buffer: 100.00   Loss: 100.605278  5.949739  6.356290
Ep: 21452   Rew: -100.07   Avg Rew: -99.23   Polyak: 0.995000   Buffer: 100.00   Loss: 100.050812  7.203341  7.421616
Ep: 21453   Rew: -100.29   Avg Rew: -99.27   Polyak: 0.995000   Buffer: 100.00   Loss: 99.411530  6.241954  5.809277
Ep: 21454   Rew: -99.52   Avg Rew: -99.30   Polyak: 0.995000   Buffer: 100.00   Loss: 99.930084  8.545149  5.137632
Ep: 21455   Rew: -98.65   Avg Rew: -99.23   Polyak: 0.995000   Buffer: 100.00   Loss: 100.041168  7.823794  7.003048
Ep: 21456   Rew: -101.58   Avg Rew: -99.29   Polyak: 0.995000

Ep: 21518   Rew: -97.68   Avg Rew: -99.91   Polyak: 0.995000   Buffer: 100.00   Loss: 99.860222  7.990758  6.042465
Ep: 21519   Rew: -99.09   Avg Rew: -99.90   Polyak: 0.995000   Buffer: 100.00   Loss: 99.864067  6.589775  6.488899
Ep: 21520   Rew: -99.44   Avg Rew: -99.91   Polyak: 0.995000   Buffer: 100.00   Loss: 99.073944  6.017025  6.492578
Ep: 21521   Rew: -98.52   Avg Rew: -99.90   Polyak: 0.995000   Buffer: 100.00   Loss: 99.788445  6.854815  6.744768
Ep: 21522   Rew: -96.69   Avg Rew: -99.86   Polyak: 0.995000   Buffer: 100.00   Loss: 100.248894  6.213942  7.025038
Ep: 21523   Rew: -100.19   Avg Rew: -99.88   Polyak: 0.995000   Buffer: 100.00   Loss: 99.574844  5.336423  7.406844
Ep: 21524   Rew: -99.12   Avg Rew: -99.86   Polyak: 0.995000   Buffer: 100.00   Loss: 99.908081  6.861356  6.393312
Ep: 21525   Rew: -96.26   Avg Rew: -99.83   Polyak: 0.995000   Buffer: 100.00   Loss: 99.577255  7.083715  6.572571
Ep: 21526   Rew: -101.86   Avg Rew: -99.86   Polyak: 0.995000   Buffer

Ep: 21589   Rew: -98.71   Avg Rew: -99.28   Polyak: 0.995000   Buffer: 100.00   Loss: 98.812897  8.319329  6.625876
Ep: 21590   Rew: -103.46   Avg Rew: -99.31   Polyak: 0.995000   Buffer: 100.00   Loss: 99.396278  6.391196  5.002051
Ep: 21591   Rew: -151.20   Avg Rew: -99.83   Polyak: 0.995000   Buffer: 100.00   Loss: 99.254158  5.461264  5.301366
Ep: 21592   Rew: -94.37   Avg Rew: -99.78   Polyak: 0.995000   Buffer: 100.00   Loss: 99.792793  6.478899  6.243684
Ep: 21593   Rew: -98.89   Avg Rew: -99.75   Polyak: 0.995000   Buffer: 100.00   Loss: 99.666298  6.049134  6.778498
Ep: 21594   Rew: -101.82   Avg Rew: -99.79   Polyak: 0.995000   Buffer: 100.00   Loss: 99.473030  6.871577  4.680747
Ep: 21595   Rew: -97.45   Avg Rew: -99.78   Polyak: 0.995000   Buffer: 100.00   Loss: 99.663254  6.951288  7.454630
Ep: 21596   Rew: -100.73   Avg Rew: -99.80   Polyak: 0.995000   Buffer: 100.00   Loss: 99.362061  7.958138  6.776803
Ep: 21597   Rew: -101.49   Avg Rew: -99.82   Polyak: 0.995000   Buff

Ep: 21660   Rew: -92.34   Avg Rew: -99.07   Polyak: 0.995000   Buffer: 100.00   Loss: 99.501465  6.771767  7.093923
Ep: 21661   Rew: -97.51   Avg Rew: -99.07   Polyak: 0.995000   Buffer: 100.00   Loss: 99.502289  6.647717  6.734734
Ep: 21662   Rew: -97.36   Avg Rew: -99.04   Polyak: 0.995000   Buffer: 100.00   Loss: 98.959015  7.422769  6.271243
Ep: 21663   Rew: -93.45   Avg Rew: -98.97   Polyak: 0.995000   Buffer: 100.00   Loss: 99.437378  6.620282  6.321423
Ep: 21664   Rew: -97.67   Avg Rew: -98.95   Polyak: 0.995000   Buffer: 100.00   Loss: 99.443237  5.786386  5.948380
Ep: 21665   Rew: -97.18   Avg Rew: -98.92   Polyak: 0.995000   Buffer: 100.00   Loss: 99.017754  5.903958  6.366456
Ep: 21666   Rew: -95.01   Avg Rew: -98.87   Polyak: 0.995000   Buffer: 100.00   Loss: 99.242630  4.679273  6.418405
Ep: 21667   Rew: -94.00   Avg Rew: -98.81   Polyak: 0.995000   Buffer: 100.00   Loss: 99.171669  6.375699  6.654786
Ep: 21668   Rew: -96.15   Avg Rew: -98.77   Polyak: 0.995000   Buffer: 1

Ep: 21731   Rew: -97.01   Avg Rew: -96.45   Polyak: 0.995000   Buffer: 100.00   Loss: 98.229836  6.522055  6.033997
Ep: 21732   Rew: -95.96   Avg Rew: -96.42   Polyak: 0.995000   Buffer: 100.00   Loss: 97.930008  6.993181  7.628468
Ep: 21733   Rew: -93.74   Avg Rew: -96.36   Polyak: 0.995000   Buffer: 100.00   Loss: 98.251549  6.048950  5.748427
Ep: 21734   Rew: -99.37   Avg Rew: -96.39   Polyak: 0.995000   Buffer: 100.00   Loss: 97.524231  6.016455  4.836157
Ep: 21735   Rew: -97.54   Avg Rew: -96.41   Polyak: 0.995000   Buffer: 100.00   Loss: 97.547798  6.752029  5.949169
Ep: 21736   Rew: -94.72   Avg Rew: -96.41   Polyak: 0.995000   Buffer: 100.00   Loss: 97.930611  5.785279  6.713276
Ep: 21737   Rew: -95.98   Avg Rew: -96.40   Polyak: 0.995000   Buffer: 100.00   Loss: 98.065231  7.209198  6.809196
Ep: 21738   Rew: -107.84   Avg Rew: -96.52   Polyak: 0.995000   Buffer: 100.00   Loss: 97.435577  7.343796  6.369138
Ep: 21739   Rew: -87.76   Avg Rew: -96.38   Polyak: 0.995000   Buffer: 

Ep: 21802   Rew: -86.98   Avg Rew: -95.08   Polyak: 0.995000   Buffer: 100.00   Loss: 96.663322  5.760306  5.780226
Ep: 21803   Rew: -142.80   Avg Rew: -95.60   Polyak: 0.995000   Buffer: 100.00   Loss: 96.792580  5.655265  6.846240
Ep: 21804   Rew: -81.02   Avg Rew: -95.50   Polyak: 0.995000   Buffer: 100.00   Loss: 96.364052  6.243814  5.828260
Ep: 21805   Rew: -74.25   Avg Rew: -95.42   Polyak: 0.995000   Buffer: 100.00   Loss: 96.308739  4.720820  6.032094
Ep: 21806   Rew: -64.60   Avg Rew: -95.05   Polyak: 0.995000   Buffer: 100.00   Loss: 96.508568  5.803560  6.025872
Ep: 21807   Rew: -90.09   Avg Rew: -95.04   Polyak: 0.995000   Buffer: 100.00   Loss: 96.254288  5.543702  7.382255
Ep: 21808   Rew: -96.10   Avg Rew: -95.14   Polyak: 0.995000   Buffer: 100.00   Loss: 96.227859  5.087183  5.245415
Ep: 21809   Rew: -95.88   Avg Rew: -95.22   Polyak: 0.995000   Buffer: 100.00   Loss: 96.424393  5.795980  5.228065
Ep: 21810   Rew: -95.77   Avg Rew: -95.27   Polyak: 0.995000   Buffer: 

Ep: 21873   Rew: -96.26   Avg Rew: -97.54   Polyak: 0.995000   Buffer: 100.00   Loss: 95.313614  6.144610  6.490539
Ep: 21874   Rew: -87.94   Avg Rew: -97.51   Polyak: 0.995000   Buffer: 100.00   Loss: 95.523361  6.919614  5.894191
Ep: 21875   Rew: -105.76   Avg Rew: -97.62   Polyak: 0.995000   Buffer: 100.00   Loss: 95.046143  5.883385  4.882405
Ep: 21876   Rew: -130.83   Avg Rew: -97.96   Polyak: 0.995000   Buffer: 100.00   Loss: 95.107117  5.442665  5.585502
Ep: 21877   Rew: -115.84   Avg Rew: -98.15   Polyak: 0.995000   Buffer: 100.00   Loss: 95.378548  5.689421  5.948789
Ep: 21878   Rew: -95.05   Avg Rew: -98.13   Polyak: 0.995000   Buffer: 100.00   Loss: 95.134018  7.236766  4.545000
Ep: 21879   Rew: -88.57   Avg Rew: -98.05   Polyak: 0.995000   Buffer: 100.00   Loss: 95.734261  6.016055  6.385053
Ep: 21880   Rew: -95.19   Avg Rew: -98.00   Polyak: 0.995000   Buffer: 100.00   Loss: 94.785088  7.101687  5.867052
Ep: 21881   Rew: -92.95   Avg Rew: -98.10   Polyak: 0.995000   Buffer

Ep: 21943   Rew: -118.81   Avg Rew: -105.33   Polyak: 0.995000   Buffer: 100.00   Loss: 89.453651  4.438499  4.093429
Ep: 21944   Rew: -117.36   Avg Rew: -105.56   Polyak: 0.995000   Buffer: 100.00   Loss: 89.487061  3.932272  4.591733
Ep: 21945   Rew: -146.61   Avg Rew: -106.06   Polyak: 0.995000   Buffer: 100.00   Loss: 89.809326  4.750795  4.424232
Ep: 21946   Rew: -110.75   Avg Rew: -106.19   Polyak: 0.995000   Buffer: 100.00   Loss: 88.732117  5.539369  5.198632
Ep: 21947   Rew: -103.03   Avg Rew: -106.27   Polyak: 0.995000   Buffer: 100.00   Loss: 89.036911  3.673307  5.014296
Ep: 21948   Rew: -100.95   Avg Rew: -106.35   Polyak: 0.995000   Buffer: 100.00   Loss: 88.807190  4.888956  3.960042
Ep: 21949   Rew: -158.22   Avg Rew: -107.01   Polyak: 0.995000   Buffer: 100.00   Loss: 88.396919  4.957626  4.129749
Ep: 21950   Rew: -154.06   Avg Rew: -107.49   Polyak: 0.995000   Buffer: 100.00   Loss: 88.960686  3.984184  6.069220
Ep: 21951   Rew: -166.73   Avg Rew: -108.04   Polyak: 0.

Ep: 22013   Rew: -97.58   Avg Rew: -110.27   Polyak: 0.995000   Buffer: 100.00   Loss: 72.596756  2.311414  2.729021
Ep: 22014   Rew: -84.32   Avg Rew: -110.13   Polyak: 0.995000   Buffer: 100.00   Loss: 73.415016  3.533248  3.793023
Ep: 22015   Rew: -81.65   Avg Rew: -110.07   Polyak: 0.995000   Buffer: 100.00   Loss: 72.546516  3.063385  4.135662
Ep: 22016   Rew: -87.28   Avg Rew: -109.81   Polyak: 0.995000   Buffer: 100.00   Loss: 72.537498  3.681035  4.780702
Ep: 22017   Rew: -95.68   Avg Rew: -109.71   Polyak: 0.995000   Buffer: 100.00   Loss: 71.829033  4.239944  3.000246
Ep: 22018   Rew: -111.71   Avg Rew: -109.74   Polyak: 0.995000   Buffer: 100.00   Loss: 70.804329  2.872843  2.243536
Ep: 22019   Rew: -102.85   Avg Rew: -109.15   Polyak: 0.995000   Buffer: 100.00   Loss: 72.275948  3.492692  3.854861
Ep: 22020   Rew: -57.55   Avg Rew: -108.44   Polyak: 0.995000   Buffer: 100.00   Loss: 70.301262  3.709035  2.888665
Ep: 22021   Rew: -70.89   Avg Rew: -108.02   Polyak: 0.995000 

Ep: 22083   Rew: -134.81   Avg Rew: -113.26   Polyak: 0.995000   Buffer: 100.00   Loss: 28.589825  0.304241  0.377810
Ep: 22084   Rew: -146.97   Avg Rew: -113.37   Polyak: 0.995000   Buffer: 100.00   Loss: 27.038034  1.680502  1.421509
Ep: 22085   Rew: -181.20   Avg Rew: -114.49   Polyak: 0.995000   Buffer: 100.00   Loss: 27.183569  0.523911  0.294700
Ep: 22086   Rew: -150.04   Avg Rew: -114.70   Polyak: 0.995000   Buffer: 100.00   Loss: 29.336355  1.422566  2.304315
Ep: 22087   Rew: -136.64   Avg Rew: -115.06   Polyak: 0.995000   Buffer: 100.00   Loss: 26.347317  0.434417  0.135957
Ep: 22088   Rew: -174.08   Avg Rew: -115.50   Polyak: 0.995000   Buffer: 100.00   Loss: 26.049202  0.358796  0.221461
Ep: 22089   Rew: -148.78   Avg Rew: -115.95   Polyak: 0.995000   Buffer: 100.00   Loss: 27.097597  0.386345  0.373681
Ep: 22090   Rew: -181.92   Avg Rew: -116.86   Polyak: 0.995000   Buffer: 100.00   Loss: 27.588148  1.977728  1.598748
Ep: 22091   Rew: -191.11   Avg Rew: -117.85   Polyak: 0.

Ep: 22153   Rew: -88.85   Avg Rew: -126.03   Polyak: 0.995000   Buffer: 100.00   Loss: 23.662228  0.995232  1.733137
Ep: 22154   Rew: -179.08   Avg Rew: -126.97   Polyak: 0.995000   Buffer: 100.00   Loss: 22.952059  0.230251  0.312620
Ep: 22155   Rew: -127.24   Avg Rew: -126.92   Polyak: 0.995000   Buffer: 100.00   Loss: 23.890095  0.427613  0.504723
Ep: 22156   Rew: -120.27   Avg Rew: -126.92   Polyak: 0.995000   Buffer: 100.00   Loss: 22.976395  1.598611  3.179683
Ep: 22157   Rew: -108.44   Avg Rew: -126.97   Polyak: 0.995000   Buffer: 100.00   Loss: 25.051254  1.022519  1.343038
Ep: 22158   Rew: -97.96   Avg Rew: -126.59   Polyak: 0.995000   Buffer: 100.00   Loss: 25.439102  5.188645  2.509675
Ep: 22159   Rew: -115.96   Avg Rew: -126.43   Polyak: 0.995000   Buffer: 100.00   Loss: 23.610182  1.141517  0.672640
Ep: 22160   Rew: -111.84   Avg Rew: -126.31   Polyak: 0.995000   Buffer: 100.00   Loss: 22.830650  0.225049  0.238732
Ep: 22161   Rew: -80.51   Avg Rew: -125.92   Polyak: 0.995

Ep: 22223   Rew: -106.39   Avg Rew: -103.04   Polyak: 0.995000   Buffer: 100.00   Loss: 20.525473  3.337919  3.095534
Ep: 22224   Rew: -163.67   Avg Rew: -103.68   Polyak: 0.995000   Buffer: 100.00   Loss: 21.435127  0.284812  0.494643
Ep: 22225   Rew: -183.62   Avg Rew: -104.54   Polyak: 0.995000   Buffer: 100.00   Loss: 20.465946  2.697789  0.550033
Ep: 22226   Rew: -121.08   Avg Rew: -104.31   Polyak: 0.995000   Buffer: 100.00   Loss: 20.622599  0.339300  0.562461
Ep: 22227   Rew: -164.45   Avg Rew: -105.19   Polyak: 0.995000   Buffer: 100.00   Loss: 21.237261  0.478649  1.380416
Ep: 22228   Rew: -196.42   Avg Rew: -105.64   Polyak: 0.995000   Buffer: 100.00   Loss: 20.088352  0.288416  0.281467
Ep: 22229   Rew: -94.76   Avg Rew: -105.83   Polyak: 0.995000   Buffer: 100.00   Loss: 20.571482  0.583015  0.346855
Ep: 22230   Rew: -159.09   Avg Rew: -106.52   Polyak: 0.995000   Buffer: 100.00   Loss: 20.843971  2.990237  0.394843
Ep: 22231   Rew: -97.28   Avg Rew: -106.22   Polyak: 0.99

Ep: 22293   Rew: -69.03   Avg Rew: -104.13   Polyak: 0.995000   Buffer: 100.00   Loss: 17.416670  0.086798  0.107327
Ep: 22294   Rew: -105.39   Avg Rew: -104.02   Polyak: 0.995000   Buffer: 100.00   Loss: 17.795542  0.177012  0.121956
Ep: 22295   Rew: -107.47   Avg Rew: -104.12   Polyak: 0.995000   Buffer: 100.00   Loss: 16.940121  0.345868  0.135636
Ep: 22296   Rew: -66.62   Avg Rew: -103.31   Polyak: 0.995000   Buffer: 100.00   Loss: 17.284203  0.198691  0.160835
Ep: 22297   Rew: -131.59   Avg Rew: -103.55   Polyak: 0.995000   Buffer: 100.00   Loss: 17.463499  0.187077  0.160052
Ep: 22298   Rew: -53.17   Avg Rew: -103.07   Polyak: 0.995000   Buffer: 100.00   Loss: 17.518259  0.110382  0.111118
Ep: 22299   Rew: -114.78   Avg Rew: -103.47   Polyak: 0.995000   Buffer: 100.00   Loss: 17.157198  0.146124  0.168765
Ep: 22300   Rew: -125.85   Avg Rew: -103.75   Polyak: 0.995000   Buffer: 100.00   Loss: 18.398270  0.220294  0.135268
Ep: 22301   Rew: -80.88   Avg Rew: -103.51   Polyak: 0.9950

Ep: 22363   Rew: -109.45   Avg Rew: -110.24   Polyak: 0.995000   Buffer: 100.00   Loss: 15.593583  0.256598  0.223343
Ep: 22364   Rew: -120.49   Avg Rew: -110.73   Polyak: 0.995000   Buffer: 100.00   Loss: 16.059610  0.360519  0.517053
Ep: 22365   Rew: -131.76   Avg Rew: -111.48   Polyak: 0.995000   Buffer: 100.00   Loss: 16.203224  1.824953  2.419069
Ep: 22366   Rew: -123.05   Avg Rew: -112.06   Polyak: 0.995000   Buffer: 100.00   Loss: 16.346348  0.880855  0.173762
Ep: 22367   Rew: -123.68   Avg Rew: -112.28   Polyak: 0.995000   Buffer: 100.00   Loss: 17.066957  0.190228  0.443494
Ep: 22368   Rew: -113.05   Avg Rew: -112.60   Polyak: 0.995000   Buffer: 100.00   Loss: 16.897982  0.353789  0.362928
Ep: 22369   Rew: -112.68   Avg Rew: -112.97   Polyak: 0.995000   Buffer: 100.00   Loss: 16.610685  0.711811  1.419264
Ep: 22370   Rew: -108.01   Avg Rew: -113.27   Polyak: 0.995000   Buffer: 100.00   Loss: 15.463792  0.083257  0.093739
Ep: 22371   Rew: -127.68   Avg Rew: -113.59   Polyak: 0.

Ep: 22433   Rew: -87.60   Avg Rew: -109.87   Polyak: 0.995000   Buffer: 100.00   Loss: 13.499596  0.062526  0.077391
Ep: 22434   Rew: -90.85   Avg Rew: -109.14   Polyak: 0.995000   Buffer: 100.00   Loss: 13.410141  0.059977  0.046743
Ep: 22435   Rew: -94.73   Avg Rew: -108.66   Polyak: 0.995000   Buffer: 100.00   Loss: 13.561503  0.181453  0.137553
Ep: 22436   Rew: -101.06   Avg Rew: -108.12   Polyak: 0.995000   Buffer: 100.00   Loss: 13.678153  0.103428  0.062480
Ep: 22437   Rew: -128.41   Avg Rew: -108.15   Polyak: 0.995000   Buffer: 100.00   Loss: 13.279230  0.089232  0.089351
Ep: 22438   Rew: -101.09   Avg Rew: -108.07   Polyak: 0.995000   Buffer: 100.00   Loss: 13.099152  0.069027  0.120997
Ep: 22439   Rew: -95.18   Avg Rew: -108.14   Polyak: 0.995000   Buffer: 100.00   Loss: 13.231863  0.062941  0.118406
Ep: 22440   Rew: -113.74   Avg Rew: -108.27   Polyak: 0.995000   Buffer: 100.00   Loss: 13.328839  0.058734  0.073336
Ep: 22441   Rew: -82.48   Avg Rew: -107.92   Polyak: 0.99500

Ep: 22504   Rew: -62.98   Avg Rew: -89.60   Polyak: 0.995000   Buffer: 100.00   Loss: 11.390296  0.024957  0.026187
Ep: 22505   Rew: -116.83   Avg Rew: -89.97   Polyak: 0.995000   Buffer: 100.00   Loss: 11.173010  0.080940  0.082413
Ep: 22506   Rew: -74.09   Avg Rew: -89.71   Polyak: 0.995000   Buffer: 100.00   Loss: 11.181491  0.023924  0.026653
Ep: 22507   Rew: -65.40   Avg Rew: -89.64   Polyak: 0.995000   Buffer: 100.00   Loss: 11.106834  0.908811  0.906633
Ep: 22508   Rew: -62.59   Avg Rew: -89.29   Polyak: 0.995000   Buffer: 100.00   Loss: 11.009366  0.025696  0.026279
Ep: 22509   Rew: -71.66   Avg Rew: -89.43   Polyak: 0.995000   Buffer: 100.00   Loss: 10.950016  0.029578  0.029825
Ep: 22510   Rew: -77.28   Avg Rew: -89.23   Polyak: 0.995000   Buffer: 100.00   Loss: 11.133527  0.036578  0.040871
Ep: 22511   Rew: -92.44   Avg Rew: -89.29   Polyak: 0.995000   Buffer: 100.00   Loss: 11.379889  0.041163  0.046020
Ep: 22512   Rew: -79.54   Avg Rew: -89.34   Polyak: 0.995000   Buffer: 

KeyboardInterrupt: 

In [None]:
def test():  
    random_seed = 0
    n_episodes = 3
    max_timesteps = 2000
    render = True
    save_gif = True
    
    filename = "TD3_torch_{}_{}".format(env_name, random_seed)
    filename += ''
    directory = "./preTrained/td3_torch/{}".format(env_name)
    
    env = gym.make(env_name)
    state_dim = env.observation_space.shape[0]
    action_dim = env.action_space.shape[0]
    max_action = float(env.action_space.high[0])
    
    policy = TD3(state_dim, action_dim, max_action)
    
    policy.load_actor(directory, filename)
    
    for ep in range(1, n_episodes+1):
        ep_reward = 0
        state = env.reset()
        for t in range(max_timesteps):
            action = policy.select_action(state)
            state, reward, done, _ = env.step(action)
            ep_reward += reward
            if render:
                env.render()
                if save_gif:
                    dirname = './gif/td3_torch/{}'.format(ep)
                    if not os.path.isdir(dirname):
                        os.mkdir(dirname)
                    img = env.render(mode = 'rgb_array')
                    img = Image.fromarray(img)
                    img.save('./gif/td3_torch/{}/{}.jpg'.format(ep,t))
            if done:
                break
            
        print('Episode: {}\tReward: {}'.format(ep, int(ep_reward)))
        ep_reward = 0
        env.close()        
                
test()
    
    