In [1]:
import os
import torch
import gym
import numpy as np
from TD3_torch.TD3 import TD3
from PIL import Image
from TD3_torch.utils import ReplayBuffer

env_name = 'BipedalWalkerHardcore-v2'
learning_rate_base = 0.0001
log_interval = 10           # print avg reward after interval
random_seed = 1
gamma = 0.99                # discount for future rewards
batch_size = 256          # num of transitions sampled from replay buffer
exploration_noise = 0.3 
polyak_int = [0.9999, 0.999999]              # target policy update parameter (1-tau)
policy_noise = 0.2          # target policy smoothing noise
noise_clip = 0.5
policy_delay = 2            # delayed policy updates parameter
max_episodes = 100000         # max num of episodes
max_timesteps = 3000        # max timesteps in one episode
max_buffer_length = 2000000
directory = "./preTrained/td3_torch/{}".format(env_name) # save trained models
filename = "TD3_torch_{}_{}".format(env_name, random_seed)
reward_history = []



def train():
    env = gym.make(env_name)
    state_dim = env.observation_space.shape[0]
    action_dim = env.action_space.shape[0]
    max_action = float(env.action_space.high[0])
    polyak = polyak_int[0]
    
    policy = TD3(state_dim, action_dim, max_action, lr=learning_rate_base)
    replay_buffer = ReplayBuffer(max_length=max_buffer_length)
    
    print("action_space={}".format(env.action_space))
    print("obs_space={}".format(env.observation_space))
    print("threshold={} \n".format(env.spec.reward_threshold))
    
    if random_seed:
        print("Random Seed: {}".format(random_seed))
        env.seed(random_seed)
        torch.manual_seed(random_seed)
        np.random.seed(random_seed)
    
    # logging variables:        
    log_f = open("log.txt","w+")
    
    # training procedure:
    for episode in range(1, max_episodes+1):
        ep_reward = 0
        state = env.reset()
       
        for t in range(max_timesteps):
            # select action and add exploration noise:
            action = policy.select_action(state)
            action = action + np.random.normal(0, exploration_noise, size=env.action_space.shape[0])
            action = action.clip(env.action_space.low, env.action_space.high)
            
            # take action in env:
            next_state, reward, done, _ = env.step(action)
            replay_buffer.add((state, action, reward, next_state, float(done)))
            state = next_state
            
            ep_reward += reward
            
            # if episode is done then update policy:
            if done or t==(max_timesteps-1):
                policy.update(replay_buffer, t, batch_size, gamma, polyak, policy_noise, noise_clip, policy_delay)
                break
        
        reward_history.append(ep_reward)
        avg_reward = np.mean(reward_history[-100:]) 
        
        # logging updates:        
        log_f.write('{},{}\n'.format(episode, ep_reward))
        log_f.flush()
       
        
        # if avg reward > 300 then save and stop traning:
        if avg_reward >= env.spec.reward_threshold: 
            print("########## Solved! ###########")
            name = filename + '_solved'
            policy.save(directory, name)
            log_f.close()
            break
            
        # Calculate polyak
        #part = (env.spec.reward_threshold - avg_reward) / (env.spec.reward_threshold + 150)
        #if part > 1:
        #    part = 1
        #polyak = polyak_int[0] + (1 - part) * (polyak_int[1] - polyak_int[0])     
        
        # Calculate LR
        part = (env.spec.reward_threshold - avg_reward) / (env.spec.reward_threshold + 150)
        if part > 1:
            part = 1
        learning_rate = learning_rate_base - learning_rate_base * (1 - part) * 0.9
        policy.set_optimizers(lr=learning_rate)
        
        
        if episode > 500:
            policy.save(directory, filename)
        
        # print avg reward every log interval:
        if episode % log_interval == 0:            
            print("Ep: {}   Rew: {:3.2f}   Avg Rew: {:3.2f}   LR: {:8.8f}   Polyak: {:6.5f}   Bf: {:2.0f}   Loss: {:5.3f}  {:5.3f}  {:5.3f}".format(
                episode, ep_reward, avg_reward, learning_rate, polyak, replay_buffer.get_fill(), policy.actor_loss, policy.loss_Q1, policy.loss_Q2))

train()


[33mWARN: gym.spaces.Box autodetected dtype as <class 'numpy.float32'>. Please provide explicit dtype.[0m
[33mWARN: gym.spaces.Box autodetected dtype as <class 'numpy.float32'>. Please provide explicit dtype.[0m
action_space=Box(4,)
obs_space=Box(24,)
threshold=300 

Random Seed: 1
Ep: 10   Rew: -100.77   Avg Rew: -97.37   LR: 0.00008947   Polyak: 0.99990   Bf:  0   Loss: -0.151  0.056  0.052
Ep: 20   Rew: -100.98   Avg Rew: -100.14   LR: 0.00009003   Polyak: 0.99990   Bf:  0   Loss: 0.036  0.552  0.585
Ep: 30   Rew: -101.00   Avg Rew: -100.81   LR: 0.00009016   Polyak: 0.99990   Bf:  0   Loss: -0.114  30.415  28.746
Ep: 40   Rew: -100.70   Avg Rew: -101.19   LR: 0.00009024   Polyak: 0.99990   Bf:  0   Loss: -0.295  3.059  3.283
Ep: 50   Rew: -100.10   Avg Rew: -103.14   LR: 0.00009063   Polyak: 0.99990   Bf:  1   Loss: -0.355  0.251  0.203
Ep: 60   Rew: -115.28   Avg Rew: -103.21   LR: 0.00009064   Polyak: 0.99990   Bf:  1   Loss: -0.358  3.787  5.897
Ep: 70   Rew: -103.69   Avg R

Ep: 710   Rew: -104.16   Avg Rew: -108.61   LR: 0.00009172   Polyak: 0.99990   Bf:  4   Loss: 1.478  1.705  1.128
Ep: 720   Rew: -114.11   Avg Rew: -108.25   LR: 0.00009165   Polyak: 0.99990   Bf:  4   Loss: 3.338  1.888  2.007
Ep: 730   Rew: -115.58   Avg Rew: -108.06   LR: 0.00009161   Polyak: 0.99990   Bf:  4   Loss: 2.028  0.921  1.665
Ep: 740   Rew: -110.49   Avg Rew: -107.45   LR: 0.00009149   Polyak: 0.99990   Bf:  4   Loss: 4.080  1.202  1.624
Ep: 750   Rew: -114.17   Avg Rew: -107.85   LR: 0.00009157   Polyak: 0.99990   Bf:  4   Loss: 2.864  3.422  2.777
Ep: 760   Rew: -96.89   Avg Rew: -108.93   LR: 0.00009179   Polyak: 0.99990   Bf:  4   Loss: 0.998  1.716  1.876
Ep: 770   Rew: -111.15   Avg Rew: -108.14   LR: 0.00009163   Polyak: 0.99990   Bf:  4   Loss: 2.309  0.538  0.618
Ep: 780   Rew: -112.56   Avg Rew: -112.07   LR: 0.00009241   Polyak: 0.99990   Bf:  5   Loss: 1.453  1.454  1.489
Ep: 790   Rew: -103.70   Avg Rew: -111.84   LR: 0.00009237   Polyak: 0.99990   Bf:  5   L

Ep: 1430   Rew: -108.77   Avg Rew: -122.49   LR: 0.00009450   Polyak: 0.99990   Bf: 13   Loss: 7.220  1.588  1.035
Ep: 1440   Rew: -126.69   Avg Rew: -121.78   LR: 0.00009436   Polyak: 0.99990   Bf: 13   Loss: 5.948  1.291  0.870
Ep: 1450   Rew: -139.66   Avg Rew: -120.39   LR: 0.00009408   Polyak: 0.99990   Bf: 13   Loss: 5.617  1.860  2.138
Ep: 1460   Rew: -108.50   Avg Rew: -119.16   LR: 0.00009383   Polyak: 0.99990   Bf: 13   Loss: 5.384  0.519  0.747
Ep: 1470   Rew: -116.78   Avg Rew: -118.15   LR: 0.00009363   Polyak: 0.99990   Bf: 13   Loss: 4.270  0.533  0.645
Ep: 1480   Rew: -112.68   Avg Rew: -116.19   LR: 0.00009324   Polyak: 0.99990   Bf: 13   Loss: 4.537  1.216  0.981
Ep: 1490   Rew: -111.13   Avg Rew: -115.73   LR: 0.00009315   Polyak: 0.99990   Bf: 13   Loss: 5.569  0.761  0.731
Ep: 1500   Rew: -103.12   Avg Rew: -113.71   LR: 0.00009274   Polyak: 0.99990   Bf: 13   Loss: 5.077  0.995  0.651
Ep: 1510   Rew: -116.69   Avg Rew: -113.35   LR: 0.00009267   Polyak: 0.99990   

Ep: 2150   Rew: -105.16   Avg Rew: -108.26   LR: 0.00009165   Polyak: 0.99990   Bf: 17   Loss: 9.196  2.119  1.226
Ep: 2160   Rew: -103.52   Avg Rew: -107.89   LR: 0.00009158   Polyak: 0.99990   Bf: 17   Loss: 9.454  1.634  1.499
Ep: 2170   Rew: -102.74   Avg Rew: -107.34   LR: 0.00009147   Polyak: 0.99990   Bf: 17   Loss: 8.574  1.430  1.798
Ep: 2180   Rew: -106.93   Avg Rew: -106.36   LR: 0.00009127   Polyak: 0.99990   Bf: 17   Loss: 5.789  1.153  1.029
Ep: 2190   Rew: -99.27   Avg Rew: -105.04   LR: 0.00009101   Polyak: 0.99990   Bf: 17   Loss: 9.414  1.891  1.203
Ep: 2200   Rew: -97.01   Avg Rew: -105.06   LR: 0.00009101   Polyak: 0.99990   Bf: 17   Loss: 5.579  1.281  0.700
Ep: 2210   Rew: -110.49   Avg Rew: -105.90   LR: 0.00009118   Polyak: 0.99990   Bf: 17   Loss: 7.219  1.075  1.059
Ep: 2220   Rew: -96.34   Avg Rew: -105.83   LR: 0.00009117   Polyak: 0.99990   Bf: 17   Loss: 8.190  2.511  2.479
Ep: 2230   Rew: -131.53   Avg Rew: -106.11   LR: 0.00009122   Polyak: 0.99990   Bf:

Ep: 2870   Rew: -115.73   Avg Rew: -102.07   LR: 0.00009041   Polyak: 0.99990   Bf: 20   Loss: 10.822  1.749  1.115
Ep: 2880   Rew: -116.74   Avg Rew: -102.64   LR: 0.00009053   Polyak: 0.99990   Bf: 20   Loss: 10.808  1.343  1.793
Ep: 2890   Rew: -102.07   Avg Rew: -102.93   LR: 0.00009059   Polyak: 0.99990   Bf: 20   Loss: 11.712  1.678  1.345
Ep: 2900   Rew: -97.08   Avg Rew: -102.75   LR: 0.00009055   Polyak: 0.99990   Bf: 20   Loss: 8.206  1.201  1.423
Ep: 2910   Rew: -99.40   Avg Rew: -103.35   LR: 0.00009067   Polyak: 0.99990   Bf: 21   Loss: 9.343  2.011  2.121
Ep: 2920   Rew: -97.12   Avg Rew: -103.69   LR: 0.00009074   Polyak: 0.99990   Bf: 21   Loss: 10.843  1.290  1.494
Ep: 2930   Rew: -117.88   Avg Rew: -103.49   LR: 0.00009070   Polyak: 0.99990   Bf: 21   Loss: 12.355  6.661  5.475
Ep: 2940   Rew: -120.01   Avg Rew: -104.01   LR: 0.00009080   Polyak: 0.99990   Bf: 21   Loss: 9.122  4.189  4.237
Ep: 2950   Rew: -95.36   Avg Rew: -103.77   LR: 0.00009075   Polyak: 0.99990  

Ep: 3590   Rew: -101.48   Avg Rew: -99.30   LR: 0.00008986   Polyak: 0.99990   Bf: 25   Loss: 14.469  1.182  1.608
Ep: 3600   Rew: -97.89   Avg Rew: -98.85   LR: 0.00008977   Polyak: 0.99990   Bf: 25   Loss: 13.404  1.122  0.807
Ep: 3610   Rew: -89.23   Avg Rew: -99.16   LR: 0.00008983   Polyak: 0.99990   Bf: 25   Loss: 12.221  2.130  1.691
Ep: 3620   Rew: -99.59   Avg Rew: -98.87   LR: 0.00008977   Polyak: 0.99990   Bf: 25   Loss: 13.748  1.398  2.226
Ep: 3630   Rew: -93.11   Avg Rew: -99.01   LR: 0.00008980   Polyak: 0.99990   Bf: 25   Loss: 13.117  1.607  2.070
Ep: 3640   Rew: -101.47   Avg Rew: -99.11   LR: 0.00008982   Polyak: 0.99990   Bf: 25   Loss: 10.804  2.983  2.721
Ep: 3650   Rew: -95.08   Avg Rew: -98.55   LR: 0.00008971   Polyak: 0.99990   Bf: 26   Loss: 12.016  2.861  2.193
Ep: 3660   Rew: -116.38   Avg Rew: -98.29   LR: 0.00008966   Polyak: 0.99990   Bf: 26   Loss: 16.312  2.332  1.903
Ep: 3670   Rew: -101.52   Avg Rew: -98.50   LR: 0.00008970   Polyak: 0.99990   Bf: 26

Ep: 4310   Rew: -99.34   Avg Rew: -87.32   LR: 0.00008746   Polyak: 0.99990   Bf: 34   Loss: 15.339  2.750  2.289
Ep: 4320   Rew: -65.36   Avg Rew: -86.22   LR: 0.00008724   Polyak: 0.99990   Bf: 34   Loss: 14.763  3.093  3.046
Ep: 4330   Rew: -88.36   Avg Rew: -86.52   LR: 0.00008730   Polyak: 0.99990   Bf: 34   Loss: 12.697  2.476  2.903
Ep: 4340   Rew: -85.63   Avg Rew: -86.04   LR: 0.00008721   Polyak: 0.99990   Bf: 34   Loss: 15.218  1.545  1.373
Ep: 4350   Rew: -78.28   Avg Rew: -86.54   LR: 0.00008731   Polyak: 0.99990   Bf: 34   Loss: 17.738  3.549  3.744
Ep: 4360   Rew: -98.46   Avg Rew: -86.42   LR: 0.00008728   Polyak: 0.99990   Bf: 34   Loss: 17.298  3.255  2.347
Ep: 4370   Rew: -92.67   Avg Rew: -85.91   LR: 0.00008718   Polyak: 0.99990   Bf: 34   Loss: 14.359  2.344  1.742
Ep: 4380   Rew: -73.65   Avg Rew: -85.18   LR: 0.00008704   Polyak: 0.99990   Bf: 35   Loss: 16.310  3.605  3.567
Ep: 4390   Rew: -78.35   Avg Rew: -85.26   LR: 0.00008705   Polyak: 0.99990   Bf: 35   L

Ep: 5030   Rew: -80.50   Avg Rew: -78.59   LR: 0.00008572   Polyak: 0.99990   Bf: 44   Loss: 17.195  2.246  3.185
Ep: 5040   Rew: -89.86   Avg Rew: -80.86   LR: 0.00008617   Polyak: 0.99990   Bf: 44   Loss: 15.990  1.604  2.334
Ep: 5050   Rew: -66.78   Avg Rew: -80.45   LR: 0.00008609   Polyak: 0.99990   Bf: 45   Loss: 15.981  4.249  5.385
Ep: 5060   Rew: -72.55   Avg Rew: -82.41   LR: 0.00008648   Polyak: 0.99990   Bf: 45   Loss: 15.379  2.464  2.768
Ep: 5070   Rew: -59.59   Avg Rew: -81.97   LR: 0.00008639   Polyak: 0.99990   Bf: 45   Loss: 11.907  5.888  7.855
Ep: 5080   Rew: -94.61   Avg Rew: -81.69   LR: 0.00008634   Polyak: 0.99990   Bf: 45   Loss: 18.103  1.909  1.601
Ep: 5090   Rew: -71.79   Avg Rew: -83.22   LR: 0.00008664   Polyak: 0.99990   Bf: 45   Loss: 18.007  4.517  4.363
Ep: 5100   Rew: -69.07   Avg Rew: -83.90   LR: 0.00008678   Polyak: 0.99990   Bf: 46   Loss: 14.865  2.974  3.589
Ep: 5110   Rew: -73.28   Avg Rew: -83.95   LR: 0.00008679   Polyak: 0.99990   Bf: 46   L

Ep: 5750   Rew: -88.98   Avg Rew: -74.53   LR: 0.00008491   Polyak: 0.99990   Bf: 57   Loss: 17.759  1.435  1.702
Ep: 5760   Rew: -107.36   Avg Rew: -77.94   LR: 0.00008559   Polyak: 0.99990   Bf: 58   Loss: 18.309  3.220  3.662
Ep: 5770   Rew: -69.72   Avg Rew: -79.10   LR: 0.00008582   Polyak: 0.99990   Bf: 58   Loss: 15.518  2.377  2.514
Ep: 5780   Rew: -82.87   Avg Rew: -80.49   LR: 0.00008610   Polyak: 0.99990   Bf: 58   Loss: 18.127  2.926  2.811
Ep: 5790   Rew: -45.23   Avg Rew: -81.92   LR: 0.00008638   Polyak: 0.99990   Bf: 58   Loss: 14.120  4.816  3.308
Ep: 5800   Rew: -85.77   Avg Rew: -82.25   LR: 0.00008645   Polyak: 0.99990   Bf: 59   Loss: 16.026  2.941  2.096
Ep: 5810   Rew: -93.31   Avg Rew: -83.17   LR: 0.00008663   Polyak: 0.99990   Bf: 59   Loss: 18.586  2.794  2.286
Ep: 5820   Rew: -49.44   Avg Rew: -83.86   LR: 0.00008677   Polyak: 0.99990   Bf: 59   Loss: 17.644  7.108  6.282
Ep: 5830   Rew: -24.55   Avg Rew: -82.98   LR: 0.00008660   Polyak: 0.99990   Bf: 59   

Ep: 6470   Rew: -81.75   Avg Rew: -78.35   LR: 0.00008567   Polyak: 0.99990   Bf: 85   Loss: 14.412  7.315  8.911
Ep: 6480   Rew: -141.83   Avg Rew: -77.46   LR: 0.00008549   Polyak: 0.99990   Bf: 85   Loss: 16.256  3.333  2.003
Ep: 6490   Rew: -102.72   Avg Rew: -76.69   LR: 0.00008534   Polyak: 0.99990   Bf: 86   Loss: 15.361  3.590  3.933
Ep: 6500   Rew: -69.87   Avg Rew: -77.65   LR: 0.00008553   Polyak: 0.99990   Bf: 86   Loss: 17.373  4.386  3.597
Ep: 6510   Rew: -83.29   Avg Rew: -74.94   LR: 0.00008499   Polyak: 0.99990   Bf: 87   Loss: 13.088  5.111  4.314
Ep: 6520   Rew: -45.45   Avg Rew: -76.77   LR: 0.00008535   Polyak: 0.99990   Bf: 88   Loss: 13.889  3.409  4.675
Ep: 6530   Rew: -47.27   Avg Rew: -74.00   LR: 0.00008480   Polyak: 0.99990   Bf: 88   Loss: 14.629  4.529  4.233
Ep: 6540   Rew: -77.12   Avg Rew: -75.92   LR: 0.00008518   Polyak: 0.99990   Bf: 88   Loss: 16.193  7.075  5.568
Ep: 6550   Rew: -109.32   Avg Rew: -76.38   LR: 0.00008528   Polyak: 0.99990   Bf: 89 

Ep: 7190   Rew: -76.68   Avg Rew: -66.87   LR: 0.00008337   Polyak: 0.99990   Bf: 100   Loss: 10.454  2.458  1.410
Ep: 7200   Rew: -44.65   Avg Rew: -66.78   LR: 0.00008336   Polyak: 0.99990   Bf: 100   Loss: 12.182  1.705  2.374
Ep: 7210   Rew: -79.49   Avg Rew: -63.21   LR: 0.00008264   Polyak: 0.99990   Bf: 100   Loss: 9.772  2.059  2.001
Ep: 7220   Rew: 162.90   Avg Rew: -61.67   LR: 0.00008233   Polyak: 0.99990   Bf: 100   Loss: 9.968  6.090  4.988
Ep: 7230   Rew: -45.41   Avg Rew: -64.03   LR: 0.00008281   Polyak: 0.99990   Bf: 100   Loss: 11.603  1.536  1.974
Ep: 7240   Rew: -63.55   Avg Rew: -69.09   LR: 0.00008382   Polyak: 0.99990   Bf: 100   Loss: 11.013  1.611  1.791
Ep: 7250   Rew: -46.62   Avg Rew: -67.05   LR: 0.00008341   Polyak: 0.99990   Bf: 100   Loss: 11.912  1.397  0.893
Ep: 7260   Rew: -108.99   Avg Rew: -66.20   LR: 0.00008324   Polyak: 0.99990   Bf: 100   Loss: 10.165  3.488  3.356
Ep: 7270   Rew: -103.69   Avg Rew: -63.64   LR: 0.00008273   Polyak: 0.99990   Bf

Ep: 7910   Rew: -156.99   Avg Rew: -9.10   LR: 0.00007182   Polyak: 0.99990   Bf: 100   Loss: 8.627  1.079  2.131
Ep: 7920   Rew: -87.79   Avg Rew: -8.78   LR: 0.00007176   Polyak: 0.99990   Bf: 100   Loss: 7.297  0.758  1.066
Ep: 7930   Rew: -62.25   Avg Rew: -5.47   LR: 0.00007109   Polyak: 0.99990   Bf: 100   Loss: 7.480  1.146  0.559
Ep: 7940   Rew: -100.20   Avg Rew: -5.44   LR: 0.00007109   Polyak: 0.99990   Bf: 100   Loss: 8.473  2.260  1.443
Ep: 7950   Rew: -102.92   Avg Rew: -8.17   LR: 0.00007163   Polyak: 0.99990   Bf: 100   Loss: 8.266  0.687  0.665
Ep: 7960   Rew: -52.29   Avg Rew: -5.66   LR: 0.00007113   Polyak: 0.99990   Bf: 100   Loss: 6.941  1.286  1.529
Ep: 7970   Rew: -51.76   Avg Rew: -6.50   LR: 0.00007130   Polyak: 0.99990   Bf: 100   Loss: 7.860  1.829  2.334
Ep: 7980   Rew: 107.03   Avg Rew: -5.59   LR: 0.00007112   Polyak: 0.99990   Bf: 100   Loss: 6.606  0.875  1.066
Ep: 7990   Rew: 212.35   Avg Rew: 0.84   LR: 0.00006983   Polyak: 0.99990   Bf: 100   Loss: 8

Ep: 8640   Rew: -84.86   Avg Rew: -4.53   LR: 0.00007091   Polyak: 0.99990   Bf: 100   Loss: 6.981  0.749  0.797
Ep: 8650   Rew: 8.83   Avg Rew: 0.33   LR: 0.00006993   Polyak: 0.99990   Bf: 100   Loss: 7.791  1.713  1.374
Ep: 8660   Rew: 120.81   Avg Rew: 4.59   LR: 0.00006908   Polyak: 0.99990   Bf: 100   Loss: 7.084  2.964  3.186
Ep: 8670   Rew: 93.58   Avg Rew: 8.06   LR: 0.00006839   Polyak: 0.99990   Bf: 100   Loss: 7.448  1.096  0.956
Ep: 8680   Rew: 78.13   Avg Rew: 3.31   LR: 0.00006934   Polyak: 0.99990   Bf: 100   Loss: 7.315  0.463  0.628
Ep: 8690   Rew: 76.78   Avg Rew: 3.07   LR: 0.00006939   Polyak: 0.99990   Bf: 100   Loss: 7.715  6.133  4.353
Ep: 8700   Rew: 132.49   Avg Rew: 13.11   LR: 0.00006738   Polyak: 0.99990   Bf: 100   Loss: 5.998  2.558  1.344
Ep: 8710   Rew: -90.37   Avg Rew: 18.53   LR: 0.00006629   Polyak: 0.99990   Bf: 100   Loss: 8.330  2.107  1.541
Ep: 8720   Rew: -47.30   Avg Rew: 22.04   LR: 0.00006559   Polyak: 0.99990   Bf: 100   Loss: 6.962  6.798 

Ep: 9370   Rew: -74.24   Avg Rew: 18.67   LR: 0.00006627   Polyak: 0.99990   Bf: 100   Loss: 5.515  1.488  0.421
Ep: 9380   Rew: -94.30   Avg Rew: 12.39   LR: 0.00006752   Polyak: 0.99990   Bf: 100   Loss: 4.770  0.652  1.112
Ep: 9390   Rew: 64.32   Avg Rew: 28.49   LR: 0.00006430   Polyak: 0.99990   Bf: 100   Loss: 4.799  0.556  0.311
Ep: 9400   Rew: -94.39   Avg Rew: 29.22   LR: 0.00006416   Polyak: 0.99990   Bf: 100   Loss: 5.021  0.416  0.285
Ep: 9410   Rew: 66.65   Avg Rew: 28.95   LR: 0.00006421   Polyak: 0.99990   Bf: 100   Loss: 5.967  1.316  1.728
Ep: 9420   Rew: -97.20   Avg Rew: 29.23   LR: 0.00006415   Polyak: 0.99990   Bf: 100   Loss: 5.063  0.686  0.773
Ep: 9430   Rew: -97.00   Avg Rew: 23.47   LR: 0.00006531   Polyak: 0.99990   Bf: 100   Loss: 4.990  0.751  1.401
Ep: 9440   Rew: -5.03   Avg Rew: 18.60   LR: 0.00006628   Polyak: 0.99990   Bf: 100   Loss: 5.629  0.333  0.816
Ep: 9450   Rew: -29.42   Avg Rew: 11.68   LR: 0.00006766   Polyak: 0.99990   Bf: 100   Loss: 4.981 

Ep: 10100   Rew: -83.22   Avg Rew: 27.17   LR: 0.00006457   Polyak: 0.99990   Bf: 100   Loss: 4.533  1.294  0.313
Ep: 10110   Rew: -87.74   Avg Rew: 31.67   LR: 0.00006367   Polyak: 0.99990   Bf: 100   Loss: 3.951  1.373  1.180
Ep: 10120   Rew: -81.16   Avg Rew: 36.93   LR: 0.00006261   Polyak: 0.99990   Bf: 100   Loss: 3.504  0.638  1.122
Ep: 10130   Rew: 36.60   Avg Rew: 38.79   LR: 0.00006224   Polyak: 0.99990   Bf: 100   Loss: 3.698  0.664  0.829
Ep: 10140   Rew: -73.14   Avg Rew: 32.82   LR: 0.00006344   Polyak: 0.99990   Bf: 100   Loss: 4.003  0.720  0.982
Ep: 10150   Rew: -46.87   Avg Rew: 30.27   LR: 0.00006395   Polyak: 0.99990   Bf: 100   Loss: 3.871  2.309  1.220
Ep: 10160   Rew: -48.69   Avg Rew: 28.53   LR: 0.00006429   Polyak: 0.99990   Bf: 100   Loss: 3.222  3.794  4.500
Ep: 10170   Rew: -124.57   Avg Rew: 22.62   LR: 0.00006548   Polyak: 0.99990   Bf: 100   Loss: 4.004  0.389  0.836
Ep: 10180   Rew: -55.95   Avg Rew: 20.83   LR: 0.00006583   Polyak: 0.99990   Bf: 100   

Ep: 10830   Rew: -131.46   Avg Rew: 29.66   LR: 0.00006407   Polyak: 0.99990   Bf: 100   Loss: 3.516  1.144  0.818
Ep: 10840   Rew: -80.89   Avg Rew: 24.75   LR: 0.00006505   Polyak: 0.99990   Bf: 100   Loss: 3.199  0.337  0.448
Ep: 10850   Rew: 22.96   Avg Rew: 28.61   LR: 0.00006428   Polyak: 0.99990   Bf: 100   Loss: 2.783  1.784  1.011
Ep: 10860   Rew: 85.39   Avg Rew: 33.12   LR: 0.00006338   Polyak: 0.99990   Bf: 100   Loss: 4.177  0.676  0.575
Ep: 10870   Rew: 2.43   Avg Rew: 34.53   LR: 0.00006309   Polyak: 0.99990   Bf: 100   Loss: 2.870  0.557  0.756
Ep: 10880   Rew: 55.97   Avg Rew: 25.27   LR: 0.00006495   Polyak: 0.99990   Bf: 100   Loss: 4.040  0.490  0.320
Ep: 10890   Rew: 167.09   Avg Rew: 27.74   LR: 0.00006445   Polyak: 0.99990   Bf: 100   Loss: 2.833  0.534  0.562
Ep: 10900   Rew: -35.63   Avg Rew: 29.89   LR: 0.00006402   Polyak: 0.99990   Bf: 100   Loss: 2.471  0.625  0.468
Ep: 10910   Rew: -41.27   Avg Rew: 28.86   LR: 0.00006423   Polyak: 0.99990   Bf: 100   Loss

Ep: 11560   Rew: 218.72   Avg Rew: 17.63   LR: 0.00006647   Polyak: 0.99990   Bf: 100   Loss: 2.892  0.572  0.985
Ep: 11570   Rew: 86.58   Avg Rew: 16.51   LR: 0.00006670   Polyak: 0.99990   Bf: 100   Loss: 3.041  0.540  0.597
Ep: 11580   Rew: -26.20   Avg Rew: 6.72   LR: 0.00006866   Polyak: 0.99990   Bf: 100   Loss: 2.892  0.644  0.520
Ep: 11590   Rew: -25.07   Avg Rew: 12.07   LR: 0.00006759   Polyak: 0.99990   Bf: 100   Loss: 3.375  0.698  0.345
Ep: 11600   Rew: -26.52   Avg Rew: 14.36   LR: 0.00006713   Polyak: 0.99990   Bf: 100   Loss: 4.037  0.457  0.294
Ep: 11610   Rew: 27.20   Avg Rew: 15.45   LR: 0.00006691   Polyak: 0.99990   Bf: 100   Loss: 1.960  1.374  0.899
Ep: 11620   Rew: 48.16   Avg Rew: 15.69   LR: 0.00006686   Polyak: 0.99990   Bf: 100   Loss: 2.487  1.121  0.782
Ep: 11630   Rew: -72.48   Avg Rew: 19.14   LR: 0.00006617   Polyak: 0.99990   Bf: 100   Loss: 3.414  0.699  0.726
Ep: 11640   Rew: -94.73   Avg Rew: 19.80   LR: 0.00006604   Polyak: 0.99990   Bf: 100   Loss

Ep: 12290   Rew: -79.79   Avg Rew: 20.11   LR: 0.00006598   Polyak: 0.99990   Bf: 100   Loss: 2.453  2.315  0.623
Ep: 12300   Rew: -92.76   Avg Rew: 8.68   LR: 0.00006826   Polyak: 0.99990   Bf: 100   Loss: 2.778  2.230  2.550
Ep: 12310   Rew: 132.57   Avg Rew: 13.63   LR: 0.00006727   Polyak: 0.99990   Bf: 100   Loss: 2.842  5.199  2.837
Ep: 12320   Rew: -171.38   Avg Rew: 20.87   LR: 0.00006583   Polyak: 0.99990   Bf: 100   Loss: 2.943  2.892  1.327
Ep: 12330   Rew: -5.87   Avg Rew: 22.87   LR: 0.00006543   Polyak: 0.99990   Bf: 100   Loss: 2.217  0.395  0.413
Ep: 12340   Rew: 134.12   Avg Rew: 20.82   LR: 0.00006584   Polyak: 0.99990   Bf: 100   Loss: 3.390  0.659  0.490
Ep: 12350   Rew: -48.44   Avg Rew: 28.79   LR: 0.00006424   Polyak: 0.99990   Bf: 100   Loss: 1.407  0.563  2.109
Ep: 12360   Rew: 98.36   Avg Rew: 27.60   LR: 0.00006448   Polyak: 0.99990   Bf: 100   Loss: 2.154  0.563  1.258
Ep: 12370   Rew: -33.57   Avg Rew: 16.67   LR: 0.00006667   Polyak: 0.99990   Bf: 100   Lo

Ep: 13020   Rew: -60.21   Avg Rew: 16.99   LR: 0.00006660   Polyak: 0.99990   Bf: 100   Loss: 1.965  0.694  0.344
Ep: 13030   Rew: -56.73   Avg Rew: 15.40   LR: 0.00006692   Polyak: 0.99990   Bf: 100   Loss: 2.455  0.470  0.257
Ep: 13040   Rew: 233.78   Avg Rew: 17.23   LR: 0.00006655   Polyak: 0.99990   Bf: 100   Loss: 1.986  0.437  0.737
Ep: 13050   Rew: 125.19   Avg Rew: 27.46   LR: 0.00006451   Polyak: 0.99990   Bf: 100   Loss: 3.159  1.162  0.986
Ep: 13060   Rew: 69.54   Avg Rew: 33.72   LR: 0.00006326   Polyak: 0.99990   Bf: 100   Loss: 3.071  0.436  0.282
Ep: 13070   Rew: 23.72   Avg Rew: 31.88   LR: 0.00006362   Polyak: 0.99990   Bf: 100   Loss: 2.256  0.312  0.256
Ep: 13080   Rew: -82.03   Avg Rew: 35.37   LR: 0.00006293   Polyak: 0.99990   Bf: 100   Loss: 2.744  3.925  4.110
Ep: 13090   Rew: 2.89   Avg Rew: 34.76   LR: 0.00006305   Polyak: 0.99990   Bf: 100   Loss: 3.390  0.907  0.888
Ep: 13100   Rew: 4.74   Avg Rew: 39.25   LR: 0.00006215   Polyak: 0.99990   Bf: 100   Loss: 

Ep: 13750   Rew: -38.32   Avg Rew: 28.13   LR: 0.00006437   Polyak: 0.99990   Bf: 100   Loss: 3.912  1.279  1.107
Ep: 13760   Rew: -91.06   Avg Rew: 30.71   LR: 0.00006386   Polyak: 0.99990   Bf: 100   Loss: 2.618  2.744  0.951
Ep: 13770   Rew: -24.48   Avg Rew: 26.78   LR: 0.00006464   Polyak: 0.99990   Bf: 100   Loss: 3.116  0.831  2.006
Ep: 13780   Rew: 277.57   Avg Rew: 30.40   LR: 0.00006392   Polyak: 0.99990   Bf: 100   Loss: 2.417  0.439  0.709
Ep: 13790   Rew: -83.05   Avg Rew: 31.38   LR: 0.00006372   Polyak: 0.99990   Bf: 100   Loss: 2.234  1.069  0.756
Ep: 13800   Rew: -86.85   Avg Rew: 26.97   LR: 0.00006461   Polyak: 0.99990   Bf: 100   Loss: 3.182  1.119  0.732
Ep: 13810   Rew: 185.51   Avg Rew: 22.93   LR: 0.00006541   Polyak: 0.99990   Bf: 100   Loss: 3.555  0.950  0.907
Ep: 13820   Rew: -60.82   Avg Rew: 19.20   LR: 0.00006616   Polyak: 0.99990   Bf: 100   Loss: 2.875  0.836  0.590
Ep: 13830   Rew: 78.64   Avg Rew: 15.36   LR: 0.00006693   Polyak: 0.99990   Bf: 100   L

Ep: 14480   Rew: -84.76   Avg Rew: 4.14   LR: 0.00006917   Polyak: 0.99990   Bf: 100   Loss: 4.223  0.987  0.759
Ep: 14490   Rew: -76.70   Avg Rew: 0.10   LR: 0.00006998   Polyak: 0.99990   Bf: 100   Loss: 3.622  0.501  0.355
Ep: 14500   Rew: 276.85   Avg Rew: 2.96   LR: 0.00006941   Polyak: 0.99990   Bf: 100   Loss: 3.252  0.765  0.673
Ep: 14510   Rew: -16.33   Avg Rew: 4.80   LR: 0.00006904   Polyak: 0.99990   Bf: 100   Loss: 4.149  5.464  5.395
Ep: 14520   Rew: -53.95   Avg Rew: 10.00   LR: 0.00006800   Polyak: 0.99990   Bf: 100   Loss: 3.584  3.199  3.885
Ep: 14530   Rew: 178.12   Avg Rew: 11.03   LR: 0.00006779   Polyak: 0.99990   Bf: 100   Loss: 4.564  2.513  1.742
Ep: 14540   Rew: -29.40   Avg Rew: 14.10   LR: 0.00006718   Polyak: 0.99990   Bf: 100   Loss: 3.649  0.621  0.769
Ep: 14550   Rew: -73.80   Avg Rew: 12.70   LR: 0.00006746   Polyak: 0.99990   Bf: 100   Loss: 2.398  2.825  3.327
Ep: 14560   Rew: -78.42   Avg Rew: 15.56   LR: 0.00006689   Polyak: 0.99990   Bf: 100   Loss

Ep: 15210   Rew: 123.50   Avg Rew: 10.82   LR: 0.00006784   Polyak: 0.99990   Bf: 100   Loss: 3.778  1.050  0.658
Ep: 15220   Rew: -56.65   Avg Rew: 14.64   LR: 0.00006707   Polyak: 0.99990   Bf: 100   Loss: 2.621  0.840  1.320
Ep: 15230   Rew: 179.26   Avg Rew: 17.62   LR: 0.00006648   Polyak: 0.99990   Bf: 100   Loss: 3.568  1.273  1.518
Ep: 15240   Rew: 56.92   Avg Rew: 24.16   LR: 0.00006517   Polyak: 0.99990   Bf: 100   Loss: 4.317  1.204  0.964
Ep: 15250   Rew: 273.85   Avg Rew: 31.86   LR: 0.00006363   Polyak: 0.99990   Bf: 100   Loss: 3.134  0.829  0.554
Ep: 15260   Rew: -93.45   Avg Rew: 30.81   LR: 0.00006384   Polyak: 0.99990   Bf: 100   Loss: 4.516  0.406  0.486
Ep: 15270   Rew: -50.27   Avg Rew: 24.66   LR: 0.00006507   Polyak: 0.99990   Bf: 100   Loss: 3.412  0.440  0.708
Ep: 15280   Rew: -10.08   Avg Rew: 26.14   LR: 0.00006477   Polyak: 0.99990   Bf: 100   Loss: 4.919  1.604  1.796
Ep: 15290   Rew: 112.38   Avg Rew: 27.38   LR: 0.00006452   Polyak: 0.99990   Bf: 100   L

Ep: 15940   Rew: 18.91   Avg Rew: 10.27   LR: 0.00006795   Polyak: 0.99990   Bf: 100   Loss: 3.874  0.843  1.011
Ep: 15950   Rew: -6.32   Avg Rew: 8.40   LR: 0.00006832   Polyak: 0.99990   Bf: 100   Loss: 3.691  0.836  1.166
Ep: 15960   Rew: 82.20   Avg Rew: 13.17   LR: 0.00006737   Polyak: 0.99990   Bf: 100   Loss: 4.669  0.317  0.396
Ep: 15970   Rew: 169.46   Avg Rew: 14.78   LR: 0.00006704   Polyak: 0.99990   Bf: 100   Loss: 3.141  0.213  0.197
Ep: 15980   Rew: -62.29   Avg Rew: 15.90   LR: 0.00006682   Polyak: 0.99990   Bf: 100   Loss: 3.726  0.631  0.438
Ep: 15990   Rew: 63.94   Avg Rew: 12.17   LR: 0.00006757   Polyak: 0.99990   Bf: 100   Loss: 4.450  0.759  1.016
Ep: 16000   Rew: 32.35   Avg Rew: 0.52   LR: 0.00006990   Polyak: 0.99990   Bf: 100   Loss: 4.153  1.211  1.067
Ep: 16010   Rew: 101.81   Avg Rew: 1.02   LR: 0.00006980   Polyak: 0.99990   Bf: 100   Loss: 4.144  1.087  0.552
Ep: 16020   Rew: -82.35   Avg Rew: 4.10   LR: 0.00006918   Polyak: 0.99990   Bf: 100   Loss: 3.7

Ep: 16670   Rew: 166.63   Avg Rew: 26.59   LR: 0.00006468   Polyak: 0.99990   Bf: 100   Loss: 4.270  0.558  0.394
Ep: 16680   Rew: -75.39   Avg Rew: 27.76   LR: 0.00006445   Polyak: 0.99990   Bf: 100   Loss: 4.165  0.354  1.071
Ep: 16690   Rew: 98.85   Avg Rew: 33.37   LR: 0.00006333   Polyak: 0.99990   Bf: 100   Loss: 4.239  0.672  0.803
Ep: 16700   Rew: -75.50   Avg Rew: 27.58   LR: 0.00006448   Polyak: 0.99990   Bf: 100   Loss: 4.773  0.841  1.054
Ep: 16710   Rew: -93.00   Avg Rew: 25.04   LR: 0.00006499   Polyak: 0.99990   Bf: 100   Loss: 5.516  0.906  0.436
Ep: 16720   Rew: -0.77   Avg Rew: 20.62   LR: 0.00006588   Polyak: 0.99990   Bf: 100   Loss: 4.834  1.321  0.668
Ep: 16730   Rew: -31.91   Avg Rew: 11.77   LR: 0.00006765   Polyak: 0.99990   Bf: 100   Loss: 5.362  7.483  8.292
Ep: 16740   Rew: -77.05   Avg Rew: 7.55   LR: 0.00006849   Polyak: 0.99990   Bf: 100   Loss: 4.628  4.156  2.382
Ep: 16750   Rew: 11.19   Avg Rew: 12.08   LR: 0.00006758   Polyak: 0.99990   Bf: 100   Loss

Ep: 17400   Rew: -44.17   Avg Rew: 23.61   LR: 0.00006528   Polyak: 0.99990   Bf: 100   Loss: 6.071  0.852  1.536
Ep: 17410   Rew: -77.94   Avg Rew: 16.70   LR: 0.00006666   Polyak: 0.99990   Bf: 100   Loss: 5.697  0.857  0.810
Ep: 17420   Rew: -71.49   Avg Rew: 9.38   LR: 0.00006812   Polyak: 0.99990   Bf: 100   Loss: 4.047  0.405  0.389
Ep: 17430   Rew: 278.27   Avg Rew: 6.66   LR: 0.00006867   Polyak: 0.99990   Bf: 100   Loss: 4.055  0.390  0.202
Ep: 17440   Rew: -84.23   Avg Rew: 5.92   LR: 0.00006882   Polyak: 0.99990   Bf: 100   Loss: 3.500  1.078  0.821
Ep: 17450   Rew: 20.36   Avg Rew: 12.32   LR: 0.00006754   Polyak: 0.99990   Bf: 100   Loss: 3.449  3.249  4.282
Ep: 17460   Rew: -79.51   Avg Rew: 10.19   LR: 0.00006796   Polyak: 0.99990   Bf: 100   Loss: 4.623  0.467  0.684
Ep: 17470   Rew: -78.48   Avg Rew: 14.89   LR: 0.00006702   Polyak: 0.99990   Bf: 100   Loss: 4.766  1.047  3.123
Ep: 17480   Rew: 189.83   Avg Rew: 8.00   LR: 0.00006840   Polyak: 0.99990   Bf: 100   Loss:

Ep: 18130   Rew: -71.00   Avg Rew: 7.90   LR: 0.00006842   Polyak: 0.99990   Bf: 100   Loss: 4.833  1.358  1.465
Ep: 18140   Rew: 25.82   Avg Rew: 7.99   LR: 0.00006840   Polyak: 0.99990   Bf: 100   Loss: 4.289  1.468  0.497
Ep: 18150   Rew: -77.69   Avg Rew: -2.37   LR: 0.00007047   Polyak: 0.99990   Bf: 100   Loss: 5.542  0.698  1.160
Ep: 18160   Rew: -77.09   Avg Rew: -3.71   LR: 0.00007074   Polyak: 0.99990   Bf: 100   Loss: 4.515  0.742  0.348
Ep: 18170   Rew: 177.76   Avg Rew: -0.52   LR: 0.00007010   Polyak: 0.99990   Bf: 100   Loss: 4.542  0.580  0.525
Ep: 18180   Rew: -19.96   Avg Rew: -6.60   LR: 0.00007132   Polyak: 0.99990   Bf: 100   Loss: 4.680  0.926  1.270
Ep: 18190   Rew: -22.87   Avg Rew: -4.73   LR: 0.00007095   Polyak: 0.99990   Bf: 100   Loss: 4.317  2.446  0.454
Ep: 18200   Rew: -48.37   Avg Rew: -5.36   LR: 0.00007107   Polyak: 0.99990   Bf: 100   Loss: 3.709  0.651  0.635
Ep: 18210   Rew: -78.02   Avg Rew: 0.82   LR: 0.00006984   Polyak: 0.99990   Bf: 100   Loss

Ep: 18860   Rew: 59.73   Avg Rew: 9.49   LR: 0.00006810   Polyak: 0.99990   Bf: 100   Loss: 4.953  0.901  0.920
Ep: 18870   Rew: 278.32   Avg Rew: 9.23   LR: 0.00006815   Polyak: 0.99990   Bf: 100   Loss: 6.081  0.792  1.061
Ep: 18880   Rew: -72.83   Avg Rew: 9.23   LR: 0.00006815   Polyak: 0.99990   Bf: 100   Loss: 5.759  2.034  3.898
Ep: 18890   Rew: 42.10   Avg Rew: 9.36   LR: 0.00006813   Polyak: 0.99990   Bf: 100   Loss: 6.288  2.229  0.926
Ep: 18900   Rew: 278.23   Avg Rew: 5.81   LR: 0.00006884   Polyak: 0.99990   Bf: 100   Loss: 4.388  0.785  1.143
Ep: 18910   Rew: -17.95   Avg Rew: 5.63   LR: 0.00006887   Polyak: 0.99990   Bf: 100   Loss: 5.803  1.021  0.969
Ep: 18920   Rew: 12.33   Avg Rew: 3.94   LR: 0.00006921   Polyak: 0.99990   Bf: 100   Loss: 4.739  2.041  2.088
Ep: 18930   Rew: -74.59   Avg Rew: 5.78   LR: 0.00006884   Polyak: 0.99990   Bf: 100   Loss: 5.851  4.471  6.455
Ep: 18940   Rew: 40.56   Avg Rew: 6.90   LR: 0.00006862   Polyak: 0.99990   Bf: 100   Loss: 5.687  

Ep: 19590   Rew: -81.11   Avg Rew: 8.00   LR: 0.00006840   Polyak: 0.99990   Bf: 100   Loss: 6.125  3.585  1.747
Ep: 19600   Rew: -69.13   Avg Rew: -3.49   LR: 0.00007070   Polyak: 0.99990   Bf: 100   Loss: 5.718  2.054  1.448
Ep: 19610   Rew: 47.07   Avg Rew: 3.06   LR: 0.00006939   Polyak: 0.99990   Bf: 100   Loss: 5.752  0.297  0.269
Ep: 19620   Rew: 132.58   Avg Rew: 5.74   LR: 0.00006885   Polyak: 0.99990   Bf: 100   Loss: 6.299  1.294  1.240
Ep: 19630   Rew: 102.52   Avg Rew: 6.80   LR: 0.00006864   Polyak: 0.99990   Bf: 100   Loss: 5.715  1.506  1.079
Ep: 19640   Rew: 21.74   Avg Rew: 15.68   LR: 0.00006686   Polyak: 0.99990   Bf: 100   Loss: 6.718  0.608  1.037
Ep: 19650   Rew: -78.00   Avg Rew: 10.49   LR: 0.00006790   Polyak: 0.99990   Bf: 100   Loss: 6.399  0.728  1.557
Ep: 19660   Rew: 210.75   Avg Rew: 14.50   LR: 0.00006710   Polyak: 0.99990   Bf: 100   Loss: 5.836  1.443  0.668
Ep: 19670   Rew: 37.46   Avg Rew: 16.39   LR: 0.00006672   Polyak: 0.99990   Bf: 100   Loss: 6

Ep: 20320   Rew: 33.11   Avg Rew: 26.00   LR: 0.00006480   Polyak: 0.99990   Bf: 100   Loss: 6.351  0.717  0.805
Ep: 20330   Rew: -48.82   Avg Rew: 24.89   LR: 0.00006502   Polyak: 0.99990   Bf: 100   Loss: 6.155  0.525  0.524
Ep: 20340   Rew: 162.89   Avg Rew: 19.92   LR: 0.00006602   Polyak: 0.99990   Bf: 100   Loss: 5.710  0.677  0.731
Ep: 20350   Rew: 133.24   Avg Rew: 16.06   LR: 0.00006679   Polyak: 0.99990   Bf: 100   Loss: 7.064  1.507  2.435
Ep: 20360   Rew: 102.51   Avg Rew: 14.63   LR: 0.00006707   Polyak: 0.99990   Bf: 100   Loss: 6.029  5.044  4.791
Ep: 20370   Rew: -173.63   Avg Rew: 7.46   LR: 0.00006851   Polyak: 0.99990   Bf: 100   Loss: 6.615  0.473  0.519
Ep: 20380   Rew: -74.40   Avg Rew: 2.99   LR: 0.00006940   Polyak: 0.99990   Bf: 100   Loss: 6.202  0.805  0.709
Ep: 20390   Rew: 37.79   Avg Rew: 1.79   LR: 0.00006964   Polyak: 0.99990   Bf: 100   Loss: 6.228  0.876  0.845
Ep: 20400   Rew: 11.25   Avg Rew: 1.74   LR: 0.00006965   Polyak: 0.99990   Bf: 100   Loss: 

Ep: 21050   Rew: 285.26   Avg Rew: 12.49   LR: 0.00006750   Polyak: 0.99990   Bf: 100   Loss: 6.075  0.763  1.835
Ep: 21060   Rew: 77.42   Avg Rew: 18.94   LR: 0.00006621   Polyak: 0.99990   Bf: 100   Loss: 6.317  0.419  0.546
Ep: 21070   Rew: 93.79   Avg Rew: 20.20   LR: 0.00006596   Polyak: 0.99990   Bf: 100   Loss: 5.998  0.401  0.285
Ep: 21080   Rew: -30.38   Avg Rew: 18.60   LR: 0.00006628   Polyak: 0.99990   Bf: 100   Loss: 6.705  2.543  2.987
Ep: 21090   Rew: -74.57   Avg Rew: 11.11   LR: 0.00006778   Polyak: 0.99990   Bf: 100   Loss: 6.667  0.542  0.321
Ep: 21100   Rew: 87.35   Avg Rew: 17.95   LR: 0.00006641   Polyak: 0.99990   Bf: 100   Loss: 6.698  0.562  0.741
Ep: 21110   Rew: 193.46   Avg Rew: 15.69   LR: 0.00006686   Polyak: 0.99990   Bf: 100   Loss: 6.452  0.728  0.373
Ep: 21120   Rew: 74.02   Avg Rew: 16.87   LR: 0.00006663   Polyak: 0.99990   Bf: 100   Loss: 7.409  1.019  1.411
Ep: 21130   Rew: -79.79   Avg Rew: 19.21   LR: 0.00006616   Polyak: 0.99990   Bf: 100   Loss

Ep: 21780   Rew: -85.00   Avg Rew: -0.31   LR: 0.00007006   Polyak: 0.99990   Bf: 100   Loss: 6.693  1.779  2.771
Ep: 21790   Rew: 45.79   Avg Rew: -6.50   LR: 0.00007130   Polyak: 0.99990   Bf: 100   Loss: 6.222  0.582  0.375
Ep: 21800   Rew: 277.68   Avg Rew: 2.42   LR: 0.00006952   Polyak: 0.99990   Bf: 100   Loss: 7.099  1.949  1.147
Ep: 21810   Rew: 29.37   Avg Rew: 10.05   LR: 0.00006799   Polyak: 0.99990   Bf: 100   Loss: 6.027  0.843  0.751
Ep: 21820   Rew: -91.39   Avg Rew: 10.70   LR: 0.00006786   Polyak: 0.99990   Bf: 100   Loss: 6.901  3.465  0.940
Ep: 21830   Rew: -77.58   Avg Rew: 13.13   LR: 0.00006737   Polyak: 0.99990   Bf: 100   Loss: 7.103  0.413  0.635
Ep: 21840   Rew: 86.03   Avg Rew: 14.79   LR: 0.00006704   Polyak: 0.99990   Bf: 100   Loss: 7.373  0.681  0.666
Ep: 21850   Rew: -58.03   Avg Rew: 15.80   LR: 0.00006684   Polyak: 0.99990   Bf: 100   Loss: 6.346  1.354  0.858
Ep: 21860   Rew: 88.29   Avg Rew: 25.21   LR: 0.00006496   Polyak: 0.99990   Bf: 100   Loss:

Ep: 22510   Rew: -2.21   Avg Rew: -16.32   LR: 0.00007326   Polyak: 0.99990   Bf: 100   Loss: 6.558  0.632  0.495
Ep: 22520   Rew: -36.17   Avg Rew: -14.60   LR: 0.00007292   Polyak: 0.99990   Bf: 100   Loss: 5.856  0.597  0.914
Ep: 22530   Rew: -91.54   Avg Rew: -15.40   LR: 0.00007308   Polyak: 0.99990   Bf: 100   Loss: 7.146  1.485  1.455
Ep: 22540   Rew: -83.34   Avg Rew: -17.28   LR: 0.00007346   Polyak: 0.99990   Bf: 100   Loss: 6.729  1.850  0.926
Ep: 22550   Rew: -117.41   Avg Rew: -14.66   LR: 0.00007293   Polyak: 0.99990   Bf: 100   Loss: 7.038  1.459  1.012
Ep: 22560   Rew: -23.88   Avg Rew: -18.90   LR: 0.00007378   Polyak: 0.99990   Bf: 100   Loss: 6.214  0.332  0.443
Ep: 22570   Rew: -80.21   Avg Rew: -13.63   LR: 0.00007273   Polyak: 0.99990   Bf: 100   Loss: 6.310  0.500  0.562
Ep: 22580   Rew: -104.26   Avg Rew: -18.37   LR: 0.00007367   Polyak: 0.99990   Bf: 100   Loss: 6.016  0.662  0.775
Ep: 22590   Rew: 285.23   Avg Rew: -20.07   LR: 0.00007401   Polyak: 0.99990   

Ep: 23230   Rew: -31.00   Avg Rew: -2.73   LR: 0.00007055   Polyak: 0.99990   Bf: 100   Loss: 6.514  0.957  1.205
Ep: 23240   Rew: -21.58   Avg Rew: -4.65   LR: 0.00007093   Polyak: 0.99990   Bf: 100   Loss: 6.483  0.980  0.834
Ep: 23250   Rew: -46.43   Avg Rew: 1.57   LR: 0.00006969   Polyak: 0.99990   Bf: 100   Loss: 6.394  1.562  0.724
Ep: 23260   Rew: -29.61   Avg Rew: 8.95   LR: 0.00006821   Polyak: 0.99990   Bf: 100   Loss: 6.867  0.799  0.932
Ep: 23270   Rew: 9.88   Avg Rew: 6.84   LR: 0.00006863   Polyak: 0.99990   Bf: 100   Loss: 6.225  1.957  2.446
Ep: 23280   Rew: -44.09   Avg Rew: 4.07   LR: 0.00006919   Polyak: 0.99990   Bf: 100   Loss: 6.132  0.621  0.739
Ep: 23290   Rew: -90.58   Avg Rew: 2.58   LR: 0.00006948   Polyak: 0.99990   Bf: 100   Loss: 6.548  0.670  0.367
Ep: 23300   Rew: -106.64   Avg Rew: 3.72   LR: 0.00006926   Polyak: 0.99990   Bf: 100   Loss: 6.602  1.502  0.715
Ep: 23310   Rew: -15.19   Avg Rew: 5.92   LR: 0.00006882   Polyak: 0.99990   Bf: 100   Loss: 5.

Ep: 23960   Rew: -94.97   Avg Rew: -7.69   LR: 0.00007154   Polyak: 0.99990   Bf: 100   Loss: 5.887  0.470  1.290
Ep: 23970   Rew: -54.11   Avg Rew: -13.28   LR: 0.00007266   Polyak: 0.99990   Bf: 100   Loss: 5.778  0.277  0.367
Ep: 23980   Rew: -49.21   Avg Rew: -22.97   LR: 0.00007459   Polyak: 0.99990   Bf: 100   Loss: 6.548  0.628  0.699
Ep: 23990   Rew: -76.53   Avg Rew: -24.96   LR: 0.00007499   Polyak: 0.99990   Bf: 100   Loss: 5.784  0.337  0.333
Ep: 24000   Rew: -82.22   Avg Rew: -18.16   LR: 0.00007363   Polyak: 0.99990   Bf: 100   Loss: 6.878  0.441  2.125
Ep: 24010   Rew: -73.95   Avg Rew: -19.17   LR: 0.00007383   Polyak: 0.99990   Bf: 100   Loss: 6.018  1.431  1.878
Ep: 24020   Rew: 181.97   Avg Rew: -15.35   LR: 0.00007307   Polyak: 0.99990   Bf: 100   Loss: 6.962  0.848  1.235
Ep: 24030   Rew: -33.45   Avg Rew: -23.70   LR: 0.00007474   Polyak: 0.99990   Bf: 100   Loss: 6.774  1.563  1.007
Ep: 24040   Rew: 112.68   Avg Rew: -21.09   LR: 0.00007422   Polyak: 0.99990   Bf

Ep: 24680   Rew: -6.38   Avg Rew: -14.35   LR: 0.00007287   Polyak: 0.99990   Bf: 100   Loss: 6.409  0.426  0.540
Ep: 24690   Rew: 2.46   Avg Rew: -13.23   LR: 0.00007265   Polyak: 0.99990   Bf: 100   Loss: 7.295  1.027  1.040
Ep: 24700   Rew: -104.17   Avg Rew: -18.68   LR: 0.00007374   Polyak: 0.99990   Bf: 100   Loss: 6.273  0.730  0.559
Ep: 24710   Rew: -71.73   Avg Rew: -17.93   LR: 0.00007359   Polyak: 0.99990   Bf: 100   Loss: 6.959  0.806  1.569
Ep: 24720   Rew: -83.32   Avg Rew: -26.62   LR: 0.00007532   Polyak: 0.99990   Bf: 100   Loss: 6.331  0.387  0.686
Ep: 24730   Rew: 173.94   Avg Rew: -17.49   LR: 0.00007350   Polyak: 0.99990   Bf: 100   Loss: 6.787  0.400  0.479
Ep: 24740   Rew: -37.73   Avg Rew: -21.10   LR: 0.00007422   Polyak: 0.99990   Bf: 100   Loss: 6.432  0.976  1.591
Ep: 24750   Rew: -94.42   Avg Rew: -16.81   LR: 0.00007336   Polyak: 0.99990   Bf: 100   Loss: 6.855  0.603  0.718
Ep: 24760   Rew: -84.25   Avg Rew: -16.90   LR: 0.00007338   Polyak: 0.99990   Bf:

Ep: 25400   Rew: -69.59   Avg Rew: -16.84   LR: 0.00007337   Polyak: 0.99990   Bf: 100   Loss: 6.606  0.542  0.663
Ep: 25410   Rew: -36.10   Avg Rew: -18.11   LR: 0.00007362   Polyak: 0.99990   Bf: 100   Loss: 6.345  0.503  0.330
Ep: 25420   Rew: -87.38   Avg Rew: -17.64   LR: 0.00007353   Polyak: 0.99990   Bf: 100   Loss: 6.524  3.985  0.774
Ep: 25430   Rew: -81.04   Avg Rew: -20.83   LR: 0.00007417   Polyak: 0.99990   Bf: 100   Loss: 6.510  5.388  5.355
Ep: 25440   Rew: -87.79   Avg Rew: -21.34   LR: 0.00007427   Polyak: 0.99990   Bf: 100   Loss: 6.780  0.775  0.778
Ep: 25450   Rew: -85.07   Avg Rew: -21.86   LR: 0.00007437   Polyak: 0.99990   Bf: 100   Loss: 6.432  1.292  1.489
Ep: 25460   Rew: -76.79   Avg Rew: -22.45   LR: 0.00007449   Polyak: 0.99990   Bf: 100   Loss: 7.487  0.509  0.508
Ep: 25470   Rew: -13.24   Avg Rew: -20.66   LR: 0.00007413   Polyak: 0.99990   Bf: 100   Loss: 6.304  1.616  0.480
Ep: 25480   Rew: -0.20   Avg Rew: -19.28   LR: 0.00007386   Polyak: 0.99990   Bf

KeyboardInterrupt: 

In [None]:
def test():  
    random_seed = 0
    n_episodes = 3
    max_timesteps = 2000
    render = True
    save_gif = True
    
    filename = "TD3_torch_{}_{}".format(env_name, random_seed)
    filename += ''
    directory = "./preTrained/td3_torch/{}".format(env_name)
    
    env = gym.make(env_name)
    state_dim = env.observation_space.shape[0]
    action_dim = env.action_space.shape[0]
    max_action = float(env.action_space.high[0])
    
    policy = TD3(state_dim, action_dim, max_action)
    
    policy.load_actor(directory, filename)
    
    for ep in range(1, n_episodes+1):
        ep_reward = 0
        state = env.reset()
        for t in range(max_timesteps):
            action = policy.select_action(state)
            state, reward, done, _ = env.step(action)
            ep_reward += reward
            if render:
                env.render()
                if save_gif:
                    dirname = './gif/td3_torch/{}'.format(ep)
                    if not os.path.isdir(dirname):
                        os.mkdir(dirname)
                    img = env.render(mode = 'rgb_array')
                    img = Image.fromarray(img)
                    img.save('./gif/td3_torch/{}/{}.jpg'.format(ep,t))
            if done:
                break
            
        print('Episode: {}\tReward: {}'.format(ep, int(ep_reward)))
        ep_reward = 0
        env.close()        
                
test()
    
    