In [1]:
import os
import torch
import gym
import numpy as np
from TD3_torch.TD3 import TD3
from PIL import Image
from TD3_torch.utils import ReplayBuffer

env_name = 'BipedalWalkerHardcore-v2'
learning_rate_base = 0.0001
log_interval = 10           # print avg reward after interval
random_seed = 123
gamma = 0.99                # discount for future rewards
batch_size = 256        # num of transitions sampled from replay buffer
exploration_noise = 0.3 
polyak_int = [0.9999, 0.999999]              # target policy update parameter (1-tau)
policy_noise = 0.2          # target policy smoothing noise
noise_clip = 0.5
policy_delay = 2            # delayed policy updates parameter
max_episodes = 100000         # max num of episodes
max_timesteps = 3000        # max timesteps in one episode
max_buffer_length = 2000000
directory = "./preTrained/td3_torch/{}".format(env_name) # save trained models
filename = "TD3_torch_{}_{}".format(env_name, random_seed)
reward_history = []



def train():
    env = gym.make(env_name)
    state_dim = env.observation_space.shape[0]
    action_dim = env.action_space.shape[0]
    max_action = float(env.action_space.high[0])
    polyak = polyak_int[0]
    
    policy = TD3(state_dim, action_dim, max_action, lr=learning_rate_base)
    replay_buffer = ReplayBuffer(max_length=max_buffer_length)
    
    print("action_space={}".format(env.action_space))
    print("obs_space={}".format(env.observation_space))
    print("threshold={} \n".format(env.spec.reward_threshold))
    
    if random_seed:
        print("Random Seed: {}".format(random_seed))
        env.seed(random_seed)
        torch.manual_seed(random_seed)
        np.random.seed(random_seed)
    
    # logging variables:        
    log_f = open("log.txt","w+")
    
    # training procedure:
    for episode in range(1, max_episodes+1):
        ep_reward = 0
        state = env.reset()
       
        for t in range(max_timesteps):
            # select action and add exploration noise:
            action = policy.select_action(state)
            action = action + np.random.normal(0, exploration_noise, size=env.action_space.shape[0])
            action = action.clip(env.action_space.low, env.action_space.high)
            
            # take action in env:
            next_state, reward, done, _ = env.step(action)
            replay_buffer.add((state, action, reward, next_state, float(done)))
            state = next_state
            
            ep_reward += reward
            
            # if episode is done then update policy:
            if done or t==(max_timesteps-1):
                policy.update(replay_buffer, t, batch_size, gamma, polyak, policy_noise, noise_clip, policy_delay)
                break
        
        reward_history.append(ep_reward)
        avg_reward = np.mean(reward_history[-100:]) 
        
        # logging updates:        
        log_f.write('{},{}\n'.format(episode, ep_reward))
        log_f.flush()
       
        
        # if avg reward > 300 then save and stop traning:
        if avg_reward >= env.spec.reward_threshold: 
            print("########## Solved! ###########")
            name = filename + '_solved'
            policy.save(directory, name)
            log_f.close()
            break
            
        # Calculate polyak
        #part = (env.spec.reward_threshold - avg_reward) / (env.spec.reward_threshold + 150)
        #if part > 1:
        #    part = 1
        #polyak = polyak_int[0] + (1 - part) * (polyak_int[1] - polyak_int[0])     
        
        # Calculate LR
        part = (env.spec.reward_threshold - avg_reward) / (env.spec.reward_threshold + 150)
        if part > 1:
            part = 1
        learning_rate = learning_rate_base - learning_rate_base * (1 - part) * 0.9
        policy.set_optimizers(lr=learning_rate)
        
        
        if episode > 500:
            policy.save(directory, filename)
        
        # print avg reward every log interval:
        if episode % log_interval == 0:            
            print("Ep: {}   Rew: {:3.2f}   Avg Rew: {:3.2f}   LR: {:8.8f}   Polyak: {:6.6f}   Bf: {:2.0f}   Loss: {:5.3f}  {:5.3f}  {:5.3f}".format(
                episode, ep_reward, avg_reward, learning_rate, polyak, replay_buffer.get_fill(), policy.actor_loss, policy.loss_Q1, policy.loss_Q2))

train()


[33mWARN: gym.spaces.Box autodetected dtype as <class 'numpy.float32'>. Please provide explicit dtype.[0m
[33mWARN: gym.spaces.Box autodetected dtype as <class 'numpy.float32'>. Please provide explicit dtype.[0m
action_space=Box(4,)
obs_space=Box(24,)
threshold=300 

Random Seed: 123
Ep: 10   Rew: -111.31   Avg Rew: -117.01   LR: 0.00009340   Polyak: 0.999900   Bf:  0   Loss: -0.517  0.324  0.758
Ep: 20   Rew: -119.55   Avg Rew: -124.24   LR: 0.00009485   Polyak: 0.999900   Bf:  0   Loss: 0.045  43.249  32.319
Ep: 30   Rew: -146.34   Avg Rew: -136.97   LR: 0.00009739   Polyak: 0.999900   Bf:  1   Loss: -0.268  0.180  0.330
Ep: 40   Rew: -116.49   Avg Rew: -140.78   LR: 0.00009816   Polyak: 0.999900   Bf:  2   Loss: -0.205  0.222  0.324
Ep: 50   Rew: -108.39   Avg Rew: -134.69   LR: 0.00009694   Polyak: 0.999900   Bf:  2   Loss: -0.242  1.593  1.350
Ep: 60   Rew: -103.20   Avg Rew: -131.68   LR: 0.00009634   Polyak: 0.999900   Bf:  2   Loss: 0.034  0.393  7.603
Ep: 70   Rew: -106.55

Ep: 700   Rew: -123.50   Avg Rew: -119.88   LR: 0.00009398   Polyak: 0.999900   Bf:  6   Loss: 3.594  4.921  3.453
Ep: 710   Rew: -104.24   Avg Rew: -118.73   LR: 0.00009375   Polyak: 0.999900   Bf:  7   Loss: 1.973  5.707  5.737
Ep: 720   Rew: -114.41   Avg Rew: -117.75   LR: 0.00009355   Polyak: 0.999900   Bf:  7   Loss: 2.971  4.149  5.933
Ep: 730   Rew: -117.29   Avg Rew: -118.37   LR: 0.00009367   Polyak: 0.999900   Bf:  7   Loss: 1.894  2.468  2.653
Ep: 740   Rew: -130.22   Avg Rew: -117.45   LR: 0.00009349   Polyak: 0.999900   Bf:  7   Loss: 3.030  4.855  6.818
Ep: 750   Rew: -119.57   Avg Rew: -116.21   LR: 0.00009324   Polyak: 0.999900   Bf:  7   Loss: 3.066  32.471  33.706
Ep: 760   Rew: -115.76   Avg Rew: -115.84   LR: 0.00009317   Polyak: 0.999900   Bf:  7   Loss: 2.250  4.862  3.696
Ep: 770   Rew: -121.29   Avg Rew: -115.91   LR: 0.00009318   Polyak: 0.999900   Bf:  7   Loss: 5.442  5.277  6.591
Ep: 780   Rew: -103.26   Avg Rew: -115.79   LR: 0.00009316   Polyak: 0.999900 

Ep: 1410   Rew: -100.85   Avg Rew: -105.95   LR: 0.00009119   Polyak: 0.999900   Bf: 16   Loss: 4.965  1.684  1.254
Ep: 1420   Rew: -105.01   Avg Rew: -106.49   LR: 0.00009130   Polyak: 0.999900   Bf: 16   Loss: 6.073  1.003  0.634
Ep: 1430   Rew: -106.46   Avg Rew: -106.69   LR: 0.00009134   Polyak: 0.999900   Bf: 16   Loss: 7.757  1.246  0.919
Ep: 1440   Rew: -97.69   Avg Rew: -106.02   LR: 0.00009120   Polyak: 0.999900   Bf: 16   Loss: 7.802  0.434  0.680
Ep: 1450   Rew: -107.21   Avg Rew: -105.71   LR: 0.00009114   Polyak: 0.999900   Bf: 16   Loss: 5.179  0.896  0.760
Ep: 1460   Rew: -106.17   Avg Rew: -105.64   LR: 0.00009113   Polyak: 0.999900   Bf: 16   Loss: 5.477  1.234  0.516
Ep: 1470   Rew: -103.01   Avg Rew: -105.64   LR: 0.00009113   Polyak: 0.999900   Bf: 16   Loss: 5.888  0.492  0.616
Ep: 1480   Rew: -99.84   Avg Rew: -105.76   LR: 0.00009115   Polyak: 0.999900   Bf: 17   Loss: 6.818  1.512  1.446
Ep: 1490   Rew: -106.07   Avg Rew: -104.87   LR: 0.00009097   Polyak: 0.99

Ep: 2120   Rew: -101.12   Avg Rew: -102.28   LR: 0.00009046   Polyak: 0.999900   Bf: 19   Loss: 9.503  1.946  1.943
Ep: 2130   Rew: -105.96   Avg Rew: -102.45   LR: 0.00009049   Polyak: 0.999900   Bf: 19   Loss: 10.034  2.541  1.924
Ep: 2140   Rew: -96.70   Avg Rew: -102.10   LR: 0.00009042   Polyak: 0.999900   Bf: 19   Loss: 7.894  0.957  1.049
Ep: 2150   Rew: -105.11   Avg Rew: -102.11   LR: 0.00009042   Polyak: 0.999900   Bf: 19   Loss: 10.900  0.870  0.925
Ep: 2160   Rew: -138.14   Avg Rew: -102.26   LR: 0.00009045   Polyak: 0.999900   Bf: 20   Loss: 10.375  0.891  1.119
Ep: 2170   Rew: -100.13   Avg Rew: -102.13   LR: 0.00009043   Polyak: 0.999900   Bf: 20   Loss: 9.166  1.534  1.507
Ep: 2180   Rew: -104.47   Avg Rew: -102.65   LR: 0.00009053   Polyak: 0.999900   Bf: 20   Loss: 7.083  1.038  1.015
Ep: 2190   Rew: -119.95   Avg Rew: -102.62   LR: 0.00009052   Polyak: 0.999900   Bf: 20   Loss: 9.637  1.335  0.815
Ep: 2200   Rew: -109.82   Avg Rew: -103.12   LR: 0.00009062   Polyak: 

Ep: 2830   Rew: -94.67   Avg Rew: -97.38   LR: 0.00008948   Polyak: 0.999900   Bf: 23   Loss: 13.028  1.118  0.977
Ep: 2840   Rew: -95.43   Avg Rew: -97.31   LR: 0.00008946   Polyak: 0.999900   Bf: 23   Loss: 14.848  2.607  2.654
Ep: 2850   Rew: -95.40   Avg Rew: -97.41   LR: 0.00008948   Polyak: 0.999900   Bf: 23   Loss: 10.438  2.016  2.038
Ep: 2860   Rew: -94.88   Avg Rew: -96.91   LR: 0.00008938   Polyak: 0.999900   Bf: 23   Loss: 13.045  0.844  0.805
Ep: 2870   Rew: -100.16   Avg Rew: -96.90   LR: 0.00008938   Polyak: 0.999900   Bf: 23   Loss: 14.694  1.391  1.596
Ep: 2880   Rew: -121.65   Avg Rew: -96.91   LR: 0.00008938   Polyak: 0.999900   Bf: 23   Loss: 13.028  1.141  1.091
Ep: 2890   Rew: -103.38   Avg Rew: -96.52   LR: 0.00008930   Polyak: 0.999900   Bf: 23   Loss: 9.730  1.371  1.607
Ep: 2900   Rew: -98.10   Avg Rew: -96.24   LR: 0.00008925   Polyak: 0.999900   Bf: 23   Loss: 13.322  1.869  1.963
Ep: 2910   Rew: -96.51   Avg Rew: -96.51   LR: 0.00008930   Polyak: 0.999900  

Ep: 3550   Rew: -91.54   Avg Rew: -93.60   LR: 0.00008872   Polyak: 0.999900   Bf: 27   Loss: 10.136  2.324  2.547
Ep: 3560   Rew: -103.08   Avg Rew: -93.56   LR: 0.00008871   Polyak: 0.999900   Bf: 27   Loss: 14.675  2.159  1.637
Ep: 3570   Rew: -98.66   Avg Rew: -93.41   LR: 0.00008868   Polyak: 0.999900   Bf: 27   Loss: 13.739  1.774  1.370
Ep: 3580   Rew: -88.73   Avg Rew: -93.52   LR: 0.00008870   Polyak: 0.999900   Bf: 27   Loss: 13.992  0.759  0.656
Ep: 3590   Rew: -97.27   Avg Rew: -93.58   LR: 0.00008872   Polyak: 0.999900   Bf: 27   Loss: 16.740  1.982  1.886
Ep: 3600   Rew: -95.05   Avg Rew: -93.63   LR: 0.00008873   Polyak: 0.999900   Bf: 27   Loss: 14.340  1.679  1.660
Ep: 3610   Rew: -90.58   Avg Rew: -93.46   LR: 0.00008869   Polyak: 0.999900   Bf: 27   Loss: 13.088  1.568  1.872
Ep: 3620   Rew: -94.72   Avg Rew: -93.58   LR: 0.00008872   Polyak: 0.999900   Bf: 27   Loss: 13.295  1.608  1.534
Ep: 3630   Rew: -88.13   Avg Rew: -93.88   LR: 0.00008878   Polyak: 0.999900   

Ep: 4270   Rew: -96.89   Avg Rew: -89.56   LR: 0.00008791   Polyak: 0.999900   Bf: 30   Loss: 16.613  0.980  1.246
Ep: 4280   Rew: -87.72   Avg Rew: -89.69   LR: 0.00008794   Polyak: 0.999900   Bf: 30   Loss: 15.709  1.596  1.483
Ep: 4290   Rew: -85.20   Avg Rew: -89.62   LR: 0.00008792   Polyak: 0.999900   Bf: 30   Loss: 18.741  1.782  1.840
Ep: 4300   Rew: -82.03   Avg Rew: -89.65   LR: 0.00008793   Polyak: 0.999900   Bf: 30   Loss: 17.427  1.338  1.391
Ep: 4310   Rew: -90.56   Avg Rew: -89.84   LR: 0.00008797   Polyak: 0.999900   Bf: 30   Loss: 17.136  2.681  2.085
Ep: 4320   Rew: -82.94   Avg Rew: -89.94   LR: 0.00008799   Polyak: 0.999900   Bf: 30   Loss: 16.448  1.371  1.303
Ep: 4330   Rew: -92.70   Avg Rew: -89.71   LR: 0.00008794   Polyak: 0.999900   Bf: 31   Loss: 14.180  1.406  1.138
Ep: 4340   Rew: -87.02   Avg Rew: -89.43   LR: 0.00008789   Polyak: 0.999900   Bf: 31   Loss: 18.200  2.405  2.430
Ep: 4350   Rew: -86.81   Avg Rew: -89.77   LR: 0.00008795   Polyak: 0.999900   B

Ep: 4990   Rew: -56.01   Avg Rew: -77.77   LR: 0.00008555   Polyak: 0.999900   Bf: 35   Loss: 14.019  1.752  2.288
Ep: 5000   Rew: -75.57   Avg Rew: -77.32   LR: 0.00008546   Polyak: 0.999900   Bf: 35   Loss: 19.585  2.507  2.250
Ep: 5010   Rew: -80.18   Avg Rew: -77.58   LR: 0.00008552   Polyak: 0.999900   Bf: 35   Loss: 18.070  1.828  2.242
Ep: 5020   Rew: -62.11   Avg Rew: -77.77   LR: 0.00008555   Polyak: 0.999900   Bf: 35   Loss: 18.249  1.668  1.339
Ep: 5030   Rew: -70.34   Avg Rew: -77.73   LR: 0.00008555   Polyak: 0.999900   Bf: 35   Loss: 17.051  1.777  1.747
Ep: 5040   Rew: -80.99   Avg Rew: -76.88   LR: 0.00008538   Polyak: 0.999900   Bf: 35   Loss: 18.307  1.779  1.833
Ep: 5050   Rew: -32.08   Avg Rew: -77.12   LR: 0.00008542   Polyak: 0.999900   Bf: 35   Loss: 20.086  4.632  4.974
Ep: 5060   Rew: -45.86   Avg Rew: -76.63   LR: 0.00008533   Polyak: 0.999900   Bf: 35   Loss: 17.155  3.595  3.509
Ep: 5070   Rew: -92.69   Avg Rew: -75.97   LR: 0.00008519   Polyak: 0.999900   B

Ep: 5710   Rew: -88.90   Avg Rew: -98.07   LR: 0.00008961   Polyak: 0.999900   Bf: 44   Loss: 19.169  2.393  1.986
Ep: 5720   Rew: -90.48   Avg Rew: -97.40   LR: 0.00008948   Polyak: 0.999900   Bf: 44   Loss: 19.181  5.324  4.788
Ep: 5730   Rew: -54.51   Avg Rew: -96.45   LR: 0.00008929   Polyak: 0.999900   Bf: 44   Loss: 16.144  9.657  10.558
Ep: 5740   Rew: -99.09   Avg Rew: -91.23   LR: 0.00008825   Polyak: 0.999900   Bf: 44   Loss: 15.639  2.023  1.815
Ep: 5750   Rew: -24.33   Avg Rew: -83.20   LR: 0.00008664   Polyak: 0.999900   Bf: 45   Loss: 16.466  3.176  1.868
Ep: 5760   Rew: -95.61   Avg Rew: -80.10   LR: 0.00008602   Polyak: 0.999900   Bf: 45   Loss: 19.703  1.194  1.615
Ep: 5770   Rew: -88.49   Avg Rew: -77.04   LR: 0.00008541   Polyak: 0.999900   Bf: 45   Loss: 18.380  3.347  3.143
Ep: 5780   Rew: -93.85   Avg Rew: -75.23   LR: 0.00008505   Polyak: 0.999900   Bf: 45   Loss: 17.034  4.480  4.171
Ep: 5790   Rew: -92.30   Avg Rew: -75.70   LR: 0.00008514   Polyak: 0.999900   

Ep: 6430   Rew: -90.23   Avg Rew: -74.28   LR: 0.00008486   Polyak: 0.999900   Bf: 55   Loss: 17.188  2.718  2.422
Ep: 6440   Rew: -68.30   Avg Rew: -75.58   LR: 0.00008512   Polyak: 0.999900   Bf: 55   Loss: 19.428  23.631  11.086
Ep: 6450   Rew: -54.58   Avg Rew: -74.64   LR: 0.00008493   Polyak: 0.999900   Bf: 55   Loss: 19.880  5.830  5.318
Ep: 6460   Rew: -52.45   Avg Rew: -75.44   LR: 0.00008509   Polyak: 0.999900   Bf: 55   Loss: 19.414  3.622  2.562
Ep: 6470   Rew: -66.02   Avg Rew: -76.27   LR: 0.00008525   Polyak: 0.999900   Bf: 55   Loss: 18.669  2.870  2.901
Ep: 6480   Rew: -90.78   Avg Rew: -77.76   LR: 0.00008555   Polyak: 0.999900   Bf: 55   Loss: 18.621  6.018  6.051
Ep: 6490   Rew: -38.84   Avg Rew: -77.08   LR: 0.00008542   Polyak: 0.999900   Bf: 56   Loss: 20.716  2.356  2.303
Ep: 6500   Rew: 65.73   Avg Rew: -76.68   LR: 0.00008534   Polyak: 0.999900   Bf: 56   Loss: 15.848  2.540  4.385
Ep: 6510   Rew: -72.68   Avg Rew: -76.61   LR: 0.00008532   Polyak: 0.999900   

Ep: 7150   Rew: -121.31   Avg Rew: -61.57   LR: 0.00008231   Polyak: 0.999900   Bf: 71   Loss: 15.112  7.812  6.111
Ep: 7160   Rew: -76.06   Avg Rew: -63.78   LR: 0.00008276   Polyak: 0.999900   Bf: 71   Loss: 16.780  2.890  2.634
Ep: 7170   Rew: -96.12   Avg Rew: -65.37   LR: 0.00008307   Polyak: 0.999900   Bf: 71   Loss: 17.591  10.501  7.429
Ep: 7180   Rew: -83.10   Avg Rew: -68.55   LR: 0.00008371   Polyak: 0.999900   Bf: 72   Loss: 15.356  8.995  8.524
Ep: 7190   Rew: -7.64   Avg Rew: -69.74   LR: 0.00008395   Polyak: 0.999900   Bf: 72   Loss: 20.185  1.436  2.489
Ep: 7200   Rew: -79.76   Avg Rew: -71.53   LR: 0.00008431   Polyak: 0.999900   Bf: 72   Loss: 16.676  3.057  4.929
Ep: 7210   Rew: -129.82   Avg Rew: -73.18   LR: 0.00008464   Polyak: 0.999900   Bf: 73   Loss: 19.181  2.290  2.612
Ep: 7220   Rew: -92.53   Avg Rew: -73.84   LR: 0.00008477   Polyak: 0.999900   Bf: 73   Loss: 15.788  1.907  1.682
Ep: 7230   Rew: -44.51   Avg Rew: -76.01   LR: 0.00008520   Polyak: 0.999900  

Ep: 7870   Rew: -115.90   Avg Rew: -86.34   LR: 0.00008727   Polyak: 0.999900   Bf: 100   Loss: 16.205  5.102  3.870
Ep: 7880   Rew: -43.13   Avg Rew: -85.45   LR: 0.00008709   Polyak: 0.999900   Bf: 100   Loss: 16.813  1.954  2.163
Ep: 7890   Rew: -95.19   Avg Rew: -87.26   LR: 0.00008745   Polyak: 0.999900   Bf: 100   Loss: 14.663  8.432  7.503
Ep: 7900   Rew: -40.01   Avg Rew: -87.26   LR: 0.00008745   Polyak: 0.999900   Bf: 100   Loss: 14.909  2.166  2.232
Ep: 7910   Rew: -57.64   Avg Rew: -84.87   LR: 0.00008697   Polyak: 0.999900   Bf: 100   Loss: 15.594  4.275  4.396
Ep: 7920   Rew: -52.68   Avg Rew: -83.08   LR: 0.00008662   Polyak: 0.999900   Bf: 100   Loss: 15.187  3.322  2.867
Ep: 7930   Rew: -79.95   Avg Rew: -82.65   LR: 0.00008653   Polyak: 0.999900   Bf: 100   Loss: 16.007  5.681  4.418
Ep: 7940   Rew: -105.47   Avg Rew: -82.74   LR: 0.00008655   Polyak: 0.999900   Bf: 100   Loss: 16.419  2.485  2.977
Ep: 7950   Rew: -130.74   Avg Rew: -85.50   LR: 0.00008710   Polyak: 0

Ep: 8580   Rew: -98.34   Avg Rew: -48.63   LR: 0.00007973   Polyak: 0.999900   Bf: 100   Loss: 9.570  3.046  2.931
Ep: 8590   Rew: -69.99   Avg Rew: -45.01   LR: 0.00007900   Polyak: 0.999900   Bf: 100   Loss: 10.731  2.618  1.641
Ep: 8600   Rew: -88.22   Avg Rew: -47.88   LR: 0.00007958   Polyak: 0.999900   Bf: 100   Loss: 10.944  3.344  2.235
Ep: 8610   Rew: -45.00   Avg Rew: -47.62   LR: 0.00007952   Polyak: 0.999900   Bf: 100   Loss: 10.132  3.518  2.854
Ep: 8620   Rew: -70.26   Avg Rew: -49.57   LR: 0.00007991   Polyak: 0.999900   Bf: 100   Loss: 9.631  1.479  1.717
Ep: 8630   Rew: -72.93   Avg Rew: -54.90   LR: 0.00008098   Polyak: 0.999900   Bf: 100   Loss: 8.591  2.746  3.108
Ep: 8640   Rew: -52.27   Avg Rew: -52.37   LR: 0.00008047   Polyak: 0.999900   Bf: 100   Loss: 11.116  2.094  2.613
Ep: 8650   Rew: -88.45   Avg Rew: -52.60   LR: 0.00008052   Polyak: 0.999900   Bf: 100   Loss: 10.448  5.003  3.242
Ep: 8660   Rew: -176.61   Avg Rew: -50.08   LR: 0.00008002   Polyak: 0.9999

Ep: 9300   Rew: -68.00   Avg Rew: -7.23   LR: 0.00007145   Polyak: 0.999900   Bf: 100   Loss: 7.560  1.874  0.998
Ep: 9310   Rew: 42.13   Avg Rew: -4.04   LR: 0.00007081   Polyak: 0.999900   Bf: 100   Loss: 7.195  2.006  1.082
Ep: 9320   Rew: 123.94   Avg Rew: -3.84   LR: 0.00007077   Polyak: 0.999900   Bf: 100   Loss: 7.761  1.430  0.859
Ep: 9330   Rew: -66.77   Avg Rew: -1.80   LR: 0.00007036   Polyak: 0.999900   Bf: 100   Loss: 7.469  1.511  0.932
Ep: 9340   Rew: -2.32   Avg Rew: -0.37   LR: 0.00007007   Polyak: 0.999900   Bf: 100   Loss: 6.882  1.543  2.387
Ep: 9350   Rew: 40.49   Avg Rew: -15.01   LR: 0.00007300   Polyak: 0.999900   Bf: 100   Loss: 7.080  1.771  2.171
Ep: 9360   Rew: 255.19   Avg Rew: -8.51   LR: 0.00007170   Polyak: 0.999900   Bf: 100   Loss: 7.115  3.340  5.405
Ep: 9370   Rew: -48.76   Avg Rew: -8.97   LR: 0.00007179   Polyak: 0.999900   Bf: 100   Loss: 8.999  0.442  0.442
Ep: 9380   Rew: 28.79   Avg Rew: -7.61   LR: 0.00007152   Polyak: 0.999900   Bf: 100   Los

Ep: 10030   Rew: 29.46   Avg Rew: 15.97   LR: 0.00006681   Polyak: 0.999900   Bf: 100   Loss: 4.071  0.598  0.315
Ep: 10040   Rew: 26.46   Avg Rew: 21.33   LR: 0.00006573   Polyak: 0.999900   Bf: 100   Loss: 5.236  0.515  0.479
Ep: 10050   Rew: -4.36   Avg Rew: 17.02   LR: 0.00006660   Polyak: 0.999900   Bf: 100   Loss: 4.825  0.619  0.554
Ep: 10060   Rew: 99.50   Avg Rew: 18.67   LR: 0.00006627   Polyak: 0.999900   Bf: 100   Loss: 5.028  0.833  0.867
Ep: 10070   Rew: 81.07   Avg Rew: 19.13   LR: 0.00006617   Polyak: 0.999900   Bf: 100   Loss: 4.770  1.721  2.871
Ep: 10080   Rew: -22.06   Avg Rew: 11.34   LR: 0.00006773   Polyak: 0.999900   Bf: 100   Loss: 4.999  0.856  0.806
Ep: 10090   Rew: -17.67   Avg Rew: 6.26   LR: 0.00006875   Polyak: 0.999900   Bf: 100   Loss: 5.921  0.416  0.583
Ep: 10100   Rew: -83.24   Avg Rew: 7.88   LR: 0.00006842   Polyak: 0.999900   Bf: 100   Loss: 5.435  0.447  0.718
Ep: 10110   Rew: 1.27   Avg Rew: 11.42   LR: 0.00006772   Polyak: 0.999900   Bf: 100   

Ep: 10750   Rew: -76.94   Avg Rew: 20.56   LR: 0.00006589   Polyak: 0.999900   Bf: 100   Loss: 3.432  1.554  0.781
Ep: 10760   Rew: 100.56   Avg Rew: 17.37   LR: 0.00006653   Polyak: 0.999900   Bf: 100   Loss: 3.820  0.619  0.513
Ep: 10770   Rew: 219.86   Avg Rew: 17.24   LR: 0.00006655   Polyak: 0.999900   Bf: 100   Loss: 4.494  0.954  0.585
Ep: 10780   Rew: -43.98   Avg Rew: 14.07   LR: 0.00006719   Polyak: 0.999900   Bf: 100   Loss: 4.032  1.089  0.664
Ep: 10790   Rew: 114.22   Avg Rew: 16.46   LR: 0.00006671   Polyak: 0.999900   Bf: 100   Loss: 3.745  0.838  0.857
Ep: 10800   Rew: 62.98   Avg Rew: 14.28   LR: 0.00006714   Polyak: 0.999900   Bf: 100   Loss: 3.785  0.628  1.188
Ep: 10810   Rew: 8.21   Avg Rew: 11.86   LR: 0.00006763   Polyak: 0.999900   Bf: 100   Loss: 4.604  2.065  1.323
Ep: 10820   Rew: 16.76   Avg Rew: 6.98   LR: 0.00006860   Polyak: 0.999900   Bf: 100   Loss: 3.723  1.606  2.077
Ep: 10830   Rew: -75.44   Avg Rew: 5.93   LR: 0.00006881   Polyak: 0.999900   Bf: 100

Ep: 11470   Rew: 172.83   Avg Rew: 37.75   LR: 0.00006245   Polyak: 0.999900   Bf: 100   Loss: 3.790  1.738  1.867
Ep: 11480   Rew: -75.65   Avg Rew: 35.76   LR: 0.00006285   Polyak: 0.999900   Bf: 100   Loss: 4.066  0.586  0.631
Ep: 11490   Rew: -9.85   Avg Rew: 32.92   LR: 0.00006342   Polyak: 0.999900   Bf: 100   Loss: 3.545  0.224  0.390
Ep: 11500   Rew: 281.62   Avg Rew: 30.48   LR: 0.00006390   Polyak: 0.999900   Bf: 100   Loss: 3.336  1.199  1.350
Ep: 11510   Rew: 53.03   Avg Rew: 24.22   LR: 0.00006516   Polyak: 0.999900   Bf: 100   Loss: 3.856  2.066  1.979
Ep: 11520   Rew: 159.65   Avg Rew: 19.18   LR: 0.00006616   Polyak: 0.999900   Bf: 100   Loss: 3.027  1.617  0.958
Ep: 11530   Rew: 16.39   Avg Rew: 17.97   LR: 0.00006641   Polyak: 0.999900   Bf: 100   Loss: 3.750  0.607  1.076
Ep: 11540   Rew: -15.77   Avg Rew: 19.05   LR: 0.00006619   Polyak: 0.999900   Bf: 100   Loss: 3.477  1.125  2.523
Ep: 11550   Rew: -65.98   Avg Rew: 16.52   LR: 0.00006670   Polyak: 0.999900   Bf: 

Ep: 12190   Rew: 92.89   Avg Rew: 19.13   LR: 0.00006617   Polyak: 0.999900   Bf: 100   Loss: 3.560  0.543  1.005
Ep: 12200   Rew: -17.34   Avg Rew: 18.01   LR: 0.00006640   Polyak: 0.999900   Bf: 100   Loss: 3.138  1.341  1.444
Ep: 12210   Rew: -66.68   Avg Rew: 18.05   LR: 0.00006639   Polyak: 0.999900   Bf: 100   Loss: 3.110  0.772  2.388
Ep: 12220   Rew: -46.35   Avg Rew: 12.16   LR: 0.00006757   Polyak: 0.999900   Bf: 100   Loss: 2.769  2.620  2.406
Ep: 12230   Rew: 71.80   Avg Rew: 15.03   LR: 0.00006699   Polyak: 0.999900   Bf: 100   Loss: 3.835  2.296  1.221
Ep: 12240   Rew: -95.84   Avg Rew: 15.15   LR: 0.00006697   Polyak: 0.999900   Bf: 100   Loss: 1.756  0.752  0.543
Ep: 12250   Rew: 71.25   Avg Rew: 20.09   LR: 0.00006598   Polyak: 0.999900   Bf: 100   Loss: 3.572  2.483  3.984
Ep: 12260   Rew: -31.66   Avg Rew: 17.18   LR: 0.00006656   Polyak: 0.999900   Bf: 100   Loss: 4.288  4.198  3.358
Ep: 12270   Rew: 48.97   Avg Rew: 22.36   LR: 0.00006553   Polyak: 0.999900   Bf: 1

KeyboardInterrupt: 

In [None]:
def test():  
    random_seed = 0
    n_episodes = 3
    max_timesteps = 2000
    render = True
    save_gif = True
    
    filename = "TD3_torch_{}_{}".format(env_name, random_seed)
    filename += ''
    directory = "./preTrained/td3_torch/{}".format(env_name)
    
    env = gym.make(env_name)
    state_dim = env.observation_space.shape[0]
    action_dim = env.action_space.shape[0]
    max_action = float(env.action_space.high[0])
    
    policy = TD3(state_dim, action_dim, max_action)
    
    policy.load_actor(directory, filename)
    
    for ep in range(1, n_episodes+1):
        ep_reward = 0
        state = env.reset()
        for t in range(max_timesteps):
            action = policy.select_action(state)
            state, reward, done, _ = env.step(action)
            ep_reward += reward
            if render:
                env.render()
                if save_gif:
                    dirname = './gif/td3_torch/{}'.format(ep)
                    if not os.path.isdir(dirname):
                        os.mkdir(dirname)
                    img = env.render(mode = 'rgb_array')
                    img = Image.fromarray(img)
                    img.save('./gif/td3_torch/{}/{}.jpg'.format(ep,t))
            if done:
                break
            
        print('Episode: {}\tReward: {}'.format(ep, int(ep_reward)))
        ep_reward = 0
        env.close()        
                
test()
    
    