In [1]:
import os
import torch
import gym
import numpy as np
from TD3_torch.TD3 import TD3
from PIL import Image
from TD3_torch.utils import ReplayBuffer

env_name = 'BipedalWalkerHardcore-v2'
learning_rate_base = 0.0001
log_interval = 10           # print avg reward after interval
random_seed = 333
gamma = 0.99                # discount for future rewards
batch_size = 1024        # num of transitions sampled from replay buffer
exploration_noise_base = 0.3 
polyak_int = [0.9999, 0.999999]              # target policy update parameter (1-tau)
policy_noise = 0.2          # target policy smoothing noise
noise_clip = 0.5
policy_delay = 2            # delayed policy updates parameter
max_episodes = 100000         # max num of episodes
max_timesteps = 3000        # max timesteps in one episode
max_buffer_length = 2000000
directory = "./preTrained/td3_torch/{}".format(env_name) # save trained models
filename = "TD3_torch_{}_{}".format(env_name, random_seed)
reward_history = []



def train():
    env = gym.make(env_name)
    state_dim = env.observation_space.shape[0]
    action_dim = env.action_space.shape[0]
    max_action = float(env.action_space.high[0])
    polyak = polyak_int[0]
    exploration_noise = exploration_noise_base
    
    actor_config = [
        {'dim': (state_dim, 256), 'dropout': False, 'activation': 'relu'},
        {'dim': (256, 256), 'dropout': True, 'activation':'relu'},
        {'dim': (256, 128), 'dropout': True, 'activation': 'relu'},
        {'dim': (128, 64), 'dropout': False, 'activation': 'relu'},
        {'dim': (64, action_dim),'dropout': False, 'activation': }
    ]
    
    critic_config = [
        {'dim': (state_dim + action_dim, 256), 'dropout': False, 'activation': 'relu'},
        {'dim': (256, 256), 'dropout': False , 'activation':'relu'},
        {'dim': (256, 128), 'dropout': False, 'activation': 'relu'},
        {'dim': (128, 64), 'dropout': False, 'activation': 'relu'},
        {'dim': (64, 1), 'dropout': False, 'activation': False},
    ]
    
    policy = TD3(actor_config, critic_config, max_action, lr=learning_rate_base)   
    replay_buffer = ReplayBuffer(max_length=max_buffer_length)
    
    print("action_space={}".format(env.action_space))
    print("obs_space={}".format(env.observation_space))
    print("threshold={} \n".format(env.spec.reward_threshold))     
    
    if random_seed:
        print("Random Seed: {}".format(random_seed))
        env.seed(random_seed)
        torch.manual_seed(random_seed)
        np.random.seed(random_seed)
    
    # loading models
    policy.load(directory, filename)
    
    # logging variables:        
    log_f = open("log.txt","w+")
    
    # training procedure:
    for episode in range(1, max_episodes+1):
        ep_reward = 0
        state = env.reset()
       
        for t in range(max_timesteps):
            # select action and add exploration noise:
            action = policy.select_action(state)
            action = action + np.random.normal(0, exploration_noise, size=env.action_space.shape[0])
            action = action.clip(env.action_space.low, env.action_space.high)
            
            # take action in env:
            next_state, reward, done, _ = env.step(action)
            replay_buffer.add((state, action, reward, next_state, float(done)))
            state = next_state
            
            ep_reward += reward
            
            # if episode is done then update policy:
            if done or t==(max_timesteps-1):
                policy.update(replay_buffer, t, batch_size, gamma, polyak, policy_noise, noise_clip, policy_delay)
                break
        
        reward_history.append(ep_reward)
        avg_reward = np.mean(reward_history[-100:]) 
        
        # logging updates:        
        log_f.write('{},{}\n'.format(episode, ep_reward))
        log_f.flush()
       
        
        # if avg reward > 300 then save and stop traning:
        if avg_reward >= env.spec.reward_threshold: 
            print("########## Solved! ###########")
            name = filename + '_solved'
            policy.save(directory, name)
            log_f.close()
            break
            
        # Calculate polyak
        #part = (env.spec.reward_threshold - avg_reward) / (env.spec.reward_threshold + 150)
        #if part > 1:
        #    part = 1
        #polyak = polyak_int[0] + (1 - part) * (polyak_int[1] - polyak_int[0])     
        
        # Calculate LR
        part = (env.spec.reward_threshold - avg_reward) / (env.spec.reward_threshold + 150)
        if part > 1:
            part = 1
        learning_rate = learning_rate_base - learning_rate_base * (1 - part) * 0.9
        policy.set_optimizers(lr=learning_rate)
        
        # Calculate Exploration Noise
        exploration_noise = exploration_noise_base - exploration_noise_base * (1 - part) * 0.9
        
        
        if episode > 500:
            policy.save(directory, filename)
        
        # print avg reward every log interval:
        if episode % log_interval == 0:            
            print("Ep:{}   Rew:{:3.2f}  Avg Rew:{:3.2f}  LR:{:8.8f}   Polyak:{:5.5f}  Bf:{:2.0f}  EN:{:0.4f}  Loss: {:5.3f} {:5.3f} {:5.3f}".format(
                episode, ep_reward, avg_reward, learning_rate, polyak, replay_buffer.get_fill(), exploration_noise, policy.actor_loss, policy.loss_Q1, policy.loss_Q2))

train()


[33mWARN: gym.spaces.Box autodetected dtype as <class 'numpy.float32'>. Please provide explicit dtype.[0m
[33mWARN: gym.spaces.Box autodetected dtype as <class 'numpy.float32'>. Please provide explicit dtype.[0m
ACTOR=Sequential(
  (0): Linear(in_features=24, out_features=256, bias=True)
  (1): ReLU()
  (2): Linear(in_features=256, out_features=256, bias=True)
  (3): Dropout(p=0.2)
  (4): ReLU()
  (5): Linear(in_features=256, out_features=128, bias=True)
  (6): Dropout(p=0.2)
  (7): ReLU()
  (8): Linear(in_features=128, out_features=64, bias=True)
  (9): ReLU()
  (10): Linear(in_features=64, out_features=4, bias=True)
)
ACTOR=Sequential(
  (0): Linear(in_features=24, out_features=256, bias=True)
  (1): ReLU()
  (2): Linear(in_features=256, out_features=256, bias=True)
  (3): Dropout(p=0.2)
  (4): ReLU()
  (5): Linear(in_features=256, out_features=128, bias=True)
  (6): Dropout(p=0.2)
  (7): ReLU()
  (8): Linear(in_features=128, out_features=64, bias=True)
  (9): ReLU()
  (10): Line

Ep:490   Rew:-109.78  Avg Rew:-111.50  LR:0.00009230   Polyak:0.99990  Bf: 4  EN:0.2769  Loss: 0.632 21.875 13.009
Ep:500   Rew:-113.68  Avg Rew:-111.51  LR:0.00009230   Polyak:0.99990  Bf: 4  EN:0.2769  Loss: -0.020 4.254 4.161
Ep:510   Rew:-109.18  Avg Rew:-111.43  LR:0.00009229   Polyak:0.99990  Bf: 4  EN:0.2769  Loss: 0.602 6.577 9.599
Ep:520   Rew:-111.38  Avg Rew:-111.15  LR:0.00009223   Polyak:0.99990  Bf: 4  EN:0.2767  Loss: 0.135 7.114 5.196
Ep:530   Rew:-112.15  Avg Rew:-111.20  LR:0.00009224   Polyak:0.99990  Bf: 4  EN:0.2767  Loss: 0.378 3.695 3.987
Ep:540   Rew:-115.18  Avg Rew:-111.33  LR:0.00009227   Polyak:0.99990  Bf: 4  EN:0.2768  Loss: 0.112 6.328 3.949
Ep:550   Rew:-112.11  Avg Rew:-111.48  LR:0.00009230   Polyak:0.99990  Bf: 4  EN:0.2769  Loss: 0.328 9.820 2.057
Ep:560   Rew:-109.58  Avg Rew:-111.40  LR:0.00009228   Polyak:0.99990  Bf: 4  EN:0.2768  Loss: 0.637 4.085 4.227
Ep:570   Rew:-108.99  Avg Rew:-111.46  LR:0.00009229   Polyak:0.99990  Bf: 4  EN:0.2769  Loss

Ep:1220   Rew:-120.67  Avg Rew:-108.95  LR:0.00009179   Polyak:0.99990  Bf: 8  EN:0.2754  Loss: 4.827 1.990 1.993
Ep:1230   Rew:-108.91  Avg Rew:-109.96  LR:0.00009199   Polyak:0.99990  Bf: 8  EN:0.2760  Loss: 4.673 2.880 3.343
Ep:1240   Rew:-117.60  Avg Rew:-110.87  LR:0.00009217   Polyak:0.99990  Bf: 8  EN:0.2765  Loss: 4.792 1.323 1.321
Ep:1250   Rew:-121.92  Avg Rew:-112.20  LR:0.00009244   Polyak:0.99990  Bf: 8  EN:0.2773  Loss: 4.846 2.910 2.283
Ep:1260   Rew:-120.78  Avg Rew:-113.02  LR:0.00009260   Polyak:0.99990  Bf: 8  EN:0.2778  Loss: 5.954 2.976 4.004
Ep:1270   Rew:-106.79  Avg Rew:-113.74  LR:0.00009275   Polyak:0.99990  Bf: 8  EN:0.2782  Loss: 4.912 3.180 2.404
Ep:1280   Rew:-113.19  Avg Rew:-113.87  LR:0.00009277   Polyak:0.99990  Bf: 8  EN:0.2783  Loss: 5.547 2.113 1.955
Ep:1290   Rew:-141.33  Avg Rew:-115.10  LR:0.00009302   Polyak:0.99990  Bf: 8  EN:0.2791  Loss: 4.435 3.003 2.149
Ep:1300   Rew:-116.16  Avg Rew:-115.60  LR:0.00009312   Polyak:0.99990  Bf: 8  EN:0.2794

Ep:1940   Rew:-127.52  Avg Rew:-117.56  LR:0.00009351   Polyak:0.99990  Bf:15  EN:0.2805  Loss: 8.119 1.627 1.561
Ep:1950   Rew:-130.56  Avg Rew:-118.49  LR:0.00009370   Polyak:0.99990  Bf:15  EN:0.2811  Loss: 7.797 2.135 2.370
Ep:1960   Rew:-120.75  Avg Rew:-118.68  LR:0.00009374   Polyak:0.99990  Bf:15  EN:0.2812  Loss: 7.559 2.269 1.969
Ep:1970   Rew:-115.44  Avg Rew:-119.14  LR:0.00009383   Polyak:0.99990  Bf:15  EN:0.2815  Loss: 8.908 2.381 2.087
Ep:1980   Rew:-101.90  Avg Rew:-119.06  LR:0.00009381   Polyak:0.99990  Bf:15  EN:0.2814  Loss: 9.112 2.172 1.524
Ep:1990   Rew:-105.91  Avg Rew:-118.24  LR:0.00009365   Polyak:0.99990  Bf:15  EN:0.2809  Loss: 7.800 2.253 2.236
Ep:2000   Rew:-127.32  Avg Rew:-118.50  LR:0.00009370   Polyak:0.99990  Bf:15  EN:0.2811  Loss: 8.007 1.883 1.260
Ep:2010   Rew:-104.99  Avg Rew:-117.80  LR:0.00009356   Polyak:0.99990  Bf:15  EN:0.2807  Loss: 8.402 2.922 3.160
Ep:2020   Rew:-117.41  Avg Rew:-117.02  LR:0.00009340   Polyak:0.99990  Bf:15  EN:0.2802

Ep:2660   Rew:-96.22  Avg Rew:-107.56  LR:0.00009151   Polyak:0.99990  Bf:25  EN:0.2745  Loss: 9.303 1.626 1.656
Ep:2670   Rew:-98.57  Avg Rew:-108.90  LR:0.00009178   Polyak:0.99990  Bf:25  EN:0.2753  Loss: 11.643 2.431 2.528
Ep:2680   Rew:-125.03  Avg Rew:-109.43  LR:0.00009189   Polyak:0.99990  Bf:25  EN:0.2757  Loss: 11.317 1.851 1.883
Ep:2690   Rew:-107.62  Avg Rew:-108.74  LR:0.00009175   Polyak:0.99990  Bf:26  EN:0.2752  Loss: 10.488 2.088 1.602
Ep:2700   Rew:-100.02  Avg Rew:-107.95  LR:0.00009159   Polyak:0.99990  Bf:26  EN:0.2748  Loss: 11.668 2.034 1.677
Ep:2710   Rew:-124.88  Avg Rew:-107.26  LR:0.00009145   Polyak:0.99990  Bf:26  EN:0.2744  Loss: 12.590 2.212 1.705
Ep:2720   Rew:-93.88  Avg Rew:-107.69  LR:0.00009154   Polyak:0.99990  Bf:26  EN:0.2746  Loss: 11.800 1.535 1.415
Ep:2730   Rew:-96.39  Avg Rew:-107.21  LR:0.00009144   Polyak:0.99990  Bf:27  EN:0.2743  Loss: 11.075 3.775 3.058
Ep:2740   Rew:-81.22  Avg Rew:-106.56  LR:0.00009131   Polyak:0.99990  Bf:27  EN:0.27

Ep:3380   Rew:-79.21  Avg Rew:-96.92  LR:0.00008938   Polyak:0.99990  Bf:49  EN:0.2682  Loss: 11.801 1.377 1.752
Ep:3390   Rew:-103.89  Avg Rew:-98.28  LR:0.00008966   Polyak:0.99990  Bf:49  EN:0.2690  Loss: 10.124 2.417 2.156
Ep:3400   Rew:-101.27  Avg Rew:-99.07  LR:0.00008981   Polyak:0.99990  Bf:49  EN:0.2694  Loss: 11.455 2.041 2.307
Ep:3410   Rew:-72.01  Avg Rew:-97.87  LR:0.00008957   Polyak:0.99990  Bf:50  EN:0.2687  Loss: 11.329 1.271 1.014
Ep:3420   Rew:-107.08  Avg Rew:-97.34  LR:0.00008947   Polyak:0.99990  Bf:50  EN:0.2684  Loss: 10.464 2.160 2.306
Ep:3430   Rew:-132.22  Avg Rew:-96.03  LR:0.00008921   Polyak:0.99990  Bf:50  EN:0.2676  Loss: 10.995 1.670 1.133
Ep:3440   Rew:-105.36  Avg Rew:-96.84  LR:0.00008937   Polyak:0.99990  Bf:51  EN:0.2681  Loss: 12.327 4.102 4.276
Ep:3450   Rew:-104.50  Avg Rew:-96.86  LR:0.00008937   Polyak:0.99990  Bf:51  EN:0.2681  Loss: 12.511 2.107 1.695
Ep:3460   Rew:-126.90  Avg Rew:-96.56  LR:0.00008931   Polyak:0.99990  Bf:51  EN:0.2679  L

Ep:4100   Rew:-102.95  Avg Rew:-108.76  LR:0.00009175   Polyak:0.99990  Bf:78  EN:0.2753  Loss: 10.895 1.990 2.166
Ep:4110   Rew:-113.16  Avg Rew:-108.53  LR:0.00009171   Polyak:0.99990  Bf:79  EN:0.2751  Loss: 10.790 1.641 2.603
Ep:4120   Rew:-80.53  Avg Rew:-109.07  LR:0.00009181   Polyak:0.99990  Bf:79  EN:0.2754  Loss: 11.350 2.042 3.229
Ep:4130   Rew:-94.08  Avg Rew:-108.91  LR:0.00009178   Polyak:0.99990  Bf:80  EN:0.2753  Loss: 10.727 1.897 1.757
Ep:4140   Rew:-60.34  Avg Rew:-108.75  LR:0.00009175   Polyak:0.99990  Bf:81  EN:0.2753  Loss: 12.236 1.990 1.984
Ep:4150   Rew:-117.02  Avg Rew:-108.53  LR:0.00009171   Polyak:0.99990  Bf:82  EN:0.2751  Loss: 11.078 2.752 2.639
Ep:4160   Rew:-129.26  Avg Rew:-106.81  LR:0.00009136   Polyak:0.99990  Bf:82  EN:0.2741  Loss: 11.761 2.534 1.934
Ep:4170   Rew:-124.47  Avg Rew:-108.22  LR:0.00009164   Polyak:0.99990  Bf:83  EN:0.2749  Loss: 11.077 3.384 2.234
Ep:4180   Rew:-40.83  Avg Rew:-110.26  LR:0.00009205   Polyak:0.99990  Bf:83  EN:0.

Ep:4820   Rew:-64.31  Avg Rew:-96.94  LR:0.00008939   Polyak:0.99990  Bf:100  EN:0.2682  Loss: 8.934 1.622 1.009
Ep:4830   Rew:-69.08  Avg Rew:-95.59  LR:0.00008912   Polyak:0.99990  Bf:100  EN:0.2674  Loss: 8.996 3.374 3.741
Ep:4840   Rew:-154.18  Avg Rew:-96.17  LR:0.00008923   Polyak:0.99990  Bf:100  EN:0.2677  Loss: 9.181 1.189 1.100
Ep:4850   Rew:-90.14  Avg Rew:-96.55  LR:0.00008931   Polyak:0.99990  Bf:100  EN:0.2679  Loss: 9.465 2.022 2.152
Ep:4860   Rew:-94.44  Avg Rew:-96.47  LR:0.00008929   Polyak:0.99990  Bf:100  EN:0.2679  Loss: 9.484 1.397 1.168
Ep:4870   Rew:-84.18  Avg Rew:-95.14  LR:0.00008903   Polyak:0.99990  Bf:100  EN:0.2671  Loss: 9.243 1.512 1.681
Ep:4880   Rew:-74.92  Avg Rew:-93.82  LR:0.00008876   Polyak:0.99990  Bf:100  EN:0.2663  Loss: 9.518 0.689 0.931
Ep:4890   Rew:-81.89  Avg Rew:-91.87  LR:0.00008837   Polyak:0.99990  Bf:100  EN:0.2651  Loss: 8.299 0.977 0.967
Ep:4900   Rew:-98.18  Avg Rew:-91.50  LR:0.00008830   Polyak:0.99990  Bf:100  EN:0.2649  Loss: 

Ep:5550   Rew:-13.22  Avg Rew:-62.25  LR:0.00008245   Polyak:0.99990  Bf:100  EN:0.2474  Loss: 6.722 0.259 0.369
Ep:5560   Rew:0.66  Avg Rew:-60.23  LR:0.00008205   Polyak:0.99990  Bf:100  EN:0.2461  Loss: 6.338 0.149 0.170
Ep:5570   Rew:12.54  Avg Rew:-60.52  LR:0.00008210   Polyak:0.99990  Bf:100  EN:0.2463  Loss: 6.589 0.416 0.542
Ep:5580   Rew:-79.65  Avg Rew:-60.41  LR:0.00008208   Polyak:0.99990  Bf:100  EN:0.2462  Loss: 6.491 0.088 0.105
Ep:5590   Rew:-21.21  Avg Rew:-60.68  LR:0.00008214   Polyak:0.99990  Bf:100  EN:0.2464  Loss: 6.044 0.148 0.115
Ep:5600   Rew:-40.00  Avg Rew:-60.83  LR:0.00008217   Polyak:0.99990  Bf:100  EN:0.2465  Loss: 6.584 0.482 0.494
Ep:5610   Rew:-93.51  Avg Rew:-62.41  LR:0.00008248   Polyak:0.99990  Bf:100  EN:0.2474  Loss: 6.582 0.248 0.227
Ep:5620   Rew:-55.82  Avg Rew:-58.87  LR:0.00008177   Polyak:0.99990  Bf:100  EN:0.2453  Loss: 6.648 0.207 0.173
Ep:5630   Rew:-91.20  Avg Rew:-58.83  LR:0.00008177   Polyak:0.99990  Bf:100  EN:0.2453  Loss: 6.20

Ep:6280   Rew:-65.42  Avg Rew:-56.94  LR:0.00008139   Polyak:0.99990  Bf:100  EN:0.2442  Loss: 6.039 0.156 0.205
Ep:6290   Rew:-41.89  Avg Rew:-56.65  LR:0.00008133   Polyak:0.99990  Bf:100  EN:0.2440  Loss: 6.689 0.213 0.276
Ep:6300   Rew:-66.51  Avg Rew:-57.25  LR:0.00008145   Polyak:0.99990  Bf:100  EN:0.2444  Loss: 6.789 0.451 0.546
Ep:6310   Rew:-131.91  Avg Rew:-58.08  LR:0.00008162   Polyak:0.99990  Bf:100  EN:0.2448  Loss: 6.442 0.224 0.312
Ep:6320   Rew:-60.39  Avg Rew:-59.04  LR:0.00008181   Polyak:0.99990  Bf:100  EN:0.2454  Loss: 6.667 0.380 0.643
Ep:6330   Rew:-100.91  Avg Rew:-58.72  LR:0.00008174   Polyak:0.99990  Bf:100  EN:0.2452  Loss: 6.503 0.481 0.461
Ep:6340   Rew:-67.37  Avg Rew:-59.68  LR:0.00008194   Polyak:0.99990  Bf:100  EN:0.2458  Loss: 5.982 0.477 0.452
Ep:6350   Rew:-61.62  Avg Rew:-59.54  LR:0.00008191   Polyak:0.99990  Bf:100  EN:0.2457  Loss: 6.618 0.240 0.310
Ep:6360   Rew:-71.23  Avg Rew:-61.25  LR:0.00008225   Polyak:0.99990  Bf:100  EN:0.2468  Loss:

Ep:7010   Rew:-88.16  Avg Rew:-47.33  LR:0.00007947   Polyak:0.99990  Bf:100  EN:0.2384  Loss: 5.438 0.223 0.254
Ep:7020   Rew:-66.40  Avg Rew:-46.06  LR:0.00007921   Polyak:0.99990  Bf:100  EN:0.2376  Loss: 5.989 0.187 0.218
Ep:7030   Rew:-124.29  Avg Rew:-46.03  LR:0.00007921   Polyak:0.99990  Bf:100  EN:0.2376  Loss: 5.804 0.440 0.368
Ep:7040   Rew:-75.60  Avg Rew:-45.38  LR:0.00007908   Polyak:0.99990  Bf:100  EN:0.2372  Loss: 5.718 0.529 0.323
Ep:7050   Rew:-72.82  Avg Rew:-45.82  LR:0.00007916   Polyak:0.99990  Bf:100  EN:0.2375  Loss: 5.634 0.101 0.147
Ep:7060   Rew:-50.44  Avg Rew:-50.69  LR:0.00008014   Polyak:0.99990  Bf:100  EN:0.2404  Loss: 5.582 0.398 0.428
Ep:7070   Rew:17.38  Avg Rew:-51.72  LR:0.00008034   Polyak:0.99990  Bf:100  EN:0.2410  Loss: 5.773 0.375 0.350
Ep:7080   Rew:-30.96  Avg Rew:-50.68  LR:0.00008014   Polyak:0.99990  Bf:100  EN:0.2404  Loss: 5.747 0.282 0.247
Ep:7090   Rew:-13.45  Avg Rew:-52.23  LR:0.00008045   Polyak:0.99990  Bf:100  EN:0.2413  Loss: 5

Ep:7740   Rew:-61.80  Avg Rew:-31.10  LR:0.00007622   Polyak:0.99990  Bf:100  EN:0.2287  Loss: 5.333 0.216 0.157
Ep:7750   Rew:-61.50  Avg Rew:-34.26  LR:0.00007685   Polyak:0.99990  Bf:100  EN:0.2306  Loss: 5.347 0.200 0.284
Ep:7760   Rew:-6.72  Avg Rew:-35.96  LR:0.00007719   Polyak:0.99990  Bf:100  EN:0.2316  Loss: 5.471 0.300 0.654
Ep:7770   Rew:-47.81  Avg Rew:-34.28  LR:0.00007686   Polyak:0.99990  Bf:100  EN:0.2306  Loss: 5.508 0.261 0.179
Ep:7780   Rew:-59.75  Avg Rew:-34.83  LR:0.00007697   Polyak:0.99990  Bf:100  EN:0.2309  Loss: 5.534 0.261 0.353
Ep:7790   Rew:30.05  Avg Rew:-32.12  LR:0.00007642   Polyak:0.99990  Bf:100  EN:0.2293  Loss: 5.533 0.283 0.375
Ep:7800   Rew:28.12  Avg Rew:-32.40  LR:0.00007648   Polyak:0.99990  Bf:100  EN:0.2294  Loss: 5.246 0.219 0.206
Ep:7810   Rew:-25.96  Avg Rew:-33.79  LR:0.00007676   Polyak:0.99990  Bf:100  EN:0.2303  Loss: 5.282 0.125 0.129
Ep:7820   Rew:10.71  Avg Rew:-29.08  LR:0.00007582   Polyak:0.99990  Bf:100  EN:0.2274  Loss: 5.410

Ep:8470   Rew:-103.33  Avg Rew:-11.40  LR:0.00007228   Polyak:0.99990  Bf:100  EN:0.2168  Loss: 4.966 0.170 0.285
Ep:8480   Rew:-16.21  Avg Rew:-10.55  LR:0.00007211   Polyak:0.99990  Bf:100  EN:0.2163  Loss: 4.909 0.267 0.242
Ep:8490   Rew:35.47  Avg Rew:-11.64  LR:0.00007233   Polyak:0.99990  Bf:100  EN:0.2170  Loss: 5.022 0.290 0.334
Ep:8500   Rew:-65.56  Avg Rew:-11.10  LR:0.00007222   Polyak:0.99990  Bf:100  EN:0.2167  Loss: 4.938 0.142 0.212
Ep:8510   Rew:88.47  Avg Rew:-12.13  LR:0.00007243   Polyak:0.99990  Bf:100  EN:0.2173  Loss: 5.099 0.184 0.165
Ep:8520   Rew:51.15  Avg Rew:-10.89  LR:0.00007218   Polyak:0.99990  Bf:100  EN:0.2165  Loss: 4.703 0.112 0.133
Ep:8530   Rew:-62.29  Avg Rew:-10.73  LR:0.00007215   Polyak:0.99990  Bf:100  EN:0.2164  Loss: 5.026 0.215 0.403
Ep:8540   Rew:-51.26  Avg Rew:-11.07  LR:0.00007221   Polyak:0.99990  Bf:100  EN:0.2166  Loss: 4.926 0.368 0.393
Ep:8550   Rew:-18.68  Avg Rew:-10.48  LR:0.00007210   Polyak:0.99990  Bf:100  EN:0.2163  Loss: 4.7

Ep:9210   Rew:-68.96  Avg Rew:12.05  LR:0.00006759   Polyak:0.99990  Bf:100  EN:0.2028  Loss: 3.991 0.163 0.226
Ep:9220   Rew:-55.00  Avg Rew:7.37  LR:0.00006853   Polyak:0.99990  Bf:100  EN:0.2056  Loss: 4.050 0.182 0.153
Ep:9230   Rew:-39.40  Avg Rew:4.25  LR:0.00006915   Polyak:0.99990  Bf:100  EN:0.2075  Loss: 4.173 0.370 0.318
Ep:9240   Rew:8.28  Avg Rew:2.10  LR:0.00006958   Polyak:0.99990  Bf:100  EN:0.2087  Loss: 3.905 0.162 0.120
Ep:9250   Rew:-18.64  Avg Rew:-0.53  LR:0.00007011   Polyak:0.99990  Bf:100  EN:0.2103  Loss: 4.506 0.476 0.460
Ep:9260   Rew:-0.63  Avg Rew:-3.90  LR:0.00007078   Polyak:0.99990  Bf:100  EN:0.2123  Loss: 4.180 0.191 0.202
Ep:9270   Rew:-55.78  Avg Rew:-0.55  LR:0.00007011   Polyak:0.99990  Bf:100  EN:0.2103  Loss: 4.201 0.079 0.107
Ep:9280   Rew:-53.73  Avg Rew:2.87  LR:0.00006943   Polyak:0.99990  Bf:100  EN:0.2083  Loss: 3.910 0.132 0.112
Ep:9290   Rew:12.84  Avg Rew:3.82  LR:0.00006924   Polyak:0.99990  Bf:100  EN:0.2077  Loss: 4.141 0.480 0.421
E

Ep:9950   Rew:-67.77  Avg Rew:45.99  LR:0.00006080   Polyak:0.99990  Bf:100  EN:0.1824  Loss: 4.263 0.505 0.528
Ep:9960   Rew:173.07  Avg Rew:42.64  LR:0.00006147   Polyak:0.99990  Bf:100  EN:0.1844  Loss: 3.974 0.314 0.225
Ep:9970   Rew:-1.98  Avg Rew:32.00  LR:0.00006360   Polyak:0.99990  Bf:100  EN:0.1908  Loss: 3.752 0.223 0.226
Ep:9980   Rew:108.31  Avg Rew:25.91  LR:0.00006482   Polyak:0.99990  Bf:100  EN:0.1945  Loss: 4.218 0.353 0.450
Ep:9990   Rew:32.89  Avg Rew:22.92  LR:0.00006542   Polyak:0.99990  Bf:100  EN:0.1962  Loss: 4.296 0.157 0.234
Ep:10000   Rew:33.00  Avg Rew:26.08  LR:0.00006478   Polyak:0.99990  Bf:100  EN:0.1944  Loss: 4.201 0.496 0.515
Ep:10010   Rew:3.72  Avg Rew:22.34  LR:0.00006553   Polyak:0.99990  Bf:100  EN:0.1966  Loss: 4.133 0.283 0.458
Ep:10020   Rew:-22.12  Avg Rew:24.01  LR:0.00006520   Polyak:0.99990  Bf:100  EN:0.1956  Loss: 4.105 0.346 0.432
Ep:10030   Rew:271.02  Avg Rew:26.41  LR:0.00006472   Polyak:0.99990  Bf:100  EN:0.1942  Loss: 4.136 0.708

Ep:10680   Rew:-47.07  Avg Rew:31.64  LR:0.00006367   Polyak:0.99990  Bf:100  EN:0.1910  Loss: 4.942 1.234 1.436
Ep:10690   Rew:270.58  Avg Rew:29.88  LR:0.00006402   Polyak:0.99990  Bf:100  EN:0.1921  Loss: 4.652 1.392 0.847
Ep:10700   Rew:-20.49  Avg Rew:30.59  LR:0.00006388   Polyak:0.99990  Bf:100  EN:0.1916  Loss: 4.622 0.585 0.733
Ep:10710   Rew:99.14  Avg Rew:34.87  LR:0.00006303   Polyak:0.99990  Bf:100  EN:0.1891  Loss: 4.539 0.528 0.444
Ep:10720   Rew:-26.97  Avg Rew:44.39  LR:0.00006112   Polyak:0.99990  Bf:100  EN:0.1834  Loss: 4.579 0.471 0.542
Ep:10730   Rew:60.24  Avg Rew:39.76  LR:0.00006205   Polyak:0.99990  Bf:100  EN:0.1861  Loss: 4.371 0.320 0.455
Ep:10740   Rew:-20.98  Avg Rew:33.40  LR:0.00006332   Polyak:0.99990  Bf:100  EN:0.1900  Loss: 4.383 1.113 1.215
Ep:10750   Rew:-29.33  Avg Rew:28.81  LR:0.00006424   Polyak:0.99990  Bf:100  EN:0.1927  Loss: 4.696 1.459 0.779
Ep:10760   Rew:-75.37  Avg Rew:33.44  LR:0.00006331   Polyak:0.99990  Bf:100  EN:0.1899  Loss: 4.6

Ep:11410   Rew:9.01  Avg Rew:13.75  LR:0.00006725   Polyak:0.99990  Bf:100  EN:0.2018  Loss: 5.412 0.505 0.495
Ep:11420   Rew:101.58  Avg Rew:12.89  LR:0.00006742   Polyak:0.99990  Bf:100  EN:0.2023  Loss: 5.335 0.963 0.560
Ep:11430   Rew:-6.17  Avg Rew:14.34  LR:0.00006713   Polyak:0.99990  Bf:100  EN:0.2014  Loss: 5.314 1.292 1.283
Ep:11440   Rew:132.53  Avg Rew:13.83  LR:0.00006723   Polyak:0.99990  Bf:100  EN:0.2017  Loss: 5.724 0.797 0.683
Ep:11450   Rew:11.54  Avg Rew:7.71  LR:0.00006846   Polyak:0.99990  Bf:100  EN:0.2054  Loss: 5.430 0.913 1.208
Ep:11460   Rew:-7.07  Avg Rew:7.49  LR:0.00006850   Polyak:0.99990  Bf:100  EN:0.2055  Loss: 5.402 0.849 0.851
Ep:11470   Rew:-46.27  Avg Rew:7.21  LR:0.00006856   Polyak:0.99990  Bf:100  EN:0.2057  Loss: 5.426 0.447 0.536
Ep:11480   Rew:150.54  Avg Rew:10.28  LR:0.00006794   Polyak:0.99990  Bf:100  EN:0.2038  Loss: 5.759 0.546 0.893
Ep:11490   Rew:22.03  Avg Rew:12.38  LR:0.00006752   Polyak:0.99990  Bf:100  EN:0.2026  Loss: 5.523 0.85

Ep:12140   Rew:6.26  Avg Rew:31.19  LR:0.00006376   Polyak:0.99990  Bf:100  EN:0.1913  Loss: 6.625 0.906 0.935
Ep:12150   Rew:-29.57  Avg Rew:33.14  LR:0.00006337   Polyak:0.99990  Bf:100  EN:0.1901  Loss: 6.309 0.502 0.542
Ep:12160   Rew:-35.09  Avg Rew:32.98  LR:0.00006340   Polyak:0.99990  Bf:100  EN:0.1902  Loss: 7.094 1.091 1.582
Ep:12170   Rew:-35.41  Avg Rew:32.59  LR:0.00006348   Polyak:0.99990  Bf:100  EN:0.1904  Loss: 7.172 2.083 1.889
Ep:12180   Rew:-7.72  Avg Rew:35.98  LR:0.00006280   Polyak:0.99990  Bf:100  EN:0.1884  Loss: 6.705 0.850 0.799
Ep:12190   Rew:47.10  Avg Rew:33.18  LR:0.00006336   Polyak:0.99990  Bf:100  EN:0.1901  Loss: 6.516 0.580 0.550
Ep:12200   Rew:-52.62  Avg Rew:26.29  LR:0.00006474   Polyak:0.99990  Bf:100  EN:0.1942  Loss: 6.943 2.309 2.016
Ep:12210   Rew:-66.51  Avg Rew:28.47  LR:0.00006431   Polyak:0.99990  Bf:100  EN:0.1929  Loss: 6.817 2.106 2.360
Ep:12220   Rew:162.59  Avg Rew:28.69  LR:0.00006426   Polyak:0.99990  Bf:100  EN:0.1928  Loss: 6.504

Ep:12870   Rew:-109.00  Avg Rew:15.38  LR:0.00006692   Polyak:0.99990  Bf:100  EN:0.2008  Loss: 7.364 0.988 0.847
Ep:12880   Rew:45.44  Avg Rew:14.56  LR:0.00006709   Polyak:0.99990  Bf:100  EN:0.2013  Loss: 7.238 0.885 1.082
Ep:12890   Rew:-16.10  Avg Rew:19.35  LR:0.00006613   Polyak:0.99990  Bf:100  EN:0.1984  Loss: 7.205 1.380 0.977
Ep:12900   Rew:176.97  Avg Rew:21.82  LR:0.00006564   Polyak:0.99990  Bf:100  EN:0.1969  Loss: 7.312 0.588 0.810
Ep:12910   Rew:181.18  Avg Rew:14.11  LR:0.00006718   Polyak:0.99990  Bf:100  EN:0.2015  Loss: 7.375 1.494 1.849
Ep:12920   Rew:-6.27  Avg Rew:11.76  LR:0.00006765   Polyak:0.99990  Bf:100  EN:0.2029  Loss: 7.445 0.991 0.851
Ep:12930   Rew:-73.39  Avg Rew:10.01  LR:0.00006800   Polyak:0.99990  Bf:100  EN:0.2040  Loss: 7.532 1.766 1.757
Ep:12940   Rew:3.28  Avg Rew:11.42  LR:0.00006772   Polyak:0.99990  Bf:100  EN:0.2031  Loss: 7.516 0.513 0.479
Ep:12950   Rew:-52.75  Avg Rew:13.18  LR:0.00006736   Polyak:0.99990  Bf:100  EN:0.2021  Loss: 8.01

Ep:13600   Rew:-19.07  Avg Rew:8.08  LR:0.00006838   Polyak:0.99990  Bf:100  EN:0.2052  Loss: 8.114 0.534 0.545
Ep:13610   Rew:50.95  Avg Rew:5.89  LR:0.00006882   Polyak:0.99990  Bf:100  EN:0.2065  Loss: 8.010 0.498 0.332
Ep:13620   Rew:-45.34  Avg Rew:4.36  LR:0.00006913   Polyak:0.99990  Bf:100  EN:0.2074  Loss: 8.149 1.493 1.387
Ep:13630   Rew:-66.81  Avg Rew:3.50  LR:0.00006930   Polyak:0.99990  Bf:100  EN:0.2079  Loss: 7.659 1.291 1.013
Ep:13640   Rew:-18.28  Avg Rew:5.60  LR:0.00006888   Polyak:0.99990  Bf:100  EN:0.2066  Loss: 8.140 0.890 0.718
Ep:13650   Rew:-24.81  Avg Rew:12.52  LR:0.00006750   Polyak:0.99990  Bf:100  EN:0.2025  Loss: 8.073 0.918 1.348
Ep:13660   Rew:-53.75  Avg Rew:9.19  LR:0.00006816   Polyak:0.99990  Bf:100  EN:0.2045  Loss: 7.992 0.435 0.430
Ep:13670   Rew:143.68  Avg Rew:12.63  LR:0.00006747   Polyak:0.99990  Bf:100  EN:0.2024  Loss: 7.962 0.785 0.703
Ep:13680   Rew:24.67  Avg Rew:12.83  LR:0.00006743   Polyak:0.99990  Bf:100  EN:0.2023  Loss: 7.809 0.3

KeyboardInterrupt: 

In [None]:
def test():  
    random_seed = 0
    n_episodes = 3
    max_timesteps = 2000
    render = True
    save_gif = True
    
    filename = "TD3_torch_{}_{}".format(env_name, random_seed)
    filename += ''
    directory = "./preTrained/td3_torch/{}".format(env_name)
    
    env = gym.make(env_name)
    state_dim = env.observation_space.shape[0]
    action_dim = env.action_space.shape[0]
    max_action = float(env.action_space.high[0])
    
    policy = TD3(state_dim, action_dim, max_action)
    
    policy.load_actor(directory, filename)
    
    for ep in range(1, n_episodes+1):
        ep_reward = 0
        state = env.reset()
        for t in range(max_timesteps):
            action = policy.select_action(state)
            state, reward, done, _ = env.step(action)
            ep_reward += reward
            if render:
                env.render()
                if save_gif:
                    dirname = './gif/td3_torch/{}'.format(ep)
                    if not os.path.isdir(dirname):
                        os.mkdir(dirname)
                    img = env.render(mode = 'rgb_array')
                    img = Image.fromarray(img)
                    img.save('./gif/td3_torch/{}/{}.jpg'.format(ep,t))
            if done:
                break
            
        print('Episode: {}\tReward: {}'.format(ep, int(ep_reward)))
        ep_reward = 0
        env.close()        
                
test()
    
    