In [1]:
ENV_NAME = 'Pendulum-v0'
alias = 'TD_INVASE'
RED_ACTION_DIM = 100
import gym
print('\n now evaluating: \n       ', ENV_NAME)


import matplotlib.pyplot as plt
import numpy as np
import torch
import argparse
import os
import torch.nn.functional as F
import utils
import TD3_INVASE_TD

def eval_policy(policy, eval_episodes=10):
    eval_env = gym.make(ENV_NAME)
    avg_reward = 0.
    for _ in range(eval_episodes):
        state, done = eval_env.reset(), False
        while not done:
            action = policy.select_action(np.array(state))
            state, reward, done, _ = eval_env.step(action[:-RED_ACTION_DIM])
            avg_reward += reward

    avg_reward /= eval_episodes

    print("---------------------------------------")
    print(f"Evaluation over {eval_episodes} episodes: {avg_reward:.3f}")
    print("---------------------------------------")
    return avg_reward

env = gym.make(ENV_NAME)
torch.manual_seed(0)
np.random.seed(0)

#spec = env.action_space
state_dim = env.observation_space.shape[0]
action_dim = env.action_space.shape[0] + RED_ACTION_DIM
max_action = env.action_space.high[0]

args_policy_noise = 0.2
args_noise_clip = 0.5
args_policy_freq = 2
args_max_timesteps = 50000
args_expl_noise = 0.1
args_batch_size = 256
args_eval_freq = 1000
args_start_timesteps = 10000

kwargs = {
    "state_dim": state_dim,
    "action_dim": action_dim,
    "max_action": max_action,
    "discount": 0.99,
    "tau": 0.005
}

for repeat in range(5):
    kwargs["policy_noise"] = args_policy_noise * max_action
    kwargs["noise_clip"] = args_noise_clip * max_action
    kwargs["policy_freq"] = args_policy_freq
    policy = TD3_INVASE_TD.TD3(**kwargs)
    replay_buffer = utils.ReplayBuffer(state_dim, action_dim)

    # Evaluate untrained policy
    evaluations = [eval_policy(policy)]
    
    state, done = env.reset(), False
    episode_reward = 0
    episode_timesteps = 0
    episode_num = 0
    counter = 0
    msk_list = []        
    temp_curve = [eval_policy(policy)]
    temp_val = []
    for t in range(int(args_max_timesteps)):
        episode_timesteps += 1
        counter += 1
        # Select action randomly or according to policy
        if t < args_start_timesteps:
            action = np.random.uniform(-max_action, max_action, action_dim)
        else:
            if np.random.uniform(0,1) < 0.0:
                action = np.random.uniform(-max_action, max_action, action_dim)
            else:
                action = (
                    policy.select_action(np.array(state))
                    + np.random.normal(0, max_action * args_expl_noise, size=action_dim)
                ).clip(-max_action, max_action)

        # Perform action
        next_state, reward, done, _ = env.step(action[:-RED_ACTION_DIM])
        

        done_bool = float(done) if episode_timesteps < env._max_episode_steps else 0

        replay_buffer.add(state, action, next_state, reward, done_bool)

        state = next_state
        episode_reward += reward

        if t >= args_start_timesteps:
            '''TD3'''
            Lmd = t/args_max_timesteps * 0.1
            Thr = 0.5*(1 - t/args_max_timesteps)
            policy.train(replay_buffer, args_batch_size, Lmd, Thr)
                    
                    
        # Train agent after collecting sufficient data
        if done:
            print(f"Total T: {t+1} Episode Num: {episode_num+1} Episode T: {episode_timesteps} Reward: {episode_reward:.3f}")
            msk_list = []
            state, done = env.reset(), False
            episode_reward = 0
            episode_timesteps = 0
            episode_num += 1 

        # Evaluate episode
        if (t + 1) % args_eval_freq == 0:
            evaluations.append(eval_policy(policy))
            print('recent Evaluation:',evaluations[-1])
            np.save('results/evaluations_alias{}_ENV{}_Repeat{}'.format(alias,ENV_NAME,repeat),evaluations)


 now evaluating: 
        Pendulum-v0




---------------------------------------
Evaluation over 10 episodes: -1471.619
---------------------------------------
---------------------------------------
Evaluation over 10 episodes: -1529.639
---------------------------------------
Total T: 200 Episode Num: 1 Episode T: 200 Reward: -1047.430
Total T: 400 Episode Num: 2 Episode T: 200 Reward: -1071.841
Total T: 600 Episode Num: 3 Episode T: 200 Reward: -1455.965
Total T: 800 Episode Num: 4 Episode T: 200 Reward: -1621.168
Total T: 1000 Episode Num: 5 Episode T: 200 Reward: -1411.154
---------------------------------------
Evaluation over 10 episodes: -1542.205
---------------------------------------
recent Evaluation: -1542.2054595542127
Total T: 1200 Episode Num: 6 Episode T: 200 Reward: -1241.584
Total T: 1400 Episode Num: 7 Episode T: 200 Reward: -1184.923
Total T: 1600 Episode Num: 8 Episode T: 200 Reward: -1524.890
Total T: 1800 Episode Num: 9 Episode T: 200 Reward: -1358.610
Total T: 2000 Episode Num: 10 Episode T: 200 Rewar

Total T: 17200 Episode Num: 86 Episode T: 200 Reward: -114.541
Total T: 17400 Episode Num: 87 Episode T: 200 Reward: -1.197
Total T: 17600 Episode Num: 88 Episode T: 200 Reward: -126.687
Total T: 17800 Episode Num: 89 Episode T: 200 Reward: -234.492
Total T: 18000 Episode Num: 90 Episode T: 200 Reward: -118.401
---------------------------------------
Evaluation over 10 episodes: -169.098
---------------------------------------
recent Evaluation: -169.09836030738435
Total T: 18200 Episode Num: 91 Episode T: 200 Reward: -235.234
Total T: 18400 Episode Num: 92 Episode T: 200 Reward: -117.581
Total T: 18600 Episode Num: 93 Episode T: 200 Reward: -118.596
Total T: 18800 Episode Num: 94 Episode T: 200 Reward: -234.430
Total T: 19000 Episode Num: 95 Episode T: 200 Reward: -245.772
---------------------------------------
Evaluation over 10 episodes: -132.745
---------------------------------------
recent Evaluation: -132.7447623447111
Total T: 19200 Episode Num: 96 Episode T: 200 Reward: -126.

Total T: 34600 Episode Num: 173 Episode T: 200 Reward: -4.586
Total T: 34800 Episode Num: 174 Episode T: 200 Reward: -1.747
Total T: 35000 Episode Num: 175 Episode T: 200 Reward: -124.056
---------------------------------------
Evaluation over 10 episodes: -177.873
---------------------------------------
recent Evaluation: -177.8732751146709
Total T: 35200 Episode Num: 176 Episode T: 200 Reward: -244.515
Total T: 35400 Episode Num: 177 Episode T: 200 Reward: -237.882
Total T: 35600 Episode Num: 178 Episode T: 200 Reward: -128.037
Total T: 35800 Episode Num: 179 Episode T: 200 Reward: -127.487
Total T: 36000 Episode Num: 180 Episode T: 200 Reward: -114.414
---------------------------------------
Evaluation over 10 episodes: -177.401
---------------------------------------
recent Evaluation: -177.40117106939658
Total T: 36200 Episode Num: 181 Episode T: 200 Reward: -119.299
Total T: 36400 Episode Num: 182 Episode T: 200 Reward: -4.066
Total T: 36600 Episode Num: 183 Episode T: 200 Reward

---------------------------------------
Evaluation over 10 episodes: -1399.607
---------------------------------------
recent Evaluation: -1399.6071667305418
Total T: 2200 Episode Num: 11 Episode T: 200 Reward: -1311.027
Total T: 2400 Episode Num: 12 Episode T: 200 Reward: -1717.570
Total T: 2600 Episode Num: 13 Episode T: 200 Reward: -905.908
Total T: 2800 Episode Num: 14 Episode T: 200 Reward: -1792.689
Total T: 3000 Episode Num: 15 Episode T: 200 Reward: -1527.324
---------------------------------------
Evaluation over 10 episodes: -1325.274
---------------------------------------
recent Evaluation: -1325.2744969028506
Total T: 3200 Episode Num: 16 Episode T: 200 Reward: -828.210
Total T: 3400 Episode Num: 17 Episode T: 200 Reward: -1745.458
Total T: 3600 Episode Num: 18 Episode T: 200 Reward: -1078.093
Total T: 3800 Episode Num: 19 Episode T: 200 Reward: -1168.696
Total T: 4000 Episode Num: 20 Episode T: 200 Reward: -1179.279
---------------------------------------
Evaluation over 

Total T: 19400 Episode Num: 97 Episode T: 200 Reward: -347.186
Total T: 19600 Episode Num: 98 Episode T: 200 Reward: -226.721
Total T: 19800 Episode Num: 99 Episode T: 200 Reward: -2.125
Total T: 20000 Episode Num: 100 Episode T: 200 Reward: -323.776
---------------------------------------
Evaluation over 10 episodes: -192.480
---------------------------------------
recent Evaluation: -192.48013759709247
Total T: 20200 Episode Num: 101 Episode T: 200 Reward: -118.640
Total T: 20400 Episode Num: 102 Episode T: 200 Reward: -352.747
Total T: 20600 Episode Num: 103 Episode T: 200 Reward: -121.664
Total T: 20800 Episode Num: 104 Episode T: 200 Reward: -119.500
Total T: 21000 Episode Num: 105 Episode T: 200 Reward: -121.625
---------------------------------------
Evaluation over 10 episodes: -157.239
---------------------------------------
recent Evaluation: -157.23888214507494
Total T: 21200 Episode Num: 106 Episode T: 200 Reward: -232.836
Total T: 21400 Episode Num: 107 Episode T: 200 Rewa

Total T: 36800 Episode Num: 184 Episode T: 200 Reward: -0.588
Total T: 37000 Episode Num: 185 Episode T: 200 Reward: -0.820
---------------------------------------
Evaluation over 10 episodes: -129.481
---------------------------------------
recent Evaluation: -129.48064713192838
Total T: 37200 Episode Num: 186 Episode T: 200 Reward: -119.484
Total T: 37400 Episode Num: 187 Episode T: 200 Reward: -116.042
Total T: 37600 Episode Num: 188 Episode T: 200 Reward: -258.326
Total T: 37800 Episode Num: 189 Episode T: 200 Reward: -223.886
Total T: 38000 Episode Num: 190 Episode T: 200 Reward: -126.907
---------------------------------------
Evaluation over 10 episodes: -130.823
---------------------------------------
recent Evaluation: -130.82334491840544
Total T: 38200 Episode Num: 191 Episode T: 200 Reward: -126.415
Total T: 38400 Episode Num: 192 Episode T: 200 Reward: -122.406
Total T: 38600 Episode Num: 193 Episode T: 200 Reward: -0.241
Total T: 38800 Episode Num: 194 Episode T: 200 Rewar

---------------------------------------
Evaluation over 10 episodes: -1341.337
---------------------------------------
recent Evaluation: -1341.3366617829781
Total T: 4200 Episode Num: 21 Episode T: 200 Reward: -1599.763
Total T: 4400 Episode Num: 22 Episode T: 200 Reward: -1072.790
Total T: 4600 Episode Num: 23 Episode T: 200 Reward: -1037.636
Total T: 4800 Episode Num: 24 Episode T: 200 Reward: -1674.532
Total T: 5000 Episode Num: 25 Episode T: 200 Reward: -1169.047
---------------------------------------
Evaluation over 10 episodes: -1312.681
---------------------------------------
recent Evaluation: -1312.6814203721146
Total T: 5200 Episode Num: 26 Episode T: 200 Reward: -1717.717
Total T: 5400 Episode Num: 27 Episode T: 200 Reward: -859.763
Total T: 5600 Episode Num: 28 Episode T: 200 Reward: -1508.089
Total T: 5800 Episode Num: 29 Episode T: 200 Reward: -1733.151
Total T: 6000 Episode Num: 30 Episode T: 200 Reward: -1199.927
---------------------------------------
Evaluation over

Total T: 21400 Episode Num: 107 Episode T: 200 Reward: -346.177
Total T: 21600 Episode Num: 108 Episode T: 200 Reward: -242.335
Total T: 21800 Episode Num: 109 Episode T: 200 Reward: -123.087
Total T: 22000 Episode Num: 110 Episode T: 200 Reward: -118.030
---------------------------------------
Evaluation over 10 episodes: -158.011
---------------------------------------
recent Evaluation: -158.01100257617108
Total T: 22200 Episode Num: 111 Episode T: 200 Reward: -347.226
Total T: 22400 Episode Num: 112 Episode T: 200 Reward: -120.569
Total T: 22600 Episode Num: 113 Episode T: 200 Reward: -253.352
Total T: 22800 Episode Num: 114 Episode T: 200 Reward: -119.827
Total T: 23000 Episode Num: 115 Episode T: 200 Reward: -234.402
---------------------------------------
Evaluation over 10 episodes: -132.666
---------------------------------------
recent Evaluation: -132.66647023529157
Total T: 23200 Episode Num: 116 Episode T: 200 Reward: -116.438
Total T: 23400 Episode Num: 117 Episode T: 200

Total T: 38800 Episode Num: 194 Episode T: 200 Reward: -121.937
Total T: 39000 Episode Num: 195 Episode T: 200 Reward: -125.808
---------------------------------------
Evaluation over 10 episodes: -98.443
---------------------------------------
recent Evaluation: -98.44256144550691
Total T: 39200 Episode Num: 196 Episode T: 200 Reward: -243.456
Total T: 39400 Episode Num: 197 Episode T: 200 Reward: -238.101
Total T: 39600 Episode Num: 198 Episode T: 200 Reward: -128.785
Total T: 39800 Episode Num: 199 Episode T: 200 Reward: -3.659
Total T: 40000 Episode Num: 200 Episode T: 200 Reward: -238.168
---------------------------------------
Evaluation over 10 episodes: -142.022
---------------------------------------
recent Evaluation: -142.0223299820787
Total T: 40200 Episode Num: 201 Episode T: 200 Reward: -120.212
Total T: 40400 Episode Num: 202 Episode T: 200 Reward: -123.289
Total T: 40600 Episode Num: 203 Episode T: 200 Reward: -125.511
Total T: 40800 Episode Num: 204 Episode T: 200 Rewa

---------------------------------------
Evaluation over 10 episodes: -1461.809
---------------------------------------
recent Evaluation: -1461.8093826670442
Total T: 6200 Episode Num: 31 Episode T: 200 Reward: -904.658
Total T: 6400 Episode Num: 32 Episode T: 200 Reward: -1362.862
Total T: 6600 Episode Num: 33 Episode T: 200 Reward: -1597.990
Total T: 6800 Episode Num: 34 Episode T: 200 Reward: -1457.763
Total T: 7000 Episode Num: 35 Episode T: 200 Reward: -1320.340
---------------------------------------
Evaluation over 10 episodes: -1485.737
---------------------------------------
recent Evaluation: -1485.7374323582649
Total T: 7200 Episode Num: 36 Episode T: 200 Reward: -1460.234
Total T: 7400 Episode Num: 37 Episode T: 200 Reward: -1185.369
Total T: 7600 Episode Num: 38 Episode T: 200 Reward: -1681.582
Total T: 7800 Episode Num: 39 Episode T: 200 Reward: -1555.939
Total T: 8000 Episode Num: 40 Episode T: 200 Reward: -966.920
---------------------------------------
Evaluation over 

Total T: 23400 Episode Num: 117 Episode T: 200 Reward: -114.717
Total T: 23600 Episode Num: 118 Episode T: 200 Reward: -320.456
Total T: 23800 Episode Num: 119 Episode T: 200 Reward: -114.261
Total T: 24000 Episode Num: 120 Episode T: 200 Reward: -223.547
---------------------------------------
Evaluation over 10 episodes: -167.131
---------------------------------------
recent Evaluation: -167.1310640016885
Total T: 24200 Episode Num: 121 Episode T: 200 Reward: -1.760
Total T: 24400 Episode Num: 122 Episode T: 200 Reward: -0.475
Total T: 24600 Episode Num: 123 Episode T: 200 Reward: -221.333
Total T: 24800 Episode Num: 124 Episode T: 200 Reward: -1.314
Total T: 25000 Episode Num: 125 Episode T: 200 Reward: -116.791
---------------------------------------
Evaluation over 10 episodes: -151.544
---------------------------------------
recent Evaluation: -151.5441597004378
Total T: 25200 Episode Num: 126 Episode T: 200 Reward: -121.074
Total T: 25400 Episode Num: 127 Episode T: 200 Reward:

Total T: 40800 Episode Num: 204 Episode T: 200 Reward: -129.696
Total T: 41000 Episode Num: 205 Episode T: 200 Reward: -129.614
---------------------------------------
Evaluation over 10 episodes: -137.467
---------------------------------------
recent Evaluation: -137.46707316180476
Total T: 41200 Episode Num: 206 Episode T: 200 Reward: -120.850
Total T: 41400 Episode Num: 207 Episode T: 200 Reward: -2.062
Total T: 41600 Episode Num: 208 Episode T: 200 Reward: -231.706
Total T: 41800 Episode Num: 209 Episode T: 200 Reward: -120.876
Total T: 42000 Episode Num: 210 Episode T: 200 Reward: -120.415
---------------------------------------
Evaluation over 10 episodes: -144.091
---------------------------------------
recent Evaluation: -144.09071024424662
Total T: 42200 Episode Num: 211 Episode T: 200 Reward: -228.061
Total T: 42400 Episode Num: 212 Episode T: 200 Reward: -230.229
Total T: 42600 Episode Num: 213 Episode T: 200 Reward: -236.829
Total T: 42800 Episode Num: 214 Episode T: 200 R

---------------------------------------
Evaluation over 10 episodes: -1478.660
---------------------------------------
recent Evaluation: -1478.660271709229
Total T: 8200 Episode Num: 41 Episode T: 200 Reward: -947.258
Total T: 8400 Episode Num: 42 Episode T: 200 Reward: -1432.560
Total T: 8600 Episode Num: 43 Episode T: 200 Reward: -956.514
Total T: 8800 Episode Num: 44 Episode T: 200 Reward: -1243.082
Total T: 9000 Episode Num: 45 Episode T: 200 Reward: -1068.262
---------------------------------------
Evaluation over 10 episodes: -1408.087
---------------------------------------
recent Evaluation: -1408.0865976647497
Total T: 9200 Episode Num: 46 Episode T: 200 Reward: -1165.393
Total T: 9400 Episode Num: 47 Episode T: 200 Reward: -1719.810
Total T: 9600 Episode Num: 48 Episode T: 200 Reward: -966.338
Total T: 9800 Episode Num: 49 Episode T: 200 Reward: -874.127
Total T: 10000 Episode Num: 50 Episode T: 200 Reward: -1063.241
---------------------------------------
Evaluation over 10

Total T: 25200 Episode Num: 126 Episode T: 200 Reward: -120.057
Total T: 25400 Episode Num: 127 Episode T: 200 Reward: -312.965
Total T: 25600 Episode Num: 128 Episode T: 200 Reward: -117.735
Total T: 25800 Episode Num: 129 Episode T: 200 Reward: -0.998
Total T: 26000 Episode Num: 130 Episode T: 200 Reward: -236.598
---------------------------------------
Evaluation over 10 episodes: -112.149
---------------------------------------
recent Evaluation: -112.14895061580759
Total T: 26200 Episode Num: 131 Episode T: 200 Reward: -120.263
Total T: 26400 Episode Num: 132 Episode T: 200 Reward: -123.067
Total T: 26600 Episode Num: 133 Episode T: 200 Reward: -119.367
Total T: 26800 Episode Num: 134 Episode T: 200 Reward: -116.038
Total T: 27000 Episode Num: 135 Episode T: 200 Reward: -236.854
---------------------------------------
Evaluation over 10 episodes: -177.949
---------------------------------------
recent Evaluation: -177.94944604317305
Total T: 27200 Episode Num: 136 Episode T: 200 R

Total T: 42600 Episode Num: 213 Episode T: 200 Reward: -1.917
Total T: 42800 Episode Num: 214 Episode T: 200 Reward: -121.464
Total T: 43000 Episode Num: 215 Episode T: 200 Reward: -234.478
---------------------------------------
Evaluation over 10 episodes: -131.549
---------------------------------------
recent Evaluation: -131.54935715610517
Total T: 43200 Episode Num: 216 Episode T: 200 Reward: -123.979
Total T: 43400 Episode Num: 217 Episode T: 200 Reward: -125.169
Total T: 43600 Episode Num: 218 Episode T: 200 Reward: -239.068
Total T: 43800 Episode Num: 219 Episode T: 200 Reward: -123.648
Total T: 44000 Episode Num: 220 Episode T: 200 Reward: -123.332
---------------------------------------
Evaluation over 10 episodes: -152.657
---------------------------------------
recent Evaluation: -152.65739057603767
Total T: 44200 Episode Num: 221 Episode T: 200 Reward: -241.197
Total T: 44400 Episode Num: 222 Episode T: 200 Reward: -2.817
Total T: 44600 Episode Num: 223 Episode T: 200 Rew