In [1]:
import os
import torch
import gym
import numpy as np
from TD3_torch.TD3 import TD3
from PIL import Image
from utils import ReplayBuffer

env_name = 'BipedalWalker-v2'
episodes = 100000
log_interval = 1           # print avg reward after interval
random_seed = 0
gamma = 0.99                # discount for future rewards
batch_size = 100            # num of transitions sampled from replay buffer
exploration_noise = 0.1 
polyak = 0.995              # target policy update parameter (1-tau)
policy_noise = 0.2          # target policy smoothing noise
noise_clip = 0.5
policy_delay = 2            # delayed policy updates parameter
max_episodes = 2000         # max num of episodes
max_timesteps = 2000        # max timesteps in one episode
directory = "./preTrained/td3_torch/{}".format(env_name) # save trained models
filename = "TD3_torch_{}_{}".format(env_name, random_seed)
reward_history = []


def train():
    env = gym.make(env_name)
    state_dim = env.observation_space.shape[0]
    action_dim = env.action_space.shape[0]
    max_action = float(env.action_space.high[0])
    
    policy = TD3(state_dim, action_dim, max_action)
    replay_buffer = ReplayBuffer()
    
    print("action_space={}".format(env.action_space))
    print("obs_space={}".format(env.observation_space))
    print("threshold={} \n".format(env.spec.reward_threshold))
    
    if random_seed:
        print("Random Seed: {}".format(random_seed))
        env.seed(random_seed)
        torch.manual_seed(random_seed)
        np.random.seed(random_seed)
    
    # logging variables:        
    log_f = open("log.txt","w+")
    
    # training procedure:
    for episode in range(1, max_episodes+1):
        ep_reward = 0
        state = env.reset()
        for t in range(max_timesteps):
            # select action and add exploration noise:
            action = policy.select_action(state)
            action = action + np.random.normal(0, exploration_noise, size=env.action_space.shape[0])
            action = action.clip(env.action_space.low, env.action_space.high)
            
            # take action in env:
            next_state, reward, done, _ = env.step(action)
            replay_buffer.add((state, action, reward, next_state, float(done)))
            state = next_state
            
            ep_reward += reward
            
            # if episode is done then update policy:
            if done or t==(max_timesteps-1):
                policy.update(replay_buffer, t, batch_size, gamma, polyak, policy_noise, noise_clip, policy_delay)
                break
        
        reward_history.append(ep_reward)
        avg_reward = np.mean(reward_history[-100:]) 
        
        # logging updates:        
        log_f.write('{},{}\n'.format(episode, ep_reward))
        log_f.flush()
       
        
        # if avg reward > 300 then save and stop traning:
        if avg_reward >= env.spec.reward_threshold: 
            print("########## Solved! ###########")
            name = filename + '_solved'
            policy.save(directory, name)
            log_f.close()
            break
        
        if episode > 500:
            policy.save(directory, filename)
        
        # print avg reward every log interval:
        if episode % log_interval == 0:            
            print("Episode: {}\tReward: {}\tAverage Reward: {}".format(episode, ep_reward, avg_reward))

train()


[33mWARN: gym.spaces.Box autodetected dtype as <class 'numpy.float32'>. Please provide explicit dtype.[0m
[33mWARN: gym.spaces.Box autodetected dtype as <class 'numpy.float32'>. Please provide explicit dtype.[0m
action_space=Box(4,)
obs_space=Box(24,)
threshold=300 

Episode: 1	Reward: -93.63873243365626	Average Reward: -93.63873243365626
Episode: 2	Reward: -101.6105417470292	Average Reward: -97.62463709034273
Episode: 3	Reward: -106.80646863548398	Average Reward: -100.6852476053898
Episode: 4	Reward: -107.40267051709799	Average Reward: -102.36460333331685
Episode: 5	Reward: -106.88762287822627	Average Reward: -103.26920724229873
Episode: 6	Reward: -106.58761257803758	Average Reward: -103.82227479825521
Episode: 7	Reward: -103.12557891334392	Average Reward: -103.72274681469646
Episode: 8	Reward: -99.7688068602872	Average Reward: -103.2285043203953
Episode: 9	Reward: -170.4759205311584	Average Reward: -110.70043945492453
Episode: 10	Reward: -123.82958267650997	Average Reward: -112.0

Episode: 106	Reward: -185.87112201281764	Average Reward: -106.12890617733181
Episode: 107	Reward: -138.06637088533085	Average Reward: -106.4783140970517
Episode: 108	Reward: -167.2153443460191	Average Reward: -107.15277947190901
Episode: 109	Reward: -151.99581480505634	Average Reward: -106.96797841464799
Episode: 110	Reward: -159.59991363445414	Average Reward: -107.32568172422744
Episode: 111	Reward: -157.61825231597868	Average Reward: -107.8146440040143
Episode: 112	Reward: -147.63061393131116	Average Reward: -108.15412503637485
Episode: 113	Reward: -148.89414423236664	Average Reward: -108.61354022576319
Episode: 114	Reward: -139.101517806889	Average Reward: -108.92519013873867
Episode: 115	Reward: -138.78295174793467	Average Reward: -109.24507153705483
Episode: 116	Reward: -136.82761680641266	Average Reward: -109.54112235854448
Episode: 117	Reward: -139.03034667689184	Average Reward: -109.86969714555555
Episode: 118	Reward: -136.3109517926956	Average Reward: -110.17669042124461
Episo

Episode: 214	Reward: -152.5459846766318	Average Reward: -113.78436301636879
Episode: 215	Reward: -64.67994784462005	Average Reward: -113.04333297733565
Episode: 216	Reward: -54.12498780625127	Average Reward: -112.21630668733405
Episode: 217	Reward: -90.53277723237846	Average Reward: -111.7313309928889
Episode: 218	Reward: -66.93039936462537	Average Reward: -111.03752546860822
Episode: 219	Reward: -36.290211545038744	Average Reward: -109.93884217231943
Episode: 220	Reward: -92.33734212715402	Average Reward: -109.13747692196245
Episode: 221	Reward: -61.57141761764682	Average Reward: -108.85203657833083
Episode: 222	Reward: -38.73915463770494	Average Reward: -107.78952406074353
Episode: 223	Reward: 10.386130805119409	Average Reward: -106.50821258859229
Episode: 224	Reward: -22.78476354599177	Average Reward: -105.61716870214434
Episode: 225	Reward: -47.97614625649053	Average Reward: -103.77132571910865
Episode: 226	Reward: 18.60045826867229	Average Reward: -102.4593011293311
Episode: 227	R

Episode: 324	Reward: -96.8584353625375	Average Reward: -34.68509729328006
Episode: 325	Reward: -103.3043968360649	Average Reward: -35.23837979907581
Episode: 326	Reward: -62.36469500917961	Average Reward: -36.048031331854325
Episode: 327	Reward: -78.45467215173974	Average Reward: -36.497057881536676
Episode: 328	Reward: -2.5969028821373996	Average Reward: -36.55703742078893
Episode: 329	Reward: 165.70705825472746	Average Reward: -34.42004600961759
Episode: 330	Reward: 147.82968830106435	Average Reward: -32.54101919579443
Episode: 331	Reward: 134.64870620106012	Average Reward: -31.47688364524546
Episode: 332	Reward: 162.58907938720793	Average Reward: -28.592287275041475
Episode: 333	Reward: 137.52286383458198	Average Reward: -26.13009148191893
Episode: 334	Reward: 130.27848098367147	Average Reward: -23.66702543832237
Episode: 335	Reward: 141.19379076448618	Average Reward: -21.55183632316753
Episode: 336	Reward: 207.6751382988886	Average Reward: -18.213016675623365
Episode: 337	Reward: 1

Episode: 435	Reward: -13.62997334324514	Average Reward: 139.41909908336456
Episode: 436	Reward: 278.7717572871669	Average Reward: 140.13006527324734
Episode: 437	Reward: 282.95540032912606	Average Reward: 141.1475027564245
Episode: 438	Reward: 24.632150684949835	Average Reward: 139.66080553304258
Episode: 439	Reward: 281.0521070346637	Average Reward: 140.3042216615954
Episode: 440	Reward: 278.4880436929646	Average Reward: 143.99280849935548
Episode: 441	Reward: 284.09280554340336	Average Reward: 144.53944953407267
Episode: 442	Reward: 282.0837301042021	Average Reward: 145.46031195216753
Episode: 443	Reward: 283.1163067631824	Average Reward: 146.19341707440944
Episode: 444	Reward: 288.6833154164033	Average Reward: 149.55784110873392
Episode: 445	Reward: 272.4394521472579	Average Reward: 150.70573488263884
Episode: 446	Reward: 283.80925292715096	Average Reward: 151.15197448926207
Episode: 447	Reward: 273.405630747912	Average Reward: 152.70277869208778
Episode: 448	Reward: 275.54149683368

Episode: 546	Reward: 297.2976359979952	Average Reward: 240.85933258660683
Episode: 547	Reward: 283.7760328926035	Average Reward: 240.96303660805378
Episode: 548	Reward: 290.6509501845912	Average Reward: 241.11413114156286
Episode: 549	Reward: 292.8164807521642	Average Reward: 241.26656967093103
Episode: 550	Reward: -45.491073353307115	Average Reward: 238.01964616626879
Episode: 551	Reward: 294.51347856332933	Average Reward: 238.17209386191539
Episode: 552	Reward: 282.4218340481648	Average Reward: 238.12178402832598
Episode: 553	Reward: 291.62697387010036	Average Reward: 238.2582187489964
Episode: 554	Reward: 299.51932271646916	Average Reward: 238.44823434792568
Episode: 555	Reward: 296.6069752615607	Average Reward: 238.5655645259499
Episode: 556	Reward: 85.06047331198067	Average Reward: 236.61305813261572
Episode: 557	Reward: 295.44862026833056	Average Reward: 236.79272525964043
Episode: 558	Reward: 281.55712992618476	Average Reward: 236.83657647148357
Episode: 559	Reward: 286.49207052

Episode: 657	Reward: 296.5678873103162	Average Reward: 266.0355965328767
Episode: 658	Reward: 292.5775410236159	Average Reward: 266.14580064385103
Episode: 659	Reward: 295.24641861923544	Average Reward: 266.23334412474907
Episode: 660	Reward: 64.28913341464204	Average Reward: 264.00956425728367
Episode: 661	Reward: 293.0233683318404	Average Reward: 264.0121759713956
Episode: 662	Reward: 297.579679396236	Average Reward: 264.06411708433865
Episode: 663	Reward: 294.8239664188847	Average Reward: 264.060486643864
Episode: 664	Reward: 295.31857787175466	Average Reward: 264.0676080463952
Episode: 665	Reward: 293.3046561419185	Average Reward: 264.0898637380467
Episode: 666	Reward: 295.3530347498143	Average Reward: 266.7118975217121
Episode: 667	Reward: 300.00812895546545	Average Reward: 266.8060518144033
Episode: 668	Reward: 298.5955499683895	Average Reward: 266.86137928114044
Episode: 669	Reward: 292.5788430563168	Average Reward: 266.88075697639584
Episode: 670	Reward: 299.98653871216356	Aver

Episode: 769	Reward: 300.9349769749852	Average Reward: 277.3865361781226
Episode: 770	Reward: 300.99924423838644	Average Reward: 277.3966632333848
Episode: 771	Reward: 293.5565352268242	Average Reward: 277.33535057229744
Episode: 772	Reward: 296.7378985225881	Average Reward: 277.2976825673274
Episode: 773	Reward: 295.3302195924744	Average Reward: 277.31060399029116
Episode: 774	Reward: 296.9247418068424	Average Reward: 277.3033188505557
Episode: 775	Reward: 288.28288408989124	Average Reward: 277.148197549197
Episode: 776	Reward: 299.5424229595263	Average Reward: 277.1794301482647
Episode: 777	Reward: 305.1902473349111	Average Reward: 280.61370744052664
Episode: 778	Reward: 295.80081189984395	Average Reward: 280.556627507933
Episode: 779	Reward: 298.629762599351	Average Reward: 280.5721529287253
Episode: 780	Reward: 294.9367750826008	Average Reward: 280.5854276849934
Episode: 781	Reward: 295.52797125206075	Average Reward: 280.5661657806055
Episode: 782	Reward: 298.1878123859195	Average 

Episode: 881	Reward: 300.7607833758008	Average Reward: 276.51096259808145
Episode: 882	Reward: 286.6220725423732	Average Reward: 276.395305199646
Episode: 883	Reward: 276.44546000720237	Average Reward: 276.2206279488704
Episode: 884	Reward: -94.70219888796143	Average Reward: 272.29740725887376
Episode: 885	Reward: -130.0121014591868	Average Reward: 267.9791952337774
Episode: 886	Reward: -127.68179822034897	Average Reward: 263.69374041729634
Episode: 887	Reward: -122.89592111323283	Average Reward: 259.469943625032
Episode: 888	Reward: -139.66769855735686	Average Reward: 255.10339082480147
Episode: 889	Reward: -149.58947952219276	Average Reward: 250.64688661071006
Episode: 890	Reward: -129.46846097375735	Average Reward: 246.39675904440028
Episode: 891	Reward: -54.720944159526105	Average Reward: 242.83938720088344
Episode: 892	Reward: -105.96238752453755	Average Reward: 238.76924669476838
Episode: 893	Reward: -69.31754283828978	Average Reward: 235.02488868783144
Episode: 894	Reward: -0.42

Episode: 992	Reward: 290.70345300360805	Average Reward: 220.1016844672684
Episode: 993	Reward: 301.0606226963058	Average Reward: 223.80546612261438
Episode: 994	Reward: 293.15270089498426	Average Reward: 226.74123432474062
Episode: 995	Reward: 294.9430329484073	Average Reward: 229.99191437861967
Episode: 996	Reward: 305.46951941369457	Average Reward: 233.5388423374873
Episode: 997	Reward: 296.56902213864475	Average Reward: 237.0293434105585
Episode: 998	Reward: 302.65577098366225	Average Reward: 240.38141842889615
Episode: 999	Reward: 297.7990376354097	Average Reward: 244.3033273388295
Episode: 1000	Reward: 296.47518926107205	Average Reward: 248.32402576145208
Episode: 1001	Reward: 300.3245601101849	Average Reward: 251.6359623752455
Episode: 1002	Reward: 296.40655194793356	Average Reward: 255.0317476685701
Episode: 1003	Reward: 298.38483719048804	Average Reward: 258.2275735925207
Episode: 1004	Reward: 296.9249475934607	Average Reward: 258.53895469928545
Episode: 1005	Reward: 295.802585

Episode: 1102	Reward: 310.07624807027	Average Reward: 267.7587689887329
Episode: 1103	Reward: 304.37801780634004	Average Reward: 267.81870079489147
Episode: 1104	Reward: 304.50333378758654	Average Reward: 267.8944846568327
Episode: 1105	Reward: 302.44849107167653	Average Reward: 267.96094370818537
Episode: 1106	Reward: 296.56743749186865	Average Reward: 267.9193413922548
Episode: 1107	Reward: 300.8189421630908	Average Reward: 267.9608075013443
Episode: 1108	Reward: 300.5802232457668	Average Reward: 267.9969734439937
Episode: 1109	Reward: 302.8797729218611	Average Reward: 268.0108900027531
Episode: 1110	Reward: 297.399976035023	Average Reward: 270.8953493535994
Episode: 1111	Reward: 301.2069877093046	Average Reward: 270.9154212082361
Episode: 1112	Reward: 301.30688918658	Average Reward: 270.9646207524883
Episode: 1113	Reward: 302.4101724941763	Average Reward: 273.77224268220743
Episode: 1114	Reward: 305.20442970312126	Average Reward: 273.81118059411824
Episode: 1115	Reward: 301.69996405

Episode: 1212	Reward: 298.89860389954043	Average Reward: 266.5469894134074
Episode: 1213	Reward: 297.1950756450835	Average Reward: 266.49483844491647
Episode: 1214	Reward: 299.95500494135587	Average Reward: 266.4423441972988
Episode: 1215	Reward: 305.95668268773045	Average Reward: 266.48491138363784
Episode: 1216	Reward: 297.1991772527966	Average Reward: 266.4382987047371
Episode: 1217	Reward: 303.140103383218	Average Reward: 266.4747572044067
Episode: 1218	Reward: -32.931568983258074	Average Reward: 263.11319326552905
Episode: 1219	Reward: 310.2036423440334	Average Reward: 263.2008772890864
Episode: 1220	Reward: 300.70239840618797	Average Reward: 263.2422060379022
Episode: 1221	Reward: 304.03220312419535	Average Reward: 263.3027680329645
Episode: 1222	Reward: 301.81206253546424	Average Reward: 263.3091044364763
Episode: 1223	Reward: -50.010390518117624	Average Reward: 259.75610178631285
Episode: 1224	Reward: 300.6505410049407	Average Reward: 259.788788261744
Episode: 1225	Reward: 303.

Episode: 1322	Reward: 299.8987950974609	Average Reward: 271.35143803026847
Episode: 1323	Reward: 59.743993536446965	Average Reward: 272.4489818708141
Episode: 1324	Reward: 304.1920754518466	Average Reward: 272.4843972152832
Episode: 1325	Reward: 303.0848516125008	Average Reward: 272.4791676491867
Episode: 1326	Reward: 305.6239816752839	Average Reward: 272.5416400643817
Episode: 1327	Reward: 110.46361960067921	Average Reward: 273.68248123767626
Episode: 1328	Reward: 302.97860264788807	Average Reward: 273.705385298874
Episode: 1329	Reward: 301.37007502259974	Average Reward: 273.7285266678398
Episode: 1330	Reward: 302.90781503398733	Average Reward: 276.43650429210084
Episode: 1331	Reward: 298.496731764416	Average Reward: 276.4336229836717
Episode: 1332	Reward: 300.06961842217333	Average Reward: 277.94344154485105
Episode: 1333	Reward: 299.4102467246598	Average Reward: 277.968778254301
Episode: 1334	Reward: 75.04911756974565	Average Reward: 275.73269256911976
Episode: 1335	Reward: 303.0260

Episode: 1432	Reward: 305.5231457975729	Average Reward: 286.4908597472911
Episode: 1433	Reward: 307.2061181754386	Average Reward: 286.56881846179886
Episode: 1434	Reward: 299.7753128175139	Average Reward: 288.8160804142766
Episode: 1435	Reward: 297.29026277956575	Average Reward: 288.75872277789483
Episode: 1436	Reward: 303.0917880041494	Average Reward: 288.7221266071515
Episode: 1437	Reward: 301.1496944172176	Average Reward: 288.6681183303669
Episode: 1438	Reward: 296.445885107294	Average Reward: 288.63744653774086
Episode: 1439	Reward: 297.2577077748355	Average Reward: 288.6273160900742
Episode: 1440	Reward: 305.5186356892653	Average Reward: 288.70982122752764
Episode: 1441	Reward: 302.31181941901656	Average Reward: 290.6394657308018
Episode: 1442	Reward: 302.65229595995476	Average Reward: 290.59763002881994
Episode: 1443	Reward: 292.1028254249911	Average Reward: 290.4743452973226
Episode: 1444	Reward: 298.38229046923806	Average Reward: 290.3935880353486
Episode: 1445	Reward: 302.5181

Episode: 1542	Reward: 305.68991406244896	Average Reward: 283.40286041727495
Episode: 1543	Reward: 307.64616573594515	Average Reward: 283.5582938203845
Episode: 1544	Reward: 298.25101841293645	Average Reward: 283.5569810998215
Episode: 1545	Reward: 304.38930545320176	Average Reward: 283.57569266388697
Episode: 1546	Reward: 298.08838409657335	Average Reward: 283.49946710866766
Episode: 1547	Reward: 302.2014401987946	Average Reward: 283.5231655838798
Episode: 1548	Reward: 304.2021772874257	Average Reward: 283.4787691283339
Episode: 1549	Reward: 30.876869806564116	Average Reward: 280.7153536099797
Episode: 1550	Reward: 303.7404667445818	Average Reward: 280.69715029656544
Episode: 1551	Reward: 199.55978146931392	Average Reward: 279.58506556728213
Episode: 1552	Reward: 307.37780080410766	Average Reward: 279.55033435426634
Episode: 1553	Reward: 309.53581817072705	Average Reward: 279.5774622222875
Episode: 1554	Reward: 308.9813277922075	Average Reward: 279.58208248706364
Episode: 1555	Reward: 

Episode: 1652	Reward: 309.3798785478059	Average Reward: 293.51760384520014
Episode: 1653	Reward: 302.6812730223597	Average Reward: 293.4490583937164
Episode: 1654	Reward: 305.29109413864046	Average Reward: 293.4121560571808
Episode: 1655	Reward: 304.4728324750874	Average Reward: 293.36276819304896
Episode: 1656	Reward: 310.09121778733817	Average Reward: 293.405058684732
Episode: 1657	Reward: -14.424399140186793	Average Reward: 290.21361261477557
Episode: 1658	Reward: 11.937389527652002	Average Reward: 287.3200808304643
Episode: 1659	Reward: 309.13252139388914	Average Reward: 287.3355947719152
Episode: 1660	Reward: 307.4184754388098	Average Reward: 287.33663250605036
Episode: 1661	Reward: 294.43632407769684	Average Reward: 287.1873037520033
Episode: 1662	Reward: 302.6458381856951	Average Reward: 287.1364123213232
Episode: 1663	Reward: 303.4984138078595	Average Reward: 287.1106806022848
Episode: 1664	Reward: 306.81649228182084	Average Reward: 287.1296209312321
Episode: 1665	Reward: 305.4

Episode: 1762	Reward: 305.5046773167616	Average Reward: 287.0093120313168
Episode: 1763	Reward: 303.8009249610314	Average Reward: 287.01233714284854
Episode: 1764	Reward: 302.3963753494134	Average Reward: 286.9681359735245
Episode: 1765	Reward: 307.87920299922297	Average Reward: 286.99285983280043
Episode: 1766	Reward: 309.5962818498806	Average Reward: 287.01245250777686
Episode: 1767	Reward: 313.2997865530578	Average Reward: 287.097925312478
Episode: 1768	Reward: 305.41410050498376	Average Reward: 287.135907450298
Episode: 1769	Reward: 312.3316075817366	Average Reward: 287.17965963056
Episode: 1770	Reward: 307.3745937869499	Average Reward: 287.21430816297936
Episode: 1771	Reward: 306.17313779652284	Average Reward: 287.2213997295618
Episode: 1772	Reward: 310.9941121123001	Average Reward: 287.24341082373235
Episode: 1773	Reward: 305.4632223445924	Average Reward: 287.22285072844795
Episode: 1774	Reward: 310.28569307943667	Average Reward: 287.2105181225355
Episode: 1775	Reward: 307.022749

In [7]:
def test():
    env_name = "BipedalWalker-v2"
    random_seed = 0
    n_episodes = 3
    max_timesteps = 2000
    render = True
    save_gif = True
    
    filename = "TD3_{}_{}".format(env_name, random_seed)
    filename += '_solved'
    directory = "./preTrained/{}/".format(env_name)
    
    env = gym.make(env_name)
    state_dim = env.observation_space.shape[0]
    action_dim = env.action_space.shape[0]
    max_action = float(env.action_space.high[0])
    
    policy = TD3(state_dim, action_dim, max_action)
    
    policy.load_actor(directory, filename)
    
    for ep in range(1, n_episodes+1):
        ep_reward = 0
        state = env.reset()
        for t in range(max_timesteps):
            action = policy.select_action(state)
            state, reward, done, _ = env.step(action)
            ep_reward += reward
            if render:
                env.render()
                if save_gif:
                    dirname = './gif/td3_torch/{}'.format(ep)
                    if not os.path.isdir(dirname):
                        os.mkdir(dirname)
                    img = env.render(mode = 'rgb_array')
                    img = Image.fromarray(img)
                    img.save('./gif/td3_torch/{}/{}.jpg'.format(ep,t))
            if done:
                break
            
        print('Episode: {}\tReward: {}'.format(ep, int(ep_reward)))
        ep_reward = 0
        env.close()        
                
test()
    
    

[33mWARN: gym.spaces.Box autodetected dtype as <class 'numpy.float32'>. Please provide explicit dtype.[0m
[33mWARN: gym.spaces.Box autodetected dtype as <class 'numpy.float32'>. Please provide explicit dtype.[0m
Episode: 1	Reward: 306
Episode: 2	Reward: 306
Episode: 3	Reward: 306
