In [1]:

from ddpg import *
import time
import numpy as np


# Environments to be tested on
# env_name = 'InvertedPendulum-v1'
# env_name = 'Pendulum-v0'
# env_name = 'HalfCheetah-v1'
env_name = 'Ant-v1'
# env_name = "Hopper-v1"
# env_name = "Walker2d-v1"

GAMMA = 0.99
BATCH_SIZE = 128

MAX_STEPS = 1000
NUM_EPISODES = 2000

logging_interval = 100
animate_interval = logging_interval * 5

VISUALIZE = False

dropout_rate = 0.05
Records = []



env = gym.make(env_name)
env = NormalizeAction(env)  # remap action values for the environment
avg_val = 0

# for plotting
running_rewards_ddpg = []
step_list_ddpg = []
step_counter = 0

# set term_condition for early stopping according to environment being used
term_condition = 900  # Pendulum
ddpg = DDPG(act_dim = env.action_space.shape[0], obs_dim = env.observation_space.shape[0],critic_lr=1e-3, actor_lr=1e-4, gamma = GAMMA, batch_size = BATCH_SIZE)

for itr in range(NUM_EPISODES):
    state = env.reset()  # get initial state
    animate_this_episode = (itr % animate_interval == 0) and VISUALIZE
    total_reward = 0

    while True:
        ddpg.noise.reset()

        if animate_this_episode:
            env.render()
            time.sleep(0.05)

        action = ddpg.get_action_with_noise(state)
        next_state, reward, done, info = env.step(action)
        total_reward += reward
        ddpg.replayBuffer.remember(state, action, reward, next_state, done)
        state = next_state
        step_counter += 1

        # use actor to get action, add ddpg.noise.step() to action
        # remember to put NN in eval mode while testing (to deal with BatchNorm layers) and put it back
        # to train mode after you're done getting the action

        # step action, get next state, reward, done (keep track of total_reward)
        # populate ddpg.replayBuffer
        if ddpg.replayBuffer.size > BATCH_SIZE:
            ddpg.train()
        


        if done:
            break
    if avg_val > term_condition :
#     if total_reward > 950 :
#         save(ddpg)
        break
        
    

    running_rewards_ddpg.append(total_reward)  # return of this episode
    step_list_ddpg.append(step_counter)
    
    
    #dropout
    number_to_drop = int(ddpg.replayBuffer.size * dropout_rate / (1 + itr))
    if number_to_drop >= 2:
        for i in range(number_to_drop):
            del ddpg.replayBuffer.buffer[np.random.choice(len(ddpg.replayBuffer.buffer),1)[0]]
            ddpg.replayBuffer.size -=1
            

    avg_val = avg_val * 0.95 + 0.05 * running_rewards_ddpg[-1]
    print("Average value: {} for episode: {} and reward: {}".format(avg_val, itr,total_reward))
    Records.append((avg_val,itr,step_counter))

[2018-06-12 21:24:52,216] Making new env: Ant-v1


Average value: 1.7256766875599698 for episode: 0 and reward: 34.5135337511994
Average value: -39.617448532002044 for episode: 1 and reward: -825.1368277036803
Average value: -59.05405546653975 for episode: 2 and reward: -428.3495872227562
Average value: -13.141170354046146 for episode: 3 and reward: 859.2036467833323
Average value: 27.429816668865094 for episode: 4 and reward: 798.2785701041785
Average value: 63.65095965018445 for episode: 5 and reward: 751.8526762952522
Average value: 104.8338515998756 for episode: 6 and reward: 887.3087986440077
Average value: 136.80640695310677 for episode: 7 and reward: 744.2849586644988
Average value: 170.4578217913316 for episode: 8 and reward: 809.8347037176036
Average value: 207.65317374963166 for episode: 9 and reward: 914.3648609573331
Average value: 241.44026392575992 for episode: 10 and reward: 883.3949772721967
Average value: 269.1194443130068 for episode: 11 and reward: 795.0238716706971
Average value: 296.3569650589203 for episode: 12 an

Average value: 662.192603697882 for episode: 104 and reward: 927.834271567876
Average value: 673.3639089684888 for episode: 105 and reward: 885.61870911002
Average value: 684.0406333076322 for episode: 106 and reward: 886.8983957513585
Average value: 692.8449948357181 for episode: 107 and reward: 860.1278638693532
Average value: 703.9527166517292 for episode: 108 and reward: 914.9994311559406
Average value: 713.7936749965183 for episode: 109 and reward: 900.7718835475115
Average value: 722.7549263583187 for episode: 110 and reward: 893.0187022325241
Average value: 718.6422446003869 for episode: 111 and reward: 640.5012911996816
Average value: 726.0461634711472 for episode: 112 and reward: 866.7206220155932
Average value: 734.5642276096332 for episode: 113 and reward: 896.407446240869
Average value: 741.339105642265 for episode: 114 and reward: 870.0617882622697
Average value: 749.3997766770493 for episode: 115 and reward: 902.5525263379527
Average value: 757.6604673961773 for episode: 

Average value: 589.9105387684302 for episode: 207 and reward: 827.1031131459217
Average value: 582.0752677588707 for episode: 208 and reward: 433.2051185772421
Average value: 571.3962173324909 for episode: 209 and reward: 368.4942592312736
Average value: 571.8593089083695 for episode: 210 and reward: 580.6580488500647
Average value: 581.5007700999992 for episode: 211 and reward: 764.6885327409623
Average value: 592.1194121610325 for episode: 212 and reward: 793.8736113206674
Average value: 589.6915962664839 for episode: 213 and reward: 543.5630942700602
Average value: 586.5646183405839 for episode: 214 and reward: 527.1520377484854
Average value: 601.6027723050905 for episode: 215 and reward: 887.3276976307171
Average value: 615.0045129381363 for episode: 216 and reward: 869.6375849660063
Average value: 625.4856884163687 for episode: 217 and reward: 824.6280225027872
Average value: 637.2535907813567 for episode: 218 and reward: 860.84373571613
Average value: 645.021046717144 for episod

Average value: 820.7010612601761 for episode: 310 and reward: 845.47570550369
Average value: 818.5521701925308 for episode: 311 and reward: 777.7232399072701
Average value: 821.6682105553655 for episode: 312 and reward: 880.8729774492258
Average value: 803.908138691806 for episode: 313 and reward: 466.46677328417826
Average value: 808.4500363224655 for episode: 314 and reward: 894.746091304996
Average value: 803.6711138467991 for episode: 315 and reward: 712.8715868091384
Average value: 797.6501591184593 for episode: 316 and reward: 683.2520192800022
Average value: 801.1236096846553 for episode: 317 and reward: 867.119170442379
Average value: 803.6876638586574 for episode: 318 and reward: 852.4046931646966
Average value: 808.9734725157291 for episode: 319 and reward: 909.4038370000922
Average value: 815.3204034263481 for episode: 320 and reward: 935.9120907281083
Average value: 818.854607700187 for episode: 321 and reward: 886.0044889031268
Average value: 818.6935672317795 for episode:

Average value: 745.2470389707713 for episode: 413 and reward: 831.1056969868961
Average value: 727.3038462468222 for episode: 414 and reward: 386.38318449179
Average value: 731.4801303641098 for episode: 415 and reward: 810.8295285925744
Average value: 733.3527206994527 for episode: 416 and reward: 768.9319370709667
Average value: 712.9894866615663 for episode: 417 and reward: 326.0880399417263
Average value: 699.8748555453708 for episode: 418 and reward: 450.696864337659
Average value: 684.6452702495425 for episode: 419 and reward: 395.2831496288053
Average value: 690.5365472462373 for episode: 420 and reward: 802.470810183441
Average value: 693.5318621395955 for episode: 421 and reward: 750.4428451134036
Average value: 693.4653553985589 for episode: 422 and reward: 692.2017273188638
Average value: 695.3942246637749 for episode: 423 and reward: 732.0427407028807
Average value: 685.2250747880562 for episode: 424 and reward: 492.0112271494
Average value: 660.5731569928171 for episode: 4

Average value: 404.06213618368497 for episode: 515 and reward: 47.477207645692005
Average value: 370.46989243075103 for episode: 516 and reward: -267.7827388749943
Average value: 364.4191888607009 for episode: 517 and reward: 249.4558210297487
Average value: 373.55946426373106 for episode: 518 and reward: 547.2246969213037
Average value: 370.3054247104284 for episode: 519 and reward: 308.47867319767823
Average value: 369.8364637344487 for episode: 520 and reward: 360.9262051908352
Average value: 374.63046727545833 for episode: 521 and reward: 465.7165345546415
Average value: 361.81842821488226 for episode: 522 and reward: 118.38968606393715
Average value: 347.0232661151458 for episode: 523 and reward: 65.91518622015283


KeyboardInterrupt: 

In [4]:
import pickle

with open("ant_drop.pkl","wb") as f:
    pickle.dump((Records,running_rewards_ddpg),f)
    
with open("ant_drop_model.pkl","wb") as f:
    pickle.dump(ddpg,f)
