In [1]:
import gym
import numpy as np
import pandas as pd
import random

import pickle
from matplotlib import pyplot as plt

from network import DQN

Using TensorFlow backend.
  _np_qint8 = np.dtype([("qint8", np.int8, 1)])
  _np_quint8 = np.dtype([("quint8", np.uint8, 1)])
  _np_qint16 = np.dtype([("qint16", np.int16, 1)])
  _np_quint16 = np.dtype([("quint16", np.uint16, 1)])
  _np_qint32 = np.dtype([("qint32", np.int32, 1)])
  np_resource = np.dtype([("resource", np.ubyte, 1)])


In [2]:
def plot_mean(df, chart_name, x_label, y_label, y_limit):
    plt.rcParams.update({'font.size': 20})
    plt.figure(figsize=(20, 12)) 
    plt.close()
    plt.figure()
    
    plot = df.plot(linewidth=3.0, figsize=(20, 12))
    plot.set_xlabel(x_label) 
    plot.set_ylabel(y_label)
    plt.ylim(y_limit)
    fig = plot.get_figure()
    fig.savefig('fig/'+ chart_name)

In [4]:
env = gym.make('LunarLander-v2')

# set seeds
env.seed(6)
np.random.seed(6)

# setting up params
lr_values = [0.0001, 0.001, 0.01, 0.05, 0.1]
epsilon = 1.0
epsilon_decay = 0.995
gamma = 0.99
training_episodes = 1000

rewards_lrs = []

save_dir = "models/"

for lr_value in lr_values:
    model = DQN(env, lr_value, gamma, epsilon, epsilon_decay)
    print("Training model for LR: {}".format(lr_value))
    model.train(training_episodes, False)
    rewards_lrs.append(model.rewards_list)
    
    model.save(save_dir + "lr_model_" + str(lr_value) + ".h5") 
    

Training model for LR: 0.0001


  out=out, **kwargs)
  ret = ret.dtype.type(ret / rcount)


Instructions for updating:
Use tf.cast instead.
0  : Episode: Reward:  -355.72579394082175   Average Reward:  -355.72579394082175  epsilon:  0.995
1  : Episode: Reward:  -366.99029995306086   Average Reward:  -361.3580469469413  epsilon:  0.990025
2  : Episode: Reward:  -201.70455569853124   Average Reward:  -308.1402165308046  epsilon:  0.985074875
3  : Episode: Reward:  -149.38001279940613   Average Reward:  -268.450165597955  epsilon:  0.9801495006250001
4  : Episode: Reward:  -124.66482581305539   Average Reward:  -239.69309764097505  epsilon:  0.9752487531218751
5  : Episode: Reward:  -301.75296929651   Average Reward:  -250.03640958356422  epsilon:  0.9703725093562657
6  : Episode: Reward:  -502.9006392837649   Average Reward:  -286.1598709693072  epsilon:  0.9655206468094844
7  : Episode: Reward:  -301.4628235799903   Average Reward:  -288.0727400456426  epsilon:  0.960693043575437
8  : Episode: Reward:  -131.84395256010805   Average Reward:  -270.7139858805832  epsilon:  0.9558

KeyboardInterrupt: 

In [None]:
#pickle.dump(rewards_lrs, open("models/" + "rewards_for_lrs.p", "wb"))
rewards_lrs = pickle.load(open("models/" + "rewards_lr.p", "rb"))

In [None]:
# setting up params
lr_values = [0.0001, 0.001, 0.01, 0.05, 0.1]
training_episodes = 1000

lr_rewards_pd = pd.DataFrame(index=pd.Series(range(1, training_episodes + 1)))

for i in range(len(lr_values)):
    col_name = "lr="+ str(lr_values[i])
    lr_rewards_pd[col_name] = rewards_lrs[i]
plot_mean(lr_rewards_pd, "learning_rates", "episodes", "reward", (-6000, 500)) 