In [1]:
import numpy as np
import gym

from keras.models import Sequential
from keras.layers import Dense, Activation, Flatten
from keras.optimizers import Adam

from rl.agents.dqn import DQNAgent
from rl.policy import MaxBoltzmannQPolicy
from rl.memory import SequentialMemory

from keras.models import load_model
from keras.models import model_from_json

  from ._conv import register_converters as _register_converters
Using TensorFlow backend.


In [2]:
ENV_NAME = 'LunarLander-v2'

# Get the environment and extract the number of actions available in the Cartpole problem
env = gym.make("LunarLander-v2")
np.random.seed(123)
env.seed(123)
nb_actions = env.action_space.n

In [3]:
model = Sequential()
model.add(Flatten(input_shape=(1,) + env.observation_space.shape))
model.add(Dense(128))
model.add(Activation('relu'))
model.add(Dense(128))
model.add(Activation('relu'))
model.add(Dense(64))
model.add(Activation('relu'))
model.add(Dense(nb_actions))
model.add(Activation('linear'))
print(model.summary())

Instructions for updating:
Colocations handled automatically by placer.
_________________________________________________________________
Layer (type)                 Output Shape              Param #   
flatten_1 (Flatten)          (None, 8)                 0         
_________________________________________________________________
dense_1 (Dense)              (None, 128)               1152      
_________________________________________________________________
activation_1 (Activation)    (None, 128)               0         
_________________________________________________________________
dense_2 (Dense)              (None, 128)               16512     
_________________________________________________________________
activation_2 (Activation)    (None, 128)               0         
_________________________________________________________________
dense_3 (Dense)              (None, 64)                8256      
_________________________________________________________________
acti

In [4]:
policy = MaxBoltzmannQPolicy()
memory = SequentialMemory(limit=50000, window_length=1)

In [5]:
json_file = open("mnist_model253.json", "r")
loaded_model_json = json_file.read()
json_file.close()

loaded_model = model_from_json(loaded_model_json)

loaded_model.load_weights("mnist_model253.h5")

In [6]:
dqn = DQNAgent(model=loaded_model, nb_actions=nb_actions, memory=memory, nb_steps_warmup=40,
target_model_update=1e-2, policy=policy)

dqn.compile(Adam(lr=1e-3), metrics=['mae'])

In [9]:
a = dqn.test(env, nb_episodes=100, visualize=False)

Testing for 100 episodes ...
Episode 1: reward: 301.262, steps: 186
Episode 2: reward: 218.765, steps: 446
Episode 3: reward: 265.895, steps: 158
Episode 4: reward: 280.825, steps: 170
Episode 5: reward: 244.692, steps: 168
Episode 6: reward: 290.187, steps: 174
Episode 7: reward: 259.910, steps: 316
Episode 8: reward: 260.602, steps: 392
Episode 9: reward: 280.459, steps: 151
Episode 10: reward: 298.265, steps: 203
Episode 11: reward: 233.880, steps: 643
Episode 12: reward: 313.315, steps: 213
Episode 13: reward: 311.694, steps: 212
Episode 14: reward: 268.749, steps: 341
Episode 15: reward: 290.423, steps: 204
Episode 16: reward: 158.530, steps: 1000
Episode 17: reward: 279.889, steps: 175
Episode 18: reward: 270.064, steps: 164
Episode 19: reward: 317.468, steps: 168
Episode 20: reward: 305.638, steps: 194
Episode 21: reward: 300.321, steps: 192
Episode 22: reward: 185.189, steps: 1000
Episode 23: reward: 313.705, steps: 172
Episode 24: reward: 165.366, steps: 1000
Episode 25: rewar

In [11]:
s=0
for i in a.history['episode_reward']:
    s+=i
s/100
    

253.4654028731039