In [1]:
import numpy as np
import gym

from keras.models import Sequential
from keras.layers import Dense, Activation, Flatten
from keras.optimizers import Adam

from rl.agents.dqn import DQNAgent
from rl.policy import EpsGreedyQPolicy
from rl.memory import SequentialMemory

Using TensorFlow backend.


In [2]:
ENV_NAME = 'CartPole-v0'

# Get the environment and extract the number of actions available in the Cartpole problem
env = gym.make(ENV_NAME)
np.random.seed(123)
env.seed(123)
nb_actions = env.action_space.n

In [3]:
model = Sequential()
model.add(Flatten(input_shape=(1,) + env.observation_space.shape))
model.add(Dense(16))
model.add(Activation('relu'))
model.add(Dense(nb_actions))
model.add(Activation('linear'))
print(model.summary())

_________________________________________________________________
Layer (type)                 Output Shape              Param #   
flatten_1 (Flatten)          (None, 4)                 0         
_________________________________________________________________
dense_1 (Dense)              (None, 16)                80        
_________________________________________________________________
activation_1 (Activation)    (None, 16)                0         
_________________________________________________________________
dense_2 (Dense)              (None, 2)                 34        
_________________________________________________________________
activation_2 (Activation)    (None, 2)                 0         
Total params: 114
Trainable params: 114
Non-trainable params: 0
_________________________________________________________________
None


In [4]:
policy = EpsGreedyQPolicy()
memory = SequentialMemory(limit=50000, window_length=1)
dqn = DQNAgent(model=model, nb_actions=nb_actions, memory=memory, nb_steps_warmup=10,
target_model_update=1e-2, policy=policy)
dqn.compile(Adam(lr=1e-3), metrics=['mae'])

# Okay, now it's time to learn something! We visualize the training here for show, but this slows down training quite a lot. 
dqn.fit(env, nb_steps=5000, visualize=True, verbose=2)

Training for 5000 steps ...




   79/5000: episode: 1, duration: 3.154s, episode steps: 79, steps per second: 25, episode reward: 79.000, mean reward: 1.000 [1.000, 1.000], mean action: 0.519 [0.000, 1.000], mean observation: 0.060 [-0.402, 0.722], loss: 0.427768, mean_absolute_error: 0.495637, mean_q: 0.053599
  113/5000: episode: 2, duration: 0.945s, episode steps: 34, steps per second: 36, episode reward: 34.000, mean reward: 1.000 [1.000, 1.000], mean action: 0.529 [0.000, 1.000], mean observation: 0.151 [-0.159, 0.753], loss: 0.354449, mean_absolute_error: 0.448013, mean_q: 0.191813
  163/5000: episode: 3, duration: 1.324s, episode steps: 50, steps per second: 38, episode reward: 50.000, mean reward: 1.000 [1.000, 1.000], mean action: 0.520 [0.000, 1.000], mean observation: 0.082 [-0.295, 0.778], loss: 0.318317, mean_absolute_error: 0.468537, mean_q: 0.318022
  197/5000: episode: 4, duration: 0.699s, episode steps: 34, steps per second: 49, episode reward: 34.000, mean reward: 1.000 [1.000, 1.000], mean action:

<keras.callbacks.History at 0x1531060d358>

In [5]:
dqn.test(env, nb_episodes=50, visualize=True)

Testing for 50 episodes ...
Episode 1: reward: 71.000, steps: 71
Episode 2: reward: 36.000, steps: 36
Episode 3: reward: 52.000, steps: 52
Episode 4: reward: 103.000, steps: 103
Episode 5: reward: 48.000, steps: 48
Episode 6: reward: 66.000, steps: 66
Episode 7: reward: 44.000, steps: 44
Episode 8: reward: 37.000, steps: 37
Episode 9: reward: 41.000, steps: 41
Episode 10: reward: 39.000, steps: 39
Episode 11: reward: 71.000, steps: 71
Episode 12: reward: 52.000, steps: 52
Episode 13: reward: 58.000, steps: 58
Episode 14: reward: 79.000, steps: 79
Episode 15: reward: 45.000, steps: 45
Episode 16: reward: 140.000, steps: 140
Episode 17: reward: 52.000, steps: 52
Episode 18: reward: 57.000, steps: 57
Episode 19: reward: 55.000, steps: 55
Episode 20: reward: 104.000, steps: 104
Episode 21: reward: 79.000, steps: 79
Episode 22: reward: 59.000, steps: 59
Episode 23: reward: 41.000, steps: 41
Episode 24: reward: 47.000, steps: 47
Episode 25: reward: 175.000, steps: 175
Episode 26: reward: 45.

<keras.callbacks.History at 0x15310352518>

In [1]:
dqn.test(env, nb_episodes=5, visualize=True)

NameError: name 'dqn' is not defined