In [1]:
import numpy as np
import gym

from keras.models import Sequential
from keras.layers import Dense, Activation, Flatten
from keras.optimizers import Adam

from rl.agents.dqn import DQNAgent 
from rl.policy import EpsGreedyQPolicy, GreedyQPolicy, BoltzmannQPolicy
from rl.memory import SequentialMemory

from inspect import signature

Using TensorFlow backend.


In [2]:
ENV_NAME = 'CartPole-v0'

# Get the environment and extract the number of actions available in the Cartpole problem
env = gym.make(ENV_NAME)

np.random.seed(123)
env.seed(123)
nb_actions = env.action_space.n
# print(nb_actions)

[2017-06-05 14:10:58,529] Making new env: CartPole-v0


In [3]:
model = Sequential()
model.add(Flatten(input_shape=(1, env.observation_space.shape[0])))
model.add(Dense(128, activation='relu'))#, use_bias=True, kernel_initializer='glorot_uniform', bias_initializer='zeros'))
model.add(Dense(64, activation='relu'))#, use_bias=True, kernel_initializer='glorot_uniform', bias_initializer='zeros'))
model.add(Dense(32, activation='relu'))#, use_bias=True, kernel_initializer='glorot_uniform', bias_initializer='zeros'))
model.add(Dense(16, activation='relu'))#, use_bias=True, kernel_initializer='glorot_uniform', bias_initializer='zeros'))
model.add(Dense(8, activation='relu'))#, use_bias=True, kernel_initializer='glorot_uniform', bias_initializer='zeros'))
model.add(Dense(nb_actions, activation='linear'))
#print(model.summary())

In [4]:
policy = EpsGreedyQPolicy()
memory = SequentialMemory(limit=50000, window_length=1)
dqn = DQNAgent(model=model, nb_actions=nb_actions, memory=memory, nb_steps_warmup=100,
target_model_update=1e-2, policy=policy)
dqn.compile(Adam(lr=1e-3), metrics=['mae'])

# Okay, now it's time to learn something! We visualize the training here for show, but this slows down training quite a lot. 
dqn.fit(env, nb_steps=5000, visualize=False, verbose=1)

Training for 5000 steps ...
Interval 1 (0 steps performed)


<keras.callbacks.History at 0x7f623b7e0c88>

In [5]:
dqn.test(env, nb_episodes=10, visualize=True)

Testing for 10 episodes ...
Episode 1: reward: 158.000, steps: 158
Episode 2: reward: 133.000, steps: 133
Episode 3: reward: 135.000, steps: 135
Episode 4: reward: 156.000, steps: 156
Episode 5: reward: 128.000, steps: 128
Episode 6: reward: 125.000, steps: 125
Episode 7: reward: 122.000, steps: 122
Episode 8: reward: 122.000, steps: 122
Episode 9: reward: 119.000, steps: 119
Episode 10: reward: 158.000, steps: 158


<keras.callbacks.History at 0x7f62442f7588>