# Solving CartPole OpenAI environment using DQNAgent

#### first install rl package - not done in the container as it requires a bit older version of keras

In [1]:
!pip install keras-rl



Standard imports

In [2]:
import numpy as np
import gym

from keras.models import Sequential
from keras.layers import Dense, Activation, Flatten
from keras.optimizers import Adam

from rl.agents.dqn import DQNAgent
from rl.policy import BoltzmannQPolicy
from rl.memory import SequentialMemory

Using TensorFlow backend.


#### Open environment
extract the number of actions. 
There are two discrete actions - move left and move Right

In [3]:
env = gym.make('CartPole-v0')
np.random.seed(123)
env.seed(123)
nb_actions = env.action_space.n

[2017-10-29 22:43:48,300] Making new env: CartPole-v0


#### Build a simple NN

In [4]:
model = Sequential()
model.add(Flatten(input_shape=(1,) + env.observation_space.shape))
model.add(Dense(16))
model.add(Activation('relu'))
model.add(Dense(16))
model.add(Activation('relu'))
model.add(Dense(16))
model.add(Activation('relu'))
model.add(Dense(nb_actions))
model.add(Activation('linear'))
print(model.summary())

_________________________________________________________________
Layer (type)                 Output Shape              Param #   
flatten_1 (Flatten)          (None, 4)                 0         
_________________________________________________________________
dense_1 (Dense)              (None, 16)                80        
_________________________________________________________________
activation_1 (Activation)    (None, 16)                0         
_________________________________________________________________
dense_2 (Dense)              (None, 16)                272       
_________________________________________________________________
activation_2 (Activation)    (None, 16)                0         
_________________________________________________________________
dense_3 (Dense)              (None, 16)                272       
_________________________________________________________________
activation_3 (Activation)    (None, 16)                0         
__________

#### Configure and compile our agent. 
You can use every built-in Keras optimizer and even the metrics!


In [5]:
memory = SequentialMemory(limit=50000, window_length=1)
policy = BoltzmannQPolicy()
dqn = DQNAgent(model=model, 
               nb_actions=nb_actions, 
               memory=memory, 
               nb_steps_warmup=10,
               target_model_update=1e-2, 
               policy=policy)
dqn.compile(Adam(lr=1e-3), metrics=['mae'])

#### Actual learning 
Visualization is OFF untill we figure out how to export display correctly.

In [None]:
dqn.fit(env, nb_steps=50000, visualize=True, verbose=2)

# After training is done, we save the final weights.
dqn.save_weights('dqn_{}_weights.h5f'.format(ENV_NAME), overwrite=True)

Training for 50000 steps ...




    23/50000: episode: 1, duration: 2.473s, episode steps: 23, steps per second: 9, episode reward: 23.000, mean reward: 1.000 [1.000, 1.000], mean action: 0.609 [0.000, 1.000], mean observation: -0.082 [-2.106, 1.171], loss: 0.479087, mean_absolute_error: 0.504080, mean_q: 0.036019
    33/50000: episode: 2, duration: 0.166s, episode steps: 10, steps per second: 60, episode reward: 10.000, mean reward: 1.000 [1.000, 1.000], mean action: 0.300 [0.000, 1.000], mean observation: 0.157 [-1.129, 1.948], loss: 0.423291, mean_absolute_error: 0.512142, mean_q: 0.117085
    69/50000: episode: 3, duration: 0.599s, episode steps: 36, steps per second: 60, episode reward: 36.000, mean reward: 1.000 [1.000, 1.000], mean action: 0.528 [0.000, 1.000], mean observation: 0.134 [-0.535, 0.891], loss: 0.263303, mean_absolute_error: 0.553609, mean_q: 0.434907
    86/50000: episode: 4, duration: 0.281s, episode steps: 17, steps per second: 60, episode reward: 17.000, mean reward: 1.000 [1.000, 1.000], mean

#### Evaluate our algorithm for 5 episodes.

In [None]:
dqn.test(env, nb_episodes=5, visualize=True)