## Instructions
Read the README for instructions to create the conda environment and
create a ipykernel to use that environment in a Jupyter notebook.

In [6]:
# Disable TensorFlow Warnings(Because I don't like seeing them)
import tensorflow as tf
tf.logging.set_verbosity(tf.logging.ERROR)

In [7]:
#imports
import gym

from keras.models import Sequential
from keras.layers import Dense, Flatten
from keras.optimizers import Adam

from rl.agents import DQNAgent
from rl.memory import SequentialMemory
from rl.policy import EpsGreedyQPolicy

In [8]:
env = gym.make('Breakout-v0')

In [9]:
model = Sequential()
model.add(Flatten(input_shape=(1,) + env.observation_space.shape))
model.add(Dense(64, activation='relu'))
model.add(Dense(32, activation='relu'))
model.add(Dense(env.action_space.n, activation='linear'))

In [10]:
memory = SequentialMemory(limit=2000, window_length=1)
policy = EpsGreedyQPolicy(eps=0.1)
dqn = DQNAgent(model=model, nb_actions=env.action_space.n, memory=memory, nb_steps_warmup=1000, target_model_update=1e-2, policy=policy)
dqn.compile(Adam(lr=1e-3), metrics=['mae'])

In [11]:
dqn.fit(env, nb_steps=10000, visualize=False, verbose=2)

Training for 10000 steps ...
  531/10000: episode: 1, duration: 3.042s, episode steps: 531, steps per second: 175, episode reward: 4.000, mean reward: 0.008 [0.000, 1.000], mean action: 1.942 [0.000, 3.000], mean observation: 40.286 [0.000, 200.000], loss: --, mean_absolute_error: --, mean_q: --
  967/10000: episode: 2, duration: 2.683s, episode steps: 436, steps per second: 163, episode reward: 2.000, mean reward: 0.005 [0.000, 1.000], mean action: 1.775 [0.000, 3.000], mean observation: 40.479 [0.000, 200.000], loss: --, mean_absolute_error: --, mean_q: --
 1263/10000: episode: 3, duration: 15.879s, episode steps: 296, steps per second: 19, episode reward: 2.000, mean reward: 0.007 [0.000, 1.000], mean action: 1.270 [0.000, 3.000], mean observation: 40.493 [0.000, 200.000], loss: 54893.251261, mean_absolute_error: 83.427633, mean_q: 13.880037
 1552/10000: episode: 4, duration: 17.202s, episode steps: 289, steps per second: 17, episode reward: 2.000, mean reward: 0.007 [0.000, 1.000],

<keras.callbacks.History at 0x7f9bc5c6eb38>

In [12]:
dqn.save_weights('policy.h5', overwrite=True)

In [18]:
dqn.test(env, nb_episodes=1, visualize=True)

Testing for 1 episodes ...
Episode 1: reward: 0.000, steps: 163


<keras.callbacks.History at 0x7f9bfe25a470>