<h1>Table of Contents &lt;br&gt;&lt;/br&gt;<span class="tocSkip"></span></h1>
<div class="toc"><ul class="toc-item"><li><span><a href="#Imports-and-Data-loading" data-toc-modified-id="Imports-and-Data-loading-1"><span class="toc-item-num">1&nbsp;&nbsp;</span>Imports and Data loading</a></span></li></ul></div>

## Imports and Data loading

In [13]:
# Display plots inline
%matplotlib inline

# Autoreload all package before excecuting a call
%load_ext autoreload
%autoreload 2

The autoreload extension is already loaded. To reload it, use:
  %reload_ext autoreload


In [14]:
import logging
logging.basicConfig(format='%(levelname)s:%(message)s', level=logging.INFO)

In [16]:
import evsim
import numpy as np
import gym

from keras.models import Sequential
from keras.layers import Dense, Activation, Flatten
from keras.optimizers import Adam

from rl.agents.dqn import DQNAgent
from rl.policy import BoltzmannQPolicy
from rl.memory import SequentialMemory


ENV_NAME = 'evsim-v0'


# Get the environment and extract the number of actions.
env = gym.make(ENV_NAME)
np.random.seed(123)
env.seed(123)
nb_actions = env.action_space.n

# Set the prediction accuracy of simulation
env.prediction_accuracy(10)

# Next, we build a very simple model.
model = Sequential()
model.add(Flatten(input_shape=(1,) + env.observation_space.shape))
model.add(Dense(16))
model.add(Activation('relu'))
model.add(Dense(16))
model.add(Activation('relu'))
model.add(Dense(16))
model.add(Activation('relu'))
model.add(Dense(nb_actions))
model.add(Activation('linear'))
print(model.summary())

# Finally, we configure and compile our agent. You can use every built-in Keras optimizer and
# even the metrics!
memory = SequentialMemory(limit=50000, window_length=1)
policy = BoltzmannQPolicy()
dqn = DQNAgent(model=model, nb_actions=nb_actions, memory=memory, nb_steps_warmup=10,
               target_model_update=1e-2, policy=policy)
dqn.compile(Adam(lr=1e-3), metrics=['mae'])

# Okay, now it's time to learn something! We visualize the training here for show, but this
# slows down training quite a lot. You can always safely abort the training prematurely using
# Ctrl + C.

dqn.fit(env, nb_steps=6700, visualize=False, verbose=1, log_interval=100)

# After training is done, we save the final weights.

dqn.save_weights('dqn_{}_weights.h5f'.format(ENV_NAME), overwrite=True)

# Finally, evaluate our algorithm for 5 episodes.

dqn.test(env, nb_episodes=1, visualize=False)




_________________________________________________________________
Layer (type)                 Output Shape              Param #   
flatten_7 (Flatten)          (None, 1)                 0         
_________________________________________________________________
dense_25 (Dense)             (None, 16)                32        
_________________________________________________________________
activation_25 (Activation)   (None, 16)                0         
_________________________________________________________________
dense_26 (Dense)             (None, 16)                272       
_________________________________________________________________
activation_26 (Activation)   (None, 16)                0         
_________________________________________________________________
dense_27 (Dense)             (None, 16)                272       
_________________________________________________________________
activation_27 (Activation)   (None, 16)                0         
__________



Interval 2 (100 steps performed)
Interval 3 (200 steps performed)
Interval 4 (300 steps performed)
Interval 5 (400 steps performed)
Interval 6 (500 steps performed)
Interval 7 (600 steps performed)
Interval 8 (700 steps performed)
Interval 9 (800 steps performed)
Interval 10 (900 steps performed)
Interval 11 (1000 steps performed)
Interval 12 (1100 steps performed)
Interval 13 (1200 steps performed)
Interval 14 (1300 steps performed)
Interval 15 (1400 steps performed)
Interval 16 (1500 steps performed)
Interval 17 (1600 steps performed)
Interval 18 (1700 steps performed)
Interval 19 (1800 steps performed)
Interval 20 (1900 steps performed)
Interval 21 (2000 steps performed)
Interval 22 (2100 steps performed)
Interval 23 (2200 steps performed)
Interval 24 (2300 steps performed)
Interval 25 (2400 steps performed)
Interval 26 (2500 steps performed)
Interval 27 (2600 steps performed)
Interval 28 (2700 steps performed)
Interval 29 (2800 steps performed)
Interval 30 (2900 steps performed)
In

<keras.callbacks.History at 0x7f573818a6d8>