# Training a basic setting with a Deep Q Network (DQN) #

Import statements

In [1]:
import numpy as np

import matplotlib
import matplotlib.pyplot as plt
from matplotlib import rc
rc('text', usetex=True)
%matplotlib inline

In [2]:
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Dense, Flatten
from tensorflow.keras.optimizers import Adam

In [3]:
from rl.agents import DQNAgent
from rl.policy import BoltzmannQPolicy
from rl.memory import SequentialMemory  # For experience replay!

In [4]:
from gym_environment_ncml import GridworldMultiAgent
from learning import *

pygame 2.0.1 (SDL 2.0.14, Python 3.7.10)
Hello from the pygame community. https://www.pygame.org/contribute.html


Useful numbers

In [5]:
MILLION = 1000000
HTHOUSAND = 100000
THOUSAND = 1000

## 1. Create environment ##

In [6]:
env = GridworldMultiAgent()

In [7]:
states = env.observation_space.shape[0]
actions = env.action_space.n

In [8]:
states, actions

(8, 25)

## 2. Create a Deep Learning Model with Keras ##

In [9]:
model = build_model(states, actions, [30, 30], ['relu', 'relu'])

Instructions for updating:
If using Keras pass *_constraint arguments to layers.


In [10]:
model.summary()

Model: "sequential"
_________________________________________________________________
Layer (type)                 Output Shape              Param #   
flatten (Flatten)            (None, 8)                 0         
_________________________________________________________________
dense (Dense)                (None, 30)                270       
_________________________________________________________________
dense_1 (Dense)              (None, 30)                930       
_________________________________________________________________
dense_2 (Dense)              (None, 25)                775       
Total params: 1,975
Trainable params: 1,975
Non-trainable params: 0
_________________________________________________________________


## 3. Build Agent with Keras-RL ##

In [11]:
dqn = build_agent(model, actions, 0.01, BoltzmannQPolicy())
dqn.compile(Adam(lr=1e-3), metrics=['mae'])
# dqn.compile(Adam(lr=1e-2), metrics=['mse'])

In [None]:
dqn.fit(env, nb_steps=5*MILLION, visualize=False, verbose=1)

Training for 5000000 steps ...
Interval 1 (0 steps performed)
200 episodes - episode_reward: -10.450 [-50.000, 50.000] - loss: 13.411 - mae: 24.520 - mean_q: 28.608

Interval 2 (10000 steps performed)
200 episodes - episode_reward: -4.450 [-50.000, 100.000] - loss: 26.270 - mae: 41.479 - mean_q: 47.174

Interval 3 (20000 steps performed)
200 episodes - episode_reward: 2.950 [-50.000, 100.000] - loss: 32.638 - mae: 47.201 - mean_q: 53.541

Interval 4 (30000 steps performed)
200 episodes - episode_reward: 10.300 [-50.000, 140.000] - loss: 38.353 - mae: 52.362 - mean_q: 59.178

Interval 5 (40000 steps performed)
200 episodes - episode_reward: 15.250 [-50.000, 90.000] - loss: 42.543 - mae: 55.202 - mean_q: 62.258

Interval 6 (50000 steps performed)
200 episodes - episode_reward: 25.900 [-50.000, 90.000] - loss: 48.638 - mae: 59.677 - mean_q: 67.287

Interval 7 (60000 steps performed)
 1603/10000 [===>..........................] - ETA: 1:05 - reward: 0.3038

In [None]:
scores = dqn.test(env, nb_episodes=10, visualize=False)
print(np.mean(scores.history['episode_reward']))

Save agent to memory

In [None]:
dqn.save_weights('agents/weights.h5f', overwrite=True)

## 4. Reloading Agent from Memory ##