# Training a basic setting with a Deep Q Network (DQN) #

Import statements

In [1]:
import numpy as np

import matplotlib
import matplotlib.pyplot as plt
from matplotlib import rc
rc('text', usetex=True)
%matplotlib inline

In [2]:
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Dense, Flatten
from tensorflow.keras.optimizers import Adam, SGD

In [3]:
from rl.agents import DQNAgent
from rl.policy import BoltzmannQPolicy, EpsGreedyQPolicy
from rl.memory import SequentialMemory  # For experience replay!

In [4]:
from gym_environment_ncml import *
from learning import *

pygame 2.0.1 (SDL 2.0.14, Python 3.7.10)
Hello from the pygame community. https://www.pygame.org/contribute.html


Useful numbers

In [5]:
MILLION = 1000000
HTHOUSAND = 100000
THOUSAND = 1000

## 1. Create environment ##

In [6]:
env = GridworldMultiAgentv25()



In [7]:
states = env.observation_space.shape[0]
actions = env.action_space.n

In [8]:
states, actions

(10, 25)

## 2. Create a Deep Learning Model with Keras ##

In [9]:
model = build_model(states, actions, [32, 16], ['relu', 'relu'])

Instructions for updating:
If using Keras pass *_constraint arguments to layers.


In [10]:
model.summary()

Model: "sequential"
_________________________________________________________________
Layer (type)                 Output Shape              Param #   
flatten (Flatten)            (None, 10)                0         
_________________________________________________________________
dense (Dense)                (None, 32)                352       
_________________________________________________________________
dense_1 (Dense)              (None, 16)                528       
_________________________________________________________________
dense_2 (Dense)              (None, 25)                425       
Total params: 1,305
Trainable params: 1,305
Non-trainable params: 0
_________________________________________________________________


## 3. Build Agent with Keras-RL ##

In [11]:
dqn = build_agent(model, actions, 0.01, EpsGreedyQPolicy(), 50000)
dqn.compile(Adam(lr=1e-3), metrics=['mae'])
# dqn.compile(Adam(lr=1e-2), metrics=['mse'])

In [None]:
history = dqn.fit(env, nb_steps=5*MILLION, visualize=False, verbose=1)

Training for 5000000 steps ...
Interval 1 (0 steps performed)

In [None]:
history.history

In [None]:
fig, ax = plt.subplots(figsize=(10,7))

ax.plot(history.history['nb_steps'], history.history['episode_reward'])

fig.tight_layout()
plt.show()

In [None]:
scores = dqn.test(env, nb_episodes=10, visualize=False)
print(np.mean(scores.history['episode_reward']))

Save agent to memory

In [None]:
dqn.save_weights('agents/dqn_5b5_3030_adam_lr0.001_tmu0.01_ml50K_ns5M.h5f', overwrite=True)

## 4. Reloading Agent from Memory ##