# Training v1.0 with a Deep Q Network (DQN) #

Import statements

In [None]:
import json
import os

import numpy as np

import matplotlib
import matplotlib.pyplot as plt
from matplotlib import rc
rc('text', usetex=True)
%matplotlib inline

In [None]:
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Dense, Flatten
from tensorflow.keras.optimizers import Adam, SGD

In [None]:
from rl.agents import DQNAgent
from rl.policy import BoltzmannQPolicy, EpsGreedyQPolicy
from rl.memory import SequentialMemory  # For experience replay!

In [None]:
from gym_environment_ncml import *
from learning import *

Useful numbers

In [None]:
MILLION = 1000000
HTHOUSAND = 100000
THOUSAND = 1000

## 1. Create environment ##

In [None]:
env = GridworldMultiAgentv1()

In [None]:
states = env.observation_space.shape[0]
actions = env.action_space.n

In [None]:
states, actions

## 2. Create a Deep Learning Model with Keras ##

In [None]:
model = build_model(states, actions, [32, 16], ['relu', 'relu'])

In [None]:
model.summary()

## 3. Build Agent with Keras-RL ##

In [None]:
dqn = build_agent(model, actions, 0.01, EpsGreedyQPolicy(), 50000)
dqn.compile(Adam(lr=1e-3), metrics=['mae'])
# dqn.compile(Adam(lr=1e-2), metrics=['mse'])

In [None]:
name = 'dqn1_5b5_3216_adam_lr0.001_tmu0.01_ml50K_ns5M_eps0.1'

In [None]:
history = dqn.fit(env, nb_steps=5*MILLION, visualize=False, verbose=1)

In [None]:
data = history.history
data['episode_reward'] = [float(v) for v in data['episode_reward']]
data['nb_episode_steps'] = [int(v) for v in data['nb_episode_steps']]
data['nb_steps'] = [int(v) for v in data['nb_steps']]

In [None]:
os.mkdir('agents/{}'.format(name))  # If the directory does not exist we cannot write the file
with open(get_training_path(name), 'w') as f:
    json.dump(data, f)

Save agent to memory

In [None]:
dqn.save_weights(get_agent_path(name), overwrite=True)

## 4. Reloading Agent from memory and test ##

In [None]:
env = GridworldMultiAgentv1(seed=2)

In [None]:
states = env.observation_space.shape[0]
actions = env.action_space.n
model = build_model(states, actions, [32, 16], ['relu', 'relu'])
print(model.summary())
dqn = build_agent(model, actions, 0.01, EpsGreedyQPolicy(eps=0), 50000)
dqn.compile(Adam(lr=1e-3), metrics=['mae'])

# Load weights
dqn.load_weights(get_agent_path(name))

In [None]:
nb_episodes = 10*THOUSAND

In [None]:
scores = dqn.test(env, nb_episodes=nb_episodes, visualize=False, verbose=0)

In [None]:
rewards = np.array(scores.history['episode_reward'])

In [None]:
np.savetxt(get_test_path(name, nb_episodes), rewards)