In [1]:
from game import Game
from environment import Environment

In [2]:
import numpy as np
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Dense, Flatten
from tensorflow.keras.optimizers import Adam

In [3]:
from rl.agents import DQNAgent
from rl.policy import BoltzmannQPolicy
from rl.memory import SequentialMemory

In [4]:
output_size = 3

In [5]:
model = Sequential()
model.add(Flatten(input_shape = (1, 6)))
model.add(Dense(50, activation = "relu"))
model.add(Dense(100, activation = "relu"))
model.add(Dense(100, activation = "relu"))
model.add(Dense(50, activation = "relu"))
model.add(Dense(20, activation = "relu"))
model.add(Dense(output_size, activation = "linear"))

In [6]:
model.summary()

Model: "sequential"
_________________________________________________________________
Layer (type)                 Output Shape              Param #   
flatten (Flatten)            (None, 6)                 0         
_________________________________________________________________
dense (Dense)                (None, 50)                350       
_________________________________________________________________
dense_1 (Dense)              (None, 100)               5100      
_________________________________________________________________
dense_2 (Dense)              (None, 100)               10100     
_________________________________________________________________
dense_3 (Dense)              (None, 50)                5050      
_________________________________________________________________
dense_4 (Dense)              (None, 20)                1020      
_________________________________________________________________
dense_5 (Dense)              (None, 3)                 6

In [7]:
dqn = DQNAgent(
    model = model,
    memory = SequentialMemory(limit = 50000, window_length = 1),
    policy = BoltzmannQPolicy(),
    nb_actions = output_size,
    nb_steps_warmup = 10,
    target_model_update = 1e-2
)

In [8]:
dqn.compile(Adam(lr = 1e-3), metrics = ["mae"])

In [9]:
dqn.fit(Environment(default_settings = False), nb_steps = 500000, visualize = False, verbose = 1)

Training for 500000 steps ...
Interval 1 (0 steps performed)
Instructions for updating:
This property should not be used in TensorFlow 2.0, as updates are applied automatically.
    1/10000 [..............................] - ETA: 16:45 - reward: 0.0000e+00



299 episodes - episode_reward: 1908.241 [210.000, 5609.000] - loss: 11305.621 - mae: 171.037 - mean_q: 974.782 - time: 337.993

Interval 2 (10000 steps performed)
296 episodes - episode_reward: 1949.824 [216.000, 6083.000] - loss: 9445.619 - mae: 181.241 - mean_q: 1158.452 - time: 338.371

Interval 3 (20000 steps performed)
301 episodes - episode_reward: 1871.103 [198.000, 5548.000] - loss: 7842.271 - mae: 177.496 - mean_q: 1136.845 - time: 337.126

Interval 4 (30000 steps performed)
299 episodes - episode_reward: 1908.074 [192.000, 5925.000] - loss: 6542.283 - mae: 179.252 - mean_q: 1135.776 - time: 337.673

Interval 5 (40000 steps performed)
 1700/10000 [====>.........................] - ETA: 52s - reward: 57.9388done, took 264.133 seconds


<tensorflow.python.keras.callbacks.History at 0x19d230da808>

In [None]:
dqn.test()

In [None]:
dqn.test(Environment(print_build = 1), nb_episodes = 1, visualize = False)

Testing for 1 episodes ...
1
0
0
0
0
0
0
0
0
1
1
0
0
0
0
0
1
1
0
0
0
1
1
0
0
0
0
0
1
1
0
0
0
0
1
1
0
0
0
0
1
1
0
0
0
0
1
1
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
Episode 1: reward: 2673.000, steps: 85


<tensorflow.python.keras.callbacks.History at 0x228587f5f08>

In [None]:
2673+808

3481