In [65]:
import json
from tqdm import tqdm
import pandas as pd
import time
from utils import play_game2
import numpy as np
from tensorflow.keras.optimizers import RMSprop
from tensorflow.keras.layers import Input, Conv2D, Flatten, Dense
from tensorflow.keras import Model
from agents.agent import Agent
from agents.agent import mean_huber_loss
from game_environment import SnakeNumpy

In [52]:
version = 'v17.1'

In [53]:
class DeepQLearningAgent(Agent):
    def __init__(self, board_size, frames, buffer_size, n_actions, version, use_target_net=True, gamma=0.99):
        super().__init__(board_size, frames, buffer_size, gamma, n_actions, use_target_net, version)
        self._model = self.model()
        self._target_net = self._model
        self.update_target()


    def model(self):
        with open('model_config/v17.1.json', 'r') as f:
            m = json.loads(f.read())

        input_board = Input((self._board_size, self._board_size, self._n_frames,), name='input')
        x = input_board
        for layer in m['model']:
            l = m['model'][layer]
            if 'Conv2D' in layer:
                # add convolutional layer
                x = Conv2D(**l)(x)
            if 'Flatten' in layer:
                x = Flatten()(x)
            if 'Dense' in layer:
                x = Dense(**l)(x)
        out = Dense(self._n_actions, activation='linear', name='action_values')(x)
        model = Model(inputs=input_board, outputs=out)  # Keras model
        model.compile(optimizer=RMSprop(0.0005), loss=mean_huber_loss)
        return model

    def update_target(self):
        self._target_net.set_weights(self._model.get_weights())


In [55]:
board_size = 10
frames = 2
max_time_limit = 998
supervised = False
n_actions = 4
obstacles = False
buffer_size = 80000

frame_mode = True

In [64]:
agent = DeepQLearningAgent(board_size=board_size, frames=frames, buffer_size=buffer_size, n_actions=n_actions, version=version)

In [63]:
# some more funny parameters
epsilon, epsilon_end = 1, 0.01
reward_type = 'current'
sample_actions = False
n_games_training = 8 * 16
games_eval = 8
decay = 0.97
episodes = 2 * (10 ** 5)
log_frequency = 500

In [60]:
# play some games to fill buffer
games = 512
env = SnakeNumpy(board_size=board_size, frames=frames,
                 max_time_limit=max_time_limit, games=games,
                 frame_mode=True, obstacles=obstacles, version=version)
ct = time.time()
_ = play_game2(env, agent, n_actions, n_games=games, record=True,
                   epsilon=epsilon, verbose=True, reset_seed=False,
                   frame_mode=True, total_frames=games * 64)
print('Playing {:d} frames took {:.2f}s'.format(games * 64, time.time() - ct))

Playing 32768 frames took 0.31s


In [62]:
env = SnakeNumpy(board_size=board_size, frames=frames,
                 max_time_limit=max_time_limit, games=n_games_training,
                 frame_mode=True, obstacles=obstacles, version=version)
env2 = SnakeNumpy(board_size=board_size, frames=frames,
                  max_time_limit=max_time_limit, games=games_eval,
                  frame_mode=True, obstacles=obstacles, version=version)

In [None]:
# training loop
model_logs = {'iteration': [], 'reward_mean': [],
              'length_mean': [], 'games': [], 'loss': []}



In [None]:
for index in tqdm(range(episodes)):
    # make small changes to the buffer and slowly train
        _, _, _ = play_game2(env, agent, n_actions, epsilon=epsilon,
                             n_games=n_games_training, record=True,
                             sample_actions=sample_actions, reward_type=reward_type,
                             frame_mode=True, total_frames=n_games_training,
                             stateful=True)
        loss = agent.train_agent(batch_size=64,
                                 num_games=n_games_training, reward_clip=True)

     # check performance every once in a while
        if (index + 1) % log_frequency == 0:
            current_rewards, current_lengths, current_games = \
                play_game2(env2, agent, n_actions, n_games=games_eval, epsilon=-1,
                           record=False, sample_actions=False, frame_mode=True,
                           total_frames=-1, total_games=games_eval)

            model_logs['iteration'].append(index + 1)
            model_logs['reward_mean'].append(round(int(current_rewards) / current_games, 2))
            model_logs['length_mean'].append(round(int(current_lengths) / current_games, 2))
            model_logs['games'].append(current_games)
            model_logs['loss'].append(loss)
            pd.DataFrame(model_logs)[['iteration', 'reward_mean', 'length_mean', 'games', 'loss']] \
                .to_csv('model_logs/{:s}.csv'.format(version), index=False)