# HSKA AI-Lab RL: Assignment

## Mount Google Drive as folder

In [None]:
from google.colab import drive

drive.mount('/content/drive', force_remount=True)
%cd /content/drive/MyDrive/ai-lab rl

Es soll ein DQN Agent trainiert werden, der ein [Atari 2600](https://www.gymlibrary.dev/environments/atari/complete_list/) Spiel spielen kann.
Der Ansatz ist frei – ihr könnt euch an Aufgabe 4 orientieren oder die Methode auf eure Art implementieren.

### "Quiz"

- Wann ist der Agent gut genug? Was ist ein gutes Erfolgskriterium?
- Was für eine Architektur soll das Q-Network haben?

### It's dangerous to go alone! Take this.

In [None]:
%pip install --upgrade pip
%pip install gym[atari]==0.12.5
%pip install pyglet==1.3.2

import gym

import random
from collections import deque
from typing import Tuple
import time
from datetime import datetime
from contextlib import suppress
from abc import abstractmethod
import os, json

import numpy as np
import tensorflow as tf
from tensorflow.keras.layers import Conv2D, Flatten, Dense, Lambda, multiply, Input
from tensorflow.keras.models import Model, load_model
from tensorflow.keras.optimizers import Adam
from tensorflow.keras.utils import to_categorical
from tensorflow.compat.v1.losses import huber_loss
from tensorflow.compat.v1.keras.backend import set_session
from loggers import TensorBoardLogger, tf_summary_image

%pip install matplotlib
import matplotlib.pyplot as plt
%matplotlib inline

from plot_utils import plot_statistics
from abstract_agent import AbstractAgent
from atari_helpers import LazyFrames, wrap_deepmind, make_atari

!apt-get install -y xvfb python-opengl
!python -m pip install pyvirtualdisplay
from pyvirtualdisplay import Display

display = Display(visible=0, size=(1400, 900))
display.start()

is_ipython = 'inline' in plt.get_backend()
if is_ipython:
    from IPython import display
    from IPython.display import SVG

plt.ion()

In [None]:
# familiar interface:
env = make_atari('CHOOSE_ATARI_GAME')
env = wrap_deepmind(env, frame_stack=True)

# or vanilla open ai gym:
# env = gym.make('EnduroNoFrameskip-v0')

### Training

Hier findet ihr ein paar Inspirationen, wo ihr das doch recht aufwendige Training laufen lassen könnt:

- [Google Colab](https://colab.research.google.com/)
- [Kaggle](https://www.kaggle.com/docs/tpu)
- [Gradient Paperspace](https://www.paperspace.com/gradient)
- KI-Rechner der Hochschule

Unabhängig von eurer Trainingsumgebung: Macht euch Gedanken über Checkpointing.
Als Startpunkt geben wir euch einen erweiterten `DQNAgenten` aus Aufgabe 4 mit, der euch ein rudimentäres Checkpointing ermöglicht:

In [None]:
class DQNAgent(AbstractAgent):
    __slots__ = [
        "action_size",
        "state_size",
        "gamma",
        "epsilon",
        "epsilon_decay",
        "epsilon_min",
        "alpha",
        "batch_size",
        "memory_size",
        "start_replay_step",
        "target_model_update_interval",
        "train_freq",
    ]

    def __init__(self,
                 action_size: int,
                 state_size: int,
                 gamma: float,
                 epsilon: float,
                 epsilon_decay: float,
                 epsilon_min: float,
                 alpha: float,
                 batch_size: int,
                 memory_size: int,
                 start_replay_step: int,
                 target_model_update_interval: int,
                 train_freq: int,
                 ):
        self.action_size = action_size
        self.state_size = state_size

        self.gamma = gamma
        self.epsilon = epsilon
        self.epsilon_decay = epsilon_decay
        self.epsilon_min = epsilon_min
        self.alpha = alpha

        self.memory_size = memory_size
        self.memory = deque(maxlen=self.memory_size)
        self.batch_size = batch_size

        self.step = 0
        self.start_replay_step = start_replay_step

        self.target_model_update_interval = target_model_update_interval

        self.train_freq = train_freq

        assert self.start_replay_step >= self.batch_size, "The number of steps to start replay must be at least as large as the batch size"

        self.action_mask = np.ones((1, self.action_size))
        self.action_mask_batch = np.ones((self.batch_size, self.action_size))

        self.tf_config_intra_threads = 8
        self.tf_config_inter_threads = 4
        self.tf_config_soft_placement = True
        self.tf_config_allow_growth = True

        config = tf.compat.v1.ConfigProto(intra_op_parallelism_threads=self.tf_config_intra_threads,
                                inter_op_parallelism_threads=self.tf_config_inter_threads,
                                allow_soft_placement=self.tf_config_soft_placement
                                )

        config.gpu_options.allow_growth = self.tf_config_allow_growth
        session = tf.compat.v1.Session(config=config)
        set_session(session)  # set this TensorFlow session as the default session for Keras

        self.model = self._build_model()
        self.target_model = self._build_model()

    def save(self, target_path: str) -> None:
      """
        Saves the current state of the DQNAgent to some output files.
        Together with `load` this serves as a very rudimentary checkpointing.
      """
      agent_dict = {
            "agent_init": {},
            "agent_params": {},
            "tf_config": {}
        }

      if not os.path.exists(target_path):
        os.makedirs(target_path)

      for slot in self.__slots__:
          agent_dict["agent_init"].update({slot: getattr(self, slot)})

      agent_dict["agent_init"].update({"memory_size": self.memory.maxlen})

      for attr in ["action_mask", "action_mask_batch"]:
          agent_dict["agent_params"].update({attr: getattr(self, attr).tolist()})

      agent_dict["agent_params"].update({"memory": list(self.memory)})

      for tf_config in [
          "tf_config_intra_threads",
          "tf_config_inter_threads",
          "tf_config_soft_placement",
          "tf_config_allow_growth",
      ]:
          agent_dict["tf_config"].update({tf_config: getattr(self, tf_config)})

      with open(os.path.join(target_path, "agent.json"), "w") as f:
          json.dump(agent_dict, f)

      self.model.save_weights(os.path.join(target_path, "model.h5"))
      self.target_model.save_weights(os.path.join(target_path, "target_model.h5"))

    @classmethod
    def load(cls, path: str) -> "DQNAgent":
      """
        Loads the serialized state of a DQNAgent and returns an instance of it.
      """

      with open(os.path.join(path, "agent.json"), "r") as f:
          agent_dict = json.load(f)

      agent = cls(**agent_dict["agent_init"])

      agent.action_mask = np.array(agent_dict["agent_params"]["action_mask"])
      agent.action_mask_batch = np.array(agent_dict["agent_params"]["action_mask_batch"])

      config = tf.compat.v1.ConfigProto(
          intra_op_parallelism_threads=agent_dict["tf_config"]["tf_config_intra_threads"],
          inter_op_parallelism_threads=agent_dict["tf_config"]["tf_config_inter_threads"],
          allow_soft_placement=agent_dict["tf_config"]["tf_config_soft_placement"])

      config.gpu_options.allow_growth = agent_dict["tf_config"]["tf_config_allow_growth"]
      session = tf.compat.v1.Session(config=config)
      set_session(session)

      agent.model.load_weights('model.h5')
      agent.target_model.load_weights("target_model.h5")

      return agent

    @abstractmethod
    def train(self, experience):
      raise NotImplementedError

    @abstractmethod
    def act(self, state):
      raise NotImplementedError

    @abstractmethod
    def _build_model(self):
      raise NotImplementedError

**Das soll kein fertiges Checkpointing darstellen, bitte verwendet es nur als Ausgangspunkt/Inspiration und passt es nach euren Bedrüfnisssen an.**

Hier findet ihr noch ein paar allgemeine Infos von [Tensorflow dazu](https://www.tensorflow.org/guide/checkpoint).