#### Setting up game configurations

In [None]:
from vizdoom import *
import random # for random actions
import time
import numpy as np

# Open AI Gym dependencies
import gymnasium as gym
from gymnasium import Env
from gymnasium.spaces import Discrete, Box # for random actions and nxm for random observation space (frames)
import cv2

### Converting it to a Gym Environment

In [None]:
# Creating Vizdoom OpenAI Gym Environment
class VizDoomGym(Env):
    def __init__(self, render_mode = False): # By default, rendering is disabled
        # Inheriting from the Env class
        super().__init__()
        # Setup game
        self.game = DoomGame()
        self.game.load_config("github_vizdoom_repo/ViZDoom/scenarios/basic.cfg") # Loading game configuration file

        # Rendering mode : if unabled, the game will not be displayed but the training will be faster
        if render_mode == False:
            self.game.set_window_visible(False)
        else:
            self.game.set_window_visible(True)

        # Start the game
        self.game.init()
        
        # In order to get the game frame size, run a dummy demo and get the screen buffer shape  with game.get_state().screen_buffer.shape
        self.observation_space = Box(low=0, high=255, shape=(100, 160, 1), dtype = np.uint8)
        # Action space
        self.action_space = Discrete(3) # left, right, shoot
        self.current_step = 0


    # Defining how to make a step in the env
    def step(self, action):
        actions = np.identity(3, dtype=np.uint8) # Possible actions
        reward = self.game.make_action(actions[action], 4) # Defyining the frame skip parameter to 4

        # Check if the frames are over
        if self.game.get_state():
            state = self.game.get_state().screen_buffer
            obs = self.grayscale(state)
            ammo = self.game.get_state().game_variables[0]
            terminated = self.game.is_episode_finished()
            truncated = self.current_step >= 2100  # Max steps
            info = {"ammo": ammo}
        else: # Default zeros observation
            obs = np.zeros(self.observation_space.shape, dtype=np.uint8)
            terminated = True
            truncated = False
            info = {} # Empty info: no 0 beacuse the API doesn't allow it

        self.current_step += 1
        return obs, reward, terminated, truncated, info # Changed parameters order according to Gymnasium API
    
    def render():
        pass
    
    # What appens when starting a new episode
    def reset(self, seed = None, options = None):
        super().reset(seed=seed)
        if seed is not None:
            random.seed(seed)
            np.random.seed(seed)

        self.game.new_episode()
        state = self.game.get_state().screen_buffer
        obs = self.grayscale(state)
        info = {"ammo": self.game.get_state().game_variables[0]}
        self.current_step = 0

        return obs, info

    # Grayscale and resize the frames in order to reduce the observation space
    ## POSSIBLE IMPROVEMENT : CUT OFF BOTTOM PART OF THE IMAGE WHERE THERE IS NO USEFUL INFORMATION
    def grayscale(self, observation):
        gray = cv2.cvtColor(np.moveaxis(observation, 0, -1), cv2.COLOR_BGR2GRAY) # moveaxis moves the first element (0) to last position (-1)
        resize = cv2.resize(gray, (160, 100), interpolation=cv2.INTER_CUBIC)
        state = np.reshape(resize, (100,160, 1))
        return state

    def close(self):
        self.game.close()

### Setting up Callbacks

In [None]:
import os
from stable_baselines3.common.callbacks import BaseCallback
from stable_baselines3.common import env_checker

In [None]:
class TrainAndLoggingCallback(BaseCallback):
    def __init__(self, check_freq, save_path, verbose=1):
        super(TrainAndLoggingCallback, self).__init__(verbose)
        self.check_freq = check_freq
        self.save_path = save_path

    def _init_callback(self):
        if self.save_path is not None:
            os.makedirs(self.save_path, exist_ok=True)

    def _on_step(self):
        if self.n_calls % self.check_freq == 0:
            model_path = os.path.join(self.save_path, 'best_model_{}.zip'.format(self.n_calls))
            self.model.save(model_path)
            
        return True

In [None]:
CHECKPOINT_DIR = './train/train_basic'
LOG_DIR = './logs/log_basic'

In [None]:
callback = TrainAndLoggingCallback(check_freq=10000, save_path = CHECKPOINT_DIR)

### Training the agent with the PPO Algorithm

In [None]:
# Import PPO for training
from stable_baselines3 import PPO

In [None]:
env = VizDoomGym() # non rendered-environment

In [None]:
# instantiate PPO model
model = PPO('CnnPolicy', # policy type -> CnnPolicy since we are working on image frames
            env,
            tensorboard_log=LOG_DIR,
            verbose=1,
            learning_rate=0.00025,
            n_steps=2048)

In [None]:
model.learn(total_timesteps = 100000, callback=callback)

### Testing the trained agent on real-time game

In [None]:
# Import evaluation policy to test the agent
from stable_baselines3.common.evaluation import evaluate_policy

In [None]:
# Reload the best model from disk
model = PPO.load("./train/train_basic/best_model_100000.zip")

In [None]:
env = VizDoomGym(render_mode= True) # rendered-env

In [None]:
mean_reward, _ = evaluate_policy(model, env, n_eval_episodes=100)

In [None]:
mean_reward

In [None]:
for episode in range(5):
    obs, info = env.reset()
    done = False
    total_reward = 0

    while not done:
        action = model.predict(obs)
        obs, reward, terminated, truncated, info = env.step(action[0])
        time.sleep(0.05)
        total_reward  += reward

    print(f"Total Reward for Episode {episode} is {total_reward}")
    time.sleep(2)