In [1]:
from vizdoom import *
import random 
import time
import numpy as np

# Open AI Gym dependencies
import gymnasium as gym
from gymnasium import Env
from gymnasium.spaces import Discrete, Box # for random actions and nxm for random observation space (frames)
import cv2

# Stable Baselines3 dependencies
import os
from stable_baselines3.common.callbacks import BaseCallback
from stable_baselines3 import PPO # training algorithm

Gym has been unmaintained since 2022 and does not support NumPy 2.0 amongst other critical functionality.
Please upgrade to Gymnasium, the maintained drop-in replacement of Gym, or contact the authors of your software and request that they upgrade.
Users of this version of Gym should be able to simply replace 'import gym' with 'import gymnasium as gym' in the vast majority of cases.
See the migration guide at https://gymnasium.farama.org/introduction/migration_guide/ for additional information.


In [2]:
# Logging folders
CHECKPOINT_DIR = './TC_train/train_basic'
LOG_DIR = './TC_logs/log_basic'

# Deafult Actions and Reward
### Defining Gymnasium Wrapper for TakeCover Gameplay

In [3]:
# Creating Vizdoom OpenAI Gym Environment
class VizDoomGym(Env):
    def __init__(self, render_mode = False): # By default, rendering is disabled
        # Inheriting from the Env class
        super().__init__()
        # Setup game
        self.game = DoomGame()
        self.game.load_config("github_vizdoom_repo/ViZDoom/scenarios/take_cover.cfg") # Take Cover configuration file

        # Rendering mode : if unabled, the game will not be displayed but the training will be faster
        if render_mode == False:
            self.game.set_window_visible(False)
        else:
            self.game.set_window_visible(True)

        # Start the game
        self.game.init()
        
        # In order to get the game frame size, run a dummy demo and get the screen buffer shape  with game.get_state().screen_buffer.shape
        self.observation_space = Box(low=0, high=255, shape=(100, 160, 1), dtype = np.uint8)
        # Action space
        self.action_space = Discrete(2) # left, right
        self.current_step = 0


    # Defining how to make a step in the env
    def step(self, action):
        actions = np.identity(2, dtype=np.uint8) # Possible actions [left, right]
        reward = self.game.make_action(actions[action], 4) # Defyining the frame skip parameter to 4

        # Check if the frames are over
        if self.game.get_state():
            state = self.game.get_state().screen_buffer
            obs = self.grayscale(state)
            health = self.game.get_state().game_variables[0]
            terminated = self.game.is_episode_finished()
            truncated = self.current_step >= 2100  # Max steps
            info = {"health": health}
        else: # Default zeros observation
            obs = np.zeros(self.observation_space.shape, dtype=np.uint8)
            terminated = True
            truncated = False
            info = {} # Empty info: no 0 beacuse the API doesn't allow it

        self.current_step += 1
        return obs, reward, terminated, truncated, info # Changed parameters order according to Gymnasium API
    
    def render():
        pass
    
    # What appens when starting a new episode
    def reset(self, seed = None, options = None):
        super().reset(seed=seed)
        if seed is not None:
            random.seed(seed)
            np.random.seed(seed)

        self.game.new_episode()
        state = self.game.get_state().screen_buffer
        obs = self.grayscale(state)
        info = {"health": self.game.get_state().game_variables[0]}
        self.current_step = 0

        return obs, info

    # Grayscale and resize the frames in order to reduce the observation space
    ## POSSIBLE IMPROVEMENT : CUT OFF BOTTOM PART OF THE IMAGE WHERE THERE IS NO USEFUL INFORMATION
    def grayscale(self, observation):
        gray = cv2.cvtColor(np.moveaxis(observation, 0, -1), cv2.COLOR_BGR2GRAY) # moveaxis moves the first element (0) to last position (-1)
        resize = cv2.resize(gray, (160, 100), interpolation=cv2.INTER_CUBIC)
        state = np.reshape(resize, (100,160, 1))
        return state

    def close(self):
        self.game.close()

In [4]:
# Basic log and train callback
class TrainAndLoggingCallback(BaseCallback):
    def __init__(self, check_freq, save_path, verbose=1):
        super(TrainAndLoggingCallback, self).__init__(verbose)
        self.check_freq = check_freq
        self.save_path = save_path

    def _init_callback(self):
        if self.save_path is not None:
            os.makedirs(self.save_path, exist_ok=True)

    def _on_step(self):
        if self.n_calls % self.check_freq == 0:
            model_path = os.path.join(self.save_path, 'best_model_{}.zip'.format(self.n_calls))
            self.model.save(model_path)
            
        return True

In [5]:
callback = TrainAndLoggingCallback(check_freq=10000, save_path = CHECKPOINT_DIR)

In [6]:
env = VizDoomGym() # non rendered-environment

agent = PPO('CnnPolicy', # policy type -> CnnPolicy since we are working on image frames
            env,
            tensorboard_log=LOG_DIR,
            verbose=1,
            learning_rate=0.00025,
            n_steps=2048)

Using cuda device
Wrapping the env with a `Monitor` wrapper
Wrapping the env in a DummyVecEnv.
Wrapping the env in a VecTransposeImage.


In [7]:
agent.learn(total_timesteps = 100000, callback=callback)

Logging to ./TC_logs/log_basic/PPO_1
---------------------------------
| rollout/           |          |
|    ep_len_mean     | 85       |
|    ep_rew_mean     | 339      |
| time/              |          |
|    fps             | 230      |
|    iterations      | 1        |
|    time_elapsed    | 8        |
|    total_timesteps | 2048     |
---------------------------------
------------------------------------------
| rollout/                |              |
|    ep_len_mean          | 86.2         |
|    ep_rew_mean          | 344          |
| time/                   |              |
|    fps                  | 204          |
|    iterations           | 2            |
|    time_elapsed         | 19           |
|    total_timesteps      | 4096         |
| train/                  |              |
|    approx_kl            | 0.0069572744 |
|    clip_fraction        | 0.327        |
|    clip_range           | 0.2          |
|    entropy_loss         | -0.669       |
|    explained_varian

<stable_baselines3.ppo.ppo.PPO at 0x7f71347d6a10>

# Testing and Evaluation

In [None]:
# Visual testing of the trained agent
agent = PPO.load('./TC_train/train_basic/best_model_100000.zip')
env = VizDoomGym(render_mode=True)

for episode in range(5):
    obs, info = env.reset()
    done = False
    total_reward = 0

    while not done:
        action, _ = agent.predict(obs)
        obs, reward, terminated, truncated, info = env.step(action)
        total_reward += reward
        time.sleep(0.02) # Slow down the rendering

    print("Episode: {}, Total Reward: {}".format(episode + 1, total_reward))
env.close()



*** Fatal Error ***
Address not mapped to object (signal 11)
Address: 0x7f35549a5f28

Generating vizdoom-crash.log and killing process 13389, please wait... sh: 1: gdb: not found
