<a href="https://colab.research.google.com/github/ccasanoval/RLtests/blob/master/DoomV4.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

DoomV4

RL  = Stable Baseline 3 : PPO

ENV = Gymnasium + VizDoom

URL = https://github.com/AKapich/Reinforcement_Learning_Doom

URL = https://stable-baselines3.readthedocs.io/en/master/

In [6]:
!pip install vizdoom
!pip install stable_baselines3

Collecting stable_baselines3
  Downloading stable_baselines3-2.3.2-py3-none-any.whl.metadata (5.1 kB)
Collecting nvidia-cuda-nvrtc-cu12==12.1.105 (from torch>=1.13->stable_baselines3)
  Using cached nvidia_cuda_nvrtc_cu12-12.1.105-py3-none-manylinux1_x86_64.whl.metadata (1.5 kB)
Collecting nvidia-cuda-runtime-cu12==12.1.105 (from torch>=1.13->stable_baselines3)
  Using cached nvidia_cuda_runtime_cu12-12.1.105-py3-none-manylinux1_x86_64.whl.metadata (1.5 kB)
Collecting nvidia-cuda-cupti-cu12==12.1.105 (from torch>=1.13->stable_baselines3)
  Using cached nvidia_cuda_cupti_cu12-12.1.105-py3-none-manylinux1_x86_64.whl.metadata (1.6 kB)
Collecting nvidia-cudnn-cu12==8.9.2.26 (from torch>=1.13->stable_baselines3)
  Using cached nvidia_cudnn_cu12-8.9.2.26-py3-none-manylinux1_x86_64.whl.metadata (1.6 kB)
Collecting nvidia-cublas-cu12==12.1.3.1 (from torch>=1.13->stable_baselines3)
  Using cached nvidia_cublas_cu12-12.1.3.1-py3-none-manylinux1_x86_64.whl.metadata (1.5 kB)
Collecting nvidia-cuff

In [7]:
############### GYM ENV == VIZ DOOM ###########################################
# config_file_path = os.path.join(vzd.scenarios_path, "defend_the_center.cfg")
# game = vzd.DoomGame()
# game.load_config(config_file_path)
# game.set_window_visible(False)
# game.set_mode(vzd.Mode.PLAYER)
# game.set_screen_format(vzd.ScreenFormat.GRAY8)
# game.set_screen_resolution(vzd.ScreenResolution.RES_640X480)#cnfg=screen_resolution = RES_320X240
# #game.add_game_args("doom_skill=1")
# game.init()
from vizdoom import *
import numpy as np
from gymnasium import Env
from gymnasium.spaces import Discrete, Box
import cv2

class VizDoomGym(Env):
    def __init__(self, scenario, render=True, number_of_actions=3):

        self.game = DoomGame()
        self.game.load_config(f"{scenario}.cfg")

        self.game.set_window_visible(render)
        self.game.init()

        self.observation_space = Box(
            low=0, high=255, shape=(100, 160, 1), dtype=np.uint8
        )
        self.number_of_actions = number_of_actions
        self.action_space = Discrete(number_of_actions)

    def step(self, action):
        actions = np.identity(self.number_of_actions)
        reward = self.game.make_action(actions[action], 4)

        if self.game.get_state():
            state = self.game.get_state().screen_buffer
            state = self.grayscale(state)
            info = self.game.get_state().game_variables[0]  # ammo
        else:
            state = np.zeros(self.observation_space.shape)
            info = 0

        info = {"info": info}
        terminated = self.game.is_episode_finished()

        truncated = (
            self.game.is_player_dead()
            or self.game.is_player_dead()
            or self.game.is_player_dead()
        )

        return state, reward, terminated, truncated, info

    def reset(self, seed=0):
        self.game.new_episode()
        state = self.game.get_state().screen_buffer

        if self.game.get_state():
            info = self.game.get_state().game_variables[0]  # ammo
        else:
            info = 0

        return (self.grayscale(state), {"ammo": info})

    def grayscale(self, observation):
        gray = cv2.cvtColor(np.moveaxis(observation, 0, -1), cv2.COLOR_BGR2GRAY)
        resize = cv2.resize(gray, (160, 100), interpolation=cv2.INTER_CUBIC)
        state = np.reshape(resize, (100, 160, 1))
        return state

    def close(self):
        self.game.close()


In [8]:
##################### SB3 : CALLBACK ##########################################
from stable_baselines3 import PPO

from stable_baselines3.common.callbacks import BaseCallback
class TrainAndLoggingCallback(BaseCallback):

    def __init__(self, check_freq, verbose=1):
        super(TrainAndLoggingCallback, self).__init__(verbose)
        self.check_freq = check_freq

    def _on_step(self):
        if self.n_calls % self.check_freq == 0:
            model_path = 'best_model_{}'.format(self.n_calls)
            self.model.save(model_path)

        return True

callback = TrainAndLoggingCallback(check_freq=10000)


In [None]:
###################### TRAIN == SB3:PPO #######################################
# train
env = VizDoomGym(render=False, scenario='defend_the_center')
model = PPO("CnnPolicy", env, verbose=1, tensorboard_log=".", learning_rate=0.0001, n_steps=4096)
model.learn(total_timesteps=150000, callback=callback)


  and should_run_async(code)


Using cpu device
Wrapping the env with a `Monitor` wrapper
Wrapping the env in a DummyVecEnv.
Wrapping the env in a VecTransposeImage.
Logging to ./PPO_1
---------------------------------
| rollout/           |          |
|    ep_len_mean     | 85.9     |
|    ep_rew_mean     | 0.426    |
| time/              |          |
|    fps             | 117      |
|    iterations      | 1        |
|    time_elapsed    | 34       |
|    total_timesteps | 4096     |
---------------------------------
-----------------------------------------
| rollout/                |             |
|    ep_len_mean          | 86.3        |
|    ep_rew_mean          | 0.553       |
| time/                   |             |
|    fps                  | 34          |
|    iterations           | 2           |
|    time_elapsed         | 238         |
|    total_timesteps      | 8192        |
| train/                  |             |
|    approx_kl            | 0.008170057 |
|    clip_fraction        | 0.0565      |
| 

In [None]:
###################### TEST == SB3:PPO ########################################
# test
from stable_baselines3.common.evaluation import evaluate_policy

env = VizDoomGym(render=True, scenario='defend_the_center')
model = PPO.load('best_model', env=env)
mean_reward, std_reward = evaluate_policy(model, env, n_eval_episodes=10)

print(f"mean_reward:{mean_reward:.2f}")
print(f"std_reward:{std_reward:.2f}")
