# 1. Getting VizDoom Up and Running

DEADLY CORRIDOR
The purpose of this scenario is to teach the agent to navigate towards his fundamental goal (the vest) and make sure he survives at the same time.

The map is a corridor with shooting monsters on both sides (6 monsters in total). A green vest is placed at the opposite end of the corridor. The reward is proportional (negative or positive) to the change in the distance between the player and the vest. If the player ignores monsters on the sides and runs straight for the vest, he will be killed somewhere along the way. To ensure this behavior difficulty level (doom_skill) = 5 (config) is needed.

REWARDS:

+dX for getting closer to the vest.

-dX for getting further from the vest.

-100 for death

CONFIGURATION:

7 available buttons: move forward/backwward/left/right, turn left/right, shoot (attack)

1 available game variable: player’s health

timeout = 2100

difficulty level (doom_skill) = 5

In [6]:
# Import vizdoom for game env
from vizdoom import * 
# Import random for action sampling
import random
# Import time for sleeping
import time 
# Import numpy for identity matrix
import numpy as np

In [7]:
# Setup game
game = DoomGame()
game.load_config('github/VizDoom/scenarios/deadly_corridor_s2.cfg')
game.init()

In [8]:
# This is the set of actions we can take in the environment
actions = np.identity(7, dtype=np.uint8)

In [9]:
state = game.get_state()

In [10]:
state.game_variables

array([100.,   0.,   0.,  -1.])

In [11]:
# Loop through episodes 
episodes = 10 
for episode in range(episodes): 
    # Create a new episode or game 
    game.new_episode()
    # Check the game isn't done 
    while not game.is_episode_finished(): 
        # Get the game state 
        state = game.get_state()
        # Get the game image 
        img = state.screen_buffer
        # Get the game variables - ammo
        info = state.game_variables
        # Take an action
        reward = game.make_action(random.choice(actions),4)
        # Print rewward 
        # print('reward:', reward) 
        time.sleep(0.02)
    print('Result:', game.get_total_reward())
    time.sleep(2)

Result: -104.97740173339844


KeyboardInterrupt: 

In [None]:
#S1 - 336
#S2 - -29
#S3 - -20
#S4 - -95
#S5 - -99

In [None]:
game.close()

In [None]:
state.game_variables

array([100.,   0.,   0.,  -1.])

# 2. Converting it to a Gym Environment

In [12]:
# Import environment base class from OpenAI Gym
from gym import Env
# Import gym spaces 
from gym.spaces import Discrete, Box
# Import opencv 
import cv2

In [13]:
# Create Vizdoom OpenAI Gym Environment
class VizDoomGym(Env): 
    # Function that is called when we start the env
    def __init__(self, render=False, config='github/VizDoom/scenarios/deadly_corridor_s1.cfg'): 
        # Inherit from Env
        super().__init__()
        # Setup the game 
        self.game = DoomGame()
        self.game.load_config(config)
        
        # Render frame logic
        if render == False: 
            self.game.set_window_visible(False)
        else:
            self.game.set_window_visible(True)
        
        # Start the game 
        self.game.init()
        
        # Create the action space and observation space
        self.observation_space = Box(low=0, high=255, shape=(100,160,1), dtype=np.uint8) 
        self.action_space = Discrete(7)
        
        # Game variables: HEALTH DAMAGE_TAKEN HITCOUNT SELECTED_WEAPON_AMMO
        self.damage_taken = 0
        self.hitcount = 0
        self.ammo = 60 ## CHANGED
        
        
    # This is how we take a step in the environment
    def step(self, action):
        # Specify action and take step 
        actions = np.identity(7)
        movement_reward = self.game.make_action(actions[action], 4) 
        
        reward = 0 
        # Get all the other stuff we need to retun 
        if self.game.get_state(): 
            state = self.game.get_state().screen_buffer
            state = self.grayscale(state)
            
            # Reward shaping
            game_variables = self.game.get_state().game_variables
            health, damage_taken, hitcount, ammo = game_variables
            
            # Calculate reward deltas
            damage_taken_delta = -damage_taken + self.damage_taken
            self.damage_taken = damage_taken
            hitcount_delta = hitcount - self.hitcount
            self.hitcount = hitcount
            ammo_delta = ammo - self.ammo
            self.ammo = ammo
            
            reward = movement_reward + damage_taken_delta*10 + hitcount_delta*200  + ammo_delta*5 
            info = ammo
        else: 
            state = np.zeros(self.observation_space.shape)
            info = 0 
        
        info = {"info":info}
        done = self.game.is_episode_finished()
        
        return state, reward, done, info 
    
    # Define how to render the game or environment 
    def render(): 
        pass
    
    # What happens when we start a new game 
    def reset(self): 
        self.game.new_episode()
        state = self.game.get_state().screen_buffer
        return self.grayscale(state)
    
    # Grayscale the game frame and resize it 
    def grayscale(self, observation):
        gray = cv2.cvtColor(np.moveaxis(observation, 0, -1), cv2.COLOR_BGR2GRAY)
        resize = cv2.resize(gray, (160,100), interpolation=cv2.INTER_CUBIC)
        state = np.reshape(resize, (100,160,1))
        return state
    
    # Call to close down the game
    def close(self): 
        self.game.close()

In [None]:
env=VizDoomGym()

In [None]:
env.close()

In [None]:
env = VizDoomGym(render=True)

In [None]:
res = env.step(5)


In [None]:
res[1]

-40.0

In [None]:
state = env.reset()

# 3. View Game State

In [None]:
env.reset()

array([[[32],
        [33],
        [25],
        ...,
        [27],
        [23],
        [24]],

       [[27],
        [33],
        [23],
        ...,
        [24],
        [24],
        [24]],

       [[20],
        [35],
        [23],
        ...,
        [24],
        [24],
        [24]],

       ...,

       [[75],
        [63],
        [62],
        ...,
        [44],
        [71],
        [60]],

       [[15],
        [48],
        [47],
        ...,
        [49],
        [69],
        [47]],

       [[22],
        [14],
        [26],
        ...,
        [57],
        [37],
        [39]]], dtype=uint8)

In [None]:
# Import Environment checker
from stable_baselines3.common import env_checker

In [None]:
env_checker.check_env(env)

AssertionError: Your environment must inherit from the gymnasium.Env class cf. https://gymnasium.farama.org/api/env/

# 3. View State

In [14]:
from matplotlib import pyplot as plt

In [16]:
plt.imshow(cv2.cvtColor(state, cv2.COLOR_BGR2RGB))

error: OpenCV(4.9.0) :-1: error: (-5:Bad argument) in function 'cvtColor'
> Overload resolution failed:
>  - src is not a numpy array, neither a scalar
>  - Expected Ptr<cv::UMat> for argument 'src'


In [None]:
env.close()

# 4. Setup Callback

In [17]:
!pip install torch==1.10.1+cu113 torchvision==0.11.2+cu113 torchaudio===0.10.1+cu113 -f https://download.pytorch.org/whl/cu113/torch_stable.html

Looking in links: https://download.pytorch.org/whl/cu113/torch_stable.html
Collecting torch==1.10.1+cu113
  Downloading https://download.pytorch.org/whl/cu113/torch-1.10.1%2Bcu113-cp38-cp38-win_amd64.whl (2442.4 MB)
     ---------------------------------------- 0.0/2.4 GB ? eta -:--:--
     ---------------------------------------- 0.0/2.4 GB ? eta -:--:--
     ---------------------------------------- 0.0/2.4 GB ? eta -:--:--
     ---------------------------------------- 0.0/2.4 GB ? eta -:--:--
     ---------------------------------------- 0.0/2.4 GB ? eta -:--:--
     ---------------------------------------- 0.0/2.4 GB ? eta -:--:--
     ---------------------------------------- 0.0/2.4 GB 81.9 kB/s eta 8:16:54
     ---------------------------------------- 0.0/2.4 GB 81.9 kB/s eta 8:16:54
     ---------------------------------------- 0.0/2.4 GB 81.9 kB/s eta 8:16:54
     ---------------------------------------- 0.0/2.4 GB 81.9 kB/s eta 8:16:54
     -------------------------------------

ERROR: Exception:
Traceback (most recent call last):
  File "C:\Users\APTS\anaconda3\envs\RLenv\lib\site-packages\pip\_vendor\urllib3\response.py", line 438, in _error_catcher
    yield
  File "C:\Users\APTS\anaconda3\envs\RLenv\lib\site-packages\pip\_vendor\urllib3\response.py", line 561, in read
    data = self._fp_read(amt) if not fp_closed else b""
  File "C:\Users\APTS\anaconda3\envs\RLenv\lib\site-packages\pip\_vendor\urllib3\response.py", line 527, in _fp_read
    return self._fp.read(amt) if amt is not None else self._fp.read()
  File "C:\Users\APTS\anaconda3\envs\RLenv\lib\site-packages\pip\_vendor\cachecontrol\filewrapper.py", line 102, in read
    self.__buf.write(data)
  File "C:\Users\APTS\anaconda3\envs\RLenv\lib\tempfile.py", line 489, in func_wrapper
    return func(*args, **kwargs)
OSError: [Errno 28] No space left on device

During handling of the above exception, another exception occurred:

Traceback (most recent call last):
  File "C:\Users\APTS\anaconda3\envs\RLen


     ----------------------- ---------------- 1.5/2.4 GB 10.9 MB/s eta 0:01:31
     ----------------------- ---------------- 1.5/2.4 GB 10.9 MB/s eta 0:01:31
     ----------------------- ---------------- 1.5/2.4 GB 11.1 MB/s eta 0:01:29
     ----------------------- ---------------- 1.5/2.4 GB 10.7 MB/s eta 0:01:32
     ----------------------- ---------------- 1.5/2.4 GB 10.7 MB/s eta 0:01:32
     ----------------------- ---------------- 1.5/2.4 GB 10.6 MB/s eta 0:01:33
     ----------------------- ---------------- 1.5/2.4 GB 10.7 MB/s eta 0:01:32
     ----------------------- ---------------- 1.5/2.4 GB 10.4 MB/s eta 0:01:35
     ----------------------- ---------------- 1.5/2.4 GB 10.1 MB/s eta 0:01:38
     ----------------------- ---------------- 1.5/2.4 GB 9.6 MB/s eta 0:01:42
     ----------------------- ---------------- 1.5/2.4 GB 9.1 MB/s eta 0:01:48
     ----------------------- ---------------- 1.5/2.4 GB 9.0 MB/s eta 0:01:50
     ----------------------- ---------------- 1.5/2.4 

In [18]:
!pip install stable-baselines3[extra]



In [19]:
# Import os for file nav
import os 
# Import callback class from sb3
from stable_baselines3.common.callbacks import BaseCallback

In [20]:
class TrainAndLoggingCallback(BaseCallback):

    def __init__(self, check_freq, save_path, verbose=1):
        super(TrainAndLoggingCallback, self).__init__(verbose)
        self.check_freq = check_freq
        self.save_path = save_path

    def _init_callback(self):
        if self.save_path is not None:
            os.makedirs(self.save_path, exist_ok=True)

    def _on_step(self):
        if self.n_calls % self.check_freq == 0:
            model_path = os.path.join(self.save_path, 'best_model_{}'.format(self.n_calls))
            self.model.save(model_path)

        return True

In [21]:
CHECKPOINT_DIR = './train/train_corridor'
LOG_DIR = './logs/log_corridor'

In [22]:
callback = TrainAndLoggingCallback(check_freq=10000, save_path=CHECKPOINT_DIR)

# 5. Train Our Model Using Curriculum 

In [23]:
# import ppo for training
from stable_baselines3 import PPO

In [24]:
# Non rendered environment
env = VizDoomGym(config='github/VizDoom/scenarios/deadly_corridor_s1.cfg')

# Changes made
- Effects of hyperparameter tuning
- Increased initial timestep length

## Possibly look into
- Changing reward function and game variables due to bug

In [25]:
#model = PPO('CnnPolicy', env, tensorboard_log=LOG_DIR, verbose=1, learning_rate=0.0001, n_steps=4096)
model = PPO('CnnPolicy', env, tensorboard_log=LOG_DIR, verbose=1, learning_rate=0.00001, n_steps=8192, clip_range=.1, gamma=.95, gae_lambda=.9)

Using cpu device
Wrapping the env with a `Monitor` wrapper
Wrapping the env in a DummyVecEnv.
Wrapping the env in a VecTransposeImage.




In [None]:
#model.learn(total_timesteps=40000, callback=callback)
model.learn(total_timesteps=400000, callback=callback)

Logging to ./logs/log_corridor\PPO_30
---------------------------------
| rollout/           |          |
|    ep_len_mean     | 93.5     |
|    ep_rew_mean     | 954      |
| time/              |          |
|    fps             | 60       |
|    iterations      | 1        |
|    time_elapsed    | 67       |
|    total_timesteps | 4096     |
---------------------------------
-----------------------------------------
| rollout/                |             |
|    ep_len_mean          | 96.8        |
|    ep_rew_mean          | 971         |
| time/                   |             |
|    fps                  | 56          |
|    iterations           | 2           |
|    time_elapsed         | 143         |
|    total_timesteps      | 8192        |
| train/                  |             |
|    approx_kl            | 0.054793693 |
|    clip_fraction        | 0.3         |
|    clip_range           | 0.2         |
|    entropy_loss         | -0.647      |
|    explained_variance   | 0.898 

<stable_baselines3.ppo.ppo.PPO at 0x2344cc78f98>

In [None]:
model.load(r'D:\Nam 2\ThayHoangAI\FPSGame\DeadlyCorridor560k.zip')

Exception: an integer is required (got type bytes)
Exception: an integer is required (got type bytes)
	Missing key(s) in state_dict: "pi_features_extractor.cnn.0.weight", "pi_features_extractor.cnn.0.bias", "pi_features_extractor.cnn.2.weight", "pi_features_extractor.cnn.2.bias", "pi_features_extractor.cnn.4.weight", "pi_features_extractor.cnn.4.bias", "pi_features_extractor.linear.0.weight", "pi_features_extractor.linear.0.bias", "vf_features_extractor.cnn.0.weight", "vf_features_extractor.cnn.0.bias", "vf_features_extractor.cnn.2.weight", "vf_features_extractor.cnn.2.bias", "vf_features_extractor.cnn.4.weight", "vf_features_extractor.cnn.4.bias", "vf_features_extractor.linear.0.weight", "vf_features_extractor.linear.0.bias".  


<stable_baselines3.ppo.ppo.PPO at 0x1baf414a610>

In [None]:
env = VizDoomGym(config='github/VizDoom/scenarios/deadly_corridor_s1.cfg')
model.set_env(env)
model.learn(total_timesteps=40000, callback=callback)



Wrapping the env with a `Monitor` wrapper
Wrapping the env in a DummyVecEnv.
Wrapping the env in a VecTransposeImage.


In [None]:
env = VizDoomGym(config='github/VizDoom/scenarios/deadly_corridor_s2.cfg')
model.set_env(env)
model.learn(total_timesteps=40000, callback=callback)

In [None]:
env = VizDoomGym(config='github/VizDoom/scenarios/deadly_corridor_s3.cfg')
model.set_env(env)
model.learn(total_timesteps=40000, callback=callback)

In [None]:
env = VizDoomGym(config='github/VizDoom/scenarios/deadly_corridor_s4.cfg')
model.set_env(env)
model.learn(total_timesteps=40000, callback=callback)

In [None]:
env = VizDoomGym(config='github/VizDoom/scenarios/deadly_corridor_s5.cfg')
model.set_env(env)
model.learn(total_timesteps=40000, callback=callback)

# 6. Test the Model

In [26]:
# Import eval policy to test agent
from stable_baselines3.common.evaluation import evaluate_policy

In [27]:
# Reload model from disc
model = PPO.load(r'D:\Nam 2\ThayHoangAI\FPSGame\DeadlyCorridor560k.zip')

Exception: an integer is required (got type bytes)
Exception: an integer is required (got type bytes)
	Missing key(s) in state_dict: "pi_features_extractor.cnn.0.weight", "pi_features_extractor.cnn.0.bias", "pi_features_extractor.cnn.2.weight", "pi_features_extractor.cnn.2.bias", "pi_features_extractor.cnn.4.weight", "pi_features_extractor.cnn.4.bias", "pi_features_extractor.linear.0.weight", "pi_features_extractor.linear.0.bias", "vf_features_extractor.cnn.0.weight", "vf_features_extractor.cnn.0.bias", "vf_features_extractor.cnn.2.weight", "vf_features_extractor.cnn.2.bias", "vf_features_extractor.cnn.4.weight", "vf_features_extractor.cnn.4.bias", "vf_features_extractor.linear.0.weight", "vf_features_extractor.linear.0.bias".  


In [48]:
# Create rendered environment
env = VizDoomGym(render=True, config='github/VizDoom/scenarios/deadly_corridor_s1.cfg')

In [50]:
# Evaluate mean reward for 10 games
mean_reward, _ = evaluate_policy(model, env, n_eval_episodes=1)
mean_reward



1249.6622924804688

In [51]:
# Create rendered environment
env = VizDoomGym(render=True, config='github/VizDoom/scenarios/deadly_corridor_s2.cfg')
# Evaluate mean reward for 10 games
mean_reward, _ = evaluate_policy(model, env, n_eval_episodes=1)
env.close()
mean_reward

1860.4078674316406

In [53]:
# Create rendered environment
env = VizDoomGym(render=True, config='github/VizDoom/scenarios/deadly_corridor_s3.cfg')
# Evaluate mean reward for 10 games
mean_reward, _ = evaluate_policy(model, env, n_eval_episodes=1)
env.close()
mean_reward

2747.848342895508

In [58]:
# Create rendered environment
env = VizDoomGym(render=True, config='github/VizDoom/scenarios/deadly_corridor_s4.cfg')
# Evaluate mean reward for 10 games
mean_reward, _ = evaluate_policy(model, env, n_eval_episodes=1)
env.close()
mean_reward

1393.9759216308594

In [57]:
# Create rendered environment
env = VizDoomGym(render=True, config='github/VizDoom/scenarios/deadly_corridor_s5.cfg')
# Evaluate mean reward for 10 games
mean_reward, _ = evaluate_policy(model, env, n_eval_episodes=1)
env.close()
mean_reward

1220.9663543701172

In [30]:
for episode in range(20): 
    obs = env.reset()
    done = False
    total_reward = 0
    while not done: 
        action, _ = model.predict(obs)
        obs, reward, done, info = env.step(action)
        time.sleep(0.02)
        total_reward += reward
    print('Total Reward for episode {} is {}'.format(episode, total_reward))
    time.sleep(2)

Total Reward for episode 0 is 890.7953948974609
Total Reward for episode 1 is 2357.024459838867
Total Reward for episode 2 is 1128.299072265625
Total Reward for episode 3 is 1454.3997192382812
Total Reward for episode 4 is -44.435943603515625
Total Reward for episode 5 is 1637.2250671386719
Total Reward for episode 6 is 2037.8232879638672
Total Reward for episode 7 is 1139.414535522461
Total Reward for episode 8 is 775.8251800537109
Total Reward for episode 9 is 2627.0672760009766
Total Reward for episode 10 is 784.4347686767578


KeyboardInterrupt: 