In [26]:
from vizdoom import DoomGame  
import random
import time
import numpy as np
import gym
from gym import Env
from gym.spaces import Discrete, Box
import cv2
from matplotlib import pyplot as plt
import os
from stable_baselines3.common.callbacks import BaseCallback
from stable_baselines3 import PPO
from stable_baselines3.common.env_util import make_vec_env
from stable_baselines3.common.vec_env import DummyVecEnv, SubprocVecEnv

In [27]:
###################
##   CONSTANTS   ##
###################
ACTION_NUM = 3
EPISODES_NUM = 10
AGENT_MODEL_PATH_PREFIX = './agents/agent_for_'
TENSORBOARD_LOG_PATH_PREFIX = './logs/logs_for_'
CURRENT_CONFIGURATION_INDEX = 3

configurations = [{
                    'name': 'basic',
                    'scenarioConfigFilePath': 'VizDoom/scenarios/basic.cfg',
                    'actionNumber': 3,
                  }, {
                    'name': 'defend_the_center',
                    'scenarioConfigFilePath': 'VizDoom/scenarios/defend_the_center.cfg',
                    'actionNumber': 3,
                  }, {
                    'name': 'deadly_corridor',
                    'scenarioConfigFilePath': 'VizDoom/scenarios/deadly_corridor.cfg',
                    'actionNumber': 7,
                  }, {
                    'name': 'deathmatch',
                    'scenarioConfigFilePath': 'VizDoom/scenarios/deathmatch.cfg',
                    'actionNumber': 20, # 20
                  }]


In [29]:
class VizDoomGym(Env):
  def __init__(self, envConfig, render=False):
    super().__init__()
    self.game = DoomGame()
    self.game.load_config(envConfig["scenarioConfigFilePath"])
    self.game.set_window_visible(render)
    self.game.init()

    self.action_number = envConfig["actionNumber"]
    self.action_space = Discrete(self.action_number)
    self.observation_space = Box(0, 255, [100, 160, 1], np.uint8)

  def close(self):
    self.game.close()
  
  def step(self, action):
    actions = np.identity(self.action_number, dtype=np.uint8)
    actionReward = self.game.make_action(actions[action], 5)

    if(action > 7):
      actionReward = -100

    done = self.game.is_episode_finished()
    state = self.game.get_state()
  
    if not state:
      return np.zeros(self.observation_space.shape), actionReward, done, {"damage_taken": 0, "hitcount": 0, "ammo": 0}
    
    health, damage_taken, hitcount, killcount, armor, _, ammo = state.game_variables
    
    deltasObject = {
      'damage_taken': -damage_taken + self.rewardsObject["damage_taken"],
      'hitcount': hitcount - self.rewardsObject["hitcount"],
      'killcount': killcount - self.rewardsObject["killcount"],
      'armor': armor - self.rewardsObject["armor"],
      'ammo': ammo - self.rewardsObject["ammo"]
    }
    
    self.rewardsObject["damage_taken"] = damage_taken
    self.rewardsObject["hitcount"] = hitcount
    self.rewardsObject["killcount"] = killcount
    self.rewardsObject["armor"] = armor
    self.rewardsObject["ammo"] = ammo

    reward = actionReward + deltasObject["damage_taken"]*20 + deltasObject["hitcount"]*20 + deltasObject["killcount"]*100 +deltasObject["armor"]*30 + deltasObject["ammo"]*5 

    
    
    img = self.grayscale(state.screen_buffer)
    # plt.imshow(cv2.cvtColor(img, cv2.COLOR_BGR2RGB))
    return img, reward, done, deltasObject
  
  def reset(self):
    self.game.new_episode()
    state = self.game.get_state()
    self.rewardsObject = {
      'damage_taken': 0,
      'hitcount': 0,
      'ammo': 52,
      'armor': 0,
      'killcount': 0
    }
    return self.grayscale(state.screen_buffer)
    
  
  def render():
    pass
  
  def grayscale(self, observation):
    grayscaled = cv2.cvtColor(np.moveaxis(observation, 0, -1), cv2.COLOR_BGR2GRAY)
    resized = cv2.resize(grayscaled, (160, 100), cv2.INTER_CUBIC)
    return np.reshape(resized, (100, 160, 1))
  

In [30]:
class AgentCallback(BaseCallback):
  def __init__(self, check_freq, save_path, verbose=1):
    super(AgentCallback, self).__init__(verbose)
    self.check_freq = check_freq
    self.save_path = save_path

  def __init_callback(self):
    if self.save_path is not None:
      os.makedirs(self.save_path, exist_ok=True)
  
  def _on_step(self):
    if self.n_calls % self.check_freq == 0:
      model_path = os.path.join(self.save_path, f"model_{self.n_calls}")
      self.model.save(model_path)
    return True
  
  
agentCallback = AgentCallback(check_freq=50000, save_path=f"{AGENT_MODEL_PATH_PREFIX}{configurations[CURRENT_CONFIGURATION_INDEX]['name']}")

In [17]:

env = VizDoomGym(envConfig=configurations[CURRENT_CONFIGURATION_INDEX], render=False)  
model = PPO('CnnPolicy', env, device='cuda', tensorboard_log=f"{TENSORBOARD_LOG_PATH_PREFIX}{configurations[CURRENT_CONFIGURATION_INDEX]['name']}", verbose=1, learning_rate=0.00001, n_steps=8192, clip_range=.1, gamma=.95, gae_lambda=.9)

print("Model created")
model.learn(total_timesteps=3000000, callback=agentCallback)
print("Model trained")

Using cpu device
Wrapping the env with a `Monitor` wrapper
Wrapping the env in a DummyVecEnv.
Wrapping the env in a VecTransposeImage.
Model created


KeyboardInterrupt: 

In [31]:
env = VizDoomGym(configurations[CURRENT_CONFIGURATION_INDEX], render=False)
## train_env = make_vec_env(env_id=env, n_envs=4, vec_env_cls=SubprocVecEnv)

In [32]:
from stable_baselines3.common.evaluation import evaluate_policy
model = PPO.load(f"{AGENT_MODEL_PATH_PREFIX}{configurations[CURRENT_CONFIGURATION_INDEX]['name']}/higher_lr/model_1550000")

In [33]:
model.set_env(env)
model.learn(total_timesteps=3000000, callback=agentCallback, reset_num_timesteps=False)

Wrapping the env with a `Monitor` wrapper
Wrapping the env in a DummyVecEnv.
Wrapping the env in a VecTransposeImage.
Logging to ./logs/logs_for_deathmatch\PPO_5
----------------------------------
| rollout/           |           |
|    ep_len_mean     | 132       |
|    ep_rew_mean     | -1.27e+03 |
| time/              |           |
|    fps             | 38        |
|    iterations      | 1         |
|    time_elapsed    | 214       |
|    total_timesteps | 5358192   |
----------------------------------
------------------------------------------
| rollout/                |              |
|    ep_len_mean          | 123          |
|    ep_rew_mean          | -1.64e+03    |
| time/                   |              |
|    fps                  | 27           |
|    iterations           | 2            |
|    time_elapsed         | 590          |
|    total_timesteps      | 5366384      |
| train/                  |              |
|    approx_kl            | 0.0069331154 |
|    clip_fract

KeyboardInterrupt: 