In [1]:
from vizdoom import DoomGame  
import random
import time
import numpy as np
import gym
from gym import Env
from gym.spaces import Discrete, Box
import cv2
from matplotlib import pyplot as plt
import os
from stable_baselines3.common.callbacks import BaseCallback
from stable_baselines3 import PPO

In [2]:
###################
##   CONSTANTS   ##
###################
ACTION_NUM = 3
EPISODES_NUM = 10
AGENT_MODEL_PATH_PREFIX = './agents/agent_for_'
TENSORBOARD_LOG_PATH_PREFIX = './logs/logs_for_'
CURRENT_CONFIGURATION_INDEX = 2

configurations = [{
                    'name': 'basic',
                    'scenarioConfigFilePath': 'VizDoom/scenarios/basic.cfg',
                    'actionNumber': 3,
                  }, {
                    'name': 'defend_the_center',
                    'scenarioConfigFilePath': 'VizDoom/scenarios/defend_the_center.cfg',
                    'actionNumber': 3,
                  }, {
                    'name': 'deadly_corridor',
                    'scenarioConfigFilePath': 'VizDoom/scenarios/deadly_corridor_label_buffer.cfg',
                    'actionNumber': 7,
                  }, {
                    'name': 'deathmatch',
                    'scenarioConfigFilePath': 'VizDoom/scenarios/deathmatch.cfg',
                    'actionNumber': 3,
                  }]


In [3]:
class VizDoomGym(Env):
  def __init__(self, envConfig, render=False):
    super().__init__()
    self.game = DoomGame()
    self.game.load_config(envConfig["scenarioConfigFilePath"])
    self.game.set_window_visible(render)
    self.game.init()

    self.action_number = envConfig["actionNumber"]
    self.action_space = Discrete(self.action_number)
    self.observation_space = Box(0, 255, [100, 160, 1], np.uint8)

  def close(self):
    self.game.close()
  
  def step(self, action):
    actions = np.identity(self.action_number, dtype=np.uint8)
    actionReward = self.game.make_action(actions[action], 5)

    done = self.game.is_episode_finished()
    state = self.game.get_state()
  
    if not state:
      return np.zeros(self.observation_space.shape), actionReward, done, {"damage_taken": 0, "hitcount": 0, "ammo": 0}
    
    health, damage_taken, hitcount, ammo = state.game_variables
    
    deltasObject = {
      'damage_taken': -damage_taken + self.rewardsObject["damage_taken"],
      'hitcount': hitcount - self.rewardsObject["hitcount"],
      'ammo': ammo - self.rewardsObject["ammo"]
    }
    
    self.rewardsObject["damage_taken"] = damage_taken
    self.rewardsObject["hitcount"] = hitcount
    self.rewardsObject["ammo"] = ammo

    reward = actionReward + deltasObject["damage_taken"]*10 + deltasObject["hitcount"]*200 + deltasObject["ammo"]*5 

    
    
    img = self.grayscale(state.screen_buffer)
    # plt.imshow(cv2.cvtColor(img, cv2.COLOR_BGR2RGB))
    return img, reward, done, deltasObject
  
  def reset(self):
    self.game.new_episode()
    state = self.game.get_state()
    self.rewardsObject = {
      'damage_taken': 0,
      'hitcount': 0,
      'ammo': 52
    }
    return self.grayscale(state.screen_buffer)
    
  
  def render():
    pass
  
  def grayscale(self, observation):
    grayscaled = cv2.cvtColor(np.moveaxis(observation, 0, -1), cv2.COLOR_BGR2GRAY)
    resized = cv2.resize(grayscaled, (160, 100), cv2.INTER_CUBIC)
    return np.reshape(resized, (100, 160, 1))
  

In [4]:
env = VizDoomGym(configurations[CURRENT_CONFIGURATION_INDEX], render=True)

In [5]:
from stable_baselines3.common.evaluation import evaluate_policy
model = PPO.load(f"{AGENT_MODEL_PATH_PREFIX}{configurations[CURRENT_CONFIGURATION_INDEX]['name']}/model_1000000")

In [5]:
from IPython.display import clear_output

for episode in range(10):
    obs = env.reset()
    done = False
    total_reward = 0
    while not done: 
        action, _ = model.predict(obs)
        obs, reward, done, info = env.step(action)
        # clear_output()
        # plt.imshow(obs)
        # plt.show()
        time.sleep(0.05555)
    time.sleep(1)

NameError: name 'model' is not defined

In [None]:
evaluate_policy(model, env, n_eval_episodes=10)

In [None]:
env.close()

In [None]:
from stable_baselines3.common.evaluation import evaluate_policy
model = PPO.load(f"{AGENT_MODEL_PATH_PREFIX}defend_the_center/model_best")
from IPython.display import clear_output

for episode in range(10):
    obs = env.reset()
    done = False
    total_reward = 0
    while not done: 
        action, _ = model.predict(obs)
        obs, reward, done, info = env.step(action)
        clear_output()
        plt.imshow(obs)
        plt.show()
        time.sleep(0.05)
    time.sleep(1)