In [1]:
import warnings
warnings.filterwarnings("ignore")

In [4]:
import cv2
import gymnasium as gym
import numpy as np
import vizdoom as vzd
import skimage.transform
from utils import _ObservationWrapper

import vizdoom.gymnasium_wrapper  # noqa
import vizdoom
from vizdoom.gymnasium_wrapper.gymnasium_env_defns import VizdoomScenarioEnv
from ray.tune.registry import register_env
from utils import wrap_env, reward_wrap_env

import ray
from ray.rllib.algorithms.dreamerv3.dreamerv3 import DreamerV3Config

import tqdm

ModuleNotFoundError: No module named 'ray'

In [None]:
IMAGE_SHAPE = (64, 64)
FRAME_SKIP = 4

class ObservationWrapper(gym.ObservationWrapper):
    """
    ViZDoom environments return dictionaries as observations, containing
    the main image as well other info.
    The image is also too large for normal training.

    This wrapper replaces the dictionary observation space with a simple
    Box space (i.e., only the RGB image), and also resizes the image to a
    smaller size.

    NOTE: Ideally, you should set the image size to smaller in the scenario files
          for faster running of ViZDoom. This can really impact performance,
          and this code is pretty slow because of this!
    """

    def __init__(self, env, shape=IMAGE_SHAPE, frame_skip=FRAME_SKIP):
        super().__init__(env)
        self.image_shape = shape
        # print('shape', shape)
        self.image_shape_reverse = shape[::-1]
        # print('image_shape_reverse', self.image_shape_reverse)
        self.env.frame_skip = frame_skip

        # Create new observation space with the new shape
        # print('env.obs', env.observation_space)
        num_channels = env.observation_space["screen"].shape[-1]
        new_shape = (self.image_shape[0], self.image_shape[1], num_channels)
        # print('new_shape', new_shape)
        self.observation_space = gym.spaces.Box(0, 255, shape=new_shape, dtype=np.float32)

    def observation(self, observation):
        # print('observation["screen"].shape', observation["screen"].shape)
        observation = cv2.resize(observation["screen"], self.image_shape_reverse)
        # print('obs.shape', observation.shape)
        observation = observation.astype('float32')
        # print('obs.shape', observation.shape)
        return observation

def wrap_env(env, config):
    env = ObservationWrapper(env, shape=config['image_shape'], frame_skip=config['frame_skip'])
    env = gym.wrappers.TransformReward(env, lambda r: r * 0.01)
    return env

def reward_wrap_env(env):
    env = gym.wrappers.TransformReward(env, lambda r: r * 0.01)
    return env

In [None]:

def main(train_config):

    # config 

    config = {"scenario_file": "basic.cfg"}
    def env_creator(env_config):
        return wrap_env(VizdoomScenarioEnv(**config), train_config)
    register_env('vizdoom_env', env_creator)

    # Create DreamerV3

    num_cpus = int(ray.cluster_resources()['CPU'])
    num_gpus = int(ray.cluster_resources()['GPU'])

    num_learner_workers = num_gpus-1
    num_gpus_per_learner_worker = 1
    num_cpus_per_learner_workers = 1

    config = (
            DreamerV3Config()
            .environment(
                env='vizdoom_env',
            )
            .resources(
                num_learner_workers=num_learner_workers,
                num_gpus_per_learner_worker=1,
                # num_cpus_for_local_worker=1,
                num_cpus_per_learner_worker=num_cpus_per_learner_workers,
            )
            .rollouts(num_envs_per_worker=1, remote_worker_envs=False)
            .training(
                model_size="S",
                training_ratio=512,
                batch_size_B=16*num_learner_workers,
            )

        )

    # run training
    iteration_num = train_config['ray_iterations']

    algo = config.build()
    print('------ algo=', algo)
    for iteration in tqdm(range(iteration_num)):
        result = algo.train()
        print('result.keys', result.keys())


    # shutdown ray
    ray.shutdown()