In [1]:
from pathlib import Path

import gym
import torch
import torch.nn as nn

from easyrl.agents.ppo_agent import PPOAgent
from easyrl.configs import cfg
from easyrl.configs import set_config
from easyrl.configs.command_line import cfg_from_cmd
from easyrl.engine.ppo_engine import PPOEngine
from easyrl.models.categorical_policy import CategoricalPolicy
from easyrl.models.diag_gaussian_policy import DiagGaussianPolicy
from easyrl.models.mlp import MLP
from easyrl.models.value_net import ValueNet
from easyrl.runner.nstep_runner import EpisodicRunner
from easyrl.utils.common import set_random_seed
from easyrl.utils.gym_util import make_vec_env

import new_env as bomberman_env
from models import ActorModel

def set_configs(exp_name='ppo_base'):
    set_config('ppo')
    cfg.alg.num_envs = 1
    cfg.alg.episode_steps = 150
    cfg.alg.max_steps = 600000
    cfg.alg.device = 'cuda' if torch.cuda.is_available() else 'cpu'
    cfg.alg.env_name = 'Bomberman-v1'
    cfg.alg.save_dir = Path.cwd().absolute().joinpath('data').as_posix()
    cfg.alg.save_dir += f'/{exp_name}'
    setattr(cfg.alg, 'diff_cfg', dict(save_dir=cfg.alg.save_dir))

    print(f'====================================')
    print(f'      Device:{cfg.alg.device}')
    print(f'====================================')

def main():
    set_configs()

    set_random_seed(cfg.alg.seed)
    env = make_vec_env(cfg.alg.env_name,
                       cfg.alg.num_envs,
                       seed=cfg.alg.seed)
    env.reset()
    ob_size = env.observation_space.shape[0]

    act_size = env.action_space.n
    actor_body = ActorModel(act_size)
    critic_body = ActorModel(act_size)

    actor = CategoricalPolicy(actor_body,
                                in_features=64,
                                action_dim=act_size)

    critic = ValueNet(critic_body, in_features=64)
    agent = PPOAgent(actor=actor, critic=critic, env=env)
    runner = EpisodicRunner(agent=agent, env=env)
    engine = PPOEngine(agent=agent,
                       runner=runner)
    if not cfg.alg.test:
        engine.train()
    else:
        stat_info, raw_traj_info = engine.eval(render=cfg.alg.render,
                                               save_eval_traj=cfg.alg.save_test_traj,
                                               eval_num=cfg.alg.test_num,
                                               sleep_time=0.04)
        import pprint
        pprint.pprint(stat_info)
    env.close()

In [2]:
main()

[32m[INFO][0m[2023-05-13 01:47:52]: [32mAlogrithm type:<class 'easyrl.configs.ppo_config.PPOConfig'>[0m
[32m[INFO][0m[2023-05-13 01:47:52]: [32mCreating 1 environments.[0m
  logger.warn(
  logger.warn(
  logger.warn(
  logger.warn(
  logger.warn(
[31m[ERROR][0m[2023-05-13 01:47:52]: [31mNot a valid git repo: /Library/Frameworks/Python.framework/Versions/3.10/lib/python3.10/site-packages[0m
  bomb_distances = np.where(bomb_distances > 1, np.sqrt(bomb_distances-1), bomb_distances-1)
  logger.deprecation(
  if not isinstance(done, (bool, np.bool8)):
  logger.warn(
  logger.warn(f"{pre} is not within the observation space.")
[32m[INFO][0m[2023-05-13 01:47:52]: [32mExploration steps: 0[0m
[32m[INFO][0m[2023-05-13 01:47:52]: [32mSaving checkpoint: /Users/anugrahchemparathy/Documents/6.8200FinalProject-BombermanRL/env_tools/data/ppo_base/seed_0/model/ckpt_000000000000.pt.[0m
[32m[INFO][0m[2023-05-13 01:47:52]: [32mSaving checkpoint: /Users/anugrahchemparathy/Documents/6

      Device:cpu


[32m[INFO][0m[2023-05-13 01:48:52]: [32mExploration steps: 15000[0m
[32m[INFO][0m[2023-05-13 01:48:52]: [32mSaving checkpoint: /Users/anugrahchemparathy/Documents/6.8200FinalProject-BombermanRL/env_tools/data/ppo_base/seed_0/model/ckpt_000000015000.pt.[0m
[32m[INFO][0m[2023-05-13 01:49:53]: [32mExploration steps: 30000[0m
[32m[INFO][0m[2023-05-13 01:49:53]: [32mSaving checkpoint: /Users/anugrahchemparathy/Documents/6.8200FinalProject-BombermanRL/env_tools/data/ppo_base/seed_0/model/ckpt_000000030000.pt.[0m
[32m[INFO][0m[2023-05-13 01:50:51]: [32mExploration steps: 45000[0m
[32m[INFO][0m[2023-05-13 01:50:51]: [32mSaving checkpoint: /Users/anugrahchemparathy/Documents/6.8200FinalProject-BombermanRL/env_tools/data/ppo_base/seed_0/model/ckpt_000000045000.pt.[0m
[32m[INFO][0m[2023-05-13 01:51:50]: [32mExploration steps: 60000[0m
[32m[INFO][0m[2023-05-13 01:51:50]: [32mSaving checkpoint: /Users/anugrahchemparathy/Documents/6.8200FinalProject-BombermanRL/env_too