In [3]:
!python3 -m retro.import ./ROMs/

Importing MortalKombatII-Genesis
Imported 1 games


In [4]:
%set_env PYGLET_HEADLESS=1

env: PYGLET_HEADLESS=1


In [1]:
"""
Train an agent using Proximal Policy Optimization from Stable Baselines 3
"""

import gymnasium as gym
import numpy as np
from gymnasium.wrappers.time_limit import TimeLimit
from stable_baselines3 import PPO
from stable_baselines3.common.atari_wrappers import ClipRewardEnv, WarpFrame
from stable_baselines3.common.vec_env import (
    SubprocVecEnv,
    VecFrameStack,
    VecTransposeImage,
    DummyVecEnv
)
from stable_baselines3.common.callbacks import CheckpointCallback


import retro


class StochasticFrameSkip(gym.Wrapper):
    def __init__(self, env, n, stickprob):
        gym.Wrapper.__init__(self, env)
        self.n = n
        self.stickprob = stickprob
        self.curac = None
        self.rng = np.random.RandomState()
        self.supports_want_render = hasattr(env, "supports_want_render")

    def reset(self, **kwargs):
        self.curac = None
        return self.env.reset(**kwargs)

    def step(self, ac):
        terminated = False
        truncated = False
        totrew = 0
        for i in range(self.n):
            # First step after reset, use action
            if self.curac is None:
                self.curac = ac
            # First substep, delay with probability=stickprob
            elif i == 0:
                if self.rng.rand() > self.stickprob:
                    self.curac = ac
            # Second substep, new action definitely kicks in
            elif i == 1:
                self.curac = ac
            if self.supports_want_render and i < self.n - 1:
                ob, rew, terminated, truncated, info = self.env.step(
                    self.curac,
                    want_render=False,
                )
            else:
                ob, rew, terminated, truncated, info = self.env.step(self.curac)
            totrew += rew
            if terminated or truncated:
                break
        return ob, totrew, terminated, truncated, info

class SelfPlay(gym.Wrapper):
    def __init__(self, env):
        gym.Wrapper.__init__(self, env)
        self.player2 = PPO.load('./ckpts/ppo/rl_model_31200000_steps.zip')
        self.player2_curac = None

    def reset(self, **kwargs):
        self.curac = None
        obs = self.env.reset(**kwargs)
        self.player2_curac, _ = self.player2.predict(np.flip(obs, axis=3).copy(), deterministic=True)
        return obs

    def step(self, ac):
        terminated = False
        truncated = False
        totrew = 0
        obs, rew, terminated, truncated, info = self.env.step(
                    np.concatenate((self.curac, self.player2_curac)),
                    want_render=False,
                )
        
        self.player2_curac, _ = self.player2.predict(np.flip(obs, axis=3).copy(), deterministic=True)
        return obs, rew, terminated, truncated, info


def make_retro(*, game, state=None, max_episode_steps=4500, **kwargs):
    if state is None:
        state = retro.State.DEFAULT
    env = retro.make(game, state, **kwargs)
    env = StochasticFrameSkip(env, n=4, stickprob=0.25)
    env = SelfPlay(env)
    if max_episode_steps is not None:
        env = TimeLimit(env, max_episode_steps=max_episode_steps)
    return env


def wrap_deepmind_retro(env):
    """
    Configure environment for retro games, using config similar to DeepMind-style Atari in openai/baseline's wrap_deepmind
    """
    env = WarpFrame(env)
    env = ClipRewardEnv(env)
    return env

In [2]:

game = "MortalKombatII-Genesis"
state = retro.State.DEFAULT
scenario=None

def make_env():
    env = make_retro(game=game, state=state, scenario=scenario, render_mode='rgb_array', players=2)
    env = wrap_deepmind_retro(env)
    return env

venv = VecTransposeImage(VecFrameStack(SubprocVecEnv([make_env] * 8), n_stack=4))
model = PPO(
    policy="CnnPolicy",
    env=venv,
    learning_rate=lambda f: f * 2.5e-4,
    n_steps=128,
    batch_size=32,
    n_epochs=4,
    gamma=0.99,
    gae_lambda=0.95,
    clip_range=0.1,
    ent_coef=0.05,
    verbose=1,
    tensorboard_log="./logs/ppo/",
)

checkpoint_callback = CheckpointCallback(save_freq=5e4, save_path='./ckpts/ppo/')
# model.learn(
#     total_timesteps=100_000_000,
#     log_interval=10,
#     callback=checkpoint_callback
# )


Using cuda device


In [3]:
venv.observation_space

Box(0, 255, (4, 84, 84), uint8)

In [4]:
venv.action_space

MultiBinary(24)

In [43]:
env.close()

def make_nenv():
    env = make_env()
    env = gym.wrappers.RecordVideo(env, 'video_folder')
    return env

env = VecTransposeImage(VecFrameStack(DummyVecEnv([make_nenv]), 4))
model1 = PPO.load('./ckpts/ppo/rl_model_31200000_steps.zip')
model2 = PPO.load('./ckpts/ppo/rl_model_31200000_steps.zip')
# Reset the environment
obs = env.reset()
done = False

# Run the policy until the episode is done
while not done:
    obs2 = obs.copy()
    action1, _states = model1.predict(obs.copy(), deterministic=True)
    action2, _states = model2.predict(np.flip(obs2, axis=3).copy(), deterministic=True)
    obs, rewards, done, info = env.step( np.concatenate((action1, action2), axis=0))

# Close the environment
env.close()

Moviepy - Building video /home/dmanu006/MKII-RL/video_folder/rl-video-episode-0.mp4.
Moviepy - Writing video /home/dmanu006/MKII-RL/video_folder/rl-video-episode-0.mp4



                                                  

Moviepy - Done !
Moviepy - video ready /home/dmanu006/MKII-RL/video_folder/rl-video-episode-0.mp4


  logger.warn(


Moviepy - Building video /home/dmanu006/MKII-RL/video_folder/rl-video-episode-0.mp4.
Moviepy - Writing video /home/dmanu006/MKII-RL/video_folder/rl-video-episode-0.mp4



                                                                  

Moviepy - Done !
Moviepy - video ready /home/dmanu006/MKII-RL/video_folder/rl-video-episode-0.mp4
Moviepy - Building video /home/dmanu006/MKII-RL/video_folder/rl-video-episode-1.mp4.
Moviepy - Writing video /home/dmanu006/MKII-RL/video_folder/rl-video-episode-1.mp4



                                                  

Moviepy - Done !
Moviepy - video ready /home/dmanu006/MKII-RL/video_folder/rl-video-episode-1.mp4


