In [1]:
#Import the super mario game in the notebook
import gym_super_mario_bros

#Import the Joypad wrapper in the notebook
from nes_py.wrappers import JoypadSpace

#Import the simple controls so that the model just needs to control some movements of our agent (here Mario)
from gym_super_mario_bros.actions import SIMPLE_MOVEMENT

In [2]:
#Changes the game from colour image (RGB) to grayscale so that our processing becomes faster as we need to deal with less data 
from gym.wrappers import GrayScaleObservation, ResizeObservation   

#VecFrameStack allows us to work with our stacked enviroments by letting us know the information of previous frames. DummyVecEnv transforms our model so that we can pass it to our AI model. 
from stable_baselines3.common.vec_env import VecFrameStack, DummyVecEnv, VecNormalize
from stable_baselines3.common.monitor import Monitor

In [3]:
# Import the Super Mario RAM utils
from Super_Mario_RAM_utils import MarioRAMGrid

In [4]:
from gym import ObservationWrapper
from gym.spaces import Box
import numpy as np


class RAMAndSkipWrapper(ObservationWrapper):
    def __init__(self, env, n_stack=4, n_skip=2):
        super().__init__(env)
        self.n_stack = n_stack
        self.n_skip = n_skip
        self.width = 16
        self.height = 13
        self.observation_space = Box(
            low=-2, high=2, shape=(self.height, self.width, self.n_stack), dtype=np.int8
        )
        
        self.frame_stack = np.zeros((self.height, self.width, (self.n_stack-1)*self.n_skip+1), dtype=np.int8)
        
    def observation(self, obs):
        grid = MarioRAMGrid(self.env)
        frame = grid.rendered_screen # The RAM map for the current frame
        
        self.frame_stack[:,:,1:] = self.frame_stack[:,:,:-1] # Shift frame_stack by 1 to the right
        self.frame_stack[:,:,0] = frame # Add the current frame to stack on the left
        obs = self.frame_stack[:,:,::self.n_skip]
        return obs
    
    def reset(self):
        obs = self.env.reset()
        self.frame_stack = np.zeros((self.height, self.width, (self.n_stack-1)*self.n_skip+1), dtype=np.int8)
        grid = MarioRAMGrid(self.env)
        frame = grid.rendered_screen # 2d array

        for i in range(self.frame_stack.shape[-1]):
            self.frame_stack[:,:,i] = frame

        obs = self.frame_stack[:,:,::self.n_skip]
        return obs

In [5]:
import stable_baselines3.common.torch_layers

def make_env(seed, env_name, n_stack, n_skip):
    def init():
        env = gym_super_mario_bros.make(env_name)
        env = JoypadSpace(env, SIMPLE_MOVEMENT)
        env = RAMAndSkipWrapper(env, n_stack=n_stack, n_skip=n_skip)

        return env
    
    return init

In [19]:
env = DummyVecEnv([make_env(0, "SuperMarioBros-1-1-v0", 4, 4)])

In [7]:
obs = env.reset()

observations = [obs, obs, obs]

In [8]:
batch_obs = np.array(observations[0:2])
batch_obs.shape

(2, 1, 13, 16, 4)

In [9]:
import torch

batch_obs = np.array(observations[0:2])
batch_obs = torch.tensor(batch_obs, dtype=torch.float32).to('cuda')
batch_obs = batch_obs.view(-1, 4, 13, 16)  # Reshape to [batch_size * n_envs, channels, height, width]

In [10]:
batch_obs.size()

torch.Size([2, 4, 13, 16])

In [11]:
# Import os for file path management
import os

# Import PPO algorithm to train our model
from stable_baselines3 import PPO

In [12]:
#Location of trained and logged files 
CHECKPOINT_DIR = './train/WithRAMWrapper/model_1/world_1_stage_2_try_4'
LOG_DIR = './logs'
HPO_LOG_DIR = './opt_logs'
HPO_CHECKPOINT_DIR = './opt_train'

In [13]:
model = PPO.load(os.path.join(CHECKPOINT_DIR, 'best_model_10000200'), env)
# model = PPO.load(os.path.join(HPO_CHECKPOINT_DIR, 'trial_1_best_model'), env, tensorboard_log=LOG_DIR)

In [14]:
model.importance

0.6

In [22]:
# from stable_baselines3.common.evaluation import evaluate_policy
# mean_reward, _ = evaluate_policy(model, env, n_eval_episodes=1, render=True, deterministic=True)
from stable_baselines3.common.evaluation import evaluate_policy
mean_reward, _ = evaluate_policy(model, env, n_eval_episodes=6, render=True, deterministic=True)

KeyboardInterrupt: 

In [15]:
mean_reward

NameError: name 'mean_reward' is not defined

In [24]:
import time

states = env.reset()

#Loop through the game
while True:
    # we are getting two values of which we need only one, so we put a underscore to just assign it the extra value
    actions, _ = model.predict(states, deterministic=True)
    states, rewards, dones, infos = env.step(actions)
    env.render()
    time.sleep(0.01)

KeyboardInterrupt: 

In [None]:
%pip install imageio

Collecting imageio
  Downloading imageio-2.34.2-py3-none-any.whl.metadata (4.9 kB)
Downloading imageio-2.34.2-py3-none-any.whl (313 kB)
   ---------------------------------------- 0.0/313.5 kB ? eta -:--:--
   - -------------------------------------- 10.2/313.5 kB ? eta -:--:--
   --- ----------------------------------- 30.7/313.5 kB 640.0 kB/s eta 0:00:01
   ----------- --------------------------- 92.2/313.5 kB 744.7 kB/s eta 0:00:01
   -------------------------- ------------- 204.8/313.5 kB 1.2 MB/s eta 0:00:01
   ---------------------------------------  307.2/313.5 kB 1.6 MB/s eta 0:00:01
   ---------------------------------------- 313.5/313.5 kB 1.5 MB/s eta 0:00:00
Installing collected packages: imageio
Successfully installed imageio-2.34.2
Note: you may need to restart the kernel to use updated packages.


In [16]:
import imageio
import numpy as np

images = []
states = env.reset()
img = env.render(mode="rgb_array")

for i in range(1):
    done = False
    while done == False:
        img = np.copy(env.render(mode="rgb_array"))
        images.append(img)
        actions, _ = model.predict(states, deterministic=True)
        states, rewards, dones, infos = env.step(actions)
        done = dones[0]
        env.render()

gif = [img for i, img in enumerate(images) if i % 2 == 0]
imageio.mimsave("super_mario_bros_level_2_RAM.gif", gif, fps=30)