Training For Simple Movement

In [2]:
import gym
import gym_super_mario_bros
from gym_super_mario_bros.actions import SIMPLE_MOVEMENT
from nes_py.wrappers import JoypadSpace

from stable_baselines3 import PPO
from stable_baselines3.common.vec_env import DummyVecEnv, VecFrameStack
from stable_baselines3.common.monitor import Monitor

from gym.wrappers import GrayScaleObservation, ResizeObservation

import os

def create_env():
    env = gym_super_mario_bros.make('SuperMarioBros-v0')
    env = JoypadSpace(env, SIMPLE_MOVEMENT)
    env = ResizeObservation(env, shape=84)
    env = GrayScaleObservation(env, keep_dim=True)
    env = Monitor(env)  # log stats like episode rewards
    return env

env = DummyVecEnv([create_env])
env = VecFrameStack(env, n_stack=4)

save_path = os.path.join("train", "ppo_mario_simple")
os.makedirs(save_path, exist_ok=True)

# Create the PPO model
model = PPO("CnnPolicy", env, verbose=1, tensorboard_log="./ppo_mario_logs/simple_move")

model.learn(total_timesteps=100000)
model.save(os.path.join(save_path, "mario_model"))

print("Training complete and model saved.")


Using cuda device
Wrapping the env in a VecTransposeImage.
Logging to ./ppo_mario_logs/simple_move\PPO_2
-----------------------------
| time/              |      |
|    fps             | 207  |
|    iterations      | 1    |
|    time_elapsed    | 9    |
|    total_timesteps | 2048 |
-----------------------------
-----------------------------------------
| time/                   |             |
|    fps                  | 158         |
|    iterations           | 2           |
|    time_elapsed         | 25          |
|    total_timesteps      | 4096        |
| train/                  |             |
|    approx_kl            | 0.060520686 |
|    clip_fraction        | 0.33        |
|    clip_range           | 0.2         |
|    entropy_loss         | -1.92       |
|    explained_variance   | -0.000724   |
|    learning_rate        | 0.0003      |
|    loss                 | 7.75        |
|    n_updates            | 10          |
|    policy_gradient_loss | 0.017       |
|    value_lo

In [None]:
import gym
import gym_super_mario_bros
from gym_super_mario_bros.actions import SIMPLE_MOVEMENT
from nes_py.wrappers import JoypadSpace
from stable_baselines3 import PPO
from stable_baselines3.common.vec_env import DummyVecEnv, VecFrameStack
from gym.wrappers import GrayScaleObservation, ResizeObservation
import time
def create_env():
    env = gym_super_mario_bros.make('SuperMarioBros-v0')  # Level 1-1
    env = JoypadSpace(env, SIMPLE_MOVEMENT)
    env = ResizeObservation(env, shape=84)
    env = GrayScaleObservation(env, keep_dim=True)
    return env
env = DummyVecEnv([create_env])
env = VecFrameStack(env, n_stack=4)
model = PPO.load("train/ppo_mario_simple/mario_model")
obs = env.reset()
done = False
while not done:
    action, _ = model.predict(obs)
    obs, reward, done, info = env.step(action)
    env.render()
    time.sleep(0.01)
env.close()


Training For Complex Movement

In [1]:
import gym
import gym_super_mario_bros
from gym_super_mario_bros.actions import COMPLEX_MOVEMENT
from nes_py.wrappers import JoypadSpace

from stable_baselines3 import PPO
from stable_baselines3.common.vec_env import DummyVecEnv, VecFrameStack
from stable_baselines3.common.monitor import Monitor

from gym.wrappers import GrayScaleObservation, ResizeObservation

import os

# Create and wrap the Mario environment
def create_env():
    env = gym_super_mario_bros.make('SuperMarioBros-1-1-v0')
    env = JoypadSpace(env, COMPLEX_MOVEMENT)
    env = ResizeObservation(env, shape=84)
    env = GrayScaleObservation(env, keep_dim=True)
    env = Monitor(env)  # Logs episode reward, length, etc.
    return env

# Wrap with vector env and stack frames
env = DummyVecEnv([create_env])
env = VecFrameStack(env, n_stack=4)

# Create directory to save model
save_path = os.path.join("train", "ppo_mario_complex")
os.makedirs(save_path, exist_ok=True)

# Create the PPO model
model = PPO("CnnPolicy", env, verbose=1, tensorboard_log="./ppo_mario_logs/complex_move")

# Train the agent
model.learn(total_timesteps=100000)

# Save the model
model.save(os.path.join(save_path, "mario_model"))

print("Training complete and model saved at:", save_path)

Using cuda device
Wrapping the env in a VecTransposeImage.
Logging to ./ppo_mario_logs/complex_move\PPO_1
-----------------------------
| time/              |      |
|    fps             | 208  |
|    iterations      | 1    |
|    time_elapsed    | 9    |
|    total_timesteps | 2048 |
-----------------------------
----------------------------------------
| time/                   |            |
|    fps                  | 200        |
|    iterations           | 2          |
|    time_elapsed         | 20         |
|    total_timesteps      | 4096       |
| train/                  |            |
|    approx_kl            | 0.02756429 |
|    clip_fraction        | 0.341      |
|    clip_range           | 0.2        |
|    entropy_loss         | -2.46      |
|    explained_variance   | -0.000483  |
|    learning_rate        | 0.0003     |
|    loss                 | 23.2       |
|    n_updates            | 10         |
|    policy_gradient_loss | 0.003      |
|    value_loss           | 

In [None]:
import gym
import gym_super_mario_bros
from gym_super_mario_bros.actions import COMPLEX_MOVEMENT
from nes_py.wrappers import JoypadSpace
from stable_baselines3 import PPO
from stable_baselines3.common.vec_env import DummyVecEnv, VecFrameStack
from gym.wrappers import GrayScaleObservation, ResizeObservation
import time
def create_env():
    env = gym_super_mario_bros.make('SuperMarioBros-v0')  # Level 1-1
    env = JoypadSpace(env, COMPLEX_MOVEMENT)
    env = ResizeObservation(env, shape=84)
    env = GrayScaleObservation(env, keep_dim=True)
    return env
env = DummyVecEnv([create_env])
env = VecFrameStack(env, n_stack=4)
model = PPO.load("train/ppo_mario_complex/mario_model")
obs = env.reset()
done = False
while not done:
    action, _ = model.predict(obs)
    obs, reward, done, info = env.step(action)
    env.render()
    time.sleep(0.01)
env.close()