In [None]:
# Install dependencies
!pip install gym-super-mario-bros stable-baselines3

# Imports
import gym
import numpy as np
from stable_baselines3 import A2C
from stable_baselines3.common.vec_env import VecFrameStack, DummyVecEnv
from stable_baselines3.common.envs import SuperMarioBrosEnv
from gym_super_mario_bros.actions import SIMPLE_MOVEMENT, COMPLEX_MOVEMENT
from gym_super_mario_bros.wrappers import SonicDiscretizer

# Create the Super Mario Bros environment
env = SuperMarioBrosEnv(world=1, stage=1)

# Use SonicDiscretizer wrapper to convert to discrete actions
env = SonicDiscretizer(env, COMPLEX_MOVEMENT)

# Wrap the environment to handle vectorized input and stack frames
env = VecFrameStack(DummyVecEnv([lambda: env]), n_stack=4)

# Define and train the A2C model
model = A2C('CnnPolicy', env, verbose=1)
model.learn(total_timesteps=100000)

# Save the trained model
model.save("mario_a2c_model")

# Load the trained model
model = A2C.load("mario_a2c_model")

# Evaluate the trained model
obs = env.reset()
done = False
while not done:
    action, _states = model.predict(obs)
    obs, reward, done, info = env.step(action)
    env.render()

# Close the environment
env.close()
