<a href="https://colab.research.google.com/github/gmansil/OpenAI-Gym-Mario-Bot/blob/main/RLModelTraining.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [None]:
# import the necessary dependencies 
!pip install stable-baselines3[extra]
!pip install gym[all]
!pip install gym_super_mario_bros==7.3.0 nes_py

In [None]:
# import the necessary libraries
import gym
from stable_baselines3.common.vec_env import VecFrameStack, DummyVecEnv
from stable_baselines3.common.evaluation import evaluate_policy
# import os for file path management
import os
os.environ['KMP_DUPLICATE_LIB_OK']='True'
# import PPO for algos
from stable_baselines3 import PPO
# import base callback for saving models
from stable_baselines3.common.callbacks import BaseCallback

In [None]:
# import game
import gym_super_mario_bros

# import Joypad wrapper
from nes_py.wrappers import JoypadSpace

#import SIMPLIFIED controls
from gym_super_mario_bros.actions import SIMPLE_MOVEMENT

# import Frame Stacker wrapper and GrayScaling Wrapper
from gym.wrappers import GrayScaleObservation

In [None]:
# 1.create the base environment 
env = gym_super_mario_bros.make("SuperMarioBros-v0")
# 2.simplify the controls
env = JoypadSpace(env, SIMPLE_MOVEMENT)
# 3.grayscale 
env = GrayScaleObservation(env, keep_dim=True)
# 4.wrap inside the Dummy Environment 
env = DummyVecEnv([lambda: env])
# 5.stack the frames
env = VecFrameStack(env, 4, channels_order="last")

In [None]:
class TrainAndLoggingCallback(BaseCallback):
    def __init__(self, check_freq, save_path, verbose=1):
        super(TrainAndLoggingCallback, self).__init__(verbose)
        self.check_freq = check_freq
        self.save_path = save_path
        
    def _init_callback(self):
        if self.save_path is not None:
            os.makedirs(self.save_path, exist_ok=True)
            
    def _on_step(self):
        if self.n_calls % self.check_freq == 0:
            model_path = os.path.join(self.save_path, "best_model_{}".format(self.n_calls))
            self.model.save(model_path)
            
        return True

In [None]:
CHECKPOINT_DIR = "./train/"
LOG_DIR = "./logs/"

In [None]:
# setup model saving callback
callback = TrainAndLoggingCallback(check_freq=1000000, save_path=CHECKPOINT_DIR)

In [None]:
# create model
model = PPO("CnnPolicy", env, verbose=1, tensorboard_log=LOG_DIR, learning_rate=0.000001,
           n_steps=512)

In [None]:
# train model to learn the game
model.learn(total_timesteps=100000, callback=callback)

In [None]:
model.save('recentModel')