In [1]:
pip install tensorflow-gpu

Note: you may need to restart the kernel to use updated packages.


**1. Setup Mario**

In [2]:
!pip install gym_super_mario_bros==7.3.0 nes_py



In [3]:
# Import the game
import gym_super_mario_bros
# Import the Joypad wrapper
from nes_py.wrappers import JoypadSpace
# Import the SIMPLIFIED controls
from gym_super_mario_bros.actions import SIMPLE_MOVEMENT

In [4]:
# Setup game
env = gym_super_mario_bros.make('SuperMarioBros-v0')
env = JoypadSpace(env, SIMPLE_MOVEMENT)

**2. Preprocess Environment**

In [5]:
# Install pytorch
!pip install torch==1.10.1+cu113 torchvision==0.11.2+cu113 torchaudio===0.10.1+cu113 -f https://download.pytorch.org/whl/cu113/torch_stable.html

Looking in links: https://download.pytorch.org/whl/cu113/torch_stable.html


In [6]:
# Install stable baselines for RL stuff
!pip install stable-baselines3[extra]



In [7]:
# Import Frame Stacker Wrapper and GrayScaling Wrapper
from gym.wrappers import GrayScaleObservation
# Import Vectorization Wrappers
from stable_baselines3.common.vec_env import VecFrameStack, DummyVecEnv
# Import Matplotlib to show the impact of frame stacking
from matplotlib import pyplot as plt

**3. Train the RL Model**

In [8]:
# Import os for file path management
import os 
# Import PPO for algos
from stable_baselines3 import PPO, DQN
# Import Base Callback for saving models
from stable_baselines3.common.callbacks import BaseCallback

In [9]:
class TrainAndLoggingCallback(BaseCallback):

    def __init__(self, check_freq, save_path, verbose=1):
        super(TrainAndLoggingCallback, self).__init__(verbose)
        self.check_freq = check_freq
        self.save_path = save_path

    def _init_callback(self):
        if self.save_path is not None:
            os.makedirs(self.save_path, exist_ok=True)

    def _on_step(self):
        if self.n_calls % self.check_freq == 0:
            model_path = os.path.join(self.save_path, 'best_model_{}'.format(self.n_calls))
            self.model.save(model_path)

        return True

In [10]:
CHECKPOINT_DIR = './train/'
LOG_DIR = './logs/'

In [11]:
# Setup model saving callback
callback = TrainAndLoggingCallback(check_freq=10000, save_path=CHECKPOINT_DIR)

In [12]:
# This is the AI model started
model = DQN('CnnPolicy', env, verbose=1, tensorboard_log=LOG_DIR, learning_rate=0.001, seed=60, buffer_size=50) 

Using cuda device
Wrapping the env with a `Monitor` wrapper
Wrapping the env in a DummyVecEnv.
Wrapping the env in a VecTransposeImage.


In [13]:
# Train the AI model, this is where the AI model starts to learn
model.learn(total_timesteps=500000, callback=callback)

Logging to ./logs/DQN_2


  return (self.ram[0x86] - self.ram[0x071c]) % 256


----------------------------------
| rollout/            |          |
|    ep_len_mean      | 1.3e+04  |
|    ep_rew_mean      | 931      |
|    exploration_rate | 0.05     |
| time/               |          |
|    episodes         | 4        |
|    fps              | 523      |
|    time_elapsed     | 99       |
|    total_timesteps  | 52104    |
| train/              |          |
|    learning_rate    | 0.001    |
|    loss             | 0.538    |
|    n_updates        | 525      |
----------------------------------
----------------------------------
| rollout/            |          |
|    ep_len_mean      | 7.11e+03 |
|    ep_rew_mean      | 1.22e+03 |
|    exploration_rate | 0.05     |
| time/               |          |
|    episodes         | 8        |
|    fps              | 408      |
|    time_elapsed     | 139      |
|    total_timesteps  | 56868    |
| train/              |          |
|    learning_rate    | 0.001    |
|    loss             | 0.0867   |
|    n_updates      

<stable_baselines3.dqn.dqn.DQN at 0x1930d6852b0>

In [14]:
model.save('thisisatestmodel')