In [None]:
# TEMP: Run the following commands in an external terminal for the uninstall process to work properly
#py -3.8 -m pip uninstall stable-baselines3
#py -3.8 -m pip uninstall gymnasium
#py -3.8 -m pip uninstall nes-py
#py -3.8 -m pip uninstall gym-super-mario-bros

### List of libaries, websites, and other resources referenced for this project:

Markdown Guide - Basic Syntax (for documentation): https://www.markdownguide.org/basic-syntax/ <br>

OpenAI's Gym library: https://github.com/openai/gym <br>

nes-py, an NES emulator and OpenAI Gym interface: https://github.com/Kautenja/nes-py <br>
gym-super-mario-bros, an OpenAI Gym environment for Super Mario Bros. using nes-py: https://github.com/Kautenja/gym-super-mario-bros <br>

PyTorch, a machine learning library for Python: https://pytorch.org/ <br>
Stable Baselines3, an RL algorithms library for PyTorch: https://github.com/DLR-RM/stable-baselines3 <br>

In [None]:
# Install PyTorch (stable version) locally on Windows w/ GPU acceleration via pip
%pip install torch torchvision torchaudio --index-url https://download.pytorch.org/whl/cu121

In [None]:
# Force downgrade wheel & setuptools in order for gym v21 to install properly
%pip install wheel==0.38.4 setuptools==66.0.0

# Install Stable Baselines3 library version 1.8.0 (last version to include & use gym v21)
%pip install stable-baselines3==1.8.0 --no-cache-dir

# Upgrade wheel & setuptools back to their latest versions
%pip install --upgrade wheel setuptools

In [None]:
# Install gym-super-mario-bros (includes nes-py, requires gym v21)
# Note: nes-py requires MSVC build tools to install
%pip install gym-super-mario-bros

### Code

In [1]:
# Import libraries
import gym_super_mario_bros
from nes_py.wrappers import JoypadSpace
from gym_super_mario_bros.actions import SIMPLE_MOVEMENT

env = gym_super_mario_bros.make("SuperMarioBros-v0") # Make gym environment with SMB1 in standard ROM mode
env = JoypadSpace(env, SIMPLE_MOVEMENT) # Set joypad wrapper; restrict to SIMPLE_MOVEMENT (only 7 actions)

In [None]:
# Default code from gym-super-mario-bros repository - will run w/ random inputs & no machine learning AI
done = True
for step in range(5000):
    if done:
        state = env.reset()
    state, reward, done, info = env.step(env.action_space.sample())
    env.render()

env.close()

### Preproccesing the Environment

In [None]:
# makes the actual environment
env = gym_super_mario_bros.make('SuperMarioBros-v0')

# simplifies the movement to only 7 options
env = JoypadSpace(env, SIMPLE_MOVEMENT)

# removes color from the environment to help with processing speed
env = GrayScaleObservation(env, keep_dim = True)

# wraps the environment in a dummy vector environment so that they can be stacked
env = DummyVecEnv([lambda: env])

# stacks 10 frames ontop of each other, so the AI can make decisions based on its previous locations
env = VecFrameStack(env, 10, channels_order = 'last')

In [3]:
# makes sure the environment has been properly reset to the start
state = env.reset()

### Frame Stack Vizualization code

In [None]:
# steps forward in the environment, doing action 5 [Jump]
# this is here purely to visualize what the AI sees when
# given stacked frames
state, reward, done, info = env.step([5])

In [None]:
plt.figure(figsize = (20,16))
for idx in range(state.shape[3]):
    plt.subplot(1,10,idx+1)
    plt.imshow(state[0][:,:,idx])
plt.show

### Code For Saving The Model

In [None]:
class TrainLogCallBack(BaseCallback):

    def __init__(self, check_freq, save_path, verbose = 1):
        super(TrainLogCallBack, self).__init__(verbose)
        self.check_freq = check_freq
        self.save_path = save_path
    
    def _init_callback(self):
        if self.save_path != None:
            os.makedirs(self.save_path, exist_ok = True)
    
    def _on_step(self):
        if self.n_calls % self.check_freq == 0:
            model_path = os.path.join(self.save_path, 'best_model_{}'.format(self.n_calls))
            self.model.save(model_path)
        
        return True

In [None]:
CHECKPOINT_DIR = 'E:/4444_AI_files/Train/'
LOG_DIR = 'E:/4444_AI_files/Log/'

In [None]:
# just creating an actual instance of the callback created above
# saves the model every 10,000 steps
callback = TrainLogCallBack(check_freq=10000, save_path=CHECKPOINT_DIR)

### Training The Model

In [None]:
# creating the actual PPO model
# (Why can't getting the data be this easy :/ )
model = PPO('CnnPolicy', env, verbose=1, tensorboard_log=LOG_DIR, learning_rate=0.000001, n_steps = 512)

#this should say using cuda device at the bottom

In [None]:
# this is what actually trains the model
# it runs for 1 billion timesteps [maybe a bit overkill]
# ie, the AI will see 1 billion frames before it ends
# it uses the callback instance created above
model.learn(total_timesteps = 1000000000, callback = callback)

In [None]:
model.save('thisisastatesmodel')

### Testing the Created Models

In [None]:
# loads the final model
model = PPO.load('E:/4444_AI_files/Train/best_model_1000000000')

In [None]:
state = env.reset()
while True:
    action, _state = model.predict(state)
    state, reward, done, info = env.step(action)
    env.render()