<a href="https://colab.research.google.com/github/hululuzhu/mario-ai-2023/blob/main/notebooks/cartpole_e2e_08202023.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

# An end-to-end colab to train CartPole AI from scratch and visualize the process
- Contact: hululu.zhu@gmail.com
- Last update: 08/20/2023
- Note we used old versions of gym+SB3 for compabitility
- Also note low-memory CPU instance (free tier) is good enough

In [1]:
# @title Run me to import necessary packages and set path
print("-" * 80, "\nQuiet install. ~5mins, be patient plz.\n" + "-" * 80)
!git clone --quiet https://github.com/hululuzhu/mario-ai-2023.git > /dev/null
!pip install -r mario-ai-2023/requirements.txt > /dev/null 2>&1

import warnings
warnings.filterwarnings('ignore')

import os, sys
sys.path.append(os.path.abspath("/content/mario-ai-2023/lib"))

import gym
from PIL import Image, ImageDraw, ImageFont
from IPython.display import Image as displayImage
from stable_baselines3 import PPO

def append_step_text(base_img, step, reward):
  draw = ImageDraw.Draw(base_img)
  font = ImageFont.truetype(r'/usr/share/fonts/truetype/liberation/LiberationMono-Regular.ttf', 40)
  text = f'{step}  Reward: {reward}'
  draw.text((5, 5), text, font = font, align ="left", fill=(0,0,0))
  base_img.save('/tmp/001.png')
  # ensure mutable
  return Image.open(r'/tmp/001.png').copy()

def eval_env(model, env, gif_path, is_human_view=True, sample_steps=100):
    """Take a glance of the world and save as gif."""
    images = []
    obs = env.reset()
    done = False
    step, total_reward = 0, 0
    for i in range(sample_steps):
        screen = env.render('rgb_array')
        if is_human_view:
          # images.append(Image.fromarray(screen))
          images.append(append_step_text(Image.fromarray(screen), step, total_reward))
        else:
          raise NotImplementedError
        # for _ in range(4): # skip frames for efficiency
        if done:
            try:
              # try multi-env within-session reset first
              env.env.reset()
            except:
              env.reset()
            break
        # print(obs)
        if model:
          action, _ = model.predict(obs)
        else:
          action = env.action_space.sample()
        # obs, reward, done, _, info = env.step(action)
        obs, reward, done, info = env.step(action)
        step += 1
        total_reward += reward
    if len(images) > 1:
        # print(action)
        # obs, reward, done, _, info = env.step(action)
        obs, reward, done, info = env.step(action)
        step += 1
        total_reward += reward
    if len(images) > 1:
        images[0].save(
            gif_path, save_all=True, append_images=images[1:], loop=0, duration=1)
    else:
        raise("Bad environment, cannot move")


-------------------------------------------------------------------------------- 
Quiet install. ~5mins, be patient plz.
--------------------------------------------------------------------------------


In [None]:
# @title Initialize the cartpole game
env = gym.make('CartPole-v1')
# Check out slides to see what the 4 outputs are, or check out
# https://www.gymlibrary.dev/environments/classic_control/cart_pole/
env.reset().tolist()

In [None]:
# @Title, let's play the game by taking random actions!
eval_env(model=None, env=env, gif_path='/tmp/eval01.gif')
print('-' * 80, '\n As you may expect, it does not last long\n', '-' * 80)
displayImage(open('/tmp/eval01.gif','rb').read())

In [7]:
# @title Let's Initialize a Game AI, using most advanced AI Algorithm called PPO
# if you like to learn more about PPO, start with https://en.wikipedia.org/wiki/Proximal_Policy_Optimization
my_ppo = PPO('MlpPolicy', env, tensorboard_log="cartpole")

In [None]:
# @title We will use a tool called TensorBoard to track training progress
%reload_ext tensorboard
# Note the pattern of logdir, 'cartpole' matches to my_ppo definition,
# 'ppo_mlp_1m_N' matches to learn code below
%tensorboard --logdir cartpole/ppo_mlp_1m_1 --reload_multifile True

In [None]:
# @title Let's train the AI now, asking it to try to get "higher scores"!
my_ppo.learn(total_timesteps=1000000, tb_log_name='ppo_mlp_1m')

In [10]:
# @title After some training (or if you see tensorflow reports reasonably high scores), let's evaluate now!
eval_env(my_ppo, env, '/tmp/eval02.gif')
print('-' * 80, '\n As you may expect, it now performs better!\n', '-' * 80)
displayImage(open('/tmp/eval02.gif','rb').read())

In [None]:
# @title Optionally, if you want to save the model and load
# my_ppo.save('/tmp/your_path')
# my_ppo = PPO.load('/tmp/your_path')