# Street Fighter Tutorial
This notebook accompanies the YouTube tutorial on <a href='https://www.youtube.com/c/NicholasRenotte'>Nicholas Renotte</a>

# Setup StreetFighter

In [None]:
# !pip install gym[all] gym-retro
# !pip install pygame
# !pip install opencv-python
# !pip install matplotlib
# !pip install torch==1.10.2+cu113 torchvision==0.11.3+cu113 torchaudio===0.10.2+cu113 -f https://download.pytorch.org/whl/cu113/torch_stable.html
# !pip install ray

In [1]:
# Import retro to play Street Fighter using a ROM
import retro
# Import time to slow down game
import time

In [None]:
# See the different retro games
retro.data.list_games()

In [None]:
# python -m retro.import . # Run this from the roms folder, or where you have your game roms 

In [None]:
game = retro.make(game='StreetFighterIISpecialChampionEdition-Genesis', use_restricted_actions=retro.Actions.FILTERED)
print(game.action_space)
print(game.action_space.shape)
print(game.observation_space.shape)

# Setup Environment
## What we are going to do! FUNNN
- Observation Preprocess - grayscale (DONE), frame delta, resize the frame so we have less pixels  (DONE) 
- Filter the action - parameter DONE
- Reward function - set this to the score

In [2]:
# Import environment base class for a wrapper 
from gym import Env 
# Import the space shapes for the environment
from gym.spaces import MultiBinary, Box, Tuple, Discrete
# Import numpy to calculate frame delta 
import numpy as np
# Import opencv for grayscaling
import cv2
# Import matplotlib for plotting the image
from matplotlib import pyplot as plt

In [3]:
# Create custom environment 
class StreetFighter(Env): 
    def __init__(self,config=None):
        super().__init__()
        # Specify action space and observation space 
        self.observation_space = Box(low=0, high=255, shape=(84, 84, 1), dtype=np.uint8)
        #self.action_space = MultiBinary(12)
        self.action_space = Tuple([Discrete(2) for _ in range(12)])
        # Startup and instance of the game 
        self.game = retro.make(game='StreetFighterIISpecialChampionEdition-Genesis', use_restricted_actions=retro.Actions.FILTERED)
    
    def reset(self):
        # Return the first frame 
        obs = self.game.reset()
        obs = self.preprocess(obs) 
        self.previous_frame = obs 
        
        # Create a attribute to hold the score delta 
        self.score = 0 
        return obs
    
    def preprocess(self, observation): 
        # Grayscaling 
        gray = cv2.cvtColor(observation, cv2.COLOR_BGR2GRAY)
        # Resize 
        resize = cv2.resize(gray, (84,84), interpolation=cv2.INTER_CUBIC)
        # Add the channels value
        channels = np.reshape(resize, (84,84,1))
        return channels 
    
    def step(self, action): 
        #convert actions to multibinary
        action_mb = np.array(action)
        action_mb = action_mb.astype(np.uint8)
        
        # Take a step 
        obs, reward, done, info = self.game.step(action_mb)
        obs = self.preprocess(obs) 
        
        # Frame delta 
        frame_delta = obs - self.previous_frame
        self.previous_frame = obs 
        
        # Reshape the reward function
        reward = info['score'] - self.score 
        self.score = info['score'] 
        
        return frame_delta, reward, done, info
    
    def render(self, *args, **kwargs):
        self.game.render()
        
    def close(self):
        self.game.close()

In [None]:
game_env = StreetFighter(None)
print(game_env.action_space)
print(game_env.action_space.shape)
print(game_env.observation_space.shape)

# Train Model

In [4]:
import ray
from ray import tune
from ray.rllib import agents
ray.init()
from ray.tune.logger import DEFAULT_LOGGERS
from multiprocessing import freeze_support
import os

from ray.rllib.agents.ppo import PPOTrainer

In [5]:
# Configure the algorithm.
config = {
    # Environment (RLlib understands openAI gym registered strings).
    "env": StreetFighter,
    "num_gpus": 1,
    # Use 2 environment workers (aka "rollout workers") that parallelly
    # collect samples from their own environment clone(s).
    "num_workers": 2,
    # Change this to "framework: torch", if you are using PyTorch.
    # Also, use "framework: tf2" for tf2.x eager execution.
    "framework": "torch",
    # Tweak the default model provided automatically by RLlib,
    # given the environment's observation- and action spaces.
    "model": {
        "fcnet_hiddens": [64, 64],
        "fcnet_activation": "relu",
    },
    # Set up a separate evaluation worker set for the
    # `trainer.evaluate()` call after training (see below).
    "evaluation_num_workers": 1,
    # Only for evaluation runs, render the env.
    "evaluation_config": {
        "render_env": True,
    }
}

In [None]:
# rllib_trainer = PPOTrainer(config=config)
# rllib_trainer.train()


time_steps_total = 100000

    # Run the experiment.
results = tune.run(
    agents.ppo.PPOTrainer,
    config=config,
    metric="episode_reward_mean",
    mode="max",
    stop={"timesteps_total": time_steps_total},
    checkpoint_at_end=True,
    checkpoint_freq=10,
    keep_checkpoints_num=100,
    local_dir="tune_runs"
)  


# Load checkpoints

In [6]:
# Where the trained agents and the logs will end up.
local_dir = "tune_runs"

import glob
import json

from ray.rllib import agents

# Find all the occurences of params.json in the directory tune_runs.
params_paths = glob.glob(os.path.join(local_dir, "**", "params.json"), recursive=True)
params_paths = sorted(params_paths)

# Find all the checkpoints per params.json.
pairs = []
for params_path in params_paths:
    search_path = os.path.join(os.path.dirname(params_path), "**")
    checkpoint_paths = glob.glob(search_path, recursive=True)
    checkpoint_paths = [checkpoint_path for checkpoint_path in checkpoint_paths if
                        not os.path.isdir(checkpoint_path)]
    checkpoint_paths = [checkpoint_path for checkpoint_path in checkpoint_paths if
                        os.path.basename(checkpoint_path).startswith(
                            "checkpoint-") and not checkpoint_path.endswith(".tune_metadata")]
    checkpoint_paths = sorted(checkpoint_paths)
    pairs += [(params_path, checkpoint_path) for checkpoint_path in checkpoint_paths]

# Get the user input.
if len(pairs) == 0:
    print("No checkpoints found.")
    #return
elif len(pairs) == 1:
    user_index = 0
else:
    print("Select a checkpoint:")
    for index, (_, subfolder) in enumerate(pairs):
        print(f"{index: >2}: {subfolder}")
    user_index = int(input("Enter the index of the checkpoint: "))
    #user_index = 2

# Get config path and checkpoint path.
config_path, checkpoint_path = pairs[user_index]
assert os.path.exists(config_path)
assert os.path.exists(checkpoint_path)
print("Config path:", config_path)
print("Checkpoint path:", checkpoint_path)

# Read the config.
with open(config_path, "r") as file:
    enjoy_config = json.load(file)
    enjoy_config = {key: value for key, value in enjoy_config.items() if key not in ["num_gpus", "num_workers"]}

print(enjoy_config, config_path)

# Load the agent.
print("Loading agent...")
agent = agents.ppo.PPOTrainer(config=config)
agent.restore(checkpoint_path)
print("Agent loaded.")

2022-04-28 15:45:08,545	INFO ppo.py:250 -- In multi-agent mode, policies will be optimized sequentially by the multi-GPU optimizer. Consider setting simple_optimizer=True if this doesn't work for you.
2022-04-28 15:45:08,546	INFO trainer.py:781 -- Current log_level is WARN. For more information, set 'log_level': 'INFO' / 'DEBUG' or use the -v and -vv flags.


Select a checkpoint:
 0: tune_runs\PPOTrainer_2022-04-20_14-47-28\PPOTrainer_StreetFighter_09610_00000_0_2022-04-20_14-47-28\checkpoint_000010\checkpoint-10
 1: tune_runs\PPOTrainer_2022-04-20_14-47-28\PPOTrainer_StreetFighter_09610_00000_0_2022-04-20_14-47-28\checkpoint_000020\checkpoint-20
 2: tune_runs\PPOTrainer_2022-04-20_14-47-28\PPOTrainer_StreetFighter_09610_00000_0_2022-04-20_14-47-28\checkpoint_000025\checkpoint-25
Config path: tune_runs\PPOTrainer_2022-04-20_14-47-28\PPOTrainer_StreetFighter_09610_00000_0_2022-04-20_14-47-28\params.json
Checkpoint path: tune_runs\PPOTrainer_2022-04-20_14-47-28\PPOTrainer_StreetFighter_09610_00000_0_2022-04-20_14-47-28\checkpoint_000025\checkpoint-25
{'env': "<class '__main__.StreetFighter'>", 'evaluation_config': {'render_env': True}, 'evaluation_num_workers': 1, 'framework': 'torch', 'model': {'fcnet_activation': 'relu', 'fcnet_hiddens': [64, 64]}} tune_runs\PPOTrainer_2022-04-20_14-47-28\PPOTrainer_StreetFighter_09610_00000_0_2022-04-20_14

2022-04-28 15:45:14,571	INFO trainable.py:496 -- Restored on 127.0.0.1 from checkpoint: tune_runs\PPOTrainer_2022-04-20_14-47-28\PPOTrainer_StreetFighter_09610_00000_0_2022-04-20_14-47-28\checkpoint_000025\checkpoint-25
2022-04-28 15:45:14,573	INFO trainable.py:503 -- Current state after restoring: {'_iteration': 25, '_timesteps_total': 100000, '_time_total': 6635.964767932892, '_episodes_total': 12}


Agent loaded.


In [7]:
#env.close()
env = StreetFighter()
# Reset game to starting state
obs = env.reset()
# Set flag to flase
done = False
for game in range(1): 
    while not done: 
        if done: 
            obs = env.reset()
        env.render()
        action = agent.compute_single_action(obs)
        obs, reward, done, info = env.step(action)
        #time.sleep(0.01)
        print(reward)



0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
0
