In [None]:
model_name = "PPO-delta"
checkpoints = 50000
model_checkpoints = "best_model_" + str(checkpoints) # Speicify the model file to load. Model "ppo_ryu_2500000_steps_updated" is capable of beating the final stage (Bison) of the game.


RENDERING = True    # Whether to render the game screen.
RESET_ROUND = True  # Whether to reset the round when fight is over. 

RANDOM_ACTION = False
NUM_EPISODES = 100 # Make sure NUM_EPISODES >= 3 if you set RESET_ROUND to False to see the whole final stage game.
MODEL_DIR = "trained_models_" + model_name +"/"


In [None]:
from pyvirtualdisplay import Display

if RENDERING:
    virtual_display = Display(visible=0, size=(1400, 900))
    virtual_display.start()

%matplotlib inline
import matplotlib.pyplot as plt

from IPython import display

In [None]:
import os
import time 

import retro
from stable_baselines3 import PPO

# Import the sb3 monitor for logging 
from stable_baselines3.common.monitor import Monitor
# Import the vec wrappers to vectorize and frame stack
from stable_baselines3.common.vec_env import DummyVecEnv, VecFrameStack

from street_fighter_custom_wrapper import StreetFighterCustomWrapper

env = retro.make(game='StreetFighterIISpecialChampionEdition-Genesis',
                state="Champion.Level12.RyuVsBison", 
                use_restricted_actions=retro.Actions.FILTERED)
env = StreetFighterCustomWrapper(env)
# env = Monitor(env, LOG_DIR)
env = DummyVecEnv([lambda: env])
env = VecFrameStack(env, 4, channels_order='last')

In [None]:

if not RANDOM_ACTION:
    model = PPO.load(os.path.join(MODEL_DIR, model_checkpoints))

obs = env.reset()
done = False

num_episodes = NUM_EPISODES
episode_reward_sum = 0
num_victory = 0

print("\nFighting Begins!\n")

for _ in range(num_episodes):
    done = False
    
    if RESET_ROUND:
        obs = env.reset()

    total_reward = 0

    if RENDERING:
        img = plt.imshow(env.render(mode='rgb_array'))

    while not done:
        timestamp = time.time()

        if RANDOM_ACTION:
            # sample action from action space
            # obs, reward, done, info = env.step(env.action_space.sample())

            # test specified action
            # button_combos [[0, 16, 32], [0, 64, 128], [0, 1, 2, 3, 256, 257, 512, 513, 1024, 1026, 1536, 2048, 2304, 2560]]
            # buttons ['B', 'A', 'MODE', 'START', 'UP', 'DOWN', 'LEFT', 'RIGHT', 'C', 'Y', 'X', 'Z']
            test_action = [0,0,0,0,1,0,0,0,0,0,0,0]
            obs, reward, done, info = env.step(test_action)
        else:
            action, _states = model.predict(obs)
            # print(action)
            obs, reward, done, info = env.step(action)

        if reward != 0:
            total_reward += reward
            # print("Reward: {:.3f}, playerHP: {}, enemyHP:{}".format(reward, info['agent_hp'], info['enemy_hp']))
        
        # done should be returned by env.step() when the game is over
        # if info['enemy_hp'] < 0 or info['agent_hp'] < 0:
        #     done = True

        if RENDERING:
            img.set_data(env.render(mode='rgb_array'))
            display.display(plt.gcf())
            display.clear_output(wait=True)

    if info['enemy_hp'] < 0:
        print(_, "Victory!")
        num_victory += 1
    else:
        print(_, "Lose...")

    # print("Total reward: {}\n".format(total_reward))
    episode_reward_sum += total_reward

    if not RESET_ROUND:
        while info['enemy_hp'] < 0 or info['agent_hp'] < 0:
        # Inter scene transition. Do nothing.
            obs, reward, done, info = env.step([0] * 12)
            if RENDERING:
                env.render(mode='rgb_array')

env.close()

print("\nFighting Ends!\n")
print(model_checkpoints)
print("Winning rate: {}".format(1.0 * num_victory / num_episodes))
if RANDOM_ACTION:
    print("Average reward for random action: {}".format(episode_reward_sum/num_episodes))
else:
    print("Average reward for {}: {}".format(model_checkpoints, episode_reward_sum/num_episodes))