In [None]:
import os
# 用 os.chdir() 跳到 main
os.chdir('../main')

In [None]:
model_name = "PPO"
checkpoints = 2500000
model_checkpoints = model_name + "_" + str(checkpoints) + "_steps" # Speicify the model file to load. Model "ppo_ryu_2500000_steps_updated" is capable of beating the final stage (Bison) of the game.
# Model notes:
# ppo_ryu_2000000_steps_updated: Just beginning to overfit state, generalizable but not quite capable.
# ppo_ryu_2500000_steps_updated: Approaching the final overfitted state, cannot dominate first round but partially generalizable. High chance of beating the final stage.
# ppo_ryu_3000000_steps_updated: Near the final overfitted state, almost dominate first round but barely generalizable.
# ppo_ryu_7000000_steps_updated: Overfitted, dominates first round but not generalizable. 


RENDERING = False    # Whether to render the game screen.
# RESET_ROUND = False  # Whether to reset the round when fight is over. 

RANDOM_ACTION = False
NUM_EXPERIMENTS = 100
MODEL_DIR = "trained_models_" + model_name +"/"
# MODEL_DIR = r"trained_models/"


In [None]:
from pyvirtualdisplay import Display

if RENDERING:
    virtual_display = Display(visible=0, size=(1400, 900))
    virtual_display.start()

%matplotlib inline
import matplotlib.pyplot as plt

from IPython import display

In [30]:
# Copyright 2023 LIN Yi. All Rights Reserved.
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#     http://www.apache.org/licenses/LICENSE-2.0
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
# ==============================================================================

import os
import time 

import retro
from stable_baselines3 import PPO

from street_fighter_custom_wrapper import StreetFighterCustomWrapper

def make_env(game, state):
    def _init():
        env = retro.make(
            game=game, 
            state=state, 
            use_restricted_actions=retro.Actions.FILTERED,
            obs_type=retro.Observations.IMAGE
        )
        env = StreetFighterCustomWrapper(env, reset_round=False, rendering=RENDERING)
        return env
    return _init

game = "StreetFighterIISpecialChampionEdition-Genesis"
env = make_env(game, state="Champion.Level12.RyuVsBison")()
# model = PPO("CnnPolicy", env)

if not RANDOM_ACTION:
    model = PPO.load(os.path.join(MODEL_DIR, model_checkpoints), env=env)

obs = env.reset()
done = False

num_experiments = NUM_EXPERIMENTS
experiment_reward_sum = 0
num_victory = 0

print("\nFighting Begins!\n")

for _ in range(1, num_experiments+1):
    win_current_exp = False
    total_reward = 0
    num_victory_exp = 0

    obs = env.reset()

    print("\nStart ", _, "th experiment")

    for __ in range(1, 4):
        done = False

        if RENDERING:
            img = plt.imshow(env.render(mode='rgb_array'))

        while not done:
            timestamp = time.time()

            if RANDOM_ACTION:
                # sample action from action space
                # obs, reward, done, info = env.step(env.action_space.sample())

                # test specified action
                # button_combos [[0, 16, 32], [0, 64, 128], [0, 1, 2, 3, 256, 257, 512, 513, 1024, 1026, 1536, 2048, 2304, 2560]]
                # buttons ['B', 'A', 'MODE', 'START', 'UP', 'DOWN', 'LEFT', 'RIGHT', 'C', 'Y', 'X', 'Z']
                test_action = [0,0,0,0,1,0,0,0,0,0,0,0]
                obs, reward, done, info = env.step(test_action)
            else:
                action, _states = model.predict(obs)
                # print(action)
                obs, reward, done, info = env.step(action)

            if reward != 0:
                total_reward += reward
                # print("Reward: {:.3f}, playerHP: {}, enemyHP:{}".format(reward, info['agent_hp'], info['enemy_hp']))
            
            # done should be returned by env.step() when the game is over
            if info['enemy_hp'] < 0 or info['agent_hp'] < 0:
                done = True

            if RENDERING:
                img.set_data(env.render(mode='rgb_array'))
                display.display(plt.gcf())
                display.clear_output(wait=True)
            
            

        if info['enemy_hp'] < 0:
            print(__, "Victory!")
            num_victory_exp += 1
            if num_victory_exp==2:
                win_current_exp = True
                break
        else:
            print(__, "Lose...")
            if __ == 2 and num_victory_exp == 0:
                break
            if __ == 3:
                break
        
        while info['enemy_hp'] < 0 or info['agent_hp'] < 0:
            # Inter scene transition. Do nothing.
            obs, reward, done, info = env.step([0] * 12)
            if RENDERING:
                img.set_data(env.render(mode='rgb_array'))
                display.display(plt.gcf())
                display.clear_output(wait=True)
    
    if win_current_exp:
        num_victory += 1
        print(_, "th experiment Victory!")
    else:
        print(_, "th experiment Lose...")

    # print("Total reward: {}\n".format(total_reward))
    experiment_reward_sum += total_reward

env.close()

print("\nFighting Ends!\n")
print(model_checkpoints)
print("Winning rate: {}".format(1.0 * num_victory / num_experiments))
if RANDOM_ACTION:
    print("Average reward for random action: {}".format(experiment_reward_sum/num_experiments))
else:
    print("Average reward for {}: {}".format(model_checkpoints, experiment_reward_sum/num_experiments))

Wrapping the env with a `Monitor` wrapper
Wrapping the env in a DummyVecEnv.
Wrapping the env in a VecTransposeImage.

Fighting Begins!


Start  1 th experiment
1 Victory!
2 Lose...
3 Lose...
1 th experiment Lose...

Start  2 th experiment
1 Victory!
2 Victory!
2 th experiment Victory!

Start  3 th experiment
1 Lose...
2 Lose...
3 th experiment Lose...

Start  4 th experiment
1 Victory!
2 Lose...
3 Lose...
4 th experiment Lose...

Start  5 th experiment
1 Victory!
2 Lose...
3 Lose...
5 th experiment Lose...

Start  6 th experiment
1 Lose...
2 Lose...
6 th experiment Lose...

Start  7 th experiment
1 Victory!
2 Lose...
3 Lose...
7 th experiment Lose...

Start  8 th experiment
1 Lose...
2 Lose...
8 th experiment Lose...

Start  9 th experiment
1 Lose...
2 Lose...
9 th experiment Lose...

Start  10 th experiment
1 Victory!
2 Lose...
3 Lose...
10 th experiment Lose...

Start  11 th experiment
1 Lose...
2 Lose...
11 th experiment Lose...

Start  12 th experiment
1 Lose...
2 Lose...
12 th ex

In [None]:
# if interrupt by KeyboardInterrupt, close the env
env.close()