In [1]:
import os
# 用 os.chdir() 跳到 main
os.chdir('../main')

In [6]:
MODEL_DIR = "ppo-timeReward"
model_name = "PPO"
checkpoints = range(500000, 7500000+1, 500000) # list of checkpoints to evaluate

game_type = 3 # in [0, 1, 3] # [no_reset(1~12關), 一局, 三局兩勝]
NUM_EXPERIMENTS = 100
RENDERING = False    # Whether to render the game screen.
RANDOM_ACTION = False

SAVE = True
if SAVE:
    save_file = os.path.join(MODEL_DIR, MODEL_DIR+"_"+str(game_type)+".csv")


# Model notes:
# ppo_ryu_2000000_steps_updated: Just beginning to overfit state, generalizable but not quite capable.
# ppo_ryu_2500000_steps_updated: Approaching the final overfitted state, cannot dominate first round but partially generalizable. High chance of beating the final stage.
# ppo_ryu_3000000_steps_updated: Near the final overfitted state, almost dominate first round but barely generalizable.
# ppo_ryu_7000000_steps_updated: Overfitted, dominates first round but not generalizable. 

In [None]:
from pyvirtualdisplay import Display

if RENDERING:
    virtual_display = Display(visible=0, size=(1400, 900))
    virtual_display.start()

%matplotlib inline
import matplotlib.pyplot as plt

from IPython import display

In [7]:
# Copyright 2023 LIN Yi. All Rights Reserved.
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#     http://www.apache.org/licenses/LICENSE-2.0
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
# ==============================================================================

import os
import time 
import csv

import retro
from stable_baselines3 import PPO

from street_fighter_custom_wrapper import StreetFighterCustomWrapper

def make_env(game, state):
    def _init():
        env = retro.make(
            game=game, 
            state=state, 
            use_restricted_actions=retro.Actions.FILTERED,
            obs_type=retro.Observations.IMAGE
        )
        env = StreetFighterCustomWrapper(env, reset_round=game_type, rendering=RENDERING)
        return env
    return _init

game = "StreetFighterIISpecialChampionEdition-Genesis"
env = make_env(game, state="Champion.Level12.RyuVsBison")()

if SAVE:
    csvfile = open(save_file, 'w', newline='')
    writer = csv.writer(csvfile)
    writer.writerow(["model checkpoints", "average reward", "winning rate"])

for checkpts in checkpoints:
    model_checkpoints = model_name + "_" + str(checkpts) + "_steps" # Speicify the model file to load. Model "ppo_ryu_2500000_steps_updated" is capable of beating the final stage (Bison) of the game.

    if not RANDOM_ACTION:
        model = PPO.load(os.path.join(MODEL_DIR, model_checkpoints), env=env)

    num_experiments = NUM_EXPERIMENTS
    experiment_reward_sum = 0
    num_victory = 0

    # print("\nFighting Begins!\n")

    for _ in range(1, num_experiments+1):
        done = False
        obs = env.reset()
        total_reward = 0

        # print("\nStart ", _, "th experiment")

        if RENDERING:
            img = plt.imshow(env.render(mode='rgb_array'))

        while not done:
            timestamp = time.time()

            if RANDOM_ACTION:
                obs, reward, done, info = env.step(env.action_space.sample())
            else:
                action, _states = model.predict(obs)
                obs, reward, done, info = env.step(action)

            if reward != 0:
                total_reward += reward
                # print("Reward: {:.3f}, playerHP: {}, enemyHP:{}".format(reward, info['agent_hp'], info['enemy_hp']))

            if RENDERING:
                img.set_data(env.render(mode='rgb_array'))
                display.display(plt.gcf())
                display.clear_output(wait=True)

        if info['done_status'] == 1:
            num_victory += 1
            # print(_, "th experiment Victory!")
        # else:
            # print(_, "th experiment Lose...")

        # print("Total reward: {}\n".format(total_reward))
        experiment_reward_sum += total_reward

    # print("\nFighting Ends!\n")
    if RANDOM_ACTION:
        print("Random action")
    else:
        print(model_checkpoints)

    average_reward = experiment_reward_sum / num_experiments
    winning_rate = 1.0 * num_victory / num_experiments

    print("Average reward : {}".format(average_reward))
    print("Winning rate: {}\n".format(winning_rate))

    if SAVE:
        writer.writerow([model_checkpoints, average_reward, winning_rate])

# close file
if SAVE:
    csvfile.close()

env.close()

Wrapping the env with a `Monitor` wrapper
Wrapping the env in a DummyVecEnv.
Wrapping the env in a VecTransposeImage.
PPO_500000_steps
Average reward : -0.35199999999999987
Winning rate: 0.0

Wrapping the env with a `Monitor` wrapper
Wrapping the env in a DummyVecEnv.
Wrapping the env in a VecTransposeImage.
PPO_1000000_steps
Average reward : -0.33792
Winning rate: 0.01

Wrapping the env with a `Monitor` wrapper
Wrapping the env in a DummyVecEnv.
Wrapping the env in a VecTransposeImage.
PPO_1500000_steps
Average reward : -0.32384000000000024
Winning rate: 0.02

Wrapping the env with a `Monitor` wrapper
Wrapping the env in a DummyVecEnv.
Wrapping the env in a VecTransposeImage.
PPO_2000000_steps
Average reward : -0.15488000000000013
Winning rate: 0.14

Wrapping the env with a `Monitor` wrapper
Wrapping the env in a DummyVecEnv.
Wrapping the env in a VecTransposeImage.
PPO_2500000_steps
Average reward : -0.04223999999999993
Winning rate: 0.22

Wrapping the env with a `Monitor` wrapper
Wr

In [None]:
# if interrupt by KeyboardInterrupt, close the env
env.close()