# Dependencies

In [27]:
import os # for file paths
import retro # framework to interact with the game
from gym import Env # environment base class
from gym.spaces import MultiBinary, Box # space shapes for the game environment
import numpy as np # for calculating frame changes
import cv2 as cv # for image preprocessing
import time # to slow down each frame so we can clearly see the game as it's being played
import math # to calculate the reward on each step
#from matplotlib import pyplot as plt
import optuna # for optimizing the agent's hyperparameters easily

# For building the DL model
from tensorflow import keras
from keras.models import Sequential
from keras.layers import Dense, Flatten, InputLayer
from keras.optimizers import Adam
from keras.losses import MeanSquaredError

# For building the DQN agent
from rl.agents import DQNAgent
from rl.memory import SequentialMemory
from rl.policy import EpsGreedyQPolicy, LinearAnnealedPolicy

# Game Environment

In [28]:
# Specifying directories where the models will be saved
LOG_DIR = "./logs/"
OPT_DIR = "./opt/"
TRAIN_DIR = "./train/"

In [51]:
# B, A, _, _, UP, DOWN, LEFT, RIGHT, C, Y, X, Z
# Every possible action for a given step in Street Fighter II
possible_actions = {
    # Idle
    0: [0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0],
    # Left
    1 : [0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0],
    # Right
    2 : [0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 0],
    # Up
    3 : [0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0],
    # Down
    4 : [0, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0],
    # Light Kick
    5 : [0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0],
    # Medium Kick
    6 : [1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0],
    # Hard Kick
    7 : [0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0],
    # Light Punch
    8 : [0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0],
    # Medium Punch
    9 : [0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0],
    # Hard Punch
    10 : [0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1],
    # Down Left
    11 : [0, 0, 0, 0, 0, 1, 1, 0, 0, 0, 0, 0],
    # Down Right
    12 : [0, 0, 0, 0, 0, 1, 0, 1, 0, 0, 0, 0],
    # Up Left
    13 : [0, 0, 0, 0, 1, 0, 1, 0, 0, 0, 0, 0],
    # Up Right
    14 : [0, 0, 0, 0, 1, 0, 0, 1, 0, 0, 0, 0],
}

In [182]:
# Creating a wrapper to interact with the game environment

class StreetFighterWrapper(Env):
    
    def __init__(self):
        super(StreetFighterWrapper, self).__init__()
        
        # Starting health in Street Fighter II is 176 HP
        self.START_HEALTH = 176
        self.REWARD_COEFF = 17
        self.PENALTY_COEFF = 1.75
        self.LOSS_PENALTY_COEFF = 0.35
        
        # defining our observation space to take frames from a
        self.observation_space = Box(low = 0, high = 255, shape = (100, 100, 1), dtype=np.uint8)
        
        # defining our own set of actions that mimics the original
        self.action_space = MultiBinary(12)
        
        # setting up the game environment
        self.game = retro.make(game='StreetFighterIISpecialChampionEdition-Genesis', use_restricted_actions=retro.Actions.FILTERED)

        # enemy and agent health values that get updated as the game goes along
        self.enemy_health = self.START_HEALTH
        self.agent_health = self.START_HEALTH

        # Creating a score variable to hold the agent's current score; important for calculating the reward on each step
        self.score = 0
    
    def step(self, action):
        # Getting the array of button combinations that corresponds to the number generated by the agent
        action_arr = possible_actions[action]
        
        # calculate change in current frame vs previous frame
        obs, reward, done, info = self.game.step(action_arr)
        obs = self.preprocess(obs)

        ## reward = info['score'] - self.score

        # calculating the change in health for each player (i.e. health from this frame - health from previous frame)
        enemy_damage_taken = abs(info['enemy_health'] - self.enemy_health)
        agent_damage_taken = abs(info['health'] - self.agent_health)
        
        # Tweaking the reward function to be the score of this step - score from the previous step (i.e. the change in score)

        # catching edge cases to make sure no reward is being earned outside of a fight (i.e. in between rounds)
        if (self.enemy_health != 0 and info['enemy_health'] == 0 and self.agent_health != 0 and info['health'] == 0) or (enemy_damage_taken == 0 and agent_damage_taken == 0) or (self.agent_health == 0 and self.enemy_health == 0):
            reward = 0
        
        # If the agent wins and enemy loses
        elif info['enemy_health'] < 0:
            reward = self.START_HEALTH * math.log(info['health'], self.START_HEALTH) * self.REWARD_COEFF
            
        # if the enemy wins and agent loses
        elif info['health'] < 0:
            reward = -math.pow(self.START_HEALTH, (info['enemy_health'] / self.START_HEALTH)) * self.LOSS_PENALTY_COEFF
                               
        # the fight goes on
        else:
            # If the enemy took more damage than the agent
            if enemy_damage_taken > agent_damage_taken:
                reward = ((enemy_damage_taken) - (agent_damage_taken)) * self.REWARD_COEFF
            # If the agent took more or same amount of damage than the enemy
            else:
                reward = ((enemy_damage_taken) - (agent_damage_taken)) * self.PENALTY_COEFF

        # as long as the game hasn't ended, set the "previous" health as the current health so we can calculate the reward for the next step
        self.enemy_health = info['enemy_health']
        self.agent_health = info['health']
        
        # set the "previous" score as the current score so we can calculate the reward for the next step
        self.score = info['score']

        # only print the reward earned if it's non-zero
        if (reward != 0):
            print("Agent Health: {} Enemy Health: {} Agent Damage Taken: {} Enemy Damage Taken: {} Reward: {}".format(info['health'], info['enemy_health'], agent_damage_taken, enemy_damage_taken, reward))
            ## print("Agent Health: {} Enemy Health: {} Reward: {}".format(info['health'], info['enemy_health'], reward))
        return obs, reward, done, info

    # Renders the current game frame
    def render(self, mode= 'human'):
        self.game.render(mode = 'human')

    def reset(self):
        # Reset the observation
        obs = self.game.reset()
        # Preprocess the observation for the agent
        obs = self.preprocess(obs)

        # Reset the game data
        self.score = 0
        self.enemy_health = self.START_HEALTH
        self.agent_health = self.START_HEALTH
        
        return obs
    
    def preprocess(self, obs):
    
        # set image to greyscale
        greyed = cv.cvtColor(obs, cv.COLOR_BGR2GRAY)
        
        # and make it smaller
        resized = cv.resize(greyed, (100, 100), interpolation=cv.INTER_CUBIC)
        
        processed = np.reshape(resized, (100, 100, 1))
        
        return processed
    
    def close(self):
        # Close the game environment
        self.game.close()

In [106]:
# Instantiating our environment and wrapping it in a FrameStack to keep track of the past 4 frames
env = StreetFighterWrapper()

In [107]:
# Variables to store observation and action space data
height, width, channels = env.observation_space.shape
frames = 4
actions = len(possible_actions)

In [16]:
# Game Loop

# Resets the observations
obs = env.reset()

# Flag that controls the inner loop
done = False

# Play one match
for game in range(1):
    while not done:
        if done:
            obs = env.reset()
        # renders the game frame
        env.render()
        # Taking random actions
        action = possible_actions[np.random.randint(len(possible_actions))]
        # Info contains: continueTimer, enemy_health, enemy_matches_won, health, matches_won, score
        obs, reward, done, info = env.step(action) # taking random actions
        # To make the game slower so it can be watched more closely
        time.sleep(0.01)


KeyboardInterrupt: 

In [131]:
env.close()

# Hyperparameter Optimization
Using the Optuna library to optimize hyperparameters for a Deep Q Network.
These hyperparameters include:
DL Model: # of layers, # of units in each Dense layer,
DQN Agent: SequentialMemory - limit,
LinearAnnealedPolicy - value_test,
Other agent parameters: target_model_update, learning rate


In [145]:
def optimize_model(trial):
    # Testing either 3 or 4 Dense layers
    n_layers = trial.suggest_int("n_layers", 3, 4)

    # Creating the model
    model = Sequential()

    # Adding the input layer - input shape 
    model.add(Flatten(input_shape = (frames, height, width, channels)))
    
    # Suggest values of the number of units in each Dense layer
    for i in range(n_layers):
        model.add(
            Dense(units = trial.suggest_categorical(f'n_units_l{i}', [64, 96, 128, 256, 512]),
                  activation = 'relu'))

    # Output layer
    model.add(Dense(actions, activation='linear'))
    # Keras RL needs a Flatten layer at the end for the agent to work
    model.add(Flatten())

    return model

In [None]:
def optimize_dqn(trial):
    # Testing out various hyperparameters for the DQN Agent
    dqn_params = {
        'limit': trial.suggest_int('limit', 1000, 3000),
        'policy': LinearAnnealedPolicy(EpsGreedyQPolicy(), attr = 'eps', value_max = 1.0, value_min = 0.1, value_test = trial.suggest_float('value_test', 0.05, 0.2), nb_steps = 30000),
        'target_model_update': trial.suggest_float('target_model_update', 0.01, 1.0, log = True),
        'learning_rate' : trial.suggest_float('learning_rate', 1e-7, 1e-3, log = True)
    }
    
    return dqn_params

In [161]:
def optimize_agent(trial):
    try:
        # Getting the DQN and model parameters
        model = optimize_model(trial)
        dqn_params = optimize_dqn(trial)
    
        # Create environment
        env = StreetFighterWrapper()
        
        # Creating the DQN
        agent = DQNAgent(
        model = model,
        memory = SequentialMemory(limit = dqn_params['limit'], window_length = frames), # window length has to align with input shape in the NN
        policy = dqn_params['policy'],
        enable_dueling_network = True, dueling_type = 'avg',
        nb_actions = actions,
        nb_steps_warmup = 5000,
        target_model_update = dqn_params['target_model_update']
        )

        # Compile and fit the agent to the environment
        agent.compile(Adam(learning_rate = (dqn_params['learning_rate'])), metrics = ['mae'])
        agent.fit(env, 30000, action_repetition = 1, callbacks = None, verbose = 2, visualize = False)
    
        # Evaluate the model
        scores = agent.test(env, nb_episodes = 10, visualize = False)
        mean_reward = np.mean(scores.history['episode_reward'])
        print(f"Mean reward for trial {trial.number} is {mean_reward}")
        env.close()
    
        SAVE_PATH = os.path.join(OPT_DIR, f"trial_{trial.number}_best_agent_weights.h5f")
        agent.save_weights(SAVE_PATH)
    
        return mean_reward
        
    except Exception as e:
        return -500

In [184]:
env.close()

In [None]:
# Creating the experiment to find the optimal DQN and model hyperparameters
study = optuna.create_study(direction='maximize')
study.optimize(optimize_agent, n_trials = 150, n_jobs = 1)

[I 2023-11-07 20:51:25,485] A new study created in memory with name: no-name-f40248b5-bca6-4cde-8671-3ed5322582f9


Training for 3000 steps ...
Agent Health: 145 Enemy Health: 140 Agent Damage Taken: 31 Enemy Damage Taken: 36 Reward: 85
Agent Health: 145 Enemy Health: 112 Agent Damage Taken: 0 Enemy Damage Taken: 28 Reward: 476
Agent Health: 145 Enemy Health: 73 Agent Damage Taken: 0 Enemy Damage Taken: 39 Reward: 663
Agent Health: 145 Enemy Health: 22 Agent Damage Taken: 0 Enemy Damage Taken: 51 Reward: 867
Agent Health: 123 Enemy Health: 22 Agent Damage Taken: 22 Enemy Damage Taken: 0 Reward: -38.5
Agent Health: 123 Enemy Health: -1 Agent Damage Taken: 0 Enemy Damage Taken: 23 Reward: 2784.663022860403
Agent Health: 176 Enemy Health: 147 Agent Damage Taken: 0 Enemy Damage Taken: 29 Reward: 493
Agent Health: 152 Enemy Health: 147 Agent Damage Taken: 24 Enemy Damage Taken: 0 Reward: -42.0
Agent Health: 145 Enemy Health: 147 Agent Damage Taken: 7 Enemy Damage Taken: 0 Reward: -12.25
Agent Health: 139 Enemy Health: 147 Agent Damage Taken: 6 Enemy Damage Taken: 0 Reward: -10.5
Agent Health: 105 Enemy H

[I 2023-11-07 21:16:00,047] Trial 0 finished with value: -728.7 and parameters: {'n_layers': 3, 'n_units_l0': 512, 'n_units_l1': 512, 'n_units_l2': 256, 'limit': 1671, 'value_test': 0.08865902884644195, 'target_model_update': 0.827801555226716, 'learning_rate': 8.986576901545484e-05}. Best is trial 0 with value: -728.7.


Training for 3000 steps ...


  updates=self.state_updates,


Agent Health: 153 Enemy Health: 176 Agent Damage Taken: 23 Enemy Damage Taken: 0 Reward: -40.25
Agent Health: 129 Enemy Health: 176 Agent Damage Taken: 24 Enemy Damage Taken: 0 Reward: -42.0
Agent Health: 105 Enemy Health: 176 Agent Damage Taken: 24 Enemy Damage Taken: 0 Reward: -42.0
Agent Health: 82 Enemy Health: 176 Agent Damage Taken: 23 Enemy Damage Taken: 0 Reward: -40.25
Agent Health: 51 Enemy Health: 176 Agent Damage Taken: 31 Enemy Damage Taken: 0 Reward: -54.25
Agent Health: 51 Enemy Health: 145 Agent Damage Taken: 0 Enemy Damage Taken: 31 Reward: 527
Agent Health: 26 Enemy Health: 145 Agent Damage Taken: 25 Enemy Damage Taken: 0 Reward: -43.75
Agent Health: 26 Enemy Health: 114 Agent Damage Taken: 0 Enemy Damage Taken: 31 Reward: 527
Agent Health: 8 Enemy Health: 114 Agent Damage Taken: 18 Enemy Damage Taken: 0 Reward: -31.5
Agent Health: 8 Enemy Health: 69 Agent Damage Taken: 0 Enemy Damage Taken: 45 Reward: 765
Agent Health: -1 Enemy Health: 69 Agent Damage Taken: 9 Enemy 

[I 2023-11-07 21:29:39,664] Trial 1 finished with value: -643.7117434748349 and parameters: {'n_layers': 3, 'n_units_l0': 64, 'n_units_l1': 256, 'n_units_l2': 64, 'limit': 2026, 'value_test': 0.052297331564470674, 'target_model_update': 0.15210607834887668, 'learning_rate': 0.0003653449551697012}. Best is trial 1 with value: -643.7117434748349.


Training for 3000 steps ...


  updates=self.state_updates,


Agent Health: 145 Enemy Health: 176 Agent Damage Taken: 31 Enemy Damage Taken: 0 Reward: -54.25
Agent Health: 121 Enemy Health: 176 Agent Damage Taken: 24 Enemy Damage Taken: 0 Reward: -42.0
Agent Health: 88 Enemy Health: 176 Agent Damage Taken: 33 Enemy Damage Taken: 0 Reward: -57.75
Agent Health: 81 Enemy Health: 176 Agent Damage Taken: 7 Enemy Damage Taken: 0 Reward: -12.25
Agent Health: 75 Enemy Health: 137 Agent Damage Taken: 6 Enemy Damage Taken: 39 Reward: 561
Agent Health: 50 Enemy Health: 137 Agent Damage Taken: 25 Enemy Damage Taken: 0 Reward: -43.75
Agent Health: 50 Enemy Health: 130 Agent Damage Taken: 0 Enemy Damage Taken: 7 Reward: 119
Agent Health: 50 Enemy Health: 95 Agent Damage Taken: 0 Enemy Damage Taken: 35 Reward: 595
Agent Health: 14 Enemy Health: 95 Agent Damage Taken: 36 Enemy Damage Taken: 0 Reward: -63.0
Agent Health: 14 Enemy Health: 88 Agent Damage Taken: 0 Enemy Damage Taken: 7 Reward: 119
Agent Health: 14 Enemy Health: 50 Agent Damage Taken: 0 Enemy Damage

[I 2023-11-07 21:39:43,182] Trial 2 finished with value: -728.7 and parameters: {'n_layers': 4, 'n_units_l0': 64, 'n_units_l1': 256, 'n_units_l2': 96, 'n_units_l3': 512, 'limit': 1884, 'value_test': 0.061565768752032415, 'target_model_update': 0.014525361847704837, 'learning_rate': 0.005352738284004532}. Best is trial 1 with value: -643.7117434748349.


Training for 3000 steps ...


  updates=self.state_updates,


Agent Health: 154 Enemy Health: 176 Agent Damage Taken: 22 Enemy Damage Taken: 0 Reward: -38.5
Agent Health: 154 Enemy Health: 136 Agent Damage Taken: 0 Enemy Damage Taken: 40 Reward: 680
Agent Health: 154 Enemy Health: 119 Agent Damage Taken: 0 Enemy Damage Taken: 17 Reward: 289
Agent Health: 130 Enemy Health: 119 Agent Damage Taken: 24 Enemy Damage Taken: 0 Reward: -42.0
Agent Health: 93 Enemy Health: 119 Agent Damage Taken: 37 Enemy Damage Taken: 0 Reward: -64.75
Agent Health: 93 Enemy Health: 91 Agent Damage Taken: 0 Enemy Damage Taken: 28 Reward: 476
Agent Health: 69 Enemy Health: 91 Agent Damage Taken: 24 Enemy Damage Taken: 0 Reward: -42.0
Agent Health: 69 Enemy Health: 56 Agent Damage Taken: 0 Enemy Damage Taken: 35 Reward: 595
Agent Health: 25 Enemy Health: 56 Agent Damage Taken: 44 Enemy Damage Taken: 0 Reward: -77.0
Agent Health: -1 Enemy Health: 56 Agent Damage Taken: 26 Enemy Damage Taken: 0 Reward: -1.8136327425065017
Agent Health: 176 Enemy Health: 140 Agent Damage Taken

[I 2023-11-07 21:49:27,237] Trial 3 finished with value: -36.022239390538715 and parameters: {'n_layers': 3, 'n_units_l0': 64, 'n_units_l1': 256, 'n_units_l2': 64, 'limit': 1333, 'value_test': 0.059164126680313456, 'target_model_update': 0.025647298987486602, 'learning_rate': 6.671245601542839e-05}. Best is trial 3 with value: -36.022239390538715.


Training for 3000 steps ...


  updates=self.state_updates,


Agent Health: 176 Enemy Health: 125 Agent Damage Taken: 0 Enemy Damage Taken: 51 Reward: 867
Agent Health: 176 Enemy Health: 96 Agent Damage Taken: 0 Enemy Damage Taken: 29 Reward: 493
Agent Health: 143 Enemy Health: 96 Agent Damage Taken: 33 Enemy Damage Taken: 0 Reward: -57.75
Agent Health: 120 Enemy Health: 96 Agent Damage Taken: 23 Enemy Damage Taken: 0 Reward: -40.25
Agent Health: 120 Enemy Health: 77 Agent Damage Taken: 0 Enemy Damage Taken: 19 Reward: 323
Agent Health: 76 Enemy Health: 77 Agent Damage Taken: 44 Enemy Damage Taken: 0 Reward: -77.0
Agent Health: 76 Enemy Health: 39 Agent Damage Taken: 0 Enemy Damage Taken: 38 Reward: 646
Agent Health: 76 Enemy Health: -1 Agent Damage Taken: 0 Enemy Damage Taken: 40 Reward: 2506.0621339455656
Agent Health: 143 Enemy Health: 136 Agent Damage Taken: 33 Enemy Damage Taken: 40 Reward: 119
done, took 343.934 seconds
Testing for 8 episodes ...
Agent Health: 152 Enemy Health: 176 Agent Damage Taken: 24 Enemy Damage Taken: 0 Reward: -42.0


[I 2023-11-07 22:03:08,001] Trial 4 finished with value: -611.45 and parameters: {'n_layers': 4, 'n_units_l0': 256, 'n_units_l1': 64, 'n_units_l2': 128, 'n_units_l3': 512, 'limit': 2406, 'value_test': 0.11301270666038997, 'target_model_update': 0.22611016260986377, 'learning_rate': 0.00011895658208686591}. Best is trial 3 with value: -36.022239390538715.


Training for 3000 steps ...


  updates=self.state_updates,


Agent Health: 176 Enemy Health: 136 Agent Damage Taken: 0 Enemy Damage Taken: 40 Reward: 680
Agent Health: 152 Enemy Health: 136 Agent Damage Taken: 24 Enemy Damage Taken: 0 Reward: -42.0
Agent Health: 121 Enemy Health: 136 Agent Damage Taken: 31 Enemy Damage Taken: 0 Reward: -54.25
Agent Health: 121 Enemy Health: 97 Agent Damage Taken: 0 Enemy Damage Taken: 39 Reward: 663
Agent Health: 121 Enemy Health: 58 Agent Damage Taken: 0 Enemy Damage Taken: 39 Reward: 663
Agent Health: 97 Enemy Health: 58 Agent Damage Taken: 24 Enemy Damage Taken: 0 Reward: -42.0
Agent Health: 97 Enemy Health: 22 Agent Damage Taken: 0 Enemy Damage Taken: 36 Reward: 612
Agent Health: 57 Enemy Health: 22 Agent Damage Taken: 40 Enemy Damage Taken: 0 Reward: -70.0
Agent Health: 33 Enemy Health: 3 Agent Damage Taken: 24 Enemy Damage Taken: 19 Reward: -8.75
Agent Health: 5 Enemy Health: 3 Agent Damage Taken: 28 Enemy Damage Taken: 0 Reward: -49.0
Agent Health: 5 Enemy Health: 1 Agent Damage Taken: 0 Enemy Damage Take

[I 2023-11-07 22:12:59,960] Trial 5 finished with value: -725.2 and parameters: {'n_layers': 3, 'n_units_l0': 96, 'n_units_l1': 256, 'n_units_l2': 256, 'limit': 1812, 'value_test': 0.16695371376693297, 'target_model_update': 0.03601311948676058, 'learning_rate': 0.0012410220949296792}. Best is trial 3 with value: -36.022239390538715.


Training for 3000 steps ...


  updates=self.state_updates,


Agent Health: 154 Enemy Health: 176 Agent Damage Taken: 22 Enemy Damage Taken: 0 Reward: -38.5
Agent Health: 154 Enemy Health: 169 Agent Damage Taken: 0 Enemy Damage Taken: 7 Reward: 119
Agent Health: 123 Enemy Health: 169 Agent Damage Taken: 31 Enemy Damage Taken: 0 Reward: -54.25
Agent Health: 123 Enemy Health: 162 Agent Damage Taken: 0 Enemy Damage Taken: 7 Reward: 119
Agent Health: 99 Enemy Health: 162 Agent Damage Taken: 24 Enemy Damage Taken: 0 Reward: -42.0
Agent Health: 75 Enemy Health: 162 Agent Damage Taken: 24 Enemy Damage Taken: 0 Reward: -42.0
Agent Health: 52 Enemy Health: 162 Agent Damage Taken: 23 Enemy Damage Taken: 0 Reward: -40.25
Agent Health: 28 Enemy Health: 162 Agent Damage Taken: 24 Enemy Damage Taken: 0 Reward: -42.0
Agent Health: 10 Enemy Health: 162 Agent Damage Taken: 18 Enemy Damage Taken: 0 Reward: -31.5
Agent Health: 1 Enemy Health: 162 Agent Damage Taken: 9 Enemy Damage Taken: 0 Reward: -15.75
Agent Health: 1 Enemy Health: 155 Agent Damage Taken: 0 Enemy

[I 2023-11-07 22:26:16,933] Trial 6 finished with value: -557.8046654801187 and parameters: {'n_layers': 3, 'n_units_l0': 256, 'n_units_l1': 512, 'n_units_l2': 64, 'limit': 2723, 'value_test': 0.152464625673419, 'target_model_update': 0.012396704896460905, 'learning_rate': 1.9151547486054264e-05}. Best is trial 3 with value: -36.022239390538715.


Training for 3000 steps ...


  updates=self.state_updates,


Agent Health: 176 Enemy Health: 139 Agent Damage Taken: 0 Enemy Damage Taken: 37 Reward: 629
Agent Health: 141 Enemy Health: 139 Agent Damage Taken: 35 Enemy Damage Taken: 0 Reward: -61.25
Agent Health: 141 Enemy Health: 101 Agent Damage Taken: 0 Enemy Damage Taken: 38 Reward: 646
Agent Health: 118 Enemy Health: 101 Agent Damage Taken: 23 Enemy Damage Taken: 0 Reward: -40.25
Agent Health: 92 Enemy Health: 101 Agent Damage Taken: 26 Enemy Damage Taken: 0 Reward: -45.5
Agent Health: 60 Enemy Health: 101 Agent Damage Taken: 32 Enemy Damage Taken: 0 Reward: -56.0
Agent Health: 60 Enemy Health: 91 Agent Damage Taken: 0 Enemy Damage Taken: 10 Reward: 170
Agent Health: 34 Enemy Health: 91 Agent Damage Taken: 26 Enemy Damage Taken: 0 Reward: -45.5
Agent Health: 13 Enemy Health: 91 Agent Damage Taken: 21 Enemy Damage Taken: 0 Reward: -36.75
Agent Health: 13 Enemy Health: 62 Agent Damage Taken: 0 Enemy Damage Taken: 29 Reward: 493
Agent Health: -9 Enemy Health: 62 Agent Damage Taken: 22 Enemy Da

[I 2023-11-07 22:38:25,479] Trial 7 finished with value: 73.31664422971681 and parameters: {'n_layers': 3, 'n_units_l0': 128, 'n_units_l1': 128, 'n_units_l2': 64, 'limit': 2074, 'value_test': 0.11094626623319866, 'target_model_update': 0.7657223746083014, 'learning_rate': 0.00024104784779683305}. Best is trial 7 with value: 73.31664422971681.


Training for 3000 steps ...


  updates=self.state_updates,


Agent Health: 141 Enemy Health: 176 Agent Damage Taken: 35 Enemy Damage Taken: 0 Reward: -61.25
Agent Health: 119 Enemy Health: 176 Agent Damage Taken: 22 Enemy Damage Taken: 0 Reward: -38.5
Agent Health: 119 Enemy Health: 135 Agent Damage Taken: 0 Enemy Damage Taken: 41 Reward: 697
Agent Health: 84 Enemy Health: 93 Agent Damage Taken: 35 Enemy Damage Taken: 42 Reward: 119
Agent Health: 60 Enemy Health: 93 Agent Damage Taken: 24 Enemy Damage Taken: 0 Reward: -42.0
Agent Health: 27 Enemy Health: 93 Agent Damage Taken: 33 Enemy Damage Taken: 0 Reward: -57.75
Agent Health: 3 Enemy Health: 93 Agent Damage Taken: 24 Enemy Damage Taken: 0 Reward: -42.0
Agent Health: 3 Enemy Health: 65 Agent Damage Taken: 0 Enemy Damage Taken: 28 Reward: 476
Agent Health: 3 Enemy Health: 30 Agent Damage Taken: 0 Enemy Damage Taken: 35 Reward: 595
Agent Health: -1 Enemy Health: 30 Agent Damage Taken: 4 Enemy Damage Taken: 0 Reward: -0.8449400293352315
done, took 276.094 seconds
Testing for 8 episodes ...
Agent

[I 2023-11-07 22:48:18,558] Trial 8 finished with value: -726.95 and parameters: {'n_layers': 4, 'n_units_l0': 64, 'n_units_l1': 512, 'n_units_l2': 96, 'n_units_l3': 96, 'limit': 1385, 'value_test': 0.059272464213134965, 'target_model_update': 0.563213884044316, 'learning_rate': 0.0021363842613628006}. Best is trial 7 with value: 73.31664422971681.


Training for 3000 steps ...


  updates=self.state_updates,


Agent Health: 152 Enemy Health: 176 Agent Damage Taken: 24 Enemy Damage Taken: 0 Reward: -42.0
Agent Health: 128 Enemy Health: 176 Agent Damage Taken: 24 Enemy Damage Taken: 0 Reward: -42.0
Agent Health: 97 Enemy Health: 139 Agent Damage Taken: 31 Enemy Damage Taken: 37 Reward: 102
Agent Health: 97 Enemy Health: 96 Agent Damage Taken: 0 Enemy Damage Taken: 43 Reward: 731
Agent Health: 73 Enemy Health: 96 Agent Damage Taken: 24 Enemy Damage Taken: 0 Reward: -42.0
Agent Health: 73 Enemy Health: 89 Agent Damage Taken: 0 Enemy Damage Taken: 7 Reward: 119
Agent Health: 49 Enemy Health: 89 Agent Damage Taken: 24 Enemy Damage Taken: 0 Reward: -42.0
Agent Health: 49 Enemy Health: 61 Agent Damage Taken: 0 Enemy Damage Taken: 28 Reward: 476
Agent Health: 49 Enemy Health: 32 Agent Damage Taken: 0 Enemy Damage Taken: 29 Reward: 493
Agent Health: 5 Enemy Health: 32 Agent Damage Taken: 44 Enemy Damage Taken: 0 Reward: -77.0
Agent Health: 5 Enemy Health: -1 Agent Damage Taken: 0 Enemy Damage Taken: 3

[I 2023-11-07 22:59:12,065] Trial 9 finished with value: -725.2 and parameters: {'n_layers': 4, 'n_units_l0': 64, 'n_units_l1': 64, 'n_units_l2': 64, 'n_units_l3': 512, 'limit': 2437, 'value_test': 0.07261613348554737, 'target_model_update': 0.9603466037484685, 'learning_rate': 2.0908607515806372e-05}. Best is trial 7 with value: 73.31664422971681.


Training for 3000 steps ...


  updates=self.state_updates,


Agent Health: 176 Enemy Health: 140 Agent Damage Taken: 0 Enemy Damage Taken: 36 Reward: 612
Agent Health: 176 Enemy Health: 123 Agent Damage Taken: 0 Enemy Damage Taken: 17 Reward: 289
Agent Health: 176 Enemy Health: 104 Agent Damage Taken: 0 Enemy Damage Taken: 19 Reward: 323
Agent Health: 176 Enemy Health: 87 Agent Damage Taken: 0 Enemy Damage Taken: 17 Reward: 289
Agent Health: 176 Enemy Health: 45 Agent Damage Taken: 0 Enemy Damage Taken: 42 Reward: 714
Agent Health: 176 Enemy Health: 9 Agent Damage Taken: 0 Enemy Damage Taken: 36 Reward: 612
Agent Health: 139 Enemy Health: 9 Agent Damage Taken: 37 Enemy Damage Taken: 0 Reward: -64.75
Agent Health: 109 Enemy Health: 2 Agent Damage Taken: 30 Enemy Damage Taken: 7 Reward: -40.25
Agent Health: 109 Enemy Health: -1 Agent Damage Taken: 0 Enemy Damage Taken: 3 Reward: 2714.7386738068853
Agent Health: 176 Enemy Health: 168 Agent Damage Taken: 0 Enemy Damage Taken: 8 Reward: 136
Agent Health: 176 Enemy Health: 139 Agent Damage Taken: 0 En

[I 2023-11-07 23:11:34,341] Trial 10 finished with value: -737.45 and parameters: {'n_layers': 3, 'n_units_l0': 128, 'n_units_l1': 128, 'n_units_l2': 512, 'limit': 1018, 'value_test': 0.19020732610847674, 'target_model_update': 0.31785082455927677, 'learning_rate': 0.00037102460560070214}. Best is trial 7 with value: 73.31664422971681.


Training for 3000 steps ...


  updates=self.state_updates,


Agent Health: 176 Enemy Health: 147 Agent Damage Taken: 0 Enemy Damage Taken: 29 Reward: 493
Agent Health: 152 Enemy Health: 147 Agent Damage Taken: 24 Enemy Damage Taken: 0 Reward: -42.0
Agent Health: 152 Enemy Health: 96 Agent Damage Taken: 0 Enemy Damage Taken: 51 Reward: 867
Agent Health: 130 Enemy Health: 96 Agent Damage Taken: 22 Enemy Damage Taken: 0 Reward: -38.5
Agent Health: 99 Enemy Health: 96 Agent Damage Taken: 31 Enemy Damage Taken: 0 Reward: -54.25
Agent Health: 67 Enemy Health: 96 Agent Damage Taken: 32 Enemy Damage Taken: 0 Reward: -56.0
Agent Health: 67 Enemy Health: 68 Agent Damage Taken: 0 Enemy Damage Taken: 28 Reward: 476
Agent Health: 36 Enemy Health: 68 Agent Damage Taken: 31 Enemy Damage Taken: 0 Reward: -54.25
Agent Health: 36 Enemy Health: 39 Agent Damage Taken: 0 Enemy Damage Taken: 29 Reward: 493
Agent Health: 36 Enemy Health: 4 Agent Damage Taken: 0 Enemy Damage Taken: 35 Reward: 595
Agent Health: 36 Enemy Health: -1 Agent Damage Taken: 0 Enemy Damage Take

[I 2023-11-07 23:25:53,012] Trial 11 finished with value: 354.7651205233177 and parameters: {'n_layers': 3, 'n_units_l0': 128, 'n_units_l1': 96, 'n_units_l2': 64, 'limit': 1336, 'value_test': 0.10197470294398872, 'target_model_update': 0.07381787068069032, 'learning_rate': 7.563566301964245e-05}. Best is trial 11 with value: 354.7651205233177.


Training for 3000 steps ...


  updates=self.state_updates,


Agent Health: 141 Enemy Health: 176 Agent Damage Taken: 35 Enemy Damage Taken: 0 Reward: -61.25
Agent Health: 116 Enemy Health: 176 Agent Damage Taken: 25 Enemy Damage Taken: 0 Reward: -43.75
Agent Health: 116 Enemy Health: 140 Agent Damage Taken: 0 Enemy Damage Taken: 36 Reward: 612
Agent Health: 92 Enemy Health: 140 Agent Damage Taken: 24 Enemy Damage Taken: 0 Reward: -42.0
Agent Health: 68 Enemy Health: 140 Agent Damage Taken: 24 Enemy Damage Taken: 0 Reward: -42.0
Agent Health: 44 Enemy Health: 140 Agent Damage Taken: 24 Enemy Damage Taken: 0 Reward: -42.0
Agent Health: 21 Enemy Health: 140 Agent Damage Taken: 23 Enemy Damage Taken: 0 Reward: -40.25
Agent Health: 6 Enemy Health: 140 Agent Damage Taken: 15 Enemy Damage Taken: 0 Reward: -26.25
Agent Health: 6 Enemy Health: 122 Agent Damage Taken: 0 Enemy Damage Taken: 18 Reward: 306
Agent Health: -1 Enemy Health: 122 Agent Damage Taken: 7 Enemy Damage Taken: 0 Reward: -12.607140654722679
done, took 323.930 seconds
Testing for 8 episo

[I 2023-11-07 23:39:14,009] Trial 12 finished with value: 1565.0763227106204 and parameters: {'n_layers': 3, 'n_units_l0': 128, 'n_units_l1': 96, 'n_units_l2': 64, 'limit': 2218, 'value_test': 0.11084516889418834, 'target_model_update': 0.0867234639784551, 'learning_rate': 0.0001970081032923026}. Best is trial 12 with value: 1565.0763227106204.


Training for 3000 steps ...


  updates=self.state_updates,


Agent Health: 176 Enemy Health: 147 Agent Damage Taken: 0 Enemy Damage Taken: 29 Reward: 493
Agent Health: 145 Enemy Health: 147 Agent Damage Taken: 31 Enemy Damage Taken: 0 Reward: -54.25
Agent Health: 145 Enemy Health: 105 Agent Damage Taken: 0 Enemy Damage Taken: 42 Reward: 714
Agent Health: 145 Enemy Health: 99 Agent Damage Taken: 0 Enemy Damage Taken: 6 Reward: 102
Agent Health: 145 Enemy Health: 47 Agent Damage Taken: 0 Enemy Damage Taken: 52 Reward: 884
Agent Health: 145 Enemy Health: 16 Agent Damage Taken: 0 Enemy Damage Taken: 31 Reward: 527
Agent Health: 136 Enemy Health: 16 Agent Damage Taken: 9 Enemy Damage Taken: 0 Reward: -15.75
Agent Health: 100 Enemy Health: 16 Agent Damage Taken: 36 Enemy Damage Taken: 0 Reward: -63.0
Agent Health: 56 Enemy Health: 16 Agent Damage Taken: 44 Enemy Damage Taken: 0 Reward: -77.0
Agent Health: 56 Enemy Health: -1 Agent Damage Taken: 0 Enemy Damage Taken: 17 Reward: 2329.347169479189
Agent Health: 171 Enemy Health: 176 Agent Damage Taken: 5

[I 2023-11-07 23:50:07,210] Trial 13 finished with value: 18.42951372496543 and parameters: {'n_layers': 3, 'n_units_l0': 128, 'n_units_l1': 96, 'n_units_l2': 512, 'limit': 2355, 'value_test': 0.13020796696209408, 'target_model_update': 0.07570417575391479, 'learning_rate': 1.0048468576173887e-05}. Best is trial 12 with value: 1565.0763227106204.


Training for 3000 steps ...


  updates=self.state_updates,


Agent Health: 176 Enemy Health: 140 Agent Damage Taken: 0 Enemy Damage Taken: 36 Reward: 612
Agent Health: 152 Enemy Health: 140 Agent Damage Taken: 24 Enemy Damage Taken: 0 Reward: -42.0
Agent Health: 152 Enemy Health: 111 Agent Damage Taken: 0 Enemy Damage Taken: 29 Reward: 493
Agent Health: 152 Enemy Health: 102 Agent Damage Taken: 0 Enemy Damage Taken: 9 Reward: 153
Agent Health: 152 Enemy Health: 66 Agent Damage Taken: 0 Enemy Damage Taken: 36 Reward: 612
Agent Health: 152 Enemy Health: 28 Agent Damage Taken: 0 Enemy Damage Taken: 38 Reward: 646
Agent Health: 152 Enemy Health: -1 Agent Damage Taken: 0 Enemy Damage Taken: 29 Reward: 2907.165080251089
Agent Health: 152 Enemy Health: 176 Agent Damage Taken: 24 Enemy Damage Taken: 0 Reward: -42.0
Agent Health: 120 Enemy Health: 176 Agent Damage Taken: 32 Enemy Damage Taken: 0 Reward: -56.0
Agent Health: 114 Enemy Health: 176 Agent Damage Taken: 6 Enemy Damage Taken: 0 Reward: -10.5
Agent Health: 91 Enemy Health: 176 Agent Damage Taken

# Building the Agent

In [None]:
# Storing the best agent hyperparameters and the number of the trial that had the best results
params = study.best_params
best_trial_number = study.best_trial.number

In [None]:
env = StreetFighterWrapper()

In [None]:
def build_model(params):
    # Creating the model with tuned hyperparameters
    model = Sequential()
    model.add(Flatten(input_shape = (frames, height, width, channels)))

    for i in range(params['n_layers']):
        model.add(Dense(units = params[f"n_units_l{i}"], activation = 'relu'))

    model.add(Dense(actions, activation='linear'))
    model.add(Flatten()) 

    return model

In [None]:
def build_dqn_agent(params):
    # Creating the agent and setting hyperparameters to the tuned values
    agent = DQNAgent(
    model = build_model(params),
    memory = SequentialMemory(limit = params['limit'], window_length = frames), # window length has to align with input shape in the NN
    policy = LinearAnnealedPolicy(EpsGreedyQPolicy(), attr = 'eps', value_max = 1.0, value_min = 0.1, value_test = params['value_test'], nb_steps = 30000),
    enable_dueling_network = True, dueling_type = 'avg',
    nb_actions = actions,
    nb_steps_warmup = 1000,
    target_model_update = params['target_model_update']
    )

    # Loading the weights from the best trial so it gets a headstart when training
    agent.load_weights(f"{OPT_DIR}/trial_{best_trial_number}_best_agent_weights.h5f")

    return agent

In [None]:
# Creating the DQN agent with optimized hyperparameters
agent = build_dqn_agent(params)
# Compiling the agent
agent.compile(Adam(learning_rate = (params['learning_rate'])), loss = 'MeanSquaredError()', metrics = ['mae'])
num_steps = 100000
# Training the agent
agent.fit(env, num_steps, action_repetition = 1, callbacks = None, verbose = 2, visualize = False)

In [None]:
agent.save_weights(f"{TRAIN_DIR}/agent_{num_steps}_steps.h5f")

# Testing out the agent

In [None]:
agent.load_weights(f"{TRAIN_DIR}/agent_{num_steps}_steps.h5f")

In [None]:
# Resets the observations
obs = env.reset()

# Flag that controls the inner loop
done = False

# Play one match
for game in range(1):
    while not done:
        if done:
            obs = env.reset()
        # renders the game frame
        env.render()
        action = agent.forward(obs)
        # Info contains: continueTimer, enemy_health, enemy_matches_won, health, matches_won, score
        obs, reward, done, info = env.step(action)
        time.sleep(0.01)