# AI Tournament

This Notebook provides the exact environment setup to be used for the tournament and also shows how submissions will be integrated in it, using the `SubmissionExample` provided

In [None]:
# Basic imports
import sys, os
import time

# DIAMBRA Environment related imports
sys.path.append(os.path.join(os.path.abspath(''), '../')) 
from makeDiambraEnv import *

# Time dependent seed
timeDepSeed = int((time.time()-int(time.time()-0.5))*1000)

## Environment settings

In [None]:
# DIAMBRA Env kwargs
diambraEnvKwargs = {}

# DIAMBRA gym kwargs
diambraGymKwargs = {}

# Wrappers kwargs
wrapperKwargs = {}

# Additional Observations
keyToAdd = []

## Fixed settings, do not modify

In [None]:
# DIAMBRA Env kwargs
# Game
diambraEnvKwargs["gameId"] = "doapp"
# Actions frequency (1 every "mame_diambra_step_ratio" frames)
diambraEnvKwargs["mame_diambra_step_ratio"] = 6
# Game difficulty level
diambraEnvKwargs["difficulty"] = 3
# Number of outfits of the selected character (min 2, max 4)
diambraEnvKwargs["charOutfits"] = [2, 2]
# 1P game, randomly initialized on P1 or P2 side
diambraEnvKwargs["player"] = "Random"

# Wrappers kwargs
# Observations
# Frame size
wrapperKwargs["hwc_obs_resize"]    = [128, 128, 1]
# Number of pixel frames to stack
wrapperKwargs["frame_stack"]       = 4
# Dilation parameter
wrapperKwargs["dilation"]          = 1
# Enable/Disable observations scaling
wrapperKwargs["scale"]             = True
# Select how to scale observation
# 0 = Scale frames between 0.0 and 1.0
# 1 = Scale frames betweek -1.0 and 1.0
wrapperKwargs["scale_mod"]         = 0
# Rewards
# Enable/Disable rewards normalization
wrapperKwargs["normalize_rewards"] = True
# Enable/Disable rewards clipping
wrapperKwargs["clip_rewards"]      = False

# Additional Observations
# Last 12 actions, one-hot encoding
keyToAdd.append("actionsBuf")
# Own and Opponent normalized health value (full bar = 1.0)
keyToAdd.append("ownHealth")
keyToAdd.append("oppHealth")
# Own and Opponent side (left = 0.0, right = 1.0)
keyToAdd.append("ownPosition")
keyToAdd.append("oppPosition")
# Normalized stage (0.0 = first stage, 1.0 = final stage)
keyToAdd.append("stage")
# Selected character, one-hot encoding 
keyToAdd.append("character")

## Custom settings, modify at will

In [None]:
# Absolute Path to Repository
base_path = "/home/yourUsername/DIAMBRAenvironment/"

# DIAMBRA Env kwargs
# Absolute Path to Diambra Lib
diambraEnvKwargs["diambraEnv_path"] = os.path.join(base_path, "diambraEnvLib/")
# Absolute path to roms
diambraEnvKwargs["roms_path"]       = os.path.join(base_path, "roms/") 
# Absolute path to MAME executable
diambraEnvKwargs["mame_path"]       = os.path.join(base_path, "mame/") 

# Enable/Disable environment rendering (Disabling it speeds up execution/training)
diambraEnvKwargs["render"]      = True
# Enable/Disable 60 FPS lock (Disabling it speeds up execution/training)
diambraEnvKwargs["lock_fps"]    = True
# Enable/Disable Sound
diambraEnvKwargs["sound"]       = diambraEnvKwargs["lock_fps"] and diambraEnvKwargs["render"]

# Character selection (edit both lists, only first element of them)
# Ex 1 (OK): Selecting Kasumi                  = [["Kasumi", "Random"], ["Kasumi", "Random"]]
# Ex 2 (OK): Selecting Random Character        = [["Random", "Random"], ["Random", "Random"]]
# Ex 3 (KO!): Selecting Gen-Fu only on 1P Side = [["Gen-Fu", "Random"], ["Kasumi", "Random"]]
# Available Characters: 
# Kasumi, Zack, Hayabusa, Bayman, Lei-Fang, Raidou, Gen-Fu, Tina, Bass, Jann-Lee, Ayane
diambraEnvKwargs["characters"]  = [["Random", "Random"], ["Random", "Random"]]

In [None]:
# Parameter to define behavior when losing 2 rounds
#  0.0 = Episode ends
#  0.0 < x <= 1.0 = Episode continues x% of the times
#  -inf < x < 0.0 = Episode continues int(x) times
# !N.B.: evaluation will be carried out with this parameter equal to 0.0!
diambraGymKwargs["continue_game"]         = 0.0

# Action space definition (only the first element of the two lists influences single player games)
# Discrete VS MultiDiscrete                     
diambraGymKwargs["actionSpace"]           = ["discrete", "multiDiscrete"]
# Using Attack Buttons Combinations VS Not using them 
diambraGymKwargs["attackButCombinations"] = [False, True]

In [None]:
# Initialize DIAMBRA Environment
env = make_diambra_env(diambraGym, env_prefix="Test", seed=timeDepSeed, 
                       diambra_kwargs=diambraEnvKwargs, diambra_gym_kwargs=diambraGymKwargs,
                       wrapper_kwargs=wrapperKwargs, key_to_add=keyToAdd)

## Take a look at observation and action spaces

In [None]:
# Print Observation space
print("Obs space =", env.observation_space)
print("Obs space type =", env.observation_space.dtype)
print("Obs space high bound =", env.observation_space.high)
print("Obs space low bound =", env.observation_space.low)

In [None]:
# Printing action space
print("Action space = ", env.action_space)
print("Action space type = ", env.action_space.dtype)
if diambraGymKwargs["actionSpace"][0] == "multiDiscrete":
    print("Action space n = ", env.action_space.nvec)
else:
    print("Action space n = ", env.action_space.n)

## Import and initialize agent

In [None]:
# Importing model
from submissionExample.agent import agent

# Saved model path
modelFile = os.path.join(base_path, "aiTournament/submissionExample/model.pth")

myAgent = agent(agentModel=modelFile, nActions=env.n_actions[0], name="Random Agent", 
                actionSpace=diambraGymKwargs["actionSpace"][0])

## Execute agent policy

In [None]:
# Bookeeping variables for average performance calculation
cumulativeEpRew = 0.0
cumulativeEpRewAll = []
currNumEp = 0

# Number of episodes
maxNumEp = 10

# Resetting the environment
observation = env.reset()

# Executing the given number of episodes
while currNumEp < maxNumEp:

    # Agent actions
    action = myAgent.act(observation)
    print("Action:", action)
    
    # Stepping the environment
    observation, reward, done, info = env.step(action)
    
    # Updating cumulative reward
    cumulativeEpRew += reward
    
    # Check if episode completed
    if np.any(done):
        currNumEp += 1
        print("Ep. # = ", currNumEp)
        print("Ep. Cumulative Rew # = ", cumulativeEpRew)
        cumulativeEpRewAll.append(cumulativeEpRew)
        cumulativeEpRew = 0.0

        # Resetting the environment
        observation = env.reset()

print("Mean cumulative reward = ", np.mean(cumulativeEpRewAll))    
print("Std cumulative reward = ", np.std(cumulativeEpRewAll))       
    
env.close()