In [None]:
import sys, os
from os.path import expanduser 
from os import listdir
import time
import cv2

import pickle, bz2 

import tensorflow as tf
import numpy as np

from diambra_environment.diambraImitationLearning import diambraImitationLearning

In [None]:
# Show files in folder
repo_base_path = os.path.join(os.path.dirname(os.path.abspath(__file__)), "../../") # Absolute path to your DIAMBRA environment
trajRecFolder = os.path.join(repo_base_path, "trajRecordings/")
trajectoriesFiles = [os.path.join(trajRecFolder, f) for f in listdir(trajRecFolder) if os.path.isfile(os.path.join(trajRecFolder, f))]

In [None]:
diambraIL_kwargs = {}
diambraIL_kwargs["hwc_dim"] = [256,256,7]
diambraIL_kwargs["action_space"] = "multiDiscrete" # or "discrete"
diambraIL_kwargs["n_actions"] = [9, 8] # 
diambraIL_kwargs["trajFilesList"] = trajectoriesFiles
diambraIL_kwargs["totalCpus"] = 4

In [None]:
env = make_diambra_imitationLearning_env(diambraImitationLearning, diambraIL_kwargs)

In [None]:
observation = env.reset()

In [None]:
env.env_method("trajSummary")

In [None]:
nChars = env.get_attr("nChars")[0]
charNames = env.get_attr("charNames")[0]
n_actions = env.get_attr("n_actions")[0]
actBufLen = env.get_attr("actBufLen")[0]
playersNum = env.get_attr("playersNum")[0]

In [None]:
limAct = [None, None]
for idx in range(2):
    limAct[idx] = [actBufLen * n_actions[0], 
                   actBufLen * n_actions[0] + actBufLen * n_actions[1]]

In [None]:
# Visualize Obs content
def observationViz(observation, limAct):
    
    shp = observation.shape
    additionalPar = int(observation[0,0,shp[2]-1])
        
    # 1P        
    nScalarAddPar = additionalPar - nChars\
                    - actBufLen*(n_actions[0]+n_actions[1])
        
    addPar = observation[:,:,shp[2]-1]
    addPar = np.reshape(addPar, (-1))
    addPar = addPar[1:additionalPar+1]
    actions = addPar[0:additionalPar-nScalarAddPar-nChars]
        
    moveActionsP1   = actions[0:limAct[0][0]]
    attackActionsP1 = actions[limAct[0][0]:limAct[0][1]]
    moveActionsP1   = np.reshape(moveActionsP1, (actBufLen,-1))
    attackActionsP1 = np.reshape(attackActionsP1, (actBufLen,-1))
    print("Move actions P1 =\n", moveActionsP1)
    print("Attack actions P1 =\n ", attackActionsP1)
        
    others = addPar[additionalPar-nScalarAddPar-nChars:]
    print("ownHealth = ", others[0])
    print("oppHealth = ", others[1])
    print("ownPosition = ", others[2])
    print("oppPosition = ", others[3])
    print("stage = ", others[4])
    print("Playing Char  = ", charNames[list(others[nScalarAddPar:
                                                    nScalarAddPar + nChars]).index(1.0)])
        
    #input("Pause1")
        
    obs = np.array(observation).astype(np.float32)
    
    for idx in range(shp[2]-1):
        cv2.imshow("image"+str(idx), obs[:,:,idx])
    
    cv2.waitKey()

In [None]:
cumulativeEpRew = 0.0
cumulativeEpRewAll = []

maxNumEp = 10
currNumEp = 0

procIdx = 0

while currNumEp < maxNumEp:
       
    dummy_actions = [0 for i in range(diambraIL_kwargs["totalCpus"])]
    observation, reward, done, info = env.step(dummy_actions)
    env.render(mode="human")
    
    observation = observation[procIdx]
    reward = reward[procIdx]
    done = done[procIdx]
    print("Reward = ", reward)
    if done:
        observation = info[procIdx]["terminal_observation"]
    
    # Visualize observations content
    observationViz(observation, limAct) # Keep space bar pressed to continue env execution
 
    cumulativeEpRew += reward
    
    if done:
        currNumEp += 1
        print("Ep. # = ", currNumEp)
        print("Ep. Cumulative Rew # = ", cumulativeEpRew)
        
        cumulativeEpRewAll.append(cumulativeEpRew)
        cumulativeEpRew = 0.0

    if np.any(env.get_attr("exhausted")):
        break
        
print("All ep. rewards =", cumulativeEpRewAll)   
print("Mean cumulative reward =", np.mean(cumulativeEpRewAll))    
print("Std cumulative reward =", np.std(cumulativeEpRewAll))       