In [20]:
import os
import numpy as np
from aic_ml.MAHIL.helper.utils import load_trajectories

cur_dir = os.getcwd()
working_dir = os.path.dirname(os.path.dirname(cur_dir))

In [82]:

data_dir = os.path.join(working_dir, "train_ma_dnn/data/")
# file_names:
#     LaborDivision2-v2_50.pkl LaborDivision3-v2_50.pkl PO_Flood-v2_100.pkl 
#     PO_Movers-v2_100.pkl Protoss5v5_100.pkl Terran5v5_100.pkl
file_name = "Terran5v5_100.pkl"
traj = load_trajectories(data_dir + file_name, 100, 0)

n_epi = len(traj[0]["rewards"])
n_agent = len(traj)
list_epi_rews = [list() for _ in range(n_agent)]
list_wons = []
for i_e in range(n_epi):
    if "wons" in traj[0]:
        list_wons.append(traj[0]["wons"][i_e])
    for i_a in range(n_agent):
        epi_rew = np.sum(traj[i_a]["rewards"][i_e])
        list_epi_rews[i_a].append(epi_rew)

ret_sum = np.sum(list_epi_rews, axis=0)
print(np.mean(ret_sum), np.std(ret_sum))
print(np.mean(ret_sum) / 5, np.std(ret_sum) / 5)
print(np.mean(list_epi_rews, axis=1), np.std(list_epi_rews, axis=1))
print(np.mean(list_wons), np.std(list_wons))

59.259203441722974 15.917649934897467
11.851840688344595 3.1835299869794933
[11.85184069 11.85184069 11.85184069 11.85184069 11.85184069] [3.18352999 3.18352999 3.18352999 3.18352999 3.18352999]
0.6 0.4898979485566356


In [63]:
import pandas as pd

hri_rew_path = os.path.join( cur_dir, "hri_reward_4.csv")
df_hri_rew = pd.read_csv(hri_rew_path)

def get_hri_rew_result(df, env_name, alg_name, sv):
    df_setting = df[(df["env"]==env_name) & 
       (df["alg"]==alg_name) &
       (df["sv"]==sv)]
      
    # use learnt_agent and model_num as indenx
    df_setting = df_setting.set_index(["learnt_agent", "model_num"])
    df_res = df_setting["mean_return_sum"]
    individual_mean = df_res.groupby(level='learnt_agent').mean()
    individual_std = df_res.groupby(level='learnt_agent').std()

    total_mean = df_res.mean()
    total_std = df_res.std()
    
    return df_res, individual_mean, individual_std, total_mean, total_std

In [69]:
env_name = ["LaborDivision2-v2", "LaborDivision3-v2", "PO_Movers-v2", "PO_Flood-v2"]
alg_sv = [
    ("iiql", 0.0),
    ("mahil", 0.0),
    ("mahil", 0.2),
    ("magail", 0.0),
    ("maogail", 0.0),
    ("maogail", 0.2)
]

for env in env_name:
    print(f"Env: {env}")
    for alg_name, sv in alg_sv:
        res, _, _, mean, std = get_hri_rew_result(df_hri_rew, env, alg_name, sv)
        print(f"    {alg_name}-{sv}: {mean}+-{std}")



Env: LaborDivision2-v2
    iiql-0.0: 17.95+-1.3892843881333676
    mahil-0.0: 18.455555555555556+-3.0840569721403335
    mahil-0.2: 22.055555555555554+-2.1047741147387002
    magail-0.0: 15.394444444444444+-3.066044622902548
    maogail-0.0: 13.77222222222222+-0.7397947663347559
    maogail-0.2: 16.294444444444444+-3.7884277985973527
Env: LaborDivision3-v2
    iiql-0.0: 28.183333333333326+-1.8576867335479363
    mahil-0.0: 26.877777777777776+-1.4050174639747617
    mahil-0.2: 28.344444444444445+-1.0005924171135754
    magail-0.0: 18.305555555555557+-3.103003324140925
    maogail-0.0: 16.92222222222222+-2.6065444699974085
    maogail-0.2: 20.127777777777776+-3.4233457751586087
Env: PO_Movers-v2
    iiql-0.0: -222.0+-13.890876302251211
    mahil-0.0: -213.88888888888889+-9.10888861782005
    mahil-0.2: -204.6777777777778+-17.943437880265947
    magail-0.0: -300.0+-0.0
    maogail-0.0: -300.0+-0.0
    maogail-0.2: -300.0+-0.0
Env: PO_Flood-v2
    iiql-0.0: 8.688888888888888+-1.24269718608

In [70]:
import pandas as pd

infer_path = os.path.join( cur_dir, "infer_latent_result.csv")

df_infer = pd.read_csv(infer_path)

def get_infer_result(df, env_name, alg_name, sv):
    df_setting = df[(df["env"]==env_name) & 
       (df["alg"]==alg_name) &
       (df["sv"]==sv)]
      
    # use learnt_agent and model_num as indenx
    df_setting = df_setting.set_index(["agent_idx", "model_num"])
    df_res = df_setting["accuracy"]
    individual_mean = df_res.groupby(level='agent_idx').mean()
    individual_std = df_res.groupby(level='agent_idx').std()

    total_mean = df_res.mean()
    total_std = df_res.std()
    
    return df_res, individual_mean, individual_std, total_mean, total_std

In [71]:
env_name = ["LaborDivision2-v2", "LaborDivision3-v2", "PO_Movers-v2", "PO_Flood-v2"]
alg_sv = [
    ("mahil", 0.2),
    ("maogail", 0.2)
]

for env in env_name:
    print(f"Env: {env}")
    for alg_name, sv in alg_sv:
        res, mean, std, _, _ = get_infer_result(df_infer, env, alg_name, sv)
        text_res = ""
        for idx in range(len(mean)):
          text_res += f"{idx}: {mean[idx]}+-{std[idx]}, "
        print(f"    {alg_name}-{sv}: {text_res[:-2]}")

Env: LaborDivision2-v2
    mahil-0.2: 0: 0.7497333333333334+-0.04460597867252024, 1: 0.7496666666666667+-0.06774107567298684
    maogail-0.2: 0: 0.6113333333333334+-0.08729039656991675, 1: 0.6334000000000001+-0.1666072027254524
Env: LaborDivision3-v2
    mahil-0.2: 0: 0.781+-0.07476282498675395, 1: 0.7182+-0.08117166993477469
    maogail-0.2: 0: 0.4874+-0.03816857346037445, 1: 0.6824666666666667+-0.06117657503761823
Env: PO_Movers-v2
    mahil-0.2: 0: 0.7771405900598717+-0.019872716061282235, 1: 0.7787636153790666+-0.07207542849051782
    maogail-0.2: 0: 0.3478684267474572+-0.1505722419918239, 1: 0.4588833585803938+-0.06981071948629743
Env: PO_Flood-v2
    mahil-0.2: 0: 0.6103366201972119+-0.07738209959133764, 1: 0.5676073897767199+-0.03060435467616486
    maogail-0.2: 0: 0.31440553099852653+-0.060386257544394625, 1: 0.25422191998186555+-0.11598749957175995
