In [3]:
import os
import numpy as np
from aic_ml.MAHIL.helper.utils import load_trajectories

cur_dir = os.getcwd()
working_dir = os.path.dirname(os.path.dirname(cur_dir))

In [87]:

data_dir = os.path.join(working_dir, "train_ma_dnn/data/")
# file_names:
#     LaborDivision2-v2_50.pkl LaborDivision3-v2_50.pkl PO_Flood-v2_100.pkl 
#     PO_Movers-v2_100.pkl Protoss5v5_100.pkl Terran5v5_100.pkl
file_name = "Protoss5v5_100.pkl"
traj = load_trajectories(data_dir + file_name, 100, 0)

n_epi = len(traj[0]["rewards"])
n_agent = len(traj)
list_epi_rews = [list() for _ in range(n_agent)]
list_wons = []
for i_e in range(n_epi):
    if "wons" in traj[0]:
        list_wons.append(traj[0]["wons"][i_e])
    for i_a in range(n_agent):
        epi_rew = np.sum(traj[i_a]["rewards"][i_e])
        list_epi_rews[i_a].append(epi_rew)

ret_sum = np.sum(list_epi_rews, axis=0)
print(np.mean(ret_sum), np.std(ret_sum))
print(np.mean(ret_sum) / 5, np.std(ret_sum) / 5)
print(np.mean(list_epi_rews, axis=1), np.std(list_epi_rews, axis=1))
print(np.mean(list_wons), np.std(list_wons))

90.22864077669902 24.17514558652156
18.045728155339805 4.835029117304312
[18.04572816 18.04572816 18.04572816 18.04572816 18.04572816] [4.83502912 4.83502912 4.83502912 4.83502912 4.83502912]
0.55 0.49749371855331


In [83]:
import pandas as pd

hri_rew_path = os.path.join( cur_dir, "hri_reward_4.csv")
df_hri_rew = pd.read_csv(hri_rew_path)

def get_hri_rew_result(df, env_name, alg_name, sv):
    df_setting = df[(df["env"]==env_name) & 
       (df["alg"]==alg_name) &
       (df["sv"]==sv)]
      
    # use learnt_agent and model_num as indenx
    df_setting = df_setting.set_index(["learnt_agent", "model_num"])
    df_res = df_setting["mean_return_sum"]
    individual_mean = df_res.groupby(level='learnt_agent').mean()
    individual_std = df_res.groupby(level='learnt_agent').std()

    total_mean = df_res.mean()
    total_std = df_res.std()
    
    return df_res, individual_mean, individual_std, total_mean, total_std

In [84]:
env_name = ["LaborDivision2-v2", "LaborDivision3-v2", "PO_Movers-v2", "PO_Flood-v2"]
alg_sv = [
    ("bc", 0.0),
    ("iiql", 0.0),
    ("mahil", 0.0),
    ("mahil", 0.2),
    ("magail", 0.0),
    ("maogail", 0.0),
    ("maogail", 0.2)
]

for env in env_name:
    print(f"Env: {env}")
    for alg_name, sv in alg_sv:
        res, _, _, mean, std = get_hri_rew_result(df_hri_rew, env, alg_name, sv)
        print(f"    {alg_name}-{sv}: {mean}+-{std}")



Env: LaborDivision2-v2
    bc-0.0: 17.416666666666668+-0.35323898111303936
    iiql-0.0: 17.911111111111108+-1.7413489875083314
    mahil-0.0: 17.944444444444443+-4.056471738218197
    mahil-0.2: 21.82777777777778+-1.4970217346699
    magail-0.0: 14.861111111111112+-2.2893392221499362
    maogail-0.0: 13.577777777777778+-1.3770607453181931
    maogail-0.2: 16.033333333333335+-4.0364450806234045
Env: LaborDivision3-v2
    bc-0.0: 17.400000000000002+-0.4671426144361299
    iiql-0.0: 28.566666666666663+-2.3764586164197246
    mahil-0.0: 27.849999999999998+-0.9044949480849023
    mahil-0.2: 27.86111111111111+-1.1954388625413153
    magail-0.0: 18.133333333333333+-3.735654040608032
    maogail-0.0: 17.7+-3.6688478360984713
    maogail-0.2: 19.805555555555557+-2.4872489636961164
Env: PO_Movers-v2
    bc-0.0: -300.0+-0.0
    iiql-0.0: -213.0111111111111+-14.993415838911385
    mahil-0.0: -217.74444444444444+-14.268499363075088
    mahil-0.2: -202.53333333333333+-20.66430093986568
    magail-0

In [5]:
import pandas as pd
def get_infer_result(df, env_name, alg_name, sv):
    df_setting = df[(df["env"]==env_name) & 
       (df["alg"]==alg_name) &
       (df["sv"]==sv)]
      
    # use learnt_agent and model_num as indenx
    df_setting = df_setting.set_index(["agent_idx", "model_num"])
    df_res = df_setting["accuracy"]
    individual_mean = df_res.groupby(level='agent_idx').mean()
    individual_std = df_res.groupby(level='agent_idx').std()

    total_mean = df_res.mean()
    total_std = df_res.std()
    
    return df_res, individual_mean, individual_std, total_mean, total_std

In [89]:

infer_path = os.path.join( cur_dir, "infer_latent_result.csv")

df_infer = pd.read_csv(infer_path)


env_name = ["LaborDivision2-v2", "LaborDivision3-v2", "PO_Movers-v2", "PO_Flood-v2"]
alg_sv = [
    ("mahil", 0.2),
    ("maogail", 0.2)
]

for env in env_name:
    print(f"Env: {env}")
    for alg_name, sv in alg_sv:
        res, mean, std, _, _ = get_infer_result(df_infer, env, alg_name, sv)
        text_res = ""
        for idx in range(len(mean)):
          text_res += f"{idx}: {mean[idx]}+-{std[idx]}, "
        print(f"    {alg_name}-{sv}: {text_res[:-2]}")

Env: LaborDivision2-v2
    mahil-0.2: 0: 0.7497333333333334+-0.04460597867252024, 1: 0.7496666666666667+-0.06774107567298684
    maogail-0.2: 0: 0.6113333333333334+-0.08729039656991675, 1: 0.6334000000000001+-0.1666072027254524
Env: LaborDivision3-v2
    mahil-0.2: 0: 0.781+-0.07476282498675395, 1: 0.7182+-0.08117166993477469
    maogail-0.2: 0: 0.4874+-0.03816857346037445, 1: 0.6824666666666667+-0.06117657503761823
Env: PO_Movers-v2
    mahil-0.2: 0: 0.7771405900598717+-0.019872716061282235, 1: 0.7787636153790666+-0.07207542849051782
    maogail-0.2: 0: 0.3478684267474572+-0.1505722419918239, 1: 0.4588833585803938+-0.06981071948629743
Env: PO_Flood-v2
    mahil-0.2: 0: 0.6103366201972119+-0.07738209959133764, 1: 0.5676073897767199+-0.03060435467616486
    maogail-0.2: 0: 0.31440553099852653+-0.060386257544394625, 1: 0.25422191998186555+-0.11598749957175995


In [7]:
btil_result_path = "/home/sangwon/Projects/ai_coach/train_ma_dnn/infer_latent_result_btil2.csv"

df_infer = pd.read_csv(btil_result_path)


env_name = ["PO_Movers-v2", "PO_Flood-v2"]
alg_sv = [
    ("btil", 0.2),
]

for env in env_name:
    print(f"Env: {env}")
    for alg_name, sv in alg_sv:
        res, mean, std, _, _ = get_infer_result(df_infer, env, alg_name, sv)
        text_res = ""
        for idx in range(len(mean)):
          text_res += f"{idx}: {mean[idx]}+-{std[idx]}, "
        print(f"    {alg_name}-{sv}: {text_res[:-2]}")


Env: PO_Movers-v2
    btil-0.2: 0: 0.9004183798600591+-0.010581844800401844, 1: 0.9094712544182356+-0.008154943316411459
Env: PO_Flood-v2
    btil-0.2: 0: 0.5285050436359515+-0.04362917999598854, 1: 0.6207639125014167+-0.008275382222043205
