In [4]:
import os 
import datetime
import re 

import matplotlib.pyplot as plt 
import numpy as np
import pandas as pd
import glob
from tensorflow.compat.v1.train import summary_iterator as tf_summary_iterator
%matplotlib inline

In [5]:
env_name_standardizer = {
    "3m": "3m",
    "3": "3m",
    "3sv4z": "3s_vs_4z",
    "5m_vs_6m": "5m_vs_6m",
    "5v6": "5m_vs_6m",
    "8m": "8m",
    "8": "8m",
    "8m_vs_9m": "8m_vs_9m",
    "8v9": "8m_vs_9m",
    "10m_vs_11m": "10m_vs_11m",
    "10v11": "10m_vs_11m"
}

def newest(path_list):
    '''Returns full path of newest file in given path'''
    return max(path_list, key=os.path.getctime)

def extract_data_from_logs(logfile_path:str):
    res_dict = {}
    for summary in tf_summary_iterator(logfile_path):
        for v in summary.summary.value:
            res_dict[v.tag] = v.simple_value
    return res_dict

def str_search_param(name:str, param_name:str, param_type=None):
    if param_type is float:
        param_match = re.search(f"{param_name}=\\d{1,3}(\\.\\d{1,3})?", name)
    elif param_type is int:
        param_match = re.search(f"{param_name}=\\d*", name)        
    elif param_type is str: 
        param_match = re.search(f"{param_name}=[a-z=\\<\\>]*", name)

    if param_match is not None: 
        param_match = param_type(param_match.group().replace(f"{param_name}=", ""))

    return param_match

## Demo Policy Eval

In [10]:
alg = 'ippo'
env = "sc2"
load_steps = [
    1000000, 2000000, 3000000, 4000000, 5000000, 6000000, 7000000, 8000000, 9000000, 10000000
]
policy_map_names = ["5v6", "3sv4z"]
eval_seed = 111111
checkpoint_names = {
    "5v6": [
        "ippo_sc2_saved-batchsize=5005_seed=112358_03-06-21-12-41"
    ],
    "3sv4z": [
        "ippo_sc2_baseline_seed=119527_01-23-20-41-57"
    ]
}

results_dir = "/scratch/cluster/clw4542/marl_results"

In [17]:
def get_demo_eval_stats(checkpoint_name, alg, env, load_step, eval_seed, logs_basepath):
    log_basename = f'{alg}_{env}_eval-ckpt={checkpoint_name.strip("/ ").replace("_", "-")}-step={load_step}_seed={eval_seed}'
    log_basename += "*"
#     print("LOGS BASENAME IS ", log_basename)

    log_folder = os.path.join(logs_basepath, log_basename)

    # there may be multiple eval folders corresponding to same log; take latest
    log_folders = glob.glob(log_folder, recursive=True)
    newest_log_folder = newest(log_folders)
    full_log_path = glob.glob(newest_log_folder + "/*", recursive=True)[0]
    results_dict = extract_data_from_logs(full_log_path)
    return results_dict

# test
get_demo_eval_stats(checkpoint_name="ippo_sc2_saved-batchsize=5005_seed=112358_03-06-21-12-41", 
                    alg="ippo",
                    env="sc2",
                    load_step="10000000",
                    eval_seed="111111",
                    logs_basepath=os.path.join(results_dir, "ippo_5v6_eval/tb_logs/")
                   )

{'test_ippo_return_mean': 15.871341705322266,
 'test_ippo_return_std': 4.949375629425049,
 'test_ippo_dead_allies_mean': 3.377952814102173,
 'test_ippo_dead_enemies_mean': 5.283464431762695,
 'test_ippo_battle_won_mean': 0.5826771855354309,
 'test_ippo_ep_length_mean': 26.42519760131836}

In [18]:
results = {}
for policy_map_name in policy_map_names: 
    logs_basepath = f"/scratch/cluster/clw4542/marl_results/{alg}_{policy_map_name}_eval/tb_logs"
    results[policy_map_name] = {}
    for ckpt_name in checkpoint_names[policy_map_name]:
        for load_step in load_steps:
            eval_stats = get_demo_eval_stats(checkpoint_name=ckpt_name, 
                                             alg=alg,
                                             env=env, 
                                             load_step=load_step, 
                                             eval_seed=eval_seed,
                                             logs_basepath=logs_basepath)
            results[policy_map_name][load_step] = eval_stats["test_ippo_battle_won_mean"]
#             print(policy_map_name, "LOAD STEP: ", load_step, "BATTLE WON: ", eval_stats['test_ippo_battle_won_mean'])
results

{'5v6': {1000000: 0.0,
  2000000: 0.007874015718698502,
  3000000: 0.0,
  4000000: 0.015748031437397003,
  5000000: 0.12598425149917603,
  6000000: 0.22047244012355804,
  7000000: 0.4409448802471161,
  8000000: 0.4094488322734833,
  9000000: 0.5196850299835205,
  10000000: 0.5826771855354309},
 '3sv4z': {1000000: 0.0,
  2000000: 0.0,
  3000000: 0.007874015718698502,
  4000000: 0.06299212574958801,
  5000000: 0.3385826647281647,
  6000000: 0.5118110179901123,
  7000000: 0.6535432934761047,
  8000000: 0.6771653294563293,
  9000000: 0.8110235929489136,
  10000000: 0.8976377844810486}}

In [19]:
pd.DataFrame(results)

Unnamed: 0,5v6,3sv4z
1000000,0.0,0.0
2000000,0.007874,0.0
3000000,0.0,0.007874
4000000,0.015748,0.062992
5000000,0.125984,0.338583
6000000,0.220472,0.511811
7000000,0.440945,0.653543
8000000,0.409449,0.677165
9000000,0.519685,0.811024
10000000,0.582677,0.897638


## Action Transfer Eval

In [53]:
alg = 'qmix'
env = "mod-act"
load_step = 0
policy_map_names = ["8m", "5v6"]
eval_seeds = [1111111]
eval_diff = "7"
eval_acts = {"epsilon-greedy": {}, 
             "sticky": {"sticky_prob": 0.1}, 
#              "permute": {"permute": str}
            }

param_nickname = {"permutation_type": "permute",
                  "sticky_prob":  "sticky_prob",
                  "epsilon-greedy": "greedy",
                  "sticky": "sticky",
                  "permute": "permute"}

checkpoint_names = {
    "5v6": [
        "qmix_sc2_None_diff=7_act=epsilon-greedy_seed=112358_07-11-23-03-27",
        "qmix_sc2_None_diff=7_act=epsilon-greedy_seed=1285842_07-11-23-03-27",

        # "qmix_mod-act_None_diff=7_env-act=sticky_seed=112358_07-18-17-33-24",
        # "qmix_mod-act_None_diff=7_env-act=sticky_seed=1285842_07-18-17-33-33",
#         "qmix_mod-act_None_diff=7_env-act=sticky_sticky-prob=0.05_seed=112358_07-19-22-11-23",
#         "qmix_mod-act_None_diff=7_env-act=sticky_sticky-prob=0.05_seed=1285842_07-19-22-11-23",
        "qmix_mod-act_None_diff=7_env-act=sticky_sticky-prob=0.1_seed=112358_07-19-22-10-15",
        "qmix_mod-act_None_diff=7_env-act=sticky_sticky-prob=0.1_seed=1285842_07-19-22-10-15",

#         "qmix_mod-act_None_diff=7_env-act=permute_permute=east=>stop_seed=112358_07-21-23-19-41",
#         "qmix_mod-act_None_diff=7_env-act=permute_permute=east=>stop_seed=1285842_07-21-23-19-41",
#         "qmix_mod-act_None_diff=7_env-act=permute_permute=east<=>west_seed=112358_07-21-23-18-43",
#         "qmix_mod-act_None_diff=7_env-act=permute_permute=east<=>west_seed=1285842_07-21-23-18-43"
    ],
    "8m": [
        "qmix_sc2_None_diff=7_act=epsilon-greedy_seed=112358_07-11-22-59-26",
        "qmix_sc2_None_diff=7_act=epsilon-greedy_seed=1285842_07-11-22-59-27",

        # "qmix_mod-act_None_diff=7_env-act=sticky_seed=112358_07-18-17-37-38",
        # "qmix_mod-act_None_diff=7_env-act=sticky_seed=1285842_07-18-17-37-38",
#         "qmix_mod-act_None_diff=7_env-act=sticky_sticky-prob=0.05_seed=112358_07-19-22-05-10",
#         "qmix_mod-act_None_diff=7_env-act=sticky_sticky-prob=0.05_seed=1285842_07-19-22-05-10",
        "qmix_mod-act_None_diff=7_env-act=sticky_sticky-prob=0.1_seed=112358_07-19-22-09-31",
        "qmix_mod-act_None_diff=7_env-act=sticky_sticky-prob=0.1_seed=1285842_07-19-22-09-31",

#         "qmix_mod-act_None_diff=7_env-act=permute_permute=east=>stop_seed=112358_07-21-23-20-20",
#         "qmix_mod-act_None_diff=7_env-act=permute_permute=east=>stop_seed=1285842_07-21-23-20-20",
#         "qmix_mod-act_None_diff=7_env-act=permute_permute=east<=>west_seed=112358_07-22-06-28-00",
#         "qmix_mod-act_None_diff=7_env-act=permute_permute=east<=>west_seed=1285842_07-22-06-46-59"
    ]
}  # models to check transfer for
results_dir = "/scratch/cluster/clw4542/marl_results"
conf_name = 'default_eval_act'
source_conf_path = f'src/config/{conf_name}.yaml'


In [None]:
def get_eval_stats(checkpoint_name, env,  eval_actname, eval_actdict, load_step, logs_basepath):
    eval_actname = param_nickname[eval_actname].replace("_", "-")
    log_basename = f'qmix_{env}_eval-ckpt={checkpoint_name.strip("/ ").replace("_", "-")}-step={load_step}_diff={eval_diff}_env-act={eval_actname}'
    param_dict = get_params(eval_actdict=eval_actdict, 
                            name=ckpt_name)
    for param_name, param_value in param_dict.items():
        if param_value is None: continue
        param_name = param_nickname[param_name].replace("_", "-")
        log_basename = f"{log_basename}_{param_name}={param_value}"
    log_basename += "*"
#     print("LOGS BASENAME IS ", log_basename)
    log_folder = os.path.join(logs_basepath, log_basename)

    # there may be multiple eval folders corresponding to same log; take latest
    log_folders = glob.glob(log_folder, recursive=True)
    newest_log_folder = newest(log_folders)
    full_log_path = glob.glob(newest_log_folder + "/*", recursive=True)[0]
    results_dict = extract_data_from_logs(full_log_path)
    return results_dict

def get_params(eval_actdict:dict, name:str):
    '''
    eval_actdict: dict containing parameters to search for
    name: string we are searching
    '''
    param_dict = {}
    for param_name, param_value in eval_actdict.items():
        if isinstance(param_value, type): 
            param_type = param_value
            param_value = str_search_param(name=name, 
                                           param_name=param_nickname[param_name].replace("_", "-"), 
                                           param_type=param_type)
        param_dict[param_name] = param_value
    return param_dict

In [54]:
load_step = 0

results = {}
for policy_map_name in policy_map_names: 
    logs_basepath = f"/scratch/cluster/clw4542/marl_results/qmix_{policy_map_name}_trans_act/tb_logs"
    results[policy_map_name] = {}
    for ckpt_name in checkpoint_names[policy_map_name]:
        
        train_act = ckpt_name.split("_")[4].replace("env-", "").replace("act=", "")
        for param_name, param_type in eval_acts[train_act].items():
            if not isinstance(param_type, type):
                param_type = type(param_type)
            param_value = str_search_param(ckpt_name, param_name, param_type)
        train_act = f"train_env-act={train_act}_{param_name}={param_value}"
        
        seed = re.search("seed=\\d*", ckpt_name).group()
        
        if train_act not in results[policy_map_name]:
            results[policy_map_name][train_act] = {}
        if seed not in results[policy_map_name][train_act]:
            results[policy_map_name][train_act][seed] = {}
        
        for eval_actname, eval_actdict in eval_acts.items():
            eval_stats = get_eval_stats(ckpt_name, env, eval_actname, eval_actdict, load_step, logs_basepath)
            if eval_stats != {}:
                results[policy_map_name][train_act][seed][f"eval_act={eval_actname}"] = eval_stats["test_battle_won_mean"]
                print(policy_map_name, ckpt_name, "ACT SEL", eval_actname, "BATTLE WON: ", eval_stats['test_battle_won_mean'])
        print("\n")

8m qmix_sc2_None_diff=7_act=epsilon-greedy_seed=112358_07-11-22-59-26 ACT SEL epsilon-greedy BATTLE WON:  0.9677419066429138
8m qmix_sc2_None_diff=7_act=epsilon-greedy_seed=112358_07-11-22-59-26 ACT SEL sticky BATTLE WON:  0.8387096524238586


8m qmix_sc2_None_diff=7_act=epsilon-greedy_seed=1285842_07-11-22-59-27 ACT SEL epsilon-greedy BATTLE WON:  1.0
8m qmix_sc2_None_diff=7_act=epsilon-greedy_seed=1285842_07-11-22-59-27 ACT SEL sticky BATTLE WON:  0.8387096524238586


8m qmix_mod-act_None_diff=7_env-act=sticky_sticky-prob=0.1_seed=112358_07-19-22-09-31 ACT SEL epsilon-greedy BATTLE WON:  1.0
8m qmix_mod-act_None_diff=7_env-act=sticky_sticky-prob=0.1_seed=112358_07-19-22-09-31 ACT SEL sticky BATTLE WON:  0.9032257795333862


8m qmix_mod-act_None_diff=7_env-act=sticky_sticky-prob=0.1_seed=1285842_07-19-22-09-31 ACT SEL epsilon-greedy BATTLE WON:  1.0
8m qmix_mod-act_None_diff=7_env-act=sticky_sticky-prob=0.1_seed=1285842_07-19-22-09-31 ACT SEL sticky BATTLE WON:  0.9677419066429138


5

### Sticky Actions

##### Prob = 0.25

In [26]:
for mapname in ["5v6", "8m"]:
    dict_of_df = {k: pd.DataFrame(v) for k,v in results[mapname].items()}
    df = pd.concat(dict_of_df, axis=1)
    print(mapname)
    display(df)

5v6


Unnamed: 0_level_0,train_env-act=sticky,train_env-act=sticky,train_act=epsilon-greedy,train_act=epsilon-greedy
Unnamed: 0_level_1,seed=112358,seed=1285842,seed=112358,seed=1285842
eval_act=sticky,0.0,0.0,0.0,0.0
eval_act=greedy,0.0,0.0,0.741935,0.677419


8m


Unnamed: 0_level_0,train_env-act=sticky,train_env-act=sticky,train_act=epsilon-greedy,train_act=epsilon-greedy
Unnamed: 0_level_1,seed=112358,seed=1285842,seed=112358,seed=1285842
eval_act=sticky,0.548387,0.741935,0.419355,0.354839
eval_act=greedy,0.967742,0.903226,0.967742,1.0


##### Prob = 0.1

In [55]:
for mapname in ["5v6", "8m"]:
    dict_of_df = {k: pd.DataFrame(v) for k,v in results[mapname].items()}
    df = pd.concat(dict_of_df, axis=1)
    print(mapname)
    display(df)

5v6


Unnamed: 0_level_0,train_env-act=epsilon-greedy_sticky_prob=None,train_env-act=epsilon-greedy_sticky_prob=None,train_env-act=sticky_sticky_prob=None,train_env-act=sticky_sticky_prob=None
Unnamed: 0_level_1,seed=112358,seed=1285842,seed=112358,seed=1285842
eval_act=epsilon-greedy,0.741935,0.677419,0.193548,0.322581
eval_act=sticky,0.193548,0.258065,0.064516,0.096774


8m


Unnamed: 0_level_0,train_env-act=epsilon-greedy_sticky_prob=None,train_env-act=epsilon-greedy_sticky_prob=None,train_env-act=sticky_sticky_prob=None,train_env-act=sticky_sticky_prob=None
Unnamed: 0_level_1,seed=112358,seed=1285842,seed=112358,seed=1285842
eval_act=epsilon-greedy,0.967742,1.0,1.0,1.0
eval_act=sticky,0.83871,0.83871,0.903226,0.967742


##### Prob = 0.05

In [50]:
for mapname in ["5v6", "8m"]:
    dict_of_df = {k: pd.DataFrame(v) for k,v in results[mapname].items()}
    df = pd.concat(dict_of_df, axis=1)
    print(mapname)
    display(df)

5v6


Unnamed: 0_level_0,train_env-act=epsilon-greedy_sticky_prob=None,train_env-act=epsilon-greedy_sticky_prob=None,train_env-act=sticky_sticky_prob=None,train_env-act=sticky_sticky_prob=None
Unnamed: 0_level_1,seed=112358,seed=1285842,seed=112358,seed=1285842
eval_act=epsilon-greedy,0.741935,0.677419,0.322581,0.741935
eval_act=sticky,0.548387,0.354839,0.193548,0.387097


8m


Unnamed: 0_level_0,train_env-act=epsilon-greedy_sticky_prob=None,train_env-act=epsilon-greedy_sticky_prob=None,train_env-act=sticky_sticky_prob=None,train_env-act=sticky_sticky_prob=None
Unnamed: 0_level_1,seed=112358,seed=1285842,seed=112358,seed=1285842
eval_act=epsilon-greedy,0.967742,1.0,0.967742,1.0
eval_act=sticky,0.935484,0.903226,0.935484,0.83871


### Action Permutation

In [58]:
for mapname in ["5v6", "8m"]:
    dict_of_df = {k: pd.DataFrame(v) for k,v in results[mapname].items()}
    df = pd.concat(dict_of_df, axis=1)
    print(mapname)
    display(df)

5v6


Unnamed: 0_level_0,train_env-act=permute_permute=east=>stop,train_env-act=permute_permute=east=>stop,train_env-act=permute_permute=east<=>west,train_env-act=permute_permute=east<=>west
Unnamed: 0_level_1,seed=112358,seed=1285842,seed=112358,seed=1285842
eval_act=greedy,0.0,0.0,0.0,0.0
eval_act=permute,0.0,0.0,0.0,0.0


8m


Unnamed: 0_level_0,train_env-act=permute_permute=east=>stop,train_env-act=permute_permute=east=>stop,train_env-act=permute_permute=east<=>west,train_env-act=permute_permute=east<=>west
Unnamed: 0_level_1,seed=112358,seed=1285842,seed=112358,seed=1285842
eval_act=permute,,0.032258,0.0,0.0
eval_act=greedy,,,0.0,0.0
