In [1]:
import os
import time

import pandas as pd
import numpy as np
import seaborn as sns
import numpy as np
import matplotlib.pyplot as plt
import matplotlib
import re

from copo import pretty_print

%matplotlib inline

In [2]:
EVALUATE_RESULTS_PATH = "./formal_evaluate_results"

os.listdir(EVALUATE_RESULTS_PATH)

['.DS_Store',
 '1011_ccppo_concat_evaluate_results.csv',
 '1003_copo_formal_evaluate_results.csv',
 '1003_ippo_evaluate_results.csv',
 '1003_cl_evaluate_results.csv',
 '1012_ccppo_mf_evaluate_results.csv']

In [3]:
eval_df = []

algo_set = set()
env_set = set()

count = 0

for p in os.listdir(EVALUATE_RESULTS_PATH):
    match = re.search("(.*?)_evaluate_results.csv", p)
    
    if match:
        
        count += 1
        
        exp_name = match[1]
        
        full_p = os.path.abspath(os.path.join(EVALUATE_RESULTS_PATH, p))
        
        try:
            tmp = pd.read_csv(full_p)
        except Exception as e:
            print(full_p)
            raise e

        tmp["exp_name"] = exp_name
        tmp["model_index"] = p
        
        if "_cl" in exp_name:
            # Special trement for CL, removing the evaluate results of the wrong checkpoints
            # We only keep results for checkpoint-1250
            # This might be inaccurate if you have multiple checkpoints after 3rd stage.
            tmp = tmp[tmp['path'].str.contains('1250')]
        
        eval_df.append(tmp)
        
        print(f"[{count}] Processed the agent: ", exp_name)
    else:
        print(p, " not exists!")
                
eval_df = pd.concat(eval_df)

.DS_Store  not exists!
[1] Processed the agent:  1011_ccppo_concat
[2] Processed the agent:  1003_copo_formal
[3] Processed the agent:  1003_ippo
[4] Processed the agent:  1003_cl
[5] Processed the agent:  1012_ccppo_mf


In [4]:
for env_short_name in ["Inter", "Round", "Parking", "Bottle", "Tollgate"]:
    eval_df.loc[
        [env_short_name in item for item in eval_df.env],
        "env"
    ] = env_short_name
    
eval_df.loc[
    ["MetaDrive" in item for item in eval_df.env],
    "env"
] = "PGMap"

## Find the best checkpoint for each env and algo.

That is applying maximum operation across all seeds and checkpoints for a given env and algo.

In [5]:

for env, gb_env in eval_df.groupby("env"):  # Each env
    for exp_name, gb in gb_env.groupby('exp_name'):  # Each algorithm
        
        best_checkpoint_value = float("-inf")
        best_checkpoint_data = None
        best_checkpoint_path = None
        
        for seed, seed_gb in gb.groupby("seed"):  # Each seed
            
            for path, ckpt_gb in seed_gb.groupby("path"):  # Find the best checkpoint
                
                val = ckpt_gb["success_rate"].mean()
                assert np.isscalar(val)
                
                if val > best_checkpoint_value:
                    best_checkpoint_value = val
                    best_checkpoint_data = ckpt_gb    
                    best_checkpoint_path = path
            
        print(f"For env={env}, algo={exp_name}, seed={seed}, we find {len(seed_gb.path.unique())} checkpoints and the best success rate is {best_checkpoint_value:.3f}")
        print(f"Path:\n{best_checkpoint_path}\n\n")


For env=Bottle, algo=1003_cl, seed=12000, we find 1 checkpoints and the best success rate is 0.738
Path:
/home/zhenghao/CoPO/copo_code/copo/1003_cl/IPPOCL_CLMultiAgentBottleneckEnv_e2a67_00039_39_env=CLMultiAgentBottleneckEnv,start_seed=11000,seed=0_2022-10-04_03-46-40/checkpoint_1250/checkpoint-1250


For env=Bottle, algo=1003_copo_formal, seed=12000, we find 6 checkpoints and the best success rate is 0.781
Path:
/home/zhpeng/CoPO/copo_code/copo/1003_copo/CoPO_CCMultiAgentBottleneckEnv_647d3_00003_3_env=CCMultiAgentBottleneckEnv,start_seed=5000,seed=0,use_centralized_critic=False,use_2022-10-04_10-43-57/checkpoint_620/checkpoint-620


For env=Bottle, algo=1003_ippo, seed=12000, we find 4 checkpoints and the best success rate is 0.610
Path:
/home/zhenghao/CoPO/copo_code/copo/1003_ippo/IPPO_MultiAgentBottleneckEnv_ad347_00045_45_env=MultiAgentBottleneckEnv,start_seed=12000,seed=0_2022-10-04_01-25-13/checkpoint_530/checkpoint-530


For env=Bottle, algo=1011_ccppo_concat, seed=12000, we f

For env=Tollgate, algo=1012_ccppo_mf, seed=12000, we find 3 checkpoints and the best success rate is 0.490
Path:
/home/zhenghao/CoPO/copo_code/copo/1012_ccppo_mf/CCPPOTrainerForMAOurEnvironment_CCMultiAgentTollgateEnv_2700f_00028_28_counterfactual=True,env=CCMultiAgentTollgateEnv,start_seed=_2022-10-12_17-08-30/checkpoint_625/checkpoint-625


