In [2]:
import os
import json

import matplotlib.pyplot as plt
import numpy as np
from collections import defaultdict, OrderedDict
from tensorboard.backend.event_processing import event_accumulator
from scipy import stats
import pandas as pd

## Our setup results paths

In [31]:
RESULTS_PATH = "../results/panda_pick_and_place_obstacle_long_v2"
LOGS_TEMPLATE_PATH = os.path.join(RESULTS_PATH, "%s", "tb", "TQC_1")

LOG_PATH_LIST = [LOGS_TEMPLATE_PATH % folder for folder in os.listdir(RESULTS_PATH) if os.path.isdir(os.path.join(RESULTS_PATH, folder))]
LOG_PATH_LIST = sorted([os.path.join(log_path, os.listdir(log_path)[0]) for log_path in LOG_PATH_LIST])

In [32]:
results_dict = defaultdict(dict)
all_experiment_combinations = set()
tb_keys = ["eval/success_rate"]

for filepath in LOG_PATH_LIST:
    config = json.load(open(os.path.join("/".join(filepath.split("/")[:-3]), "config.json"), "r"))
    num_steps = config["train_kwargs"]["total_timesteps"]
    
    run_name = filepath.split("/")[-4]
    baseline_type = run_name.split("_")[4]
    environment = "_".join(run_name.split("_")[-4:-3])
    
    ea = event_accumulator.EventAccumulator(filepath, size_guidance={"scalars": 0})
    ea.Reload()
    
    for tb_key in tb_keys:
        if environment + "-" + tb_key not in results_dict[baseline_type]:
            results_dict[baseline_type][environment + "-" + tb_key] = []
        
        scalars = [scalar.value for scalar in ea.Scalars(tb_key)]
        scalars = np.interp(np.linspace(0, len(scalars), 100), np.arange(len(scalars)), scalars).tolist()
        
        results_dict[baseline_type][environment + "-" + tb_key].append(scalars)
    
for baseline_type in results_dict.keys():
    for environment in results_dict[baseline_type].keys():
        results_dict[baseline_type][environment] = np.stack(results_dict[baseline_type][environment], axis=0)

## BiPaRS results paths

In [5]:
RESULTS_PATH = "../ipynbs/BiPaRS_partial_results"
LOGS_TEMPLATE_PATH = os.path.join(RESULTS_PATH, "%s", "summary")

BIPARS_LOG_PATH_LIST = [LOGS_TEMPLATE_PATH % folder for folder in os.listdir(RESULTS_PATH) if os.path.isdir(os.path.join(RESULTS_PATH, folder))]
BIPARS_LOG_PATH_LIST = sorted([os.path.join(log_path, tb_log_name) for log_path in BIPARS_LOG_PATH_LIST for tb_log_name in os.listdir(log_path)])

In [6]:
for filepath in BIPARS_LOG_PATH_LIST:
    run_name = filepath.split("/")[-3]
    baseline_type = "BiPaRS_" + "_".join(run_name.split("_")[2:4])
    environment = run_name.split("_")[-1]
    
    # print(environment)
    
    if environment not in results_dict[baseline_type]:
        results_dict[baseline_type][environment] = []
    
    ea = event_accumulator.EventAccumulator(filepath, size_guidance={"scalars": 0})
    ea.Reload()
    
    scalars = [scalar.value for scalar in ea.Scalars("Test_Episode_Success")]
    scalars = np.interp(np.arange(100) * (len(scalars) / 100), np.arange(len(scalars)), scalars).tolist()
    
    results_dict[baseline_type][environment].append(scalars)
    
for baseline_type in results_dict.keys():
    for environment in results_dict[baseline_type].keys():
        results_dict[baseline_type][environment] = np.stack(results_dict[baseline_type][environment], axis=0)

## Analysis v1

In [5]:
def ttest(mean_0, mean_1, std_0, std_1, n_0, n_1):
    return np.abs(mean_0 - mean_1) / np.sqrt(std_0**2 / n_0 + std_1**2 / n_1)

def df(std_0, std_1, n_0, n_1):
    return (std_0**2 / n_0 + std_1**2 / n_1)**2 / ((std_0**2 / n_0)**2 / (n_0 - 1) + (std_1**2 / n_1)**2 / (n_1 - 1))

In [14]:

at_steps = [20, 40, 60, 80, 99]
ignore = {'BiPaRS_v1_fop', 'BiPaRS_v2_fsa'}
alpha = 0.05

keys_map = {
    "random": "Random",
    "manualtask": "Main task",
    "currot": "CurrOT",
    "settersolver": "Setter-Solver",
    "alpgmm": "ALP-GMM",
    "sacx": "SAC-Q",
    # "BiPaRS_v1_fop": "BiPaRS-v1",
    # "BiPaRS_v2_fsa": "BiPaRS-v2",
    # "BiPaRS_v3_fsart": "BiPaRS"
}
bonferroni = len(keys_map) - 1

column_names = ['step', 'first_algo', 'second_algo', "first_mean", "first_std", "significant", "p"]
results = pd.DataFrame(columns=column_names)

for first_algo in keys_map.keys():
    first_scalar_id = list(results_dict[first_algo].keys())[0]
    first_data = results_dict[first_algo][first_scalar_id]
    for second_algo in keys_map.keys():
        if first_algo in ignore or second_algo in ignore:
            continue

        second_scalar_id = list(results_dict[second_algo].keys())[0]
        second_data = results_dict[second_algo][second_scalar_id]
        
        for step in at_steps:
            first_data_selection = first_data[:, step]
            second_data_selection = second_data[:, step]
            
            first_mean = first_data_selection.mean()
            first_std = first_data_selection.std()
            
            second_mean = second_data_selection.mean()
            second_std = second_data_selection.std()
            
            first_n = len(first_data_selection)
            second_n = len(second_data_selection)
            
            # t_scores = ttest(first_mean, second_mean, first_std, second_std, first_n, second_n)
            # p_values = stats.t.sf(t_scores, df=df(first_std, second_std, first_n, second_n)) * 2
            u_scores = stats.mannwhitneyu(first_data_selection, second_data_selection)
            p_values = u_scores.pvalue
            
            p_values *= bonferroni
            significant = p_values < alpha
            
            results.loc[len(results)] = [step, keys_map[first_algo], keys_map[second_algo], first_mean, first_std, significant, p_values]
            
results[results.significant]

Unnamed: 0,step,first_algo,second_algo,first_mean,first_std,significant,p
10,20,Random,CurrOT,0.100000,0.062633,True,0.000863
11,40,Random,CurrOT,0.210000,0.101540,True,0.000787
12,60,Random,CurrOT,0.430909,0.289291,True,0.001473
13,80,Random,CurrOT,0.645758,0.251743,True,0.002748
15,20,Random,Setter-Solver,0.100000,0.062633,True,0.000431
...,...,...,...,...,...,...,...
160,20,SAC-Q,CurrOT,0.298788,0.261606,True,0.009137
161,40,SAC-Q,CurrOT,0.520606,0.190446,True,0.006326
162,60,SAC-Q,CurrOT,0.617273,0.297467,True,0.036997
165,20,SAC-Q,Setter-Solver,0.298788,0.261606,True,0.001887


In [24]:
algorithms_order = ["Main task", "Random", "SAC-Q", "Setter-Solver", "CurrOT", "ALP-GMM"] # , "BiPaRS"]
colors = {
    "Random": "tab:brown",
    "Main task": "tab:pink",
    "CurrOT": "tab:orange",
    "Setter-Solver": "tab:green",
    "ALP-GMM": "tab:blue",
    "SAC-Q": "tab:purple",
    "BiPaRS": "tab:red"
}

gray_rows = {"CurrOT", "Setter-Solver", "ALP-GMM"}
symbol = "\\ding{169}"

for algo in algorithms_order:
    if algo in gray_rows:
        print("\\rowcolor{gray!20} ", end="")
        
    print("\\textcolor{" + colors[algo] + "}{" + symbol + "}" + " \\textbf{" + algo + "}", end="")
    for step in at_steps:
        subset = results[(results.step == step) & (results.first_algo == algo)]
        print(" & \n", end="")
        
        print(np.round(subset.first_mean.iloc[0], 2), "\\par $\\pm$", np.round(subset.first_std.iloc[0], 2), end="")
        print("\\par ", end="")
        
        if len(subset) > 0:
            for index, row in subset.iterrows():
                if row.significant:
                    print("\\textcolor{" + colors[row.second_algo] + "}{" + symbol + "}", end="")
                # print(row.second_algo, row.significant)
        # print("}", end="")
    print(" \\\ \\hline\n")
print()

\textcolor{tab:pink}{\ding{169}} \textbf{Main task} & 
0.15 \par $\pm$ 0.06\par \textcolor{tab:orange}{\ding{169}}\textcolor{tab:green}{\ding{169}} & 
0.37 \par $\pm$ 0.23\par \textcolor{tab:orange}{\ding{169}}\textcolor{tab:green}{\ding{169}} & 
0.42 \par $\pm$ 0.25\par \textcolor{tab:orange}{\ding{169}}\textcolor{tab:green}{\ding{169}}\textcolor{tab:blue}{\ding{169}} & 
0.45 \par $\pm$ 0.2\par \textcolor{tab:orange}{\ding{169}}\textcolor{tab:green}{\ding{169}}\textcolor{tab:blue}{\ding{169}}\textcolor{tab:purple}{\ding{169}} & 
0.46 \par $\pm$ 0.24\par \textcolor{tab:orange}{\ding{169}}\textcolor{tab:green}{\ding{169}}\textcolor{tab:blue}{\ding{169}} \\ \hline

\textcolor{tab:brown}{\ding{169}} \textbf{Random} & 
0.1 \par $\pm$ 0.06\par \textcolor{tab:orange}{\ding{169}}\textcolor{tab:green}{\ding{169}} & 
0.21 \par $\pm$ 0.1\par \textcolor{tab:orange}{\ding{169}}\textcolor{tab:green}{\ding{169}}\textcolor{tab:purple}{\ding{169}} & 
0.43 \par $\pm$ 0.29\par \textcolor{tab:orange}{\di

## Analysis v2

In [54]:
threshold = 0.9
alpha = 0.05

ignore = {'BiPaRS_v1_fop', 'BiPaRS_v2_fsa'}
keys_map = {
    "random": "Random",
    "manualtask": "Main task",
    "currot": "CurrOT",
    "settersolver": "Setter-Solver",
    "alpgmm": "ALP-GMM",
    "sacx": "SAC-Q",
    # "BiPaRS_v1_fop": "BiPaRS-v1",
    # "BiPaRS_v2_fsa": "BiPaRS-v2",
    # "BiPaRS_v3_fsart": "BiPaRS"
}

bonferroni = len(keys_map) - 1

steps_data = OrderedDict()

for first_algo in keys_map.keys():
    first_scalar_id = list(results_dict[first_algo].keys())[0]
    first_data = results_dict[first_algo][first_scalar_id]
    
    first_above_threshold = np.where(first_data > threshold)
    # if len(np.unique(first_above_threshold[0])) != len(first_data): 
    #     continue
    
    first_occurences = [np.where(first_above_threshold[0] == idx)[0][0] for idx in range(len(first_data)) if idx in first_above_threshold[0]]
    first_algo_indices = first_above_threshold[1][first_occurences]
    
    for second_algo in keys_map.keys():
        if first_algo in ignore or second_algo in ignore or first_algo == second_algo:
            continue

        second_scalar_id = list(results_dict[second_algo].keys())[0]
        second_data = results_dict[second_algo][second_scalar_id]
        
        second_above_threshold = np.where(second_data > threshold)
        # if len(np.unique(second_above_threshold[0])) != len(second_data): 
        #     continue
        
        second_occurences = [np.where(second_above_threshold[0] == idx)[0][0] for idx in range(len(second_data)) if idx in second_above_threshold[0]]
        second_algo_indices = second_above_threshold[1][second_occurences]
        
        steps_data[first_algo + ":" + second_algo] = (first_algo_indices, second_algo_indices)
        

In [56]:
for comparison, (first_steps_data, second_steps_data) in steps_data.items():
    u_scores = stats.mannwhitneyu(first_steps_data, second_steps_data)
    p_values = u_scores.pvalue
    
    p_values *= bonferroni
    significant = p_values < alpha
    print(comparison, np.round(p_values, 2), significant)

random:manualtask 1.43 False
random:currot 0.01 True
random:settersolver 0.01 True
random:alpgmm 0.04 True
random:sacx 1.17 False
manualtask:random 1.43 False
manualtask:currot 0.75 False
manualtask:settersolver 0.76 False
manualtask:alpgmm 3.64 False
manualtask:sacx 2.5 False
currot:random 0.01 True
currot:manualtask 0.75 False
currot:settersolver 0.12 False
currot:alpgmm 0.0 True
currot:sacx 0.08 False
settersolver:random 0.01 True
settersolver:manualtask 0.76 False
settersolver:currot 0.12 False
settersolver:alpgmm 0.0 True
settersolver:sacx 0.05 True
alpgmm:random 0.04 True
alpgmm:manualtask 3.64 False
alpgmm:currot 0.0 True
alpgmm:settersolver 0.0 True
alpgmm:sacx 2.37 False
sacx:random 1.17 False
sacx:manualtask 2.5 False
sacx:currot 0.08 False
sacx:settersolver 0.05 True
sacx:alpgmm 2.37 False


## Analysis v3

In [33]:
threshold = 0.9

ignore = {'BiPaRS_v1_fop', 'BiPaRS_v2_fsa'}
keys_map = OrderedDict({
    "manualtask": "Main task",
    "random": "Random",
    "sacx": "SAC-Q",
    "currot": "CurrOT",
    "settersolver": "Setter-Solver",
    "alpgmm": "ALP-GMM",
    # "BiPaRS_v1_fop": "BiPaRS-v1",
    # "BiPaRS_v2_fsa": "BiPaRS-v2",
    # "BiPaRS_v3_fsart": "BiPaRS"
})

total_steps = 1.5e6
print(total_steps)

steps_data = OrderedDict()

for algo in keys_map.keys():
    scalar_id = list(results_dict[algo].keys())[0]
    data = results_dict[algo][scalar_id]
    
    above_threshold = np.where(np.median(data, axis=0) > threshold)
    # if len(np.unique(first_above_threshold[0])) != len(first_data): 
    #     continue
    
    # first_occurences = [np.where(above_threshold[0] == idx)[0][0] for idx in range(len(data)) if idx in above_threshold[0]]
    # indices = above_threshold[1][first_occurences]
    
    if len(above_threshold[0]) == 0:
        continue
    
    print(keys_map[algo], "$" + str((total_steps * above_threshold[0][0] / 100) / 10e5) + " \\times 10^5$")
    
    

1500000.0
SAC-Q $0.27 \times 10^5$
Setter-Solver $0.69 \times 10^5$
