In [60]:
import glob
import numpy as np
import pandas as pd
import seaborn as sns
import matplotlib.pyplot as plt

sns.set(style="ticks")
np.set_printoptions(precision=1, suppress=True, threshold=5)
pd.set_option('display.precision', 4)
pd.set_option('display.max_rows', None)  # Show all rows

In [61]:
import glob
import pandas as pd
import numpy as np

def load_data(patterns, metric):
    dfs = []
    for pattern in patterns:
        files = glob.glob(pattern)
        files.sort()
        for fname in files:
            setting = fname.split('/eval_')[1].replace('.json', '').split('_')
            df = pd.read_json(fname, orient='index').transpose()
            df['runs'] = int(setting[0])
            df['noise'] = float(setting[1])
            df['method'] = setting[2]
            horizon = setting[3].split('-')
            df['ph'] = int(horizon[0])
            df['ah'] = int(horizon[1])
            
            # Set default values
            df['nsample'] = 1
            df['nmode'] = 1
            df['decay'] = np.nan
            df['weak'] = -1
            
            if setting[2] == 'coherence':
                df['nsample'] = int(setting[4])
                df['decay'] = float(setting[5])
            elif setting[2] in ['positive']:
                df['nsample'] = int(setting[4])
                df['nmode'] = int(setting[5])
            elif setting[2] in ['contrast', 'positive', 'negative']:
                df['nsample'] = int(setting[4])
                df['nmode'] = int(setting[5])
                df['weak'] = int(setting[6])
            elif setting[2] == 'bid':
                df['nsample'] = int(setting[4])
                df['nmode'] = int(setting[5])
                df['decay'] = float(setting[6])
                df['weak'] = int(setting[7])
            elif setting[2] == 'ema':
                df['decay'] = float(setting[4])
            
            dfs.append(df)
    
    dff = pd.concat(dfs, ignore_index=True)
    dff['decay'] = dff['decay'].astype(float)
    dff['nsample'] = dff['nsample'].astype(int)
    dff['nmode'] = dff['nmode'].astype(int)
    dff['runs'] = dff['runs'].astype(int)
    dff['weak'] = dff['weak'].astype(int)
    
    method_order = {'random': 0, 'lowvar': 0.6, 'warmstart': 0.8, 'ema': 1, 'coherence': 2, 'bid': 3}
    dff['sort_key'] = dff['method'].apply(lambda x: method_order.get(x.lower(), len(method_order)))
    
    dff = dff.sort_values(['noise', 'ah', 'sort_key', 'method', 'nsample', 'decay', 'nmode', 'weak', metric]).drop('sort_key', axis=1)
    cols = ['noise', 'ah', 'method', 'nsample', 'decay', 'nmode', 'weak', metric]
    dff = dff[cols]
    # dff = dff[cols + [col for col in dff.columns if col not in cols]]
    
    return dff

In [62]:
# Define tasks and corresponding metrics
tasks_and_metrics = [
    ('pusht', 'test/mean_score'),
    # ('kitchen', 'test/p_4'),
    # ('lift_mh', 'test/mean_score'),
    # ('can_mh', 'test/mean_score'),
    # ('square_mh', 'test/mean_score'),
    # ('transport_mh', 'test/mean_score'),
    # ('tool_hang_ph', 'test/mean_score')
]

foldername = 'outputs'

# Loop through each task and metric
for task, metric in tasks_and_metrics:
    patterns = [
        f'../{foldername}/{task}/*/*/*/eval_*_random_*.json',
        f'../{foldername}/{task}/*/*/*/eval_*_warmstart_*.json',
        f'../{foldername}/{task}/*/*/*/eval_*_ema_*.json',
        f'../{foldername}/{task}/*/*/*/eval_*_contrast_*.json',
        f'../{foldername}/{task}/*/*/*/eval_*_bid_*.json',
    ]
    df = load_data(patterns, metric)
    display(df)



Unnamed: 0,noise,ah,method,nsample,decay,nmode,weak,test/mean_score
0,0.0,1,random,1,,1,-1,0.8457
1,0.0,8,random,1,,1,-1,0.884
4,0.0,8,warmstart,1,,1,-1,0.8752
6,0.0,8,ema,1,0.5,1,-1,0.8602
8,0.0,8,bid,15,0.5,3,50,0.9164
2,1.0,1,random,1,,1,-1,0.8048
5,1.0,1,warmstart,1,,1,-1,0.8361
7,1.0,1,ema,1,0.5,1,-1,0.8217
9,1.0,1,bid,15,0.5,3,50,0.8698
3,1.0,8,random,1,,1,-1,0.5822
