In [195]:
import json
import os
from glob import glob
import pandas as pd
import re
import scipy.stats as st
import numpy as np

In [196]:
def read_experiment_stats(regex):
    all_files = glob("./stats/*.json")
    files = []
    for f in all_files:
        temp = os.path.split(f)[-1]
        if re.search(regex, temp) is not None:
            files.append(f)
        
    method_acc = []
    method_total_flops = []
    method_client_flops = []
    method_comm = []
    steps = []
    names = []

        

    acc_dict = {}
    for i, f in enumerate(files):
        exp_name = os.path.split(f)[-1].replace(".json", "")
        run = exp_name.split("_")[-1][:-1]
        exp_name = "_".join(exp_name.split("_")[:-1])

        j = json.load(open(f, "r"))
        method_acc.append(j['Method Accuracy'])
        method_total_flops.append(j['Method Flops'])
        method_client_flops.append(j['Method Client Flops'])
        method_comm.append(j['Method Comm Cost'])
        steps.append(j['Steps'])
        names.append(os.path.split(f)[-1].replace(".json", ""))
        
        if exp_name not in acc_dict:
            acc_dict[exp_name] = {}
            acc_dict[exp_name]['acc_list'] = []
            acc_dict[exp_name]['flop_list'] = []
            acc_dict[exp_name]['tflop_list'] = []
            acc_dict[exp_name]['comm_list'] = []
            acc_dict[exp_name]['count'] = 0
        acc_dict[exp_name]['acc_list'].append(max(eval(j['Method Accuracy'])) * 100)
        acc_dict[exp_name]['tflop_list'].append(eval(j['Method Flops'])[-1] * 1e-12)
        acc_dict[exp_name]['flop_list'].append(eval(j['Method Client Flops'])[-1] * 1e-12)
        acc_dict[exp_name]['comm_list'].append(eval(j['Method Comm Cost'])[-1] * 1e-3)
        acc_dict[exp_name]['count'] += 1
        
        # for k, v in j['hparams'].items():
        #     hparams[k].append(v)

    exp_dict = {}
    for k in acc_dict.keys():
        exp_dict[k] = {}
        exp_dict[k]['acc_avg'] = np.mean(acc_dict[k]['acc_list'])
        exp_dict[k]['acc_std'] = np.std(acc_dict[k]['acc_list'])
        exp_dict[k]['tflop_avg'] = np.mean(acc_dict[k]['tflop_list'])
        # exp_dict[k]['tflop_std'] = np.std(acc_dict[k]['tflop_list'])
        exp_dict[k]['cflop_avg'] = np.mean(acc_dict[k]['flop_list'])
        # exp_dict[k]['cflop_std'] = np.std(acc_dict[k]['flop_list'])
        exp_dict[k]['comm_avg'] = np.mean(acc_dict[k]['comm_list'])
        exp_dict[k]['count'] = acc_dict[k]['count']
        # exp_dict[k]['comm_std'] = np.std(acc_dict[k]['comm_list'])

    
    df = pd.DataFrame(
        zip(names, method_acc, method_comm, method_total_flops, method_client_flops), 
        columns=["Experiment Name", "Method Accuracy", "Method Comm Cost", "Total Flops", "Client Flops"]
    )
    
    # Make main results table
    table = pd.DataFrame()
    table['Experiment Name'] = df['Experiment Name']
    table['Method Accuracy'] = df['Method Accuracy'].apply(lambda x: max(eval(x)) * 100)
    table['Client TFLOPS'] = df['Client Flops'].apply(lambda x: eval(x)[-1] * 1e-12)
    table['Total TFLOPS'] = df['Total Flops'].apply(lambda x: eval(x)[-1] * 1e-12)
    table['Comm Cost (GBs)'] = df['Method Comm Cost'].apply(lambda x: eval(x)[-1] * 1e-3)

    return df, table, exp_dict

def compute_acc_per_comm_per_flop(metric_dict):
    new_dict = {}
    for k in metric_dict.keys():
        new_dict[k] = metric_dict[k]['acc_avg'] / (metric_dict[k]['cflop_avg'] * metric_dict[k]['comm_avg'])
    return new_dict


In [197]:
df, table, metric_dict = read_experiment_stats(r'.*non.*')
# metric_dict
compute_acc_per_comm_per_flop(metric_dict)

{'non_iid_50_v1_SplitFed-5-5': 0.2662540401512772,
 'non_iid_50_v1_LocalParallel-0.6-3-5-True': 1.6625398636086,
 'non_iid_50_v1_Vanilla-5-5': 0.26599600428361636,
 'non_iid_50_v1_LocalParallel-0.6-3-5': 1.6995702699661233,
 'non_iid_50_v1_Only-Masked': 0.5425862020452664}

In [198]:
import pandas as pd
df = pd.DataFrame(metric_dict)
df = df.T
df = df.reset_index().sort_values("index")
df

Unnamed: 0,index,acc_avg,acc_std,tflop_avg,cflop_avg,comm_avg,count
3,non_iid_50_v1_LocalParallel-0.6-3-5,88.884065,0.266961,10.885159,5.384503,9.712681,5.0
1,non_iid_50_v1_LocalParallel-0.6-3-5-True,88.74183,0.204757,10.885159,5.384503,9.913127,5.0
4,non_iid_50_v1_Only-Masked,89.38376,0.280765,26.679528,3.760128,43.811412,5.0
0,non_iid_50_v1_SplitFed-5-5,84.635877,0.237803,26.679528,3.760128,84.538719,5.0
2,non_iid_50_v1_Vanilla-5-5,84.654812,0.317508,26.679528,3.760128,84.639659,5.0


In [199]:
df, table, metric_dict = read_experiment_stats(r'.*interrupt.*')
# metric_dict
compute_acc_per_comm_per_flop(metric_dict)

{'interrupt_range_expt_0.45': 1.2473348750626614,
 'interrupt_range_expt_0.3': 0.9682545475639175,
 'interrupt_range_expt_0.75': 2.6805922717864092,
 'interrupt_range_expt_0.6': 1.7141803353946299,
 'interrupt_range_expt_0.9': 6.652288270243921}

In [200]:
df = pd.DataFrame(metric_dict)
df = df.T
df = df.reset_index().sort_values("index")
df

Unnamed: 0,index,acc_avg,acc_std,tflop_avg,cflop_avg,comm_avg,count
1,interrupt_range_expt_0.3,89.79665,0.387912,15.010651,5.384503,17.22364,5.0
0,interrupt_range_expt_0.45,89.773213,0.34231,12.947905,5.384503,13.366511,5.0
3,interrupt_range_expt_0.6,89.080563,0.38153,10.885159,5.384503,9.651188,5.0
2,interrupt_range_expt_0.75,88.179444,0.591671,8.822413,5.384503,6.109293,5.0
4,interrupt_range_expt_0.9,87.112685,0.446838,6.759667,5.384503,2.432006,5.0


In [201]:
df, table, metric_dict = read_experiment_stats(r'.*cifar.*')
# metric_dict
compute_acc_per_comm_per_flop(metric_dict)

{'cifar10_NIID_v1_Only-Masked': 3.2849789276421983,
 'cifar10_NIID_v1_Vanilla-5-5': 1.6434001875308595,
 'cifar10_NIID_v1_LocalParallel-0.6-3-5': 10.612005867286955,
 'cifar10_NIID_v1_SplitFed-5-5': 1.3414110625569253,
 'cifar10_NIID_v1_CESL-3-5No-Head': 13.36726671850826}

In [202]:
df = pd.DataFrame(metric_dict)
df = df.T
df = df.reset_index().sort_values("index")
df

Unnamed: 0,index,acc_avg,acc_std,tflop_avg,cflop_avg,comm_avg,count
4,cifar10_NIID_v1_CESL-3-5No-Head,87.436667,4.170617,4.092342,1.661338,3.937251,5.0
2,cifar10_NIID_v1_LocalParallel-0.6-3-5,91.626667,1.745744,4.81004,2.379035,3.629305,5.0
0,cifar10_NIID_v1_Only-Masked,93.016667,1.184342,11.790522,1.661338,17.043952,5.0
3,cifar10_NIID_v1_SplitFed-5-5,65.76,4.334674,11.790522,1.661338,29.508153,5.0
1,cifar10_NIID_v1_Vanilla-5-5,70.926667,2.979198,11.790522,1.661338,25.978156,5.0


In [203]:
df, table, metric_dict = read_experiment_stats(r'.*client.*')
# metric_dict
compute_acc_per_comm_per_flop(metric_dict)

{'client_size_expt_4': 29.280778718935885,
 'client_size_expt_3': 17.67673822614769,
 'client_size_expt_1': 1.7142409266743188,
 'client_size_expt_2': 1.1140754201435437}

In [204]:
df = pd.DataFrame(metric_dict)
df = df.T
df = df.reset_index().sort_values("index")
df

Unnamed: 0,index,acc_avg,acc_std,tflop_avg,cflop_avg,comm_avg,count
2,client_size_expt_1,89.0501,0.112292,10.885159,5.384503,9.647547,5.0
3,client_size_expt_2,90.060411,0.569671,22.190611,20.450083,3.952977,5.0
1,client_size_expt_3,88.858866,0.223836,26.472143,26.235666,0.191605,5.0
0,client_size_expt_4,85.872667,0.303456,27.028329,27.02686,0.108512,5.0
