In [1]:
import seaborn as sns
import pandas as pd
import matplotlib
from matplotlib import pyplot as plt 
import numpy as np
import os 
import json 
import ast 
import glob

In [2]:
def compute_mean_std(x):
    x = [float(i) for i in x.split(",")]
    stable_x = []
    for i in x:
        if i >= 11:
            stable_x.append(i)
    if len(stable_x) == 0:
        return f"nan"
    if len(stable_x) > 3:
        stable_x = sorted(stable_x, reverse=True)  # 对列表进行降序排序
        stable_x = stable_x[:3]  # 获取最大的三个元素
    mean = round(pd.Series(stable_x).mean(), 2)
    std = round(pd.Series(stable_x).std(), 2)
    
    if len(stable_x) < 3:
        return f"{mean}±{std}({len(stable_x)})"
    return f"{mean}±{std}"

In [3]:
def parse_whole_dir(entry_folder, args):
    file_pattern = os.path.join(entry_folder, '**', 'result_record.txt')
    possible_paths = glob.glob(file_pattern, recursive=True)
    possible_paths = [path for path in possible_paths if "old" not in path]
    results_dict = {
'com_round': [],
'model': [],
'sample_ratio': [],
'batch_size': [],
'epochs': [],
'centralized': [],
'partition': [],
'dir_alpha': [],
'noise_mode': [],
'globalize': [],
'noise_ratio': [],
        'seed':[],
        'last_acc':[],
        'num_clients':[],
        'lr':[],
        'dir_alpha':[],
}
    for file_path in possible_paths:
        fedNLL = [path for path in file_path.split('/') if path[:6]=='fedNLL'][0]
        # print(fedNLL)
        accs, losses, setting_dict = result_parser(file_path)
        if len(accs) == 0:
            continue
        best_acc = max(accs)
        last_ten = accs[-10:]
        last_acc = sum(last_ten) / len(last_ten)
        # last_acc = accs[-1]
        best_round = np.argmax(accs)
        best_round_loss = losses[best_round]
        last_loss = losses[-1]
        idx = 0
        if setting_dict['partition'] == 'iid':
            idx = 0
        elif setting_dict['partition'] == 'noniid-#label':
            idx = 1
        elif setting_dict['partition'] == 'noniid-labeldir':
            idx = 2
        elif setting_dict['partition'] == 'noniid-quantity':
            idx = 3
        if setting_dict['dataset'] != args['dataset']:
            continue
        if setting_dict['com_round'] != len(accs):
            continue
        if setting_dict['model'] != args['model']:
            continue
        if setting_dict['dir_alpha'] != args['dir_alpha'] and (type(args['dir_alpha']) == list and (setting_dict['dir_alpha'] not in args['dir_alpha'])):
            continue
        if setting_dict['num_clients'] != args['num_clients']:
            continue
        if 'sample_ratio' in args.keys() and setting_dict['sample_ratio'] != args['sample_ratio']:
            continue
        if 'globalize' in args.keys() and setting_dict['globalize'] not in args['globalize']:
            continue
        if 'partition' in args.keys() and setting_dict['partition'] not in args['partition']:
            continue
        if setting_dict['seed'] not in args['seed']:
            continue
        if setting_dict['com_round'] != args['com_round']:
            continue
        if setting_dict['lr'] != args['lr'] and (type(args['lr']) == list and (setting_dict['lr'] not in args['lr'])):
            continue
        if setting_dict['weight_decay'] != args['weight_decay']:
            continue
        if setting_dict['partition'] == 'noniid-#label' and setting_dict['major_classes_num'] != args['major_classes_num']:
            continue
        if setting_dict['criterion'] != args['criterion']:
            continue
        if setting_dict['noise_mode'] not in args['noise_mode']:
            continue
        if setting_dict['min_noise_ratio'] != 0.0:
            if setting_dict['min_noise_ratio'] not in args['noise_ratio_asym']:
                continue
            results_dict['noise_ratio'].append(str(setting_dict['min_noise_ratio']) + '-' + str(setting_dict['max_noise_ratio']))
        else:
            if setting_dict['noise_ratio'] not in args['noise_ratio_sym']:
                continue
            results_dict['noise_ratio'].append(setting_dict['noise_ratio'])
        
        
#         if setting_dict['partition'] == "iid" and  setting_dict['noise_mode'] == "asym" and setting_dict['globalize'] == False:
#             print(file_path)
        
#         if  setting_dict['partition'] == "noniid-quantity":
#             print(setting_dict['noise_mode'], file_path)
        if setting_dict.get('criterion', 'ce') != 'ce':
            alg_name = 'FedAvg-RobustLoss'
        else:
            alg_name = 'FedAvg'
            
        
        results_dict['com_round'].append(setting_dict['com_round'])
        results_dict['model'].append(setting_dict['model'])
        results_dict['num_clients'].append(setting_dict['num_clients'])
        results_dict['sample_ratio'].append(setting_dict['sample_ratio'])
        results_dict['batch_size'].append(setting_dict['batch_size'])
        results_dict['epochs'].append(setting_dict['epochs'])
        results_dict['centralized'].append(setting_dict['centralized'])
        results_dict['partition'].append(setting_dict['partition'])
        results_dict['noise_mode'].append(setting_dict['noise_mode'])
        results_dict['globalize'].append(setting_dict['globalize'])
        results_dict['last_acc'].append(last_acc)
        results_dict['lr'].append(setting_dict['lr'])
        results_dict['dir_alpha'].append(setting_dict['dir_alpha'])
        criterion = setting_dict.get('criterion', 'ce')
        sce_alpha = setting_dict.get('sce_alpha', None)
        sce_beta = setting_dict.get('sce_beta', None)
        loss_scale = setting_dict.get('loss_scale', None)
        gce_q = setting_dict.get('gce_q', None)
        focal_alpha = setting_dict.get('focal_alpha', None)
        focal_gamma = setting_dict.get('focal_gamma', None)
        results_dict['seed'].append(setting_dict['seed'])

#     print(results_dict.keys())
#     for i in results_dict.keys():
#         print(i, len(results_dict[i]))
    results_df = pd.DataFrame.from_dict(results_dict)
    results_df = results_df.groupby(['com_round', 'model','num_clients', 'partition', 'noise_mode', 'globalize', 'noise_ratio', 'lr', 'dir_alpha'])
    results_df = results_df.agg({'seed': lambda x: ','.join([str(i) for i in x]),'last_acc': lambda x: ','.join([str(round(i, 2)) for i in x])})

    # 计算 "last_acc" 列的均值和标准差，并将它们添加到 DataFrame 中
    results_df = results_df.assign(
    last_acc_mean_std = results_df["last_acc"].apply(compute_mean_std)
    )
    
    stable_x = []
    for i in results_dict["last_acc"]:
        if i >= 11:
            stable_x.append(i)
    
    print(f"Number of total records: {len(stable_x)}")
    return results_df



In [4]:
def result_parser(result_path):
    try:
        with open(result_path, 'r') as f:
            lines = f.readlines()
        # hist accuracy    
        accs = [float(item) for item in lines[1].strip()[5:-1].split(', ')]
        # hist losses
        losses = [float(item) for item in lines[2].strip()[6:-1].split(', ')]
        # hyperparameter setting
        setting_dict = ast.literal_eval(lines[0].strip())
        return accs, losses, setting_dict
    except:
        return [],[],{'com'}

In [5]:
print("cifar10 ce 10选10")
args = {}
args['dataset'] = "cifar10"
args['model'] = "VGG16"
args['criterion'] = "ce"
args['major_classes_num'] =3
args['dir_alpha'] = [0.1,2]
args['seed'] = [1,2,3]
args['lr'] = [0.01]
args['num_clients'] = 10
args['sample_ratio'] = 1
args['noise_mode'] = ['asym', 'sym','clean']
args['com_round'] = 500
args['weight_decay'] = 5e-4
args['noise_ratio_sym'] = [0, 0.4]
args['noise_ratio_asym'] = [0.3]   # 0.3指0.3-0.5
entry_folder = './Fed-Noisy-checkpoint'
parse_whole_dir(entry_folder, args)

cifar10 ce 10选10
Number of total records: 60


Unnamed: 0_level_0,Unnamed: 1_level_0,Unnamed: 2_level_0,Unnamed: 3_level_0,Unnamed: 4_level_0,Unnamed: 5_level_0,Unnamed: 6_level_0,Unnamed: 7_level_0,Unnamed: 8_level_0,seed,last_acc,last_acc_mean_std
com_round,model,num_clients,partition,noise_mode,globalize,noise_ratio,lr,dir_alpha,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1
500,VGG16,10,iid,asym,False,0.3-0.5,0.01,0.1,213,"59.86,56.87,58.54",58.42±1.5
500,VGG16,10,iid,asym,True,0.4,0.01,0.1,213,"58.27,58.1,57.17",57.85±0.59
500,VGG16,10,iid,clean,True,0.0,0.01,0.1,213,"90.65,90.98,90.85",90.83±0.17
500,VGG16,10,iid,sym,False,0.3-0.5,0.01,0.1,213,"66.43,65.01,65.08",65.51±0.8
500,VGG16,10,iid,sym,True,0.4,0.01,0.1,213,"64.71,65.47,65.07",65.08±0.38
500,VGG16,10,noniid-#label,asym,False,0.3-0.5,0.01,0.1,213,"29.55,23.77,26.21",26.51±2.9
500,VGG16,10,noniid-#label,asym,True,0.4,0.01,0.1,213,"40.77,47.99,43.42",44.06±3.65
500,VGG16,10,noniid-#label,clean,True,0.0,0.01,0.1,213,"71.42,78.01,69.85",73.09±4.33
500,VGG16,10,noniid-#label,sym,False,0.3-0.5,0.01,0.1,213,"19.3,16.68,15.78",17.25±1.83
500,VGG16,10,noniid-#label,sym,True,0.4,0.01,0.1,213,"31.84,35.19,32.09",33.04±1.87


In [6]:
print("cifar10 sce 10选10")
args = {}
args['dataset'] = "cifar10"
args['model'] = "VGG16"
args['criterion'] = "sce"
args['major_classes_num'] = 3
args['dir_alpha'] = [0.1,2,3]
args['seed'] = [1,2,3,4,5,6,7]
args['lr'] = [0.005]    #0.01学习率学不起来
args['num_clients'] = 10
args['sample_ratio'] = 1
args['com_round'] = 500
args['weight_decay'] = 5e-4
args['noise_mode'] = ['asym', 'sym', 'clean']
args['noise_ratio_sym'] = [0, 0.4]
args['noise_ratio_asym'] = [0.3]   # 0.3指0.3-0.5
entry_folder = './Fed-Noisy-checkpoint'
parse_whole_dir(entry_folder, args)

cifar10 sce 10选10
Number of total records: 58


Unnamed: 0_level_0,Unnamed: 1_level_0,Unnamed: 2_level_0,Unnamed: 3_level_0,Unnamed: 4_level_0,Unnamed: 5_level_0,Unnamed: 6_level_0,Unnamed: 7_level_0,Unnamed: 8_level_0,seed,last_acc,last_acc_mean_std
com_round,model,num_clients,partition,noise_mode,globalize,noise_ratio,lr,dir_alpha,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1
500,VGG16,10,iid,asym,False,0.3-0.5,0.005,0.1,123,"73.88,77.7,74.14",75.24±2.13
500,VGG16,10,iid,asym,True,0.4,0.005,0.1,1423,"10.0,76.3,74.72,74.19",75.07±1.1
500,VGG16,10,iid,clean,True,0.0,0.005,0.1,123,"90.56,90.8,90.29",90.55±0.26
500,VGG16,10,iid,sym,False,0.3-0.5,0.005,0.1,123,"83.21,83.25,84.24",83.57±0.58
500,VGG16,10,iid,sym,True,0.4,0.005,0.1,1423,"84.08,83.9,83.58,10.0",83.85±0.25
500,VGG16,10,noniid-#label,asym,False,0.3-0.5,0.005,0.1,123,"57.04,55.4,54.13",55.52±1.46
500,VGG16,10,noniid-#label,asym,True,0.4,0.005,0.1,123,"68.75,64.07,63.64",65.49±2.83
500,VGG16,10,noniid-#label,clean,True,0.0,0.005,0.1,123,"82.61,80.57,83.15",82.11±1.36
500,VGG16,10,noniid-#label,sym,False,0.3-0.5,0.005,0.1,123,"63.27,65.87,59.39",62.84±3.26
500,VGG16,10,noniid-#label,sym,True,0.4,0.005,0.1,123,"77.84,75.23,73.35",75.47±2.25


In [7]:
print("cifar10 ce 100选10")
args = {}
args['dataset'] = "cifar10"
args['model'] = "VGG16"
args['noise_mode'] = ['asym', 'sym']
args['criterion'] = "ce"
args['major_classes_num'] = 3
args['dir_alpha'] = [0.1,2,3, 0.3,0.2]
args['seed'] = [1,2,3,4,5]
args['lr'] = [0.01, 0.005, 0.002, 0.001]
args['num_clients'] = 100
args['sample_ratio'] = 0.1
args['com_round'] = 500
args['weight_decay'] = 5e-4
args['noise_ratio_sym'] = [0, 0.4]
args['noise_ratio_asym'] = [0.3]   # 0.3指0.3-0.5
entry_folder = './Fed-Noisy-checkpoint'
parse_whole_dir(entry_folder, args)

cifar10 ce 100选10
Number of total records: 58


Unnamed: 0_level_0,Unnamed: 1_level_0,Unnamed: 2_level_0,Unnamed: 3_level_0,Unnamed: 4_level_0,Unnamed: 5_level_0,Unnamed: 6_level_0,Unnamed: 7_level_0,Unnamed: 8_level_0,seed,last_acc,last_acc_mean_std
com_round,model,num_clients,partition,noise_mode,globalize,noise_ratio,lr,dir_alpha,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1
500,VGG16,100,iid,asym,False,0.3-0.5,0.01,0.1,312,"64.81,76.0,70.06",70.29±5.6
500,VGG16,100,iid,asym,True,0.4,0.01,0.1,312,"72.13,73.93,66.81",70.96±3.7
500,VGG16,100,iid,sym,False,0.3-0.5,0.01,0.1,312,"70.11,68.46,59.1",65.89±5.94
500,VGG16,100,iid,sym,True,0.4,0.01,0.1,312,"70.43,70.53,58.71",66.56±6.8
500,VGG16,100,noniid-#label,asym,False,0.3-0.5,0.005,0.1,3142,"11.08,14.37,10.0,13.28",12.91±1.68
500,VGG16,100,noniid-#label,asym,False,0.3-0.5,0.01,0.1,312,"13.93,14.64,10.0",14.28±0.5(2)
500,VGG16,100,noniid-#label,asym,True,0.4,0.005,0.1,312,"28.97,26.86,29.24",28.36±1.3
500,VGG16,100,noniid-#label,asym,True,0.4,0.01,0.1,312,"35.42,39.0,41.36",38.59±2.99
500,VGG16,100,noniid-#label,sym,False,0.3-0.5,0.001,0.1,1,14.02,14.02±nan(1)
500,VGG16,100,noniid-#label,sym,False,0.3-0.5,0.002,0.1,1,14.22,14.22±nan(1)


In [8]:
print("cifar10 sce 100选10")
args = {}
args['dataset'] = "cifar10"
args['model'] = "VGG16"
args['criterion'] = "sce"
args['major_classes_num'] = 3
args['dir_alpha'] = [0.1,2,3, 0.3,0.2]
args['seed'] = [1,2,3,4,5]
args['lr'] = [0.01, 0.005, 0.002, 0.001]
args['num_clients'] = 100
args['sample_ratio'] = 0.1
args['noise_mode'] = ['asym', 'sym']
args['com_round'] = 500
args['weight_decay'] = 5e-4
args['noise_ratio_sym'] = [0, 0.4]
args['noise_ratio_asym'] = [0.3]   # 0.3指0.3-0.5
entry_folder = './Fed-Noisy-checkpoint'
parse_whole_dir(entry_folder, args)

cifar10 sce 100选10
Number of total records: 14


Unnamed: 0_level_0,Unnamed: 1_level_0,Unnamed: 2_level_0,Unnamed: 3_level_0,Unnamed: 4_level_0,Unnamed: 5_level_0,Unnamed: 6_level_0,Unnamed: 7_level_0,Unnamed: 8_level_0,seed,last_acc,last_acc_mean_std
com_round,model,num_clients,partition,noise_mode,globalize,noise_ratio,lr,dir_alpha,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1
500,VGG16,100,iid,asym,False,0.3-0.5,0.005,0.1,1,73.59,73.59±nan(1)
500,VGG16,100,iid,asym,False,0.3-0.5,0.01,0.1,23,"75.29,73.41",74.35±1.33(2)
500,VGG16,100,iid,asym,True,0.4,0.01,0.1,23,"72.65,73.67",73.16±0.72(2)
500,VGG16,100,iid,sym,False,0.3-0.5,0.01,0.1,3,79.66,79.66±nan(1)
500,VGG16,100,iid,sym,True,0.4,0.005,0.1,1,78.49,78.49±nan(1)
500,VGG16,100,iid,sym,True,0.4,0.01,0.1,3,79.83,79.83±nan(1)
500,VGG16,100,noniid-#label,asym,False,0.3-0.5,0.01,0.1,23,"19.51,19.79",19.65±0.2(2)
500,VGG16,100,noniid-#label,asym,True,0.4,0.01,0.1,23,"10.0,10.0",
500,VGG16,100,noniid-#label,sym,False,0.3-0.5,0.005,0.1,1,16.0,16.0±nan(1)
500,VGG16,100,noniid-#label,sym,False,0.3-0.5,0.01,0.1,23,"12.44,17.02",14.73±3.24(2)


In [11]:
print("cifar10 ce 10选10, sym0-0.8间隔0.1")
args = {}
args['dataset'] = "cifar10"
args['model'] = "VGG16"
args['criterion'] = "ce"
args['major_classes_num'] =3
args['dir_alpha'] = [0.1,2]
args['seed'] = [1,2,3,4]
args['lr'] = [0.01]
args['num_clients'] = 10
args['sample_ratio'] = 1
args['com_round'] = 500
args['weight_decay'] = 5e-4
args['noise_mode'] = ['sym', 'asym']
args['globalize'] = [True]
args['partition'] = ['iid','noniid-#label','noniid-labeldir']
args['noise_ratio_sym'] = [0,0.1,0.2,0.3, 0.4,0.5,0.6,0.7,0.8]
args['noise_ratio_asym'] = [0.3]   # 0.3指0.3-0.5
entry_folder = './Fed-Noisy-checkpoint'
parse_whole_dir(entry_folder, args)


cifar10 ce 10选10, sym0-0.8间隔0.1
Number of total records: 127


Unnamed: 0_level_0,Unnamed: 1_level_0,Unnamed: 2_level_0,Unnamed: 3_level_0,Unnamed: 4_level_0,Unnamed: 5_level_0,Unnamed: 6_level_0,Unnamed: 7_level_0,Unnamed: 8_level_0,seed,last_acc,last_acc_mean_std
com_round,model,num_clients,partition,noise_mode,globalize,noise_ratio,lr,dir_alpha,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1
500,VGG16,10,iid,asym,True,0.1,0.01,0.1,213,"85.94,86.41,86.19",86.18±0.24
500,VGG16,10,iid,asym,True,0.2,0.01,0.1,213,"79.57,80.33,80.13",80.01±0.39
500,VGG16,10,iid,asym,True,0.3,0.01,0.1,213,"70.22,70.71,69.77",70.23±0.47
500,VGG16,10,iid,asym,True,0.4,0.01,0.1,213,"58.27,58.1,57.17",57.85±0.59
500,VGG16,10,iid,asym,True,0.5,0.01,0.1,213,"44.54,44.4,44.07",44.34±0.24
500,VGG16,10,iid,asym,True,0.6,0.01,0.1,213,"30.95,31.12,31.11",31.06±0.1
500,VGG16,10,iid,sym,True,0.1,0.01,0.1,213,"85.7,85.88,86.01",85.86±0.16
500,VGG16,10,iid,sym,True,0.2,0.01,0.1,2413,"80.74,80.4,80.95,80.83",80.84±0.11
500,VGG16,10,iid,sym,True,0.3,0.01,0.1,213,"74.16,73.86,73.75",73.92±0.21
500,VGG16,10,iid,sym,True,0.4,0.01,0.1,2413,"64.71,64.61,65.47,65.07",65.08±0.38


In [10]:
print("cifar10 ce 10选10, sym0-0.8间隔0.1")
args = {}
args['dataset'] = "cifar10"
args['model'] = "VGG16"
args['criterion'] = "ce"
args['major_classes_num'] =3
args['dir_alpha'] = [0.1,2]
args['seed'] = [1,2,3]
args['lr'] = [0.01]
args['num_clients'] = 10
args['sample_ratio'] = 1
args['com_round'] = 500
args['weight_decay'] = 5e-4
args['noise_mode'] = ['sym']
args['globalize'] = [True]
args['partition'] = ['iid','noniid-#label','noniid-labeldir']
args['noise_ratio_sym'] = [0,0.1,0.2,0.3, 0.4,0.5,0.6,0.7,0.8]
args['noise_ratio_asym'] = [0.3]   # 0.3指0.3-0.5
entry_folder = './Fed-Noisy-checkpoint'
parse_whole_dir(entry_folder, args)

cifar10 ce 10选10, sym0-0.8间隔0.1
Number of total records: 68


Unnamed: 0_level_0,Unnamed: 1_level_0,Unnamed: 2_level_0,Unnamed: 3_level_0,Unnamed: 4_level_0,Unnamed: 5_level_0,Unnamed: 6_level_0,Unnamed: 7_level_0,Unnamed: 8_level_0,seed,last_acc,last_acc_mean_std
com_round,model,num_clients,partition,noise_mode,globalize,noise_ratio,lr,dir_alpha,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1
500,VGG16,10,iid,sym,True,0.1,0.01,0.1,213,"85.7,85.88,86.01",85.86±0.16
500,VGG16,10,iid,sym,True,0.2,0.01,0.1,213,"80.74,80.95,80.83",80.84±0.11
500,VGG16,10,iid,sym,True,0.3,0.01,0.1,213,"74.16,73.86,73.75",73.92±0.21
500,VGG16,10,iid,sym,True,0.4,0.01,0.1,213,"64.71,65.47,65.07",65.08±0.38
500,VGG16,10,iid,sym,True,0.5,0.01,0.1,213,"52.34,53.35,54.93",53.54±1.31
500,VGG16,10,iid,sym,True,0.6,0.01,0.1,213,"39.71,38.98,40.76",39.82±0.89
500,VGG16,10,iid,sym,True,0.7,0.01,0.1,213,"25.43,27.76,25.73",26.31±1.27
500,VGG16,10,iid,sym,True,0.8,0.01,0.1,213,"28.79,22.44,27.65",26.29±3.39
500,VGG16,10,noniid-#label,sym,True,0.1,0.01,0.1,213,"57.76,61.33,59.11",59.4±1.8
500,VGG16,10,noniid-#label,sym,True,0.2,0.01,0.1,213,"48.24,50.87,48.59",49.23±1.43
