In [50]:
import seaborn as sns
import pandas as pd
import matplotlib
from matplotlib import pyplot as plt 
import numpy as np
import os 
import json 
import ast 
import glob
import re
import datetime

In [2]:
def compute_mean_std(x):
    x = [float(i) for i in x.split(",")]
    mean = round(pd.Series(x).mean(), 2)
    std = round(pd.Series(x).std(), 2)
    return f"{mean}±{std}"

In [67]:
def parse_whole_dir_mnist(entry_folder):
    file_pattern = os.path.join(entry_folder, '**', 'result_record.txt')
    possible_paths = glob.glob(file_pattern, recursive=True)
    results_dict = {
'com_round': [],
'model': [],
'sample_ratio': [],
'batch_size': [],
'epochs': [],
'centralized': [],
'partition': [],
'dir_alpha': [],
'noise_mode': [],
'globalize': [],
'noise_ratio': [],
        'seed':[],
        'last_acc':[],
}
    for file_path in possible_paths:
        file_path = file_path.replace('\\', '/')
        # fedNLL = [path for path in file_path.split('/') if path[:6]=='fedNLL'][0]
        # print(fedNLL)
        accs, losses, setting_dict = result_parser(file_path)
        best_acc = max(accs)
        last_acc = accs[-1]
        best_round = np.argmax(accs)
        best_round_loss = losses[best_round]
        last_loss = losses[-1]
        if setting_dict['dataset'] != "mnist":
            continue
        if setting_dict['com_round'] != len(accs):
            continue
        if setting_dict['model'] != "SimpleCNN":
            continue
            
            # 打印未跑完数据集的进度
        if setting_dict['com_round'] != len(accs):
            if setting_dict['lr'] == 0.01 and setting_dict['weight_decay'] == 5e-4:
                file_path = os.path.join(os.path.dirname(file_path), 'server.log')
                file_path = file_path.replace('\\', '/')
                print(file_path)
                with open(file_path, 'r') as f:
                    lines = f.readlines()
                    last_line = lines[-1]
                    datetime_str = re.search(r'\d{4}-\d{2}-\d{2} \d{2}:\d{2}:\d{2}', last_line).group()
                    log_datetime = datetime.datetime.strptime(datetime_str, "%Y-%m-%d %H:%M:%S")
                    print(log_datetime)
                    now = datetime.datetime.now()
                    time_diff = now - log_datetime
                    if time_diff.total_seconds() < 3600:
                        middle_path = file_path.split('/Fed-Noisy-checkpoint/')[1].split('/server.log')[0]
                        print(middle_path,"暂未跑完，目前最后一轮的结果:")
                        print(last_line)
                continue
                
        
        if setting_dict.get('criterion', 'ce') != 'ce':
            alg_name = 'FedAvg-RobustLoss'
        else:
            alg_name = 'FedAvg'
        results_dict['com_round'].append(setting_dict['com_round'])
        results_dict['model'].append(setting_dict['model'])
        results_dict['sample_ratio'].append(setting_dict['sample_ratio'])
        results_dict['batch_size'].append(setting_dict['batch_size'])
        results_dict['epochs'].append(setting_dict['epochs'])
        results_dict['centralized'].append(setting_dict['centralized'])
        results_dict['partition'].append(setting_dict['partition'])
        results_dict['dir_alpha'].append(setting_dict['dir_alpha'])
        results_dict['noise_mode'].append(setting_dict['noise_mode'])
        results_dict['globalize'].append(setting_dict['globalize'])
        if setting_dict['min_noise_ratio'] != 0.0:
            results_dict['noise_ratio'].append(str(setting_dict['min_noise_ratio']) + '-' + str(setting_dict['max_noise_ratio']))
        else:
            results_dict['noise_ratio'].append(setting_dict['noise_ratio'])
        results_dict['last_acc'].append(last_acc)
        criterion = setting_dict.get('criterion', 'ce')
        sce_alpha = setting_dict.get('sce_alpha', None)
        sce_beta = setting_dict.get('sce_beta', None)
        loss_scale = setting_dict.get('loss_scale', None)
        gce_q = setting_dict.get('gce_q', None)
        focal_alpha = setting_dict.get('focal_alpha', None)
        focal_gamma = setting_dict.get('focal_gamma', None)
        results_dict['seed'].append(setting_dict['seed'])
        
    results_df = pd.DataFrame.from_dict(results_dict)
    results_df = results_df.groupby(['com_round', 'model', 'partition', 'noise_mode', 'globalize', 'noise_ratio'])
    results_df = results_df.agg({'seed': lambda x: ','.join([str(i) for i in x]),'last_acc': lambda x: ','.join([str(round(i, 2)) for i in x])})

    # 计算 "last_acc" 列的均值和标准差，并将它们添加到 DataFrame 中
    results_df = results_df.assign(
    last_acc_mean_std = results_df["last_acc"].apply(compute_mean_std)
    )
    
    print(f"Number of total records: {len(results_dict['noise_ratio'])}")
    return results_df



In [68]:
def result_parser(result_path):
    with open(result_path, 'r') as f:
        lines = f.readlines()
    # hist accuracy    
    accs = [float(item) for item in lines[1].strip()[5:-1].split(', ')]
    # hist losses
    losses = [float(item) for item in lines[2].strip()[6:-1].split(', ')]
    # hyperparameter setting
    setting_dict = ast.literal_eval(lines[0].strip())
    return accs, losses, setting_dict

In [69]:
entry_folder = './'
parse_whole_dir_mnist(entry_folder)

Number of total records: 352


Unnamed: 0_level_0,Unnamed: 1_level_0,Unnamed: 2_level_0,Unnamed: 3_level_0,Unnamed: 4_level_0,Unnamed: 5_level_0,seed,last_acc,last_acc_mean_std
com_round,model,partition,noise_mode,globalize,noise_ratio,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1
250,SimpleCNN,iid,asym,False,0.1-0.3,12345,"98.51,98.46,98.45,98.66,98.53",98.52±0.08
250,SimpleCNN,iid,asym,False,0.3-0.5,412345,"97.09,97.58,96.57,97.41,97.61,97.28",97.26±0.39
250,SimpleCNN,iid,asym,False,0.5-0.7,12345,"50.99,51.0,51.27,51.14,51.06",51.09±0.12
250,SimpleCNN,iid,asym,True,0.2,12345,"98.59,98.45,98.56,98.63,98.45",98.54±0.08
250,SimpleCNN,iid,asym,True,0.4,12345,"97.43,96.7,97.53,97.66,97.63",97.39±0.4
250,SimpleCNN,iid,asym,True,0.6,12345,"51.19,52.79,51.06,52.35,51.95",51.87±0.74
250,SimpleCNN,iid,clean,True,0.0,2412345,"98.6,98.66,98.66,98.74,98.66,98.69,98.77",98.68±0.06
250,SimpleCNN,iid,sym,False,0.1-0.3,234512345,"98.61,98.64,98.63,98.54,98.61,98.62,98.57,98.6...",98.62±0.04
250,SimpleCNN,iid,sym,False,0.3-0.5,34512345,"98.27,98.27,98.04,98.17,98.28,98.33,98.27,98.1",98.22±0.1
250,SimpleCNN,iid,sym,False,0.5-0.7,24512345,"97.05,97.32,96.9,96.9,97.11,96.82,97.29,96.8",97.02±0.2


In [74]:
def parse_whole_dir_cifar10(entry_folder):
    file_pattern = os.path.join(entry_folder, '**', 'result_record.txt')
    possible_paths = glob.glob(file_pattern, recursive=True)
    results_dict = {
'com_round': [],
'model': [],
'sample_ratio': [],
'batch_size': [],
'epochs': [],
'centralized': [],
'partition': [],
'dir_alpha': [],
'noise_mode': [],
'globalize': [],
'noise_ratio': [],
        'seed':[],
        'last_acc':[],
}
    for file_path in possible_paths:
        file_path = file_path.replace('\\', '/')
        # fedNLL = [path for path in file_path.split('/') if path[:6]=='fedNLL'][0]
        # print(fedNLL)
        accs, losses, setting_dict = result_parser(file_path)
        best_acc = max(accs)
        last_acc = sum(accs[-10:]) / len(accs[-10:])
        best_round = np.argmax(accs)
        best_round_loss = losses[best_round]
        last_loss = losses[-1]
        
        # 筛选数据集
        if setting_dict['dataset'] != "cifar10":
            continue
            
        # 超参
        if setting_dict['lr'] != 0.01 or setting_dict['weight_decay'] != 5e-4:
            continue
            
        # 打印未跑完数据集的进度
        if setting_dict['com_round'] != len(accs):
            if setting_dict['lr'] == 0.01 and setting_dict['weight_decay'] == 5e-4:
                file_path = os.path.join(os.path.dirname(file_path), 'server.log')
                file_path = file_path.replace('\\', '/')
                with open(file_path, 'r') as f:
                    lines = f.readlines()
                    last_line = lines[-1]
                    
                    datetime_str = re.search(r'\d{4}-\d{2}-\d{2} \d{2}:\d{2}:\d{2}', last_line).group()
                    log_datetime = datetime.datetime.strptime(datetime_str, "%Y-%m-%d %H:%M:%S")
                    now = datetime.datetime.now()
                    time_diff = now - log_datetime
                    if time_diff.total_seconds() < 3600:
                    
                        middle_path = file_path.split('/server.log')[0]
                        print(middle_path,"暂未跑完，目前最后一轮的结果:")
                        print(last_line)
                continue
            
        if setting_dict.get('criterion', 'ce') != 'ce':
            alg_name = 'FedAvg-RobustLoss'
        else:
            alg_name = 'FedAvg'
        results_dict['com_round'].append(setting_dict['com_round'])
        results_dict['model'].append(setting_dict['model'])
        results_dict['sample_ratio'].append(setting_dict['sample_ratio'])
        results_dict['batch_size'].append(setting_dict['batch_size'])
        results_dict['epochs'].append(setting_dict['epochs'])
        results_dict['centralized'].append(setting_dict['centralized'])
        results_dict['partition'].append(setting_dict['partition'])
        results_dict['dir_alpha'].append(setting_dict['dir_alpha'])
        results_dict['noise_mode'].append(setting_dict['noise_mode'])
        results_dict['globalize'].append(setting_dict['globalize'])
        if setting_dict['min_noise_ratio'] != 0.0:
            results_dict['noise_ratio'].append(str(setting_dict['min_noise_ratio']) + '-' + str(setting_dict['max_noise_ratio']))
        else:
            results_dict['noise_ratio'].append(setting_dict['noise_ratio'])
        results_dict['last_acc'].append(last_acc)
        criterion = setting_dict.get('criterion', 'ce')
        sce_alpha = setting_dict.get('sce_alpha', None)
        sce_beta = setting_dict.get('sce_beta', None)
        loss_scale = setting_dict.get('loss_scale', None)
        gce_q = setting_dict.get('gce_q', None)
        focal_alpha = setting_dict.get('focal_alpha', None)
        focal_gamma = setting_dict.get('focal_gamma', None)
        results_dict['seed'].append(setting_dict['seed'])
        
    results_df = pd.DataFrame.from_dict(results_dict)
    results_df = results_df.groupby(['com_round', 'model', 'partition', 'noise_mode', 'globalize', 'noise_ratio'])
    results_df = results_df.agg({'seed': lambda x: ','.join([str(i) for i in x]),'last_acc': lambda x: ','.join([str(round(i, 2)) for i in x])})

    # 计算 "last_acc" 列的均值和标准差，并将它们添加到 DataFrame 中
    results_df = results_df.assign(
    last_acc_mean_std = results_df["last_acc"].apply(compute_mean_std)
    )
    
    print(f"Number of total records: {len(results_dict['noise_ratio'])}")
    return results_df

def result_parser(result_path):
    with open(result_path, 'r') as f:
        lines = f.readlines()
    # hist accuracy    
    accs = [float(item) for item in lines[1].strip()[5:-1].split(', ')]
    # hist losses
    losses = [float(item) for item in lines[2].strip()[6:-1].split(', ')]
    # hyperparameter setting
    setting_dict = ast.literal_eval(lines[0].strip())
    return accs, losses, setting_dict

In [75]:
entry_folder = './'
parse_whole_dir_cifar10(entry_folder)

./Fed-Noisy-checkpoint_V13/fedNLL_cifar10_10_iid__global_asym_0.20/FedAvg/fedavg-criterion=ce--arch=VGG16-lr=0.0100-momentum=0.90-weight_decay=0.00050-com_round=500-local_epochs=5-batch_size=128-seed=4 暂未跑完，目前最后一轮的结果:
2023-03-30 11:53:28,569 - ServerHandler - INFO - Round [491/500] test performance on server: 	 Loss: 0.55891 	 Acc: 84.970%

./Fed-Noisy-checkpoint_V13/fedNLL_cifar10_10_noniid-#label_2_global_asym_0.20/FedAvg/fedavg-criterion=ce--arch=VGG16-lr=0.0100-momentum=0.90-weight_decay=0.00050-com_round=500-local_epochs=5-batch_size=128-seed=4 暂未跑完，目前最后一轮的结果:
2023-03-30 11:55:37,275 - ServerHandler - INFO - Round [415/500] test performance on server: 	 Loss: 1.73853 	 Acc: 36.340%

./Fed-Noisy-checkpoint_V13/fedNLL_cifar10_10_noniid-labeldir_0.30_10_global_asym_0.40/FedAvg/fedavg-criterion=ce--arch=VGG16-lr=0.0100-momentum=0.90-weight_decay=0.00050-com_round=500-local_epochs=5-batch_size=128-seed=4 暂未跑完，目前最后一轮的结果:
2023-03-30 11:48:19,751 - ServerHandler - INFO - Round [47/500] te

Unnamed: 0_level_0,Unnamed: 1_level_0,Unnamed: 2_level_0,Unnamed: 3_level_0,Unnamed: 4_level_0,Unnamed: 5_level_0,seed,last_acc,last_acc_mean_std
com_round,model,partition,noise_mode,globalize,noise_ratio,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1
500,VGG13,iid,clean,True,0.0,1,90.6,90.6±nan
500,VGG16,iid,clean,True,0.0,45123,"90.84,90.42,90.98,90.65,90.85",90.75±0.22
500,VGG16,iid,sym,True,0.2,45123,"80.4,80.38,80.95,80.74,80.83",80.66±0.26
500,VGG16,iid,sym,True,0.4,4512,"64.61,64.34,65.47,64.71",64.78±0.48
500,VGG16,iid,sym,True,0.6,4512,"40.07,39.5,38.98,39.71",39.56±0.46
500,VGG16,iid,sym,True,0.8,4512,"22.66,22.28,22.44,28.79",24.04±3.17
500,VGG16,noniid-#label,clean,True,0.0,45123,"38.71,37.6,30.56,29.46,35.77",34.42±4.18
500,VGG16,noniid-#label,sym,False,0.1-0.3,1,14.94,14.94±nan
500,VGG16,noniid-#label,sym,False,0.3-0.5,1,10.0,10.0±nan
500,VGG16,noniid-#label,sym,True,0.2,45123,"18.64,23.58,19.57,21.55,22.75",21.22±2.09
