In [4]:
import numpy as np
import random
import os

In [5]:
def parse_elapsed_time(str_time):
    tmp = str_time.split(':')
    return float(int(tmp[0]) * 3600 + int(tmp[1]) * 60) + float(tmp[2])

def parse_config_log_realexperiment(filename):
    f = open(filename)

    lines = f.readlines()
    round_time = []
    test_accuracy = []

    for line in lines:
        tmp = line.strip().split(' ')
        if 'fit_round' in tmp and 'received' in tmp:
            round_time.append(parse_elapsed_time(tmp[tmp.index('fit_round')-2]))
        elif 'fit' in tmp and 'progress:' in tmp:
            test_accuracy.append(float(tmp[tmp.index('progress:')+4][:-2]))
    
    results = {}
    results['round_time'] = round_time
    results['test_accuracy'] = test_accuracy

    return results

In [10]:
import sys
def measure_time(log_dict, objective, what_to_check):
    i = -1
    for i, acc in enumerate(log_dict[what_to_check]):
        if acc >= objective:
            return log_dict['round_time'][i]
    return sys.maxsize

In [11]:
def measure_final_acc(log_dict, what_to_check, global_final_time):
    for i in range(len(log_dict['round_time'])):
        if log_dict['round_time'][i] >= global_final_time:
            return log_dict[what_to_check][i-1]
    return log_dict[what_to_check][-1]

In [22]:
global_final_times = [981, 1452, 1110]

speedup = np.zeros((3, 8))
accuracy = np.zeros((3, 8))

for batch_idx in range(1,4,1):
    log_path = '../android/log/experiments_batch'+str(batch_idx)+'/'
    fedavg_1T = parse_config_log_realexperiment(log_path+'fedavg_final_experiment_ddl_1T_'+str(batch_idx)+'.log')
    fedavg_2T = parse_config_log_realexperiment(log_path+'fedavg_final_experiment_ddl_2T_'+str(batch_idx)+'.log')
    fedavg_spc = parse_config_log_realexperiment(log_path+'fedavg_final_experiment_ddl_smartpc_'+str(batch_idx)+'.log')
    fedavg_wfa = parse_config_log_realexperiment(log_path+'fedavg_final_experiment_ddl_wfa_'+str(batch_idx)+'.log')

    fedprox_1T = parse_config_log_realexperiment(log_path+'fedprox_final_experiment_ddl_1T_'+str(batch_idx)+'.log')
    fedprox_2T = parse_config_log_realexperiment(log_path+'fedprox_final_experiment_ddl_2T_'+str(batch_idx)+'.log')
    fedprox_ss = parse_config_log_realexperiment(log_path+'fedprox_final_experiment_ss_'+str(batch_idx)+'.log')

    fedbalancer = parse_config_log_realexperiment(log_path+'fedbalancer_final_experiment_'+str(batch_idx)+'.log')
    
    global_final_time = global_final_times[batch_idx-1]
    what_to_check='test_accuracy'
    
    acc1 = measure_final_acc(fedavg_1T, what_to_check, global_final_time)
    acc2 = measure_final_acc(fedavg_2T, what_to_check, global_final_time)
    acc3 = measure_final_acc(fedavg_spc, what_to_check, global_final_time)
    acc4 = measure_final_acc(fedavg_wfa, what_to_check, global_final_time)

    acc5 = measure_final_acc(fedprox_1T, what_to_check, global_final_time)
    acc6 = measure_final_acc(fedprox_2T, what_to_check, global_final_time)
    acc7 = measure_final_acc(fedprox_ss, what_to_check, global_final_time)

    acc8 = measure_final_acc(fedbalancer, what_to_check, global_final_time)
    
    if acc1 == max(acc1, acc2, acc3, acc4):
        objective = measure_final_acc(fedavg_1T, what_to_check, global_final_time)
        tta = measure_time(fedavg_1T, objective, what_to_check)
    elif acc2 == max(acc1, acc2, acc3, acc4):
        objective = measure_final_acc(fedavg_2T, what_to_check, global_final_time)
        tta = measure_time(fedavg_2T, objective, what_to_check)
    elif acc3 == max(acc1, acc2, acc3, acc4):
        objective = measure_final_acc(fedavg_spc, what_to_check, global_final_time)
        tta = measure_time(fedavg_spc, objective, what_to_check)
    elif acc4 == max(acc1, acc2, acc3, acc4):
        objective = measure_final_acc(fedavg_wfa, what_to_check, global_final_time)
        tta = measure_time(fedavg_wfa, objective, what_to_check)
    
    tta1 = measure_time(fedavg_1T, objective, what_to_check)
    tta2 = measure_time(fedavg_2T, objective, what_to_check)
    tta3 = measure_time(fedavg_spc, objective, what_to_check)
    tta4 = measure_time(fedavg_wfa, objective, what_to_check)

    tta5 = measure_time(fedprox_1T, objective, what_to_check)
    tta6 = measure_time(fedprox_2T, objective, what_to_check)
    tta7 = measure_time(fedprox_ss, objective, what_to_check)

    tta8 = measure_time(fedbalancer, objective, what_to_check)
    
    speedup[batch_idx-1, 0] = tta / tta1
    speedup[batch_idx-1, 1] = tta / tta2
    speedup[batch_idx-1, 2] = tta / tta3
    speedup[batch_idx-1, 3] = tta / tta4
    speedup[batch_idx-1, 4] = tta / tta5
    speedup[batch_idx-1, 5] = tta / tta6
    speedup[batch_idx-1, 6] = tta / tta7
    speedup[batch_idx-1, 7] = tta / tta8
    
    accuracy[batch_idx-1, 0] = acc1
    accuracy[batch_idx-1, 1] = acc2
    accuracy[batch_idx-1, 2] = acc3
    accuracy[batch_idx-1, 3] = acc4
    accuracy[batch_idx-1, 4] = acc5
    accuracy[batch_idx-1, 5] = acc6
    accuracy[batch_idx-1, 6] = acc7
    accuracy[batch_idx-1, 7] = acc8

print("Averaged Results")
print("{:15s} | {:11s} | {:10s}".format('METHOD', 'SPEEDUP', 'ACCURACY'))
print("{:15s}".format('FedAvg+1T')+" | %.2f ± %.2f | %.3f ± %.3f" % (round(np.mean(speedup[:,0]), 3),round(np.std(speedup[:,0], ddof=1), 3),round(np.mean(accuracy[:,0]), 3),round(np.std(accuracy[:,0], ddof=1), 3)))
print("{:15s}".format('FedAvg+2T')+" | %.2f ± %.2f | %.3f ± %.3f" % (round(np.mean(speedup[:,1]), 3),round(np.std(speedup[:,1], ddof=1), 3),round(np.mean(accuracy[:,1]), 3),round(np.std(accuracy[:,1], ddof=1), 3)))
print("{:15s}".format('FedAvg+SPC')+" | %.2f ± %.2f | %.3f ± %.3f" % (round(np.mean(speedup[:,2]), 3),round(np.std(speedup[:,2], ddof=1), 3),round(np.mean(accuracy[:,2]), 3),round(np.std(accuracy[:,2], ddof=1), 3)))
print("{:15s}".format('FedAvg+WFA')+" | %.2f ± %.2f | %.3f ± %.3f" % (round(np.mean(speedup[:,3]), 3),round(np.std(speedup[:,3], ddof=1), 3),round(np.mean(accuracy[:,3]), 3),round(np.std(accuracy[:,3], ddof=1), 3)))
print("{:15s}".format('Prox+1T')+" | %.2f ± %.2f | %.3f ± %.3f" % (round(np.mean(speedup[:,4]), 3),round(np.std(speedup[:,4], ddof=1), 3),round(np.mean(accuracy[:,4]), 3),round(np.std(accuracy[:,4], ddof=1), 3)))
print("{:15s}".format('Prox+2T')+" | %.2f ± %.2f | %.3f ± %.3f" % (round(np.mean(speedup[:,5]), 3),round(np.std(speedup[:,5], ddof=1), 3),round(np.mean(accuracy[:,5]), 3),round(np.std(accuracy[:,5], ddof=1), 3)))
print("{:15s}".format('SampleSelection')+" | %.2f ± %.2f | %.3f ± %.3f" % (round(np.mean(speedup[:,6]), 3),round(np.std(speedup[:,6], ddof=1), 3),round(np.mean(accuracy[:,6]), 3),round(np.std(accuracy[:,6], ddof=1), 3)))
print("{:15s}".format('FedBalancer')+" | %.2f ± %.2f | %.3f ± %.3f" % (round(np.mean(speedup[:,7]), 3),round(np.std(speedup[:,7], ddof=1), 3),round(np.mean(accuracy[:,7]), 3),round(np.std(accuracy[:,7], ddof=1), 3)))


Averaged Results
METHOD          | SPEEDUP     | ACCURACY  
FedAvg+1T       | 0.99 ± 0.02 | 0.852 ± 0.020
FedAvg+2T       | 0.75 ± 0.30 | 0.833 ± 0.034
FedAvg+SPC      | 0.61 ± 0.35 | 0.850 ± 0.013
FedAvg+WFA      | 0.92 ± 0.07 | 0.846 ± 0.007
Prox+1T         | 1.03 ± 0.23 | 0.860 ± 0.007
Prox+2T         | 0.90 ± 0.20 | 0.860 ± 0.012
SampleSelection | 1.00 ± 0.13 | 0.846 ± 0.015
FedBalancer     | 1.34 ± 0.07 | 0.885 ± 0.017
