In [1]:
import matplotlib.pyplot as plt
import numpy as np
import re
import os
import pandas as pd
import ast
from IPython.display import display, HTML
from io import StringIO


def extract_file_data(file):
    with open(file) as file:
        raw = file.read()
        failed = re.compile('Round \d+ failed clients: (\[[^]]*])').findall(raw)
        failed = map(lambda x: x if ',' in x else ', '.join(x.split()).replace('[,', '[').replace(',]', ']'), failed)
        failed = list(map(len, map(ast.literal_eval, failed)))
        avg_failed = np.mean(failed)
        rounds_failed = len(list(filter(lambda x: x > 0, failed)))
        selected = re.compile('Sampled clients for round \d+: (\[[^]]*])').findall(raw)
        selected = map(lambda x: x if ',' in x else ', '.join(x.split()).replace('[,', '[').replace(',]', ']'),
                       selected)
        selected = list(map(ast.literal_eval, selected))
        unique_participants = len(set().union(*selected))
        uniques = np.unique(np.concatenate(selected), return_counts=True)
        total_participants = uniques[1].sum()
        # total_participants = sum(list(map(len, selected)))
        return avg_failed, unique_participants, total_participants, rounds_failed


def extract_config(raw, key):
    if key in raw:
        return re.compile(key + '\s+=\s+(\S+)').search(raw).group(1)
    return ''


def prepare_raw_csv():
    data = []
    for base in ['./results/cifar/', './results/femnist/']:
        for dir in os.listdir(base):
            with open(base + dir + '/args.txt') as args:
                args = args.read()
                availability = extract_config(args, 'trace_distro')
                availability = availability.split('_')[0].capitalize()
                selector = extract_config(args, 'selector').replace('random', 'Random')
                if selector == 'tiflx':
                    selector = 'tifl-mda'
                selector = selector.replace('mdA-fail', 'mda-failure history').replace('mdA-history',
                                                                                       'mda-availability history')
                selector = selector.replace('tifl', 'TiFL').replace('mda', 'MDA').replace('fedcs', 'FedCS')
            for file in os.listdir(base + dir):
                if file.endswith('.out'):
                    avg_failed, unique_participants, total_participants, rounds_failed = extract_file_data(
                        base + dir + '/' + file)
            accuracies = np.load(base + dir + '/accuracies.npy') * 100
            times = np.load(base + dir + '/times.npy')
            dataset = base.replace('./results/', '').replace('/', '').upper()
            if dataset == 'CIFAR':
                dataset = 'CIFAR-10'
            data.append({
                'dataset': dataset,
                'availability': availability,
                'selector': selector,
                'finish_time(s)': times[-1],
                'failed_rounds': rounds_failed,
                'accuracy_mean': np.mean(accuracies[-200:]),
                'accuracy_std': accuracies[-200:].std(),
                'average_failed_clients': avg_failed,
                'unique_participants': unique_participants,
                'total_participants': total_participants,
                'acc_path': base + dir + '/accuracies.npy',
                'times_path': base + dir + '/times.npy',
            })
    df = pd.DataFrame(data)
    os.makedirs('./results/latex/', exist_ok=True)
    df.to_csv('./results/latex/raw.csv', index=False)
    return df


out = './results/latex/'
if os.path.exists(out + 'raw.csv'):
    df = pd.read_csv(out + 'raw.csv')
else:
    df = prepare_raw_csv()

pd.options.display.float_format = '{:.5f}'.format
# display(df)
df['availability'] = pd.Categorical(df['availability'], ["High", "Average", "Low"])
df['selector'] = pd.Categorical(df['selector'],
                                ['Random', 'FedCS', 'TiFL', 'MDA-failure history', 'MDA-availability history', 'MDA',
                                 'TiFL-MDA'])
df.sort_values(inplace=True, by=['dataset', 'availability', 'selector'])
datasets = df.dataset.unique()


In [2]:
def load_accuracy_data(path):
    accuracies = np.load(path) * 100
    a = np.mean(np.array_split(accuracies, len(accuracies) / 100), axis=1)
    return range(100, len(accuracies) + 100, 100), a


In [4]:
def save_tabular(df, exclude, name):
    df = df.drop(exclude, axis=1).round({'finish_time(s)': 0}).rename(
        columns=lambda c: c.replace('_', ' ').capitalize()).transpose()
    # df.to_csv(out + name, header=False)
    df.columns = df.iloc[0]
    df = df[1:].convert_dtypes()
    s = df.to_latex(column_format='c', float_format='{:,.2f}'.format)
    # i = s.index('{l')
    # j = s.index('l}') + 1
    # s = s[:i] + s[i:j].replace('l', 'c') + s[j:]
    result = re.compile('(\w+)\s+&').search(s).group(1)
    r = ' & \multicolumn{' + str(len(df.columns)) + '}{c}{' + result + '}\\\\\n'
    s = s.replace(result, r, 1).replace('.00', '   ')
    s = s.replace('{tabular}{c}', '{tabular}{' + ''.join(['l'] * (len(df.columns) + 1)) + '}')
    with open(out + name, 'w') as f:
        f.write(s)


def display_single_selector(rq, selector):
    selector = selector.lower()
    for dataset in datasets:
        filtered_data = df[(df.dataset == dataset) & (df.selector.str.lower() == selector)]

        fig = plt.figure(figsize=(12, 8))
        for availability, path, color in zip(filtered_data.availability, filtered_data.acc_path,
                                             ['tab:green', 'orange', 'red']):
            plt.plot(*load_accuracy_data(path), label=availability + ' availability', color=color)

        plt.xlabel('Round number')
        plt.ylabel('Accuracy %')
        plt.legend(loc='lower right')
        plt.margins(x=0.02)
        plt.savefig(out + '{}-image-{}-{}'.format(rq, dataset, selector), bbox_inches='tight')
        plt.close(fig)
        # plt.show()
        save_tabular(filtered_data, ['acc_path', 'times_path', 'dataset', 'selector'],
                     '{}-table-{}-{}.txt'.format(rq, dataset, selector))


def display_multiple_selectors(rq, selectors):
    cmap = {
        'random': 'C0',
        'fedcs': 'C4',
        'tifl': 'C5',
        'mda': 'C6',
        'tifl-mda': 'C9',
        'mda-availability history': 'C7',
        'mda-failure history': 'C8'
    }
    selectors = list(map(str.lower, selectors))
    availabilities = set(df.availability.unique())
    for selector in selectors:
        availabilities = availabilities.intersection(df[df.selector.str.lower() == selector].availability.unique())

    for dataset in datasets:
        for availability in availabilities:
            filtered_data = df[
                (df.dataset == dataset) & (df.availability == availability) & (df.selector.str.lower().isin(selectors))]

            save_tabular(filtered_data, ['acc_path', 'times_path', 'dataset', 'availability'],
                         '{}-table-{}-{}.txt'.format(rq, dataset, availability))
            fig = plt.figure(figsize=(12, 8))
            for selector, path in zip(filtered_data.selector, filtered_data.acc_path):
                plt.plot(*load_accuracy_data(path), label=selector, color=cmap[selector.lower()])
            plt.margins(x=0.02)
            plt.xlabel('Round number')
            plt.ylabel('Accuracy %')
            plt.legend(loc='lower right')
            plt.savefig(out + '{}-image-{}-{}'.format(rq, dataset, availability), bbox_inches='tight')
            plt.close(fig)


plt.rcParams.update({
    'font.size': 26,
    'lines.linewidth': 3,
})
display_single_selector('RQ1', 'random')
display_multiple_selectors('RQ2', ['mda', 'MDA-failure history', 'MDA-availability history'])
display_multiple_selectors('RQ2vsrandom', ['mda', 'random'])
display_multiple_selectors('RQ3', ['random', 'fedcs', 'tifl'])
display_multiple_selectors('RQ4', ['fedcs', 'tifl', 'tifl-mda'])


  s = df.to_latex(column_format='c', float_format='{:,.2f}'.format)
  s = df.to_latex(column_format='c', float_format='{:,.2f}'.format)
  s = df.to_latex(column_format='c', float_format='{:,.2f}'.format)
  s = df.to_latex(column_format='c', float_format='{:,.2f}'.format)
  s = df.to_latex(column_format='c', float_format='{:,.2f}'.format)
  s = df.to_latex(column_format='c', float_format='{:,.2f}'.format)
  s = df.to_latex(column_format='c', float_format='{:,.2f}'.format)
  s = df.to_latex(column_format='c', float_format='{:,.2f}'.format)
  s = df.to_latex(column_format='c', float_format='{:,.2f}'.format)
  s = df.to_latex(column_format='c', float_format='{:,.2f}'.format)
  s = df.to_latex(column_format='c', float_format='{:,.2f}'.format)
  s = df.to_latex(column_format='c', float_format='{:,.2f}'.format)
  s = df.to_latex(column_format='c', float_format='{:,.2f}'.format)
  s = df.to_latex(column_format='c', float_format='{:,.2f}'.format)
  s = df.to_latex(column_format='c', float_forma