In [None]:
import os
import pandas as pd
import numpy as np
import  seaborn as sns
import matplotlib.pyplot as plt
import itertools
from matplotlib import cm
from matplotlib.gridspec import GridSpec
from mpl_toolkits import axes_grid1

%load_ext autoreload

%autoreload 2

from source.plotting_utils import plot_heatmap_colorbar, plot_heatmap_histogram, visualize_participant_performance, visualize_participant_performance_multiseed

# Configuration

In [None]:
benchmark = True
visualize_individual_features = False
center_weight_plots = True
percentage_of_last_rounds = 0.05

logdir = "../outputs/vehicle/experiment_name/"
dataset_name = "Vehicle"

### Setup of Metrics, Methods and Directories

In [None]:
outdir = logdir + "eval/"
os.makedirs(outdir, exist_ok=True)
all_runs = os.listdir(logdir)
if "eval" in all_runs:
    all_runs.remove("eval")
metrics = ["balanced_accuracy","f1","precision","recall","pr_auc","roc_auc"]
methods = ["mean-eq", "perc-10", "perc-90"]

metrics_selection = ["balanced_accuracy", "f1", "roc_auc"]

all_run_order = ["iFedAvg", "APFL", "FedAvg", "Local", "Centralized"]
runs = [run for run in all_runs if run in all_run_order]

run_order = [run for run in all_run_order if run in runs]
if not benchmark:
    runs = runs + [run for run in all_runs if run not in runs]
    run_order = run_order + [run for run in runs if run not in run_order]

print(runs)

### Determining whether multiple seeds have been executed

In [None]:
try:
    participants = os.listdir(logdir + runs[0] + "/logs")
    multiseed = False
except:
    multiseed = True
    path = logdir + runs[0]
    seeds = [name for name in os.listdir(path) if os.path.isdir(os.path.join(path, name)) ]
    print(seeds)
    participants = os.listdir(logdir + runs[0] + "/" + seeds[0] + "/logs")


### Helper Functions

In [None]:
def perc(n):
    def perc_(x):
        return np.percentile(x, n)
    perc_.__name__ = 'perc_%s' % n
    return perc_

def get_joined_df(run):
    dfs = []
    for p in participants:
        dfs.append(pd.read_csv(logdir + run + "/logs/" + p, sep=";"))
    df = pd.concat(dfs, axis=0)
    return df

def get_joined_df_multiseed(run, std=False):
    dfs = []
    for p in participants:
        for s in seeds:
            df = pd.read_csv(logdir + run + "/" + s + "/logs/" + p, sep=";")
            df["seed"] = s
            dfs.append(df)
    df = pd.concat(dfs, axis=0)
    means = df.groupby(["name", "round"]).mean().reset_index()

    if std:
        stds = df.groupby(["name", "round"]).std().reset_index()
        return means, stds

    return means

def get_aggregate_metric(run, metric="f1", method="perc", multiseed=False):
    if multiseed:
        df = get_joined_df_multiseed(run)
    else:
        df = get_joined_df(run)
    if method == "mean-eq":
        return df.groupby("round").mean().reset_index()[metric]
    elif method == "mean-smpl":
        weighted_metric = pd.DataFrame({"round": df["round"], metric: df[metric] * df["n_samples"]}).groupby("round").sum().reset_index()[metric]
        sample_vec = df.groupby("round").sum()["n_samples"]
        return weighted_metric / sample_vec
    elif method == "median":
        return df.groupby("round").agg({metric: perc(50)})[metric]
    elif method == "perc-10":
        return df.groupby("round").agg({metric: perc(10)})[metric]
    elif method == "perc-90":
        return df.groupby("round").agg({metric: perc(90)})[metric]
    else:
        raise NotImplemented

def get_metrics_df(metric="f1", method="mean-eq", multiseed=False):
    df = pd.DataFrame()
    for run in runs:
        df[run] = get_aggregate_metric(run, metric, method, multiseed)
    df.index = df.index.set_names(["round"])
    df = df.reset_index()
    return df

def make_table(data, metric, method):
    ending = data.iloc[-int(percentage_of_last_rounds*len(data)):][runs]
    col_name = str(method + "-" + metric)
    df = pd.DataFrame({col_name + "-avg":ending.mean(), col_name + "-std":ending.std()})
    return df

def run_eval(metric="f1", method="mean-eq", multiseed=False):
    """
    Runs an evaluation for a particular metric, with a method (such as mean or 10th percentile)

    Outputs (on disk) the visualizations and returns the partial DataFrame
    """
    title = method + "-" + metric
    data = get_metrics_df(metric, method, multiseed)
    plot_means(data, title)
    plot_boxplot(data, title)

    df_results = make_table(data, metric, method)
    return df_results

def save_tables(table):
    """
    Saves a full table and subset of results as CSV files
    """
    table.to_csv(outdir + "full_results.csv", sep=";")

    # create selection of the table...
    selection_cols = ["mean-eq-balanced_accuracy-avg", "mean-eq-balanced_accuracy-std",
                      "perc-10-balanced_accuracy-avg", "perc-10-balanced_accuracy-std",
                      "mean-eq-roc_auc-avg", "mean-eq-roc_auc-std",
                      "perc-10-roc_auc-avg", "perc-10-roc_auc-std",
                      "mean-eq-f1-avg"]
    new_names = ["mean-BA-avg", "mean-BA-std",
                 "10thPer-BA-avg", "10thPer-BA-std",
                 "mean-ROCAUC-avg", "mean-ROCAUC-std",
                 "10thPer-ROCAUC-avg", "10thPer-ROCAUC-std",
                 "mean-F1-avg"]
    subset = table[selection_cols].round(3)
    subset.columns = new_names
    subset.to_csv(outdir + "subset_results.csv", sep=";")

def compile_participant_table():
    """
    creates a single DF with data for each participant
    """
    df = pd.DataFrame()
    for run in runs:
        if multiseed:
            df_2 = get_joined_df_multiseed(run)
        else:
            df_2 = get_joined_df(run)

        last_df = df_2[df_2["round"] > ((1-percentage_of_last_rounds) * df_2["round"].max())]
        last_df = last_df.groupby(["name"]).median().reset_index()
        last_df.drop(["round", "n_samples"], axis=1, inplace=True)
        last_df["Method"] = run
        df = pd.concat([df, last_df], axis=0)

    # save tables
    df = df.round(3)
    df.to_csv(outdir + "participant_results.csv", sep=";", index=False)
    return df

def get_multiseed_means_stds():
    all_means = pd.DataFrame()
    all_stds = pd.DataFrame()

    # create individual output tables
    for run in runs:
        means, stds = get_joined_df_multiseed(run, std=True)

        means_end = means[means["round"] > ((1-percentage_of_last_rounds) * means["round"].max())]
        means_end = means_end.groupby(["name"]).median().reset_index()
        means_end.drop(["round", "n_samples"], axis=1, inplace=True)
        means_end["Method"] = run
        all_means = pd.concat([all_means, means_end], axis=0)

        stds_end = stds[stds["round"] > ((1-percentage_of_last_rounds) * stds["round"].max())]
        stds_end = stds_end.groupby(["name"]).median().reset_index()
        stds_end.drop(["round", "n_samples"], axis=1, inplace=True)
        stds_end["Method"] = run
        all_stds = pd.concat([all_stds, stds_end], axis=0)

    return all_means, all_stds

### Individual Plotting

In [None]:
def plot_means(data, title):
    sns.set_style("whitegrid")
    melted = data.melt(id_vars=["round"], var_name="Method")
    plt.figure(figsize=(8, 5))
    sns.lineplot(data=melted, x="round",y="value", hue="Method", hue_order=run_order)
    plt.title(str(title))
    plt.savefig(outdir + "line-" + title + ".jpg", dpi=300)
    # plt.show()
    plt.clf()

def plot_boxplot(data, title):
    ending = data.iloc[-int(percentage_of_last_rounds*len(data)):][runs]
    melted = ending.melt(var_name="Method")

    plt.figure(figsize=(8, 5))
    ax = sns.boxplot(y="value", x="Method", data=melted, order=run_order, showfliers=False, palette="pastel")
    if len(list(ax.get_xticklabels())) > 5:
        ax.set_xticklabels(ax.get_xticklabels(), rotation=30)
    plt.title(str("Metric: "+ title))
    plt.tight_layout()
    plt.savefig(outdir + "box-" +  title + ".jpg", dpi=300)
    # plt.show()
    plt.clf()
    plt.close()

# Running Post-Processing Pipeline

In [None]:
print("multiseed:", multiseed)

# Collect full results table
table = pd.DataFrame()
for metric, method in itertools.product(metrics_selection, methods):
    chunk = run_eval(metric, method, multiseed=multiseed)
    table = pd.concat([table, chunk], axis=1)

save_tables(table)

participant_table = compile_participant_table()

In [None]:
# prints violin plots for each metric
for metric in metrics_selection:
    p_subset = participant_table[["Method", "name", metric]]
    visualize_participant_performance(df=p_subset, metric=metric, outdir=outdir, run_order=run_order)


In [None]:
# creates the violin plots with error bars
if multiseed:
    all_means, all_stds = get_multiseed_means_stds()

    for metric in metrics_selection:
        means_subset = all_means[["Method", metric]]
        stds_subset = all_stds[["Method", metric]]
        visualize_participant_performance_multiseed(dataset_name, df_means=means_subset, df_stds=stds_subset, metric=metric, outdir=outdir, run_order=run_order)
