In [5]:
import csv
import json
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
from sklearn.metrics import confusion_matrix, classification_report, log_loss
import seaborn as sns
import os
import ast
import sys
import re
from statistics import mean, stdev
statistics_path = os.path.abspath("../")
sys.path.append(statistics_path)
import stats_utils

In [6]:
path_to_exp_statistics = "/Users/admin/Desktop/thesis/dataset/metrics/"
path_to_exp_images = "/Users/admin/Desktop/thesis_writing/experiment_images/performance_exp/5_client_scaling/"

### NON-IID CASE

### 1. Compare the 6 different scales of non idd scaling (1 to 5 client and 5 client which know the whole data)

In [7]:
labels_names_list = []
exp_name = "exp_5_1_1_dataset_6"
df1 = stats_utils.parse_experiments_statistics_to_df(path_to_exp_statistics, exp_name, csv_filename="logs.csv")
labels_names_list.append(stats_utils.load_label_names(path_to_exp_statistics, exp_name))

exp_name = "exp_5_2_1_dataset_6"
df2 = stats_utils.parse_experiments_statistics_to_df(path_to_exp_statistics, exp_name, csv_filename="logs.csv")
labels_names_list.append(stats_utils.load_label_names(path_to_exp_statistics, exp_name))

exp_name = "exp_5_3_1_dataset_6"
df3 = stats_utils.parse_experiments_statistics_to_df(path_to_exp_statistics, exp_name, csv_filename="logs.csv")
labels_names_list.append(stats_utils.load_label_names(path_to_exp_statistics, exp_name))

exp_name = "exp_5_4_1_dataset_6"
df4 = stats_utils.parse_experiments_statistics_to_df(path_to_exp_statistics, exp_name, csv_filename="logs.csv")
labels_names_list.append(stats_utils.load_label_names(path_to_exp_statistics, exp_name))

exp_name = "exp_5_5_1_dataset_6"
df5 = stats_utils.parse_experiments_statistics_to_df(path_to_exp_statistics, exp_name, csv_filename="logs.csv")
labels_names_list.append(stats_utils.load_label_names(path_to_exp_statistics, exp_name))

exp_name = "exp_5_5_1_dataset_6_all_dataset"
df6 = stats_utils.parse_experiments_statistics_to_df(path_to_exp_statistics, exp_name, csv_filename="logs.csv")
labels_names_list.append(stats_utils.load_label_names(path_to_exp_statistics, exp_name))


dfs = [df2, df3, df4, df5, df6]

In [8]:
def plot_acc_loss_from_dfs(dfs, get_accuracy_loss_values, path_to_exp_images, should_save=False, filename="accuracies_for_every_dataset", title = "", label_names=None):
    accs = {}
    losses = {}

    for i, df in enumerate(dfs):
        label_name = label_names[i]
        first_client_name = ast.literal_eval(df['devices_names'][0])[0]
        acc, loss = get_accuracy_loss_values(df, first_client_name)
        accs[label_name] = acc
        losses[label_name] = loss

    rounds = range(1, len(next(iter(accs.values()))) + 1)

    # Plot Accuracy
    plt.figure(figsize=(10, 6))
    for name, values in accs.items():
        plt.plot(rounds, values, label=name)
    plt.title(title)
    plt.xlabel("Federated Round")
    plt.ylabel("Accuracy")
    plt.grid(True)
    plt.legend()
    plt.tight_layout()
    if should_save == False:
        plt.show()
    else:
#         filename = "accuracies_for_every_dataset"
        path_to_file = os.path.join(path_to_exp_images, filename)
        plt.savefig(path_to_file, dpi=300)
        plt.close()

In [9]:
filename = "client_scaling_on_non_iid_dataset"
label_names = ["2 clients", "3 clients", \
              "4 clients", "5 clients", "5 clients (with the whole dataset known)"]
title = "Effect of Client Scaling on Non-IID dataset"
plot_acc_loss_from_dfs(dfs, stats_utils.get_accuracy_loss_values_for_dfs, path_to_exp_images, \
                       should_save=True, filename=filename, title=title, label_names=label_names)

### IID CASE

In [4]:
labels_names_list = []
exp_name = "exp_5_1_1_dataset_6_iid_case"
df1 = stats_utils.parse_experiments_statistics_to_df(path_to_exp_statistics, exp_name, csv_filename="logs.csv")
labels_names_list.append(stats_utils.load_label_names(path_to_exp_statistics, exp_name))

exp_name = "exp_5_2_1_dataset_6_iid_case"
df2 = stats_utils.parse_experiments_statistics_to_df(path_to_exp_statistics, exp_name, csv_filename="logs.csv")
labels_names_list.append(stats_utils.load_label_names(path_to_exp_statistics, exp_name))

exp_name = "exp_5_3_1_dataset_6_iid_case"
df3 = stats_utils.parse_experiments_statistics_to_df(path_to_exp_statistics, exp_name, csv_filename="logs.csv")
labels_names_list.append(stats_utils.load_label_names(path_to_exp_statistics, exp_name))

exp_name = "exp_5_4_1_dataset_6_iid_case"
df4 = stats_utils.parse_experiments_statistics_to_df(path_to_exp_statistics, exp_name, csv_filename="logs.csv")
labels_names_list.append(stats_utils.load_label_names(path_to_exp_statistics, exp_name))

exp_name = "exp_5_5_2_dataset_6_iid_case"
df5 = stats_utils.parse_experiments_statistics_to_df(path_to_exp_statistics, exp_name, csv_filename="logs.csv")
labels_names_list.append(stats_utils.load_label_names(path_to_exp_statistics, exp_name))

dfs = [df2, df3, df4, df5]

In [9]:
filename = "client_scaling_on_iid_dataset"
label_names = ["2 client", "3 clients", "4 clients", "5 clients"]
title = "Effect of Client Scaling on IID dataset"
plot_acc_loss_from_dfs(dfs, stats_utils.get_accuracy_loss_values_for_dfs, path_to_exp_images, \
                     should_save=True, filename=filename, title=title, label_names=label_names)