# Client Availability Test

In [18]:
import csv
import json
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
from sklearn.metrics import confusion_matrix, classification_report, log_loss
import seaborn as sns
import os
import ast
import sys
import re
from statistics import mean, stdev
statistics_path = os.path.abspath("../")
sys.path.append(statistics_path)
import stats_utils
from matplotlib.ticker import MaxNLocator
import re

In [21]:
def extract_client_counts_from_logs(path_to_file):
    client_counts = []

    # Define the regex pattern to match the expected log line
    pattern = re.compile(r"NUMBER OF CLIENTS:\s*(\d+)")

    # Open and read the file
    with open(path_to_file, "r") as f:
        for line in f:
            match = pattern.search(line)
            if match:
                count = int(match.group(1))
                client_counts.append(count)

    return client_counts

In [5]:
path_to_exp_statistics = "/Users/admin/Desktop/thesis/dataset/metrics/"

In [6]:
path_to_exp_images = "/Users/admin/Desktop/thesis_writing/experiment_images/network_experiments/"

In [44]:
def plot_acc_loss_from_dfs(
    dfs,
    get_accuracy_loss_values,
    path_to_exp_images,
    counts,
    should_save=False,
    filename="accuracies_for_every_dataset",
    label_names=None,
    title=""
):
    accs = {}
    losses = {}

    # 1) Extract accuracy & loss per dataset
    for i, df in enumerate(dfs):
        label = label_names[i]
        first_client = ast.literal_eval(df["devices_names"][0])[0]
        acc, loss = get_accuracy_loss_values(df, first_client)
        accs[label] = acc
        losses[label] = loss

    # 2) X-axis rounds
    n_rounds = len(next(iter(accs.values())))
    rounds = list(range(1, n_rounds + 1))

    # 3) Create subplots
    fig, (ax1, ax2) = plt.subplots(
        2, 1, figsize=(12, 8), sharex=True,
        gridspec_kw={"height_ratios": [3, 1]}
    )

    # 4) Accuracy subplot
    for label, values in accs.items():
        ax1.plot(rounds, values, label=label)

    ax1.set_ylabel("Accuracy")
    ax1.set_title(title, fontsize=12)
    ax1.grid(True)
    ax1.legend(loc="lower right")

    # 5) Client count subplot (stem plot)
    for i, label in enumerate(label_names):
        markerline, stemlines, baseline = ax2.stem(
            rounds, counts[i],
            linefmt=f"C{i}-", markerfmt=f"C{i}o", basefmt=" ",
            label=label, use_line_collection=True
        )
        markerline.set_markersize(4)
        stemlines.set_alpha(0.5)

    ax2.set_ylabel("Clients per Round")
    ax2.set_xlabel("Federated Round")
    ax2.grid(True)
    ax2.legend(loc="upper right", fontsize=9)

    # 6) Reduce number of x-ticks
    tick_spacing = max(1, n_rounds // 10)
    ax2.xaxis.set_major_locator(MaxNLocator(integer=True))
    ax2.set_xticks(range(1, n_rounds + 1, tick_spacing))

#     # 7) Add figure-wide title
#     fig.suptitle(
#         "Federated Learning Accuracy and Client Participation\n"
#         "(Top) Accuracy over Training Rounds\n"
#         "(Bottom) Number of Clients Participating per Round",
#         fontsize=13, y=0.97
#     )

    # 8) Layout and save/show
    plt.tight_layout(rect=[0, 0, 1, 0.94])
    fig.subplots_adjust(hspace=0.3)

    if not should_save:
        plt.show()
    else:
        path_to_file = os.path.join(path_to_exp_images, filename + ".png")
        fig.savefig(path_to_file, dpi=300)
        plt.close(fig)


In [35]:
def plot_acc_loss_from_dfs(
    dfs,
    get_accuracy_loss_values,
    path_to_exp_images,
    counts,
    should_save=False,
    filename="accuracies_for_every_dataset",
    label_names=None,
    title=""
):
    accs = {}
    losses = {}

    # 1) Extract accuracy & loss per dataset
    for i, df in enumerate(dfs):
        label = label_names[i]
        first_client = ast.literal_eval(df["devices_names"][0])[0]
        acc, loss = get_accuracy_loss_values(df, first_client)
        accs[label] = acc
        losses[label] = loss

    # 2) X-axis is 1…n_rounds
    n_rounds = len(next(iter(accs.values())))
    rounds = list(range(1, n_rounds + 1))

    # 3) Create two vertically stacked plots: Accuracy and Client Count
    fig, (ax1, ax2) = plt.subplots(
        2, 1, figsize=(12, 8), sharex=True,
        gridspec_kw={"height_ratios": [3, 1]}
    )

    # 4) Plot Accuracy
    for label, values in accs.items():
        ax1.plot(rounds, values, label=label)

    ax1.set_title(title)
    ax1.set_ylabel("Accuracy")
    ax1.grid(True)
    ax1.legend()

    # Remove detailed round numbering from x-axis (too dense)
    ax2.set_xlabel("Federated Round")
    ax2.set_ylabel("Clients")
    ax2.grid(True)

    # 5) Plot Client Participation
    for i, label in enumerate(label_names):
        ax2.plot(rounds, counts[i], label=label)

    ax2.legend()

    # 6) Tidy up x-axis ticks (fewer ticks, integer positions)
    ax2.xaxis.set_major_locator(MaxNLocator(integer=True, prune='both'))
    ax2.set_xticks([])  # Optional: remove all x ticks (or reduce manually)
    
    plt.tight_layout()

    # 7) Show or save
    if not should_save:
        plt.show()
    else:
        path_to_file = os.path.join(path_to_exp_images, filename + ".png")
        fig.savefig(path_to_file, dpi=300)
        plt.close(fig)

## 1. Compute Accuracy Plot for every dataset in a single diagram

In [33]:
exp_name = "exp_9_1_1_zero_clients_accepted"
path_server_logs = os.path.join(path_to_exp_statistics, exp_name, "server_logs.txt")
df_25_percent = stats_utils.parse_experiments_statistics_to_df(path_to_exp_statistics, exp_name, csv_filename="logs.csv")
counts_25_percent = extract_client_counts_from_logs(path_server_logs)

exp_name = "exp_9_2_1"
path_server_logs = os.path.join(path_to_exp_statistics, exp_name, "server_logs.txt")
df_50_percent = stats_utils.parse_experiments_statistics_to_df(path_to_exp_statistics, exp_name, csv_filename="logs.csv")
counts_50_percent = extract_client_counts_from_logs(path_server_logs)

exp_name = "exp_9_3_1"
path_server_logs = os.path.join(path_to_exp_statistics, exp_name, "server_logs.txt")
df_75_percent = stats_utils.parse_experiments_statistics_to_df(path_to_exp_statistics, exp_name, csv_filename="logs.csv")
counts_75_percent = extract_client_counts_from_logs(path_server_logs)

exp_name = "exp_9_4_1"
#path_server_logs = os.path.join(path_to_exp_statistics, exp_name, "server_logs.txt")
df_100_percent = stats_utils.parse_experiments_statistics_to_df(path_to_exp_statistics, exp_name, csv_filename="logs.csv")
counts_100_percent = 50 * [5]

dfs = [df_25_percent, df_50_percent, df_75_percent, df_100_percent]
counts = [counts_25_percent, counts_50_percent, counts_75_percent, counts_100_percent]

In [46]:
filename = "client_availability_test"
title = "Accuracy Under Varying Client Availability Levels"
label_names = ["25%", "50%", "75%", "100%"]
plot_acc_loss_from_dfs(dfs, stats_utils.get_accuracy_loss_values_for_dfs, path_to_exp_images, counts, \
                       should_save=True, filename=filename, label_names=label_names, title=title)