In [None]:
from typing import Any

import matplotlib.pyplot as plt
import pandas as pd
import torch
from federatedlearning.models.cnn import CNNMnist
from hydra import compose, initialize
from nptyping import DataFrame
from omegaconf import OmegaConf
from scipy.stats import linregress

In [None]:
# hydra global initialization
# config_path in initialize() must be relative
initialize(version_base="1.1", config_path="../config", job_name="jupyterlab")
cfg: OmegaConf = compose(config_name="default")

In [None]:
client_num: int = cfg.federatedlearning.num_clients
client_behavior_df: list[DataFrame] = [
    pd.read_csv(f"/workspace/outputs/csv/client_{client_i}_behavior.csv")
    for client_i in range(client_num)
]

In [None]:
# client 0
client_behavior_df[0].info()

## Time-Series Analysis

In [None]:
def log_total_distances(
    global_model: CNNMnist | Any, local_model: CNNMnist | Any
) -> float:
    """
    Calculate the total Euclidean distance between the parameters of two models.

    This function assumes that both models have the same architecture and they
    are instances of the class `CNNMnist` or any other class with an accessible
    `named_parameters()` method providing name-parameter pairs.

    Args:
        global_model (CNNMnist|Any): The global model.
        local_model (CNNMnist|Any): The local model updated by a client.

    Returns:
        float: The total Euclidean distance between the parameters of the two models.
    """
    # Initialize the distance to zero.
    distance: float = 0.0

    # Iterate over the named parameters of both models simultaneously.
    for (layer_global, param_global), (layer_local, param_local) in zip(
        global_model.named_parameters(), local_model.named_parameters()
    ):
        # Ensure the layers compared are corresponding layers by checking their names.
        assert layer_global == layer_local, "Layer names do not match"

        # Calculate Euclidean distance for the current layer's parameters and add to the total distance.
        # p=2 specifies that this is the L2 norm, which corresponds to Euclidean distance.
        distance += torch.norm(param_global - param_local, p=2).item()

    # Return the total Euclidean distance calculated.
    return distance

In [None]:
client_id: int = 15  # byzantine client < 12
num_rounds: int = cfg.federatedlearning.rounds
euclidean_distance_list: list[float] = []

for round in range(num_rounds - 1):
    if round > 0:
        local_model = CNNMnist(cfg)
        local_model.load_state_dict(
            torch.load(
                client_behavior_df[client_id]["local_weight_path"][round]
            )
        )

        global_model = CNNMnist(cfg)
        global_model.load_state_dict(
            torch.load(
                f"/workspace/outputs/weights/server/global_model_round_{round-1}.pth"
            )
        )

        euclidean_distance_list.append(
            log_total_distances(global_model, local_model)
        )
print(f"{client_id=}, {num_rounds=}, {euclidean_distance_list=}")

In [None]:
plt.plot(euclidean_distance_list)
plt.xlabel("round")
plt.ylabel("euclidean_distance")
plt.title(f"Euclidean distance between Client {client_id} and Global Model")
plt.show()

In [None]:
def monitore_time_series(
    client_id: int, num_rounds: int = cfg.federatedlearning.rounds
) -> list[float]:
    euclidean_distance_list: list[float] = []

    for round in range(num_rounds - 1):
        if round > 0:
            local_model = CNNMnist(cfg)
            local_model.load_state_dict(
                torch.load(
                    client_behavior_df[client_id]["local_weight_path"][round]
                )
            )

            global_model = CNNMnist(cfg)
            global_model.load_state_dict(
                torch.load(
                    f"/workspace/outputs/weights/server/global_model_round_{round-1}.pth"
                )
            )

            euclidean_distance_list.append(
                log_total_distances(global_model, local_model)
            )

    return euclidean_distance_list

In [None]:
nrows: int = 4
ncols: int = 5
fig, axes = plt.subplots(
    nrows=nrows, ncols=ncols, figsize=(25, 20), sharex="all", sharey="all"
)
axes = axes.reshape(-1)
for client in range(cfg.federatedlearning.num_clients):
    axes[client].plot(monitore_time_series(client_id=client))
    axes[client].set_title(f"Client {client}")
    axes[client].set_xlabel("round")
    axes[client].set_ylabel("euclidean_distance")
fig.suptitle(
    f"Euclidean distance between each Client and Global Model\n byzantine client index <= {cfg.federatedlearning.num_byzantines}"
)
plt.show()

In [None]:
client_id: int = 0  # byzantine client < 12
num_rounds: int = cfg.federatedlearning.rounds
euclidean_distance_list: list[float] = []

for round in range(num_rounds - 1):
    if round > 0:
        local_model = CNNMnist(cfg)
        local_model.load_state_dict(
            torch.load(
                client_behavior_df[client_id]["local_weight_path"][round]
            )
        )

        global_model = CNNMnist(cfg)
        global_model.load_state_dict(
            torch.load(
                f"/workspace/outputs/weights/server/global_model_round_{round-1}.pth"
            )
        )

        euclidean_distance_list.append(
            log_total_distances(global_model, local_model)
        )

time_series_threshold: float = 2.0
for round in range(num_rounds - 2):
    slope, _, _, _, std_err = linregress(
        [round, round + 1],
        [euclidean_distance_list[round], euclidean_distance_list[round + 1]],
    )
    print(f"{client_id=}, {round=}, {slope=}")
    if slope >= time_series_threshold:
        print(f"CLIENT {client_id} is BYZANTINE CLIENT!!!")
        break

In [None]:
time_series_threshold: float = 2.0
nrows: int = 4
ncols: int = 5
fig, axes = plt.subplots(
    nrows=nrows, ncols=ncols, figsize=(25, 20), sharex="all", sharey="all"
)
axes = axes.reshape(-1)
for client in range(cfg.federatedlearning.num_clients):
    euclidean_distance_list = monitore_time_series(client_id=client)
    for round in range(cfg.federatedlearning.rounds - 2):
        if round > 0:
            slope, _, _, _, std_err = linregress(
                [round - 1, round],
                [
                    euclidean_distance_list[round - 1],
                    euclidean_distance_list[round],
                ],
            )
            if slope >= time_series_threshold:
                print(f"CLIENT {client} is BYZANTINE CLIENT!!!")
            axes[client].plot(euclidean_distance_list)
            axes[client].set_title(f"Client {client}")
            axes[client].set_xlabel("round")
            axes[client].set_ylabel("euclidean_distance")
fig.suptitle(
    f"Euclidean distance between each Client and Global Model\n\byzantine client index < {cfg.federatedlearning.num_byzantines}"
)
plt.show()