# Plotting

Ran out of time to implement proper plotting in main runner script.


In [108]:
import pandas as pd
import matplotlib.pyplot as plt
import seaborn as sns
sns.set_palette("colorblind")
sns.set_style("whitegrid")

federated_path = "../output/federated/"
centralised_path = "../output/centralised/"
plots_path = "../output/plots/"
metrics = ["accuracy", "precision", "recall", "f1"]
figsize = (10, 6)


In [109]:
def add_value_labels(ax):
    for container in ax.containers:
        ax.bar_label(container, fmt='%.3f', padding=3)


## Centralised

In [110]:
baseline_dnn_df = pd.read_csv(centralised_path + "baseline_dnn.csv").drop(columns=["Unnamed: 0"])
print(baseline_dnn_df)
baseline_cnn_df = pd.read_csv(
    centralised_path + "baseline_cnn.csv").drop(columns=["Unnamed: 0"])
print(baseline_cnn_df)

# join baseline dfs
baseline_df = pd.concat([baseline_dnn_df, baseline_cnn_df], axis=0)
print(baseline_df)
# add first col for model type
baseline_df.insert(0, "Model", ["DNN", "CNN"])
baseline_df.set_index("Model", inplace=True)
print(baseline_df)

# convert to long format
baseline_df_melt = pd.melt(baseline_df, ignore_index=False).reset_index()
baseline_df_melt.columns = ["Model", "Metric", "Score"]
print(baseline_df_melt)


In [111]:
# grouped bar chart for metrics
plt.figure(figsize=figsize)
# dnn metric group
ax = sns.barplot(x="Model", y="Score", hue="Metric", data=baseline_df_melt)
add_value_labels(ax)
plt.title("Centralised - Baseline DNN vs CNN Metrics")
# plt.legend(title="Metrics", bbox_to_anchor=(1.05, 1.0), loc='upper left')
plt.tight_layout()
plt.savefig(plots_path + "baseline_metrics.png")


### DP


In [112]:
# read noise_cnn_metrics.csv and noise_dnn_metrics.csv
noise_cnn_df = pd.read_csv(centralised_path + "noise_cnn_metrics.csv").drop(columns=["Unnamed: 0"])
noise_dnn_df = pd.read_csv(centralised_path + "noise_dnn_metrics.csv").drop(columns=["Unnamed: 0"])

# set index
noise_cnn_df.set_index("noise_multiplier", inplace=True)
noise_dnn_df.set_index("noise_multiplier", inplace=True)

# inspect
print(noise_cnn_df)
print(noise_dnn_df)


In [113]:
# melt dnn
noise_dnn_df_melt = pd.melt(noise_dnn_df, ignore_index=False).reset_index()
noise_dnn_df_melt.columns = ["noise_multiplier", "Metric", "Score"]
print(noise_dnn_df_melt)

# plot dnn over noise_multiplier
plt.figure(figsize=figsize)
ax = sns.barplot(x="noise_multiplier", y="Score", hue="Metric", data=noise_dnn_df_melt)
add_value_labels(ax)
plt.xlabel("Noise Multiplier")
plt.title("Centralised - DNN Metrics vs Noise Multiplier")
plt.tight_layout()
plt.savefig(plots_path + "noise_dnn.png")


In [114]:
# melt cnn
noise_cnn_df_melt = pd.melt(noise_cnn_df, ignore_index=False).reset_index()
noise_cnn_df_melt.columns = ["noise_multiplier", "Metric", "Score"]
print(noise_cnn_df_melt)

# plot cnn over noise_multiplier
plt.figure(figsize=figsize)
ax = sns.barplot(x="noise_multiplier", y="Score", hue="Metric", data=noise_cnn_df_melt)
add_value_labels(ax)
plt.xlabel("Noise Multiplier")
plt.title("Centralised - CNN Metrics vs Noise Multiplier")
plt.tight_layout()
plt.savefig(plots_path + "noise_cnn.png")


In [115]:
# get last row of noise_dnn and noise_cnn
print(noise_dnn_df.iloc[-1])
print(noise_cnn_df.iloc[-1])

noise_df = pd.DataFrame({"DNN": noise_dnn_df.iloc[-1], "CNN": noise_cnn_df.iloc[-1]}).T
print(noise_df)

# convert to long format
noise_df_melt = pd.melt(noise_df, ignore_index=False).reset_index()
noise_df_melt.columns = ["Model", "Metric", "Score"]
print(noise_df_melt)

# plot
plt.figure(figsize=figsize)
ax = sns.barplot(x="Model", y="Score", hue="Metric", data=noise_df_melt)
add_value_labels(ax)
plt.title("Centralised - DP DNN vs CNN Metrics ")
plt.tight_layout()
plt.savefig(plots_path + "dp_metrics.png")


In [117]:
# centralised dp vs no dp
centralised_dp_no_dp = pd.DataFrame(
    {"No DP": baseline_df.iloc[-1],
     "DP": noise_df.iloc[-1]}).T
print(centralised_dp_no_dp)

# melt
centralised_dp_no_dp_melt = pd.melt(centralised_dp_no_dp, ignore_index=False).reset_index()
centralised_dp_no_dp_melt.columns = ["DP", "Metric", "Score"]
print(centralised_dp_no_dp_melt)


In [118]:
plt.figure(figsize=figsize)
ax= sns.barplot(x="DP", y="Score", hue="Metric", data=centralised_dp_no_dp_melt)
add_value_labels(ax)
plt.title("Centralised -  No DP vs DP Metrics")
plt.tight_layout()
plt.savefig(plots_path + "centralised_dp_vs_no_dp.png")


## Federated

In [119]:
no_dp_federated_df = pd.read_csv(
    federated_path + "5_clients.csv").drop(columns=["Unnamed: 0"])
print(no_dp_federated_df)

# insert communication round as first column
no_dp_federated_df.insert(0, "Round", value=range(0, len(no_dp_federated_df)))
no_dp_federated_df.set_index("Round", inplace=True)
print(no_dp_federated_df)

# melt 
no_dp_federated_df_melt = pd.melt(no_dp_federated_df, ignore_index=False).reset_index()
no_dp_federated_df_melt.columns = ["Round", "Metric", "Score"]
print(no_dp_federated_df_melt)


In [129]:
plt.figure(figsize=figsize)
ax = sns.barplot(x="Round", y="Score", hue="Metric",
                 data=no_dp_federated_df_melt)
add_value_labels(ax)
plt.xlabel("Communication Round")
plt.title("Federated -  No DP Improvement Over Communication Rounds")
plt.tight_layout()
plt.savefig(plots_path + "fl_no_dp.png")


In [121]:
# read
dp_federated_df = pd.read_csv(
    federated_path + "5_clients_dp.csv").drop(columns=["Unnamed: 0"])
print(dp_federated_df)

# insert communication round as first column
dp_federated_df.insert(0, "Round", value=range(0, len(dp_federated_df)))
dp_federated_df.set_index("Round", inplace=True)
print(dp_federated_df)

# melt 
dp_federated_df_melt = pd.melt(dp_federated_df, ignore_index=False).reset_index()
dp_federated_df_melt.columns = ["Round", "Metric", "Score"]
print(dp_federated_df_melt)


In [130]:
# plot

plt.figure(figsize=figsize)
ax = sns.barplot(x="Round", y="Score", hue="Metric", data=dp_federated_df_melt)
add_value_labels(ax)
plt.xlabel("Communication Round")
plt.title("Federated - DP Improvement Over Communication Rounds")
plt.tight_layout()
plt.savefig(plots_path + "fl_dp.png")


In [123]:
# compare between fl no dp and dp
print(no_dp_federated_df.iloc[-1, :])

fl_dp_vs_no_dp_df = pd.DataFrame(
    {"No DP": no_dp_federated_df.iloc[-1,:],
     "DP": dp_federated_df.iloc[-1,:]}
    ).T
print(fl_dp_vs_no_dp_df)

# MELT
fl_dp_vs_no_dp_df_melt = pd.melt(fl_dp_vs_no_dp_df, ignore_index=False).reset_index()
fl_dp_vs_no_dp_df_melt.columns = ["DP", "Metric", "Score"]
print(fl_dp_vs_no_dp_df_melt)


In [124]:
plt.figure(figsize=figsize)
ax = sns.barplot(x="DP", y="Score", hue="Metric",
                 data=fl_dp_vs_no_dp_df_melt)
add_value_labels(ax)
plt.title("Federated -  No DP vs DP Metrics")
plt.tight_layout()
plt.savefig(plots_path + "fl_no_dp_vs_dp.png")


## Configuration Comparison


In [127]:
# get centralised cnn (dp and no dp), fl (dp and no dp)

model_performance_df = pd.DataFrame(
    {"Centralised Baseline CNN": baseline_cnn_df.iloc[-1, :],
     "Centralised DP CNN": noise_cnn_df.iloc[-1, :],
     "Federated Baseline": no_dp_federated_df.iloc[-1, :],
     "Federated DP": dp_federated_df.iloc[-1, :]
     }
).T

print(model_performance_df)

# melt
model_performance_df_melt = pd.melt(model_performance_df, ignore_index=False).reset_index()
model_performance_df_melt.columns = ["Model", "Metric", "Score"]
print(model_performance_df_melt)


In [128]:
# plot
plt.figure(figsize=figsize)
ax = sns.barplot(x="Model", y="Score", hue="Metric", data=model_performance_df_melt)
add_value_labels(ax)
plt.xlabel("System Configuration")
plt.title("Metrics of Different System Configurations")
plt.tight_layout()
plt.savefig(plots_path + "model_performance.png")
