# Analysis of internal benchmark results

In [1]:
import pandas as pd
import plotly.express as px
import plotly.io as pio

import bayer_visual_identity as vi

In [2]:
vi.set_bayer_theme()

## Camelyon16

In [3]:
RESULTS_FILE = "benchmark_fed_camelyon16_results.csv"
df = pd.read_csv(RESULTS_FILE)

In [4]:
# Make "Method" a categorical variable to preserve the desired order.
METHOD_CATEGORIES = [
    "Pooled Training",
    "Local 0",
    "Local 1",
    "FedAvg100",
    "Scaffold100",
    "FedProx100",
    "Cyclic100",
    "FedAdagrad100",
    "FedYogi100",
    "FedAdam100"
]
df["Method"] = pd.Categorical(df["Method"], METHOD_CATEGORIES)

In [5]:
# Aggregate results for all random seeds.
df_agg = df.groupby(["Method", "Test"]).agg({"Metric": ["mean", "std"]}).reset_index()
df_agg.columns = ["Method", "Test", "Metric", "MetricError"]

In [6]:
# Keep only pooled test.
df_agg_pool = df_agg[df_agg["Test"] == "Pooled Test"]

In [11]:
# Plot benchmark results.
fig = px.bar(
    df_agg_pool,
    x="Method",
    y="Metric",
    error_y="MetricError",
    title="FLamby reproducibility - Benchmark results on Fed-Camelyon16",
)
fig.write_html("benchmark_fed_camelyon16_results.html")
fig.write_image("benchmark_fed_camelyon16_results.svg")
fig.show()

In [10]:
# Log to MLflow.
import mlflow

mlflow.set_tracking_uri("http://localhost:8003")
mlflow.set_experiment("flamby-benchmark-fed-camelyon16")

with mlflow.start_run() as run:
    mlflow.log_figure(fig, "benchmark_fed_camelyon16_results.html")

2023/02/15 17:15:23 INFO mlflow.tracking.fluent: Experiment with name 'flamby-benchmark-fed-camelyon16' does not exist. Creating a new experiment.


ModuleNotFoundError: No module named 'google.cloud'