In [201]:
%load_ext autoreload
%autoreload 2

The autoreload extension is already loaded. To reload it, use:
  %reload_ext autoreload


In [202]:
from utils import query, job_config, render, register_filter
import pandas as pd

In [203]:
relevant_experiments = [
    "neurips21-cifar-robustness",
    "neurips21-cifar-robustness-d2",
    "neurips21-cifar-robustness-push-sum",
    "neurips21-cifar-robustness-quasi-global",
    "neurips21-cifar-robustness-gossip",
]

In [204]:
data = [query("""query($experiment: String!) {
  jobs(experiment: $experiment, status: FINISHED) {
    job
    id
    status
    config {
      key
      value
    }
    annotations {
      key
      value
    }
    timeseries {
      measurement
      tags
      values
    }
  }
}""", {"experiment": experiment})["jobs"] for experiment in relevant_experiments]

In [205]:
results = []

def optimizer_name(job):
    config = job_config(job)

for experiment in data:
    for job in experiment:
        config = job_config(job)
        if job["status"] != "FINISHED":
            continue
        job_data = {
            "job": job["job"], 
            "algorithm": config["algorithm"], 
            "non_iid_alpha": config["non_iid_alpha"], 
            "num_workers": config["distributed_world_size"], 
            "drop_probability": config["simulated_dropped_message_probability"], 
            "learning_rate": config["learning_rate"], 
            "momentum": config["momentum"], 
            "topology": config["topology"], 
            "learning_rate": config.get("learning_rate"), 
            "seed": config["seed"]
        }
        for series in job["timeseries"]:
            series_data = {"measurement": series["measurement"], **series["tags"]}
            if series["measurement"] not in {"accuracy", "cross_entropy"}:
                continue
            for entry in series["values"]:
                results.append({
                    **job_data,
                    **series_data,
                    "step": entry["epoch"],
                    "warm_start": config.get("optimizer_warm_start", "baseline"),
                    "starred": {x["key"]: x["value"] for x in job["annotations"]}.get("star", False),
                    "value": entry["value"],
                    "mb": entry["mb"],
                    "task": config["task"]
                })
    df = pd.DataFrame(results)

    df["non_iid_alpha"] = df.non_iid_alpha.fillna(-1)
    df

In [206]:
last_iterations = (
    (df.task.eq("Cifar") & (df.step > 195) & df.measurement.eq("accuracy"))
)
last_values = df[last_iterations & df.split.eq("test")].copy()
achieved_accuracies = last_values.groupby(["task", "non_iid_alpha", "topology", "drop_probability", "algorithm", "learning_rate", "momentum", "worker", "seed"]).agg("mean").groupby([ "task", "non_iid_alpha", "topology", "drop_probability", "algorithm", "learning_rate", "momentum", "seed"]).agg("min").reset_index()
achieved_accuracies = achieved_accuracies[achieved_accuracies.value > 0]
achieved_accuracies

Unnamed: 0,task,non_iid_alpha,topology,drop_probability,algorithm,learning_rate,momentum,seed,mb,num_workers,starred,step,value
0,Cifar,0.01,double-binary-trees,0.0,relaysum-model,0.1,0.9,1,1428805.0,16,False,198,0.8888
1,Cifar,0.01,double-binary-trees,0.0,relaysum-model,0.3,0.9,1,1428805.0,16,False,198,0.8923
2,Cifar,0.01,double-binary-trees,0.01,relaysum-model,0.1,0.9,1,1428805.0,16,False,198,0.8911
3,Cifar,0.01,double-binary-trees,0.01,relaysum-model,0.3,0.9,1,1428805.0,16,False,198,0.8925
4,Cifar,0.01,double-binary-trees,0.01,relaysum-model,0.6,0.9,1,1428805.0,16,False,198,0.892
5,Cifar,0.01,double-binary-trees,0.1,relaysum-model,0.1,0.9,1,1428805.0,16,False,198,0.87695
6,Cifar,0.01,double-binary-trees,0.1,relaysum-model,0.3,0.9,1,1428805.0,16,False,198,0.89275
7,Cifar,0.01,exponential,0.0,push-sum,0.1,0.9,1,1428804.0,16,False,198,0.88515
8,Cifar,0.01,exponential,0.01,push-sum,0.05,0.9,1,1428804.0,16,False,198,0.88235
9,Cifar,0.01,exponential,0.01,push-sum,0.1,0.9,1,1428804.0,16,False,198,0.8862


In [207]:
best_lrs_results = achieved_accuracies.loc[achieved_accuracies.groupby(["task", "non_iid_alpha", "topology", "algorithm", "drop_probability", "momentum", "learning_rate"])["value"].idxmax()]

In [208]:
best_results = best_lrs_results.set_index(["task", "topology", "algorithm", "learning_rate", "drop_probability", "momentum", "non_iid_alpha"])
best_results

Unnamed: 0_level_0,Unnamed: 1_level_0,Unnamed: 2_level_0,Unnamed: 3_level_0,Unnamed: 4_level_0,Unnamed: 5_level_0,Unnamed: 6_level_0,seed,mb,num_workers,starred,step,value
task,topology,algorithm,learning_rate,drop_probability,momentum,non_iid_alpha,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1
Cifar,double-binary-trees,relaysum-model,0.1,0.0,0.9,0.01,1,1428805.0,16,False,198,0.8888
Cifar,double-binary-trees,relaysum-model,0.3,0.0,0.9,0.01,1,1428805.0,16,False,198,0.8923
Cifar,double-binary-trees,relaysum-model,0.1,0.01,0.9,0.01,1,1428805.0,16,False,198,0.8911
Cifar,double-binary-trees,relaysum-model,0.3,0.01,0.9,0.01,1,1428805.0,16,False,198,0.8925
Cifar,double-binary-trees,relaysum-model,0.6,0.01,0.9,0.01,1,1428805.0,16,False,198,0.892
Cifar,double-binary-trees,relaysum-model,0.1,0.1,0.9,0.01,1,1428805.0,16,False,198,0.87695
Cifar,double-binary-trees,relaysum-model,0.3,0.1,0.9,0.01,1,1428805.0,16,False,198,0.89275
Cifar,exponential,push-sum,0.1,0.0,0.9,0.01,1,1428804.0,16,False,198,0.88515
Cifar,exponential,push-sum,0.05,0.01,0.9,0.01,1,1428804.0,16,False,198,0.88235
Cifar,exponential,push-sum,0.1,0.01,0.9,0.01,1,1428804.0,16,False,198,0.8862


In [163]:
@register_filter
def percentage(value):
    val = value * 100
    return f"{val:.1f}\%"
@register_filter
def three_digits(value):
    return f"{value:.3f}"
@register_filter
def two_digits(value):
    return f"{value:.2f}"

In [219]:
%%template table
%%-set base_accuracy = data.loc["Cifar", "double-binary-trees", "relaysum-model", 0.3, 0.0]["value"].mean()
%%-set grad_base_accuracy = data.loc["Cifar", "double-binary-trees", "relaysum-model", 0.3, 0.0]["value"].mean()
%%-set unreliable_accuracy = data.loc["Cifar", "double-binary-trees", "relaysum-model", 0.3, 0.1]["value"].mean()
%%-set p1_accuracy = data.loc["Cifar", "double-binary-trees", "relaysum-model", 0.3, 0.01]["value"].mean()
%%-set d2_base_accuracy = data.loc["Cifar", "ring", "d2", 0.025, 0.0]["value"].mean()
%%-set qg_base_accuracy = data.loc["Cifar", "ring", "quasi-global-momentum", 0.05, 0.0]["value"].mean()
%%-set push_sum_base_accuracy = data.loc["Cifar", "exponential", "push-sum", 0.1, 0.0]["value"].mean()
%%-set push_sum_1p_accuracy = data.loc["Cifar", "exponential", "push-sum", 0.1, 0.01]["value"].mean()
%%-set push_sum_10p_accuracy = data.loc["Cifar", "exponential", "push-sum", 0.05, 0.1]["value"].mean()
\tablefontsize
\begin{tabularx}{\textwidth}{l X l l l}
    \toprule
      Algorithm & Topology & Reliable network & 1\% dropped messages & 10\% dropped messages \\
    \cmidrule(lr){1-2} \cmidrule(lr){3-5}
    \RelaySumModel w/ momentum & trees & \Var{base_accuracy | percentage} & \Var{p1_accuracy | percentage} & \Var{unreliable_accuracy | percentage} \\
     \dpsgd~\citep{lian2017dpsgd} w/ quasi-global mom.~\citep{lin2021quasiglobal} & ring & \Var{qg_base_accuracy | percentage} & {\color{gray}diverges} & {\color{gray}diverges} \\
     \dsquare~\citep{tang2018d2} w/ momentum  & ring& \Var{d2_base_accuracy | percentage} & {\color{gray}diverges} & {\color{gray}diverges} \\
     SGP~\citep{assran2019sgp} w/ momentum & time-varying & \Var{push_sum_base_accuracy | percentage} & \Var{push_sum_1p_accuracy | percentage} & \Var{push_sum_10p_accuracy | percentage} \\
    \bottomrule
\end{tabularx}

In [220]:
!mkdir -p generated
with open("generated/cifar10-robustness.tex", "w") as fp:
    res = render("table", {"data": best_results})
    fp.write(res)
    print(res)

\tablefontsize
\begin{tabularx}{\textwidth}{l X l l l}
    \toprule
      Algorithm & Topology & Reliable network & 1\% dropped messages & 10\% dropped messages \\
    \cmidrule(lr){1-2} \cmidrule(lr){3-5}
    \RelaySumModel w/ momentum & trees & 89.2\% & 89.3\% & 89.3\% \\
     \dpsgd~\citep{lian2017dpsgd} w/ quasi-global mom.~\citep{lin2021quasiglobal} & ring & 69.3\% & {\color{gray}diverges} & {\color{gray}diverges} \\
     \dsquare~\citep{tang2018d2} w/ momentum  & ring& 87.4\% & {\color{gray}diverges} & {\color{gray}diverges} \\
     SGP~\citep{assran2019sgp} w/ momentum & time-varying & 88.5\% & 88.6\% & 88.1\% \\
    \bottomrule
\end{tabularx}
