### Load libraries

In [None]:
import sys

import matplotlib.pyplot as plt
import numpy as np
import pandas as pd
import wandb
import wandb.sdk

In [None]:
sys.path.append("../..")
from src.utils import runs_to_df

### Load data

In [None]:
api = wandb.Api()
runs: list[wandb.sdk.wandb_run.Run] = api.runs(
    f"data-frugal-learning/adv-train",
    filters={"tags": {"$in": ["try1.2", "try1.2.1", "try1.3", "vit-v0"]}},
)

df_all = runs_to_df(runs)
df_all.loc[df_all.model.isna(), "model"] = "ModelT.WideResNet"
(df_all.state == "finished").mean()

In [None]:
df_all.model.unique()

In [None]:
print(sorted(df_all.n_train.unique()))

### CIFAR-10 and CIFAR-5m combined

In [None]:
def plot_zone(xs: pd.Series, ys1: pd.Series, ys2: pd.Series, **kwargs):
    plt.fill_between(
        xs.to_numpy(),
        np.minimum(ys1.to_numpy(), ys2.to_numpy()),
        np.maximum(ys1.to_numpy(), ys2.to_numpy()),
        **kwargs
    )

for adv_train in [True, False]:
    plt.figure(dpi=200, figsize=(6, 4))
    train_type = "adversarial" if adv_train else "natural"
    plt.title(f"Data-scaling of {train_type} training, wrn-28-{{10,20}}")

    df = df_all[df_all.model == "ModelT.WideResNet"]
    df_c5m = df[(df.do_adv_training == adv_train) & df.dataset.isna()]
    df_c10 = df[(df.do_adv_training == adv_train) & (df.dataset == "DatasetT.CIFAR10")]

    cdf10 = df_c5m[df_c5m.width == 10].sort_values("n_train")
    cdf20 = df_c5m[df_c5m.width == 20].sort_values("n_train")
    plot_zone(cdf10.n_train, 1 - cdf10.test_acc_adv, 1 - cdf20.test_acc_adv, label=f"adv-err (wrn)", alpha=0.5)
    plot_zone(cdf10.n_train, 1 - cdf10.test_acc_nat, 1 - cdf20.test_acc_nat, label=f"nat-err (wrn)", alpha=0.5)

    df = df_all[df_all.model == "ModelT.VisionTransformer"]
    df = df[df.do_adv_training == adv_train].sort_values("n_train")
    plt.plot(df.n_train, 1 - df.test_acc_adv, label=f"adv-err (vit)", color="tab:green", linestyle="--")
    plt.plot(df.n_train, 1 - df.test_acc_nat, label=f"nat-err (vit)", color="tab:red", linestyle="--")

    plt.xscale("log")
    plt.yscale("log")

    plt.xlabel("# of train datapoints")
    plt.ylabel("Test error")
    plt.legend(loc="upper left", bbox_to_anchor=(1, 1));


In [None]:
for adv_train in [True, False]:
    plt.figure(dpi=200, figsize=(6, 4))
    train_type = "adversarial" if adv_train else "natural"
    plt.title(f"Data-scaling of {train_type} training, wrn-28-{{10,20}}")

    df = df_all[df_all.model == "ModelT.WideResNet"]
    df_c5m = df[(df.do_adv_training == adv_train) & df.dataset.isna()]
    df_c10 = df[(df.do_adv_training == adv_train) & (df.dataset == "DatasetT.CIFAR10")]
    
    cdf10 = df_c5m[df_c5m.width == 10].sort_values("n_train")
    cdf20 = df_c5m[df_c5m.width == 20].sort_values("n_train")
    plot_zone(cdf10.n_train, cdf10.test_loss_adv, cdf20.test_loss_adv, label=f"adv-loss (cifar5m)", alpha=0.5)
    plot_zone(cdf10.n_train, cdf10.test_loss_nat, cdf20.test_loss_nat, label=f"nat-loss (cifar5m)", alpha=0.5)

    df = df_all[df_all.model == "ModelT.VisionTransformer"]
    df = df[df.do_adv_training == adv_train].sort_values("n_train")
    plt.plot(df.n_train, df.test_loss_adv, label=f"adv-loss (vit)", color="tab:green", linestyle="--")
    plt.plot(df.n_train, df.test_loss_nat, label=f"nat-loss (vit)", color="tab:red", linestyle="--")

    plt.xscale("log")
    plt.yscale("log")

    plt.xlabel("# of train datapoints")
    plt.ylabel("Test loss")
    plt.legend(loc="upper left", bbox_to_anchor=(1, 1));
