# Histograms (Figs. 3 and 9)

In [None]:
%load_ext autoreload
%autoreload 2
from pathlib import Path

import matplotlib.pyplot as plt
import pandas as pd
from aeroblade.paper import DATASET_ORDER, configure_mpl, get_nice_name, set_figsize

configure_mpl()
set_figsize()

output_dir = Path("output/01/default/figures")
output_dir.mkdir(exist_ok=True, parents=True)

In [None]:
distances = pd.read_parquet("output/01/default/distances.parquet").query(
    "repo_id == 'max'"
)
distances[["dir"]] = distances[["dir"]].map(get_nice_name)
for distance_metric, df in distances.groupby("distance_metric", observed=True):
    for dir in DATASET_ORDER:
        plt.hist(-df.query("dir == @dir").distance.values, label=dir, alpha=0.7)
    plt.legend()
    plt.xlabel(get_nice_name(distance_metric))
    plt.ylabel("Count")
    plt.savefig(output_dir / f"hist_{distance_metric}.pdf")
    plt.close()

# Detection Performance

In [None]:
%load_ext autoreload
%autoreload 2
from pathlib import Path

import pandas as pd
from aeroblade.paper import DATASET_ORDER, get_nice_name

output_dir = Path("output/01/default/tables")
output_dir.mkdir(exist_ok=True)

## AP (Tab. 1 and part of Tab. 3)

In [None]:
detection_results = pd.read_csv(
    "output/01/default/detection_results.csv", index_col=0
).drop(columns=["transform", "tpr5fpr"])
detection_results[["fake_dir", "repo_id", "distance_metric"]] = detection_results[
    ["fake_dir", "repo_id", "distance_metric"]
].map(get_nice_name)
table = detection_results.pivot(
    columns="fake_dir", index=["distance_metric", "repo_id"]
).droplevel(0, axis=1)
table = (
    table[DATASET_ORDER[:-1]]
    .reindex(index=["SD1", "SD2", "KD2.1", "max"], level=1)
    .rename(index={"max": "min"})
)
table = table.rename_axis(index=["Distance", "AE"], columns=None)
table.style.format(precision=3).highlight_max(
    axis="index", props="textbf:--rwrap;"
).to_latex(output_dir / "AP.tex", clines="skip-last;data")

## TPR@5%FPR (part of Tab. 3)

In [None]:
detection_results = pd.read_csv(
    "output/01/default/detection_results.csv", index_col=0
).drop(columns=["transform", "ap"])
detection_results[["fake_dir", "repo_id", "distance_metric"]] = detection_results[
    ["fake_dir", "repo_id", "distance_metric"]
].map(get_nice_name)
table = detection_results.pivot(
    columns="fake_dir", index=["distance_metric", "repo_id"]
).droplevel(0, axis=1)
table = (
    table[DATASET_ORDER[:-1]]
    .reindex(index=["SD1", "SD2", "KD2.1", "max"], level=1)
    .rename(index={"max": "min"})
)
table = table.rename_axis(index=["Distance", "AE"], columns=None)
table.style.format(precision=3).highlight_max(
    axis="index", props="textbf:--rwrap;"
).to_latex(output_dir / "TPR.tex", clines="skip-last;data")

## Attribution (Tabs. 2 and 5)

In [None]:
attribution_results = pd.read_csv(
    "output/01/default/attribution_results.csv", index_col=0
).drop(columns=["transform"])
attribution_results[["dir", "repo_id", "distance_metric"]] = attribution_results[
    ["dir", "repo_id", "distance_metric"]
].map(get_nice_name)
table = attribution_results.pivot(
    columns="dir", index=["distance_metric", "repo_id"]
).droplevel(0, axis=1)
table = table[DATASET_ORDER[:-1]].reindex(index=["SD1", "SD2", "KD2.1"], level=1)
table = table.rename_axis(index=["Distance", "AE"], columns=None)
table.style.format(precision=3).to_latex(
    output_dir / "attribution.tex", clines="skip-last;data"
)

# Distance Metric Ablation (Tab. 4)

In [None]:
%load_ext autoreload
%autoreload 2
from pathlib import Path

import pandas as pd
from aeroblade.paper import DATASET_ORDER, configure_mpl, get_nice_name, set_figsize

output_dir = Path("output/01/distance_metric_ablation/tables")
output_dir.mkdir(exist_ok=True)

In [None]:
metric_ablation = (
    pd.read_csv("output/01/distance_metric_ablation/detection_results.csv", index_col=0)
    .query("repo_id == 'max'")
    .drop(columns=["repo_id", "transform", "tpr5fpr"])
)
metric_ablation[["fake_dir", "distance_metric"]] = metric_ablation[
    ["fake_dir", "distance_metric"]
].map(get_nice_name)
table = metric_ablation.pivot(columns="fake_dir", index=["distance_metric"]).droplevel(
    0, axis=1
)
table = table[DATASET_ORDER[:-1]]
table = table.rename_axis(index=["Distance"], columns=None)
table.style.format(precision=3).highlight_max(
    axis="index", props="textbf:--rwrap;"
).to_latex(output_dir / "AP.tex", clines="skip-last;data")

# Robustness

In [None]:
%load_ext autoreload
%autoreload 2
from pathlib import Path

import matplotlib.pyplot as plt
import pandas as pd
import seaborn as sns
from aeroblade.paper import DATASET_ORDER, configure_mpl, get_nice_name, set_figsize

configure_mpl()
set_figsize("single", factor=0.49, ratio=1.0)

output_dir = Path("output/01/robustness/figures")
output_dir.mkdir(exist_ok=True)

In [None]:
robustness_ap = (
    pd.read_csv("output/01/robustness/detection_results.csv", index_col=0)
    .drop(columns="tpr5fpr")
    .query("repo_id == 'max'")
)

# split transform config into name and parameter
transform_split = pd.DataFrame(
    robustness_ap["transform"].str.split("_").tolist(),
    columns=["transform", "parameter"],
    index=robustness_ap.index,
)
transform_split["parameter"] = transform_split["parameter"].astype(float)
robustness_ap = pd.concat(
    [robustness_ap.drop(columns="transform"), transform_split], axis=1
)
robustness_ap[["fake_dir", "repo_id"]] = robustness_ap[["fake_dir", "repo_id"]].map(
    get_nice_name
)

## AP per LPIPS Layer (Figs. 7 and 16-21)

In [None]:
for distance_metric, dist_df in robustness_ap.groupby("distance_metric"):
    clean = dist_df.query("transform == 'clean'")

    for transform, label, clean_label in zip(
        ["jpeg", "crop", "blur", "noise"],
        ["$q$", "$f$", "$\sigma$", "$\sigma$"],
        [100, 1.0, 0.0, 0.0],
    ):
        transformed = dist_df.query("transform == @transform")
        clean_ = clean.copy()
        clean_.loc[:, "parameter"] = clean_label
        transformed = pd.concat([transformed, clean_])
        sns.lineplot(
            data=transformed,
            x="parameter",
            y="ap",
            hue="fake_dir",
            marker="o",
            markeredgecolor="auto",
            legend=transform == "jpeg",
            hue_order=DATASET_ORDER[:-1],
        )
        plt.xlabel(label)
        plt.ylabel("AP")
        plt.ylim(0.3, 1.02)
        if transform == "jpeg":
            plt.legend(ncols=2)
            # fix label with q=100
            ax = plt.gca()
            labels = [item.get_text() for item in ax.get_xticklabels()]
            labels[3] = "w/o"
            ax.set_xticklabels(labels)
        if transform in ["jpeg", "crop"]:
            plt.gca().invert_xaxis()
        plt.savefig(output_dir / f"{distance_metric.replace('/', '_')}_{transform}.pdf")
        plt.close()

        sns.lineplot(
            data=transformed,
            x="parameter",
            y="ap",
            marker="o",
            markeredgecolor="auto",
            legend=transform == "jpeg",
            errorbar=lambda x: (x.min(), x.max()),
        )
        plt.xlabel(label)
        plt.ylabel("AP")
        plt.ylim(0.3, 1.02)
        if transform == "jpeg":
            # fix label with q=100
            ax = plt.gca()
            labels = [item.get_text() for item in ax.get_xticklabels()]
            labels[3] = "w/o"
            ax.set_xticklabels(labels)
        if transform in ["jpeg", "crop"]:
            plt.gca().invert_xaxis()
        plt.savefig(
            output_dir / f"{distance_metric.replace('/', '_')}_{transform}_averaged.pdf"
        )
        plt.close()

## Best Layer for Each Setting (Fig. 22)

In [None]:
best_layer_ap = robustness_ap.loc[
    robustness_ap.groupby(
        ["fake_dir", "transform", "parameter"], as_index=False, sort=False, dropna=False
    )
    .idxmax()["ap"]
    .values
].drop(columns="distance_metric")

clean = best_layer_ap.query("transform == 'clean'")
for transform, label, clean_label in zip(
    ["jpeg", "crop", "blur", "noise"],
    ["$q$", "$f$", "$\sigma$", "$\sigma$"],
    [100, 1.0, 0.0, 0.0],
):
    transformed = best_layer_ap.query("transform == @transform")
    clean_ = clean.copy()
    clean_.loc[:, "parameter"] = clean_label
    transformed = pd.concat([transformed, clean_])
    sns.lineplot(
        data=transformed,
        x="parameter",
        y="ap",
        marker="o",
        markeredgecolor="auto",
        legend=transform == "jpeg",
        errorbar=lambda x: (x.min(), x.max()),
    )

    plt.xlabel(label)
    plt.ylabel("AP")
    plt.ylim(0.3, 1.02)
    if transform == "jpeg":
        # fix label with q=100
        ax = plt.gca()
        labels = [item.get_text() for item in ax.get_xticklabels()]
        labels[3] = "w/o"
        ax.set_xticklabels(labels)
    if transform in ["jpeg", "crop"]:
        plt.gca().invert_xaxis()
    plt.savefig(output_dir / f"best_layer_{transform}.pdf")
    plt.close()