In [None]:
import pandas as pd
import seaborn as sns
import matplotlib.pyplot as plt
from IPython.display import display

data_probe_10epochs = pd.read_csv("../results/probing/probe_agreement_10epochs.csv")
data_probe_30epochs = pd.read_csv("../results/probing/probe_agreement_30epochs.csv")
data_probe_30epochs_noNorm = pd.read_csv("../results/probing/probe_agreement_30epochs_noNorm.csv")
data_probe_10epochs.head()

data_with_pretrained_classifiers_eps0 = pd.read_csv("../results/probing/agreement_with_pretrained_classifiers_eps0.csv", index_col=0).reset_index(drop=True)
data_with_pretrained_classifiers_eps3 = pd.read_csv("../results/probing/agreement_with_pretrained_classifiers_eps3.csv", index_col=0).reset_index(drop=True)

# extract model names from probe_1 and probe_2 columns
data_probe_10epochs["model1"] = data_probe_10epochs["probe_1"].str.split("/").str[1]
data_probe_10epochs["model2"] = data_probe_10epochs["probe_2"].str.split("/").str[1]

data_probe_30epochs["model1"] = data_probe_30epochs["probe_1"].str.split("/").str[1]
data_probe_30epochs["model2"] = data_probe_30epochs["probe_2"].str.split("/").str[1]

data_probe_30epochs_noNorm["model1"] = data_probe_30epochs_noNorm["probe_1"].str.split("/").str[2]
data_probe_30epochs_noNorm["model2"] = data_probe_30epochs_noNorm["probe_2"].str.split("/").str[2]

# homogenize column of dataframes
data_with_pretrained_classifiers_eps0.rename(columns={"score": "agreement"}, inplace=True)
data_with_pretrained_classifiers_eps3.rename(columns={"score": "agreement"}, inplace=True)
data_with_pretrained_classifiers_eps0.loc[:, "dataset"] = data_with_pretrained_classifiers_eps0["dataset"].map({"imagenet1k": "in1k"})
data_with_pretrained_classifiers_eps3.loc[:, "dataset"] = data_with_pretrained_classifiers_eps3["dataset"].map({"imagenet1k": "in1k"})

probe_model_names_to_canonical_names = {
    "resnet50": "resnet50",
    "resnet18": "resnet18",
    "wide_rn50_2": "wide_resnet50_2",
    "wide_rn50_4": "wide_resnet50_4",
    "densenet161": "densenet161",
    "resnext": "resnext50_32x4d",
    "vgg16": "vgg16_bn"
}
data_probe_10epochs.loc[:, "dataset"] = data_probe_10epochs.loc[:, "model1"].str.split("_").str[-1]
data_probe_10epochs.loc[:, "model1"] = data_probe_10epochs.loc[:, "model1"].str.split("_").str[:-1].str.join("_").map(probe_model_names_to_canonical_names)
data_probe_10epochs.loc[:, "model2"] = data_probe_10epochs.loc[:, "model2"].str.split("_").str[:-1].str.join("_").map(probe_model_names_to_canonical_names)

data_probe_30epochs.loc[:, "dataset"] = data_probe_30epochs.loc[:, "model1"].str.split("_").str[-2]
data_probe_30epochs.loc[:, "model1"] = data_probe_30epochs.loc[:, "model1"].str.split("_").str[:-2].str.join("_").map(probe_model_names_to_canonical_names)
data_probe_30epochs.loc[:, "model2"] = data_probe_30epochs.loc[:, "model2"].str.split("_").str[:-2].str.join("_").map(probe_model_names_to_canonical_names)

data_probe_30epochs_noNorm.loc[:, "dataset"] = data_probe_30epochs_noNorm.loc[:, "model1"].str.split("_").str[-3]
data_probe_30epochs_noNorm.loc[:, "model1"] = data_probe_30epochs_noNorm.loc[:, "model1"].str.split("_").str[:-3].str.join("_").map(probe_model_names_to_canonical_names)
data_probe_30epochs_noNorm.loc[:, "model2"] = data_probe_30epochs_noNorm.loc[:, "model2"].str.split("_").str[:-3].str.join("_").map(probe_model_names_to_canonical_names)

# check whether probes are for the same model
data_probe_10epochs["same_model"] = data_probe_10epochs["model1"] == data_probe_10epochs["model2"]
data_probe_30epochs["same_model"] = data_probe_30epochs["model1"] == data_probe_30epochs["model2"]
data_probe_30epochs_noNorm["same_model"] = data_probe_30epochs_noNorm["model1"] == data_probe_30epochs_noNorm["model2"]

# add canoncial pair ids
data_probe_10epochs["pair_id"] = data_probe_10epochs.apply(lambda row: f"{tuple(sorted([row['model1'], row['model2']]))}", axis=1)
data_probe_30epochs["pair_id"] = data_probe_30epochs.apply(lambda row: f"{tuple(sorted([row['model1'], row['model2']]))}", axis=1)
data_probe_30epochs_noNorm["pair_id"] = data_probe_30epochs_noNorm.apply(lambda row: f"{tuple(sorted([row['model1'], row['model2']]))}", axis=1)
data_with_pretrained_classifiers_eps0["pair_id"] = data_with_pretrained_classifiers_eps0.apply(lambda row: f"{tuple(sorted([row['model1'], row['model2']]))}", axis=1)
data_with_pretrained_classifiers_eps3["pair_id"] = data_with_pretrained_classifiers_eps3.apply(lambda row: f"{tuple(sorted([row['model1'], row['model2']]))}", axis=1)

# add epsilon
data_probe_10epochs["eps"] = 3.0
data_probe_30epochs["eps"] = 3.0
data_probe_30epochs_noNorm["eps"] = 3.0

# add experiment identifier
data_probe_10epochs["experiment"] = "probe_10epochs"
data_probe_30epochs["experiment"] = "probe_30epochs"
data_probe_30epochs_noNorm["experiment"] = "probe_30epochs_noNorm"
data_with_pretrained_classifiers_eps0["experiment"] = "pretrained_classifier_eps0"
data_with_pretrained_classifiers_eps3["experiment"] = "pretrained_classifier_eps3"

# display(data_probe_10epochs.head())
# display(data_probe_30epochs.head())
# display(data_probe_30epochs_noNorm.head())
# display(data_with_pretrained_classifiers.head())

df = pd.concat([data_probe_10epochs, data_probe_30epochs, data_probe_30epochs_noNorm, data_with_pretrained_classifiers_eps0, data_with_pretrained_classifiers_eps3])
df.head()

In [None]:
sns.stripplot(data=data_probe_10epochs.loc[data_probe_10epochs.same_model], x="agreement", y="model1")

In [None]:
sns.stripplot(data=data_probe_10epochs.loc[~data_probe_10epochs.same_model], x="agreement", y="pair_id")


In [None]:

plotdata_probe_10epochs = data_probe_10epochs.loc[~data_probe_10epochs.same_model].groupby(["eps", "pair_id", "experiment"])[["agreement"]].mean().reset_index()
plotdata_probe_30epochs = data_probe_30epochs.loc[~data_probe_30epochs.same_model].groupby(["eps", "pair_id", "experiment"])[["agreement"]].mean().reset_index()
plotdata_probe_30epochs_noNorm = data_probe_30epochs_noNorm.loc[~data_probe_30epochs_noNorm.same_model].groupby(["eps", "pair_id", "experiment"])[["agreement"]].mean().reset_index()
plotdata_pretrained_classifier_eps0 = data_with_pretrained_classifiers_eps0.groupby(["eps", "pair_id", "experiment"])[["agreement"]].mean().reset_index()
plotdata_pretrained_classifier_eps3 = data_with_pretrained_classifiers_eps3.groupby(["eps", "pair_id", "experiment"])[["agreement"]].mean().reset_index()
# display(plotdata_probe_10epochs .head())
# display(plotdata_probe_30epochs.head())
# display(plotdata_pretrained_classifier_eps0.head())
# display(plotdata_pretrained_classifier_eps3.head())
print(len(plotdata_probe_10epochs), len(plotdata_probe_30epochs), len(plotdata_probe_30epochs_noNorm), len(plotdata_pretrained_classifier_eps0), len(plotdata_pretrained_classifier_eps3))

plotdata = pd.concat([plotdata_probe_10epochs, plotdata_probe_30epochs, plotdata_probe_30epochs_noNorm, plotdata_pretrained_classifier_eps0, plotdata_pretrained_classifier_eps3])
sns.set_style("whitegrid")
sns.catplot(data=plotdata, y="agreement", x="eps", kind="box", hue="experiment")
sns.catplot(data=plotdata, x="agreement", y="experiment", kind="strip", hue="pair_id", aspect=1.5)


In [None]:
plotdata.loc[:, "model1"] = plotdata.pair_id.str.strip("()").str.split(",").str[0]
plotdata.loc[:, "model2"] = plotdata.pair_id.str.strip("()").str.split(",").str[1]

plotdata.head()

In [None]:
sns.catplot(data=plotdata, x="agreement", y="experiment", kind="strip", hue="model1", aspect=1.5)


## Realign Paper Plot

In [None]:
plotdata_probe_10epochs = data_probe_10epochs.loc[~data_probe_10epochs.same_model].groupby(["eps", "pair_id", "experiment"])[["agreement"]].mean().reset_index()
plotdata_probe_30epochs = data_probe_30epochs.loc[~data_probe_30epochs.same_model].groupby(["eps", "pair_id", "experiment"])[["agreement"]].mean().reset_index()
plotdata_probe_30epochs_noNorm = data_probe_30epochs_noNorm.loc[~data_probe_30epochs_noNorm.same_model].groupby(["eps", "pair_id", "experiment"])[["agreement"]].mean().reset_index()
plotdata_pretrained_classifier_eps0 = data_with_pretrained_classifiers_eps0.groupby(["eps", "pair_id", "experiment"])[["agreement"]].mean().reset_index()
plotdata_pretrained_classifier_eps3 = data_with_pretrained_classifiers_eps3.groupby(["eps", "pair_id", "experiment"])[["agreement"]].mean().reset_index()

plotdata = pd.concat([plotdata_probe_10epochs, plotdata_probe_30epochs, plotdata_probe_30epochs_noNorm, plotdata_pretrained_classifier_eps0, plotdata_pretrained_classifier_eps3])

plotdata.loc[:, "model1"] = plotdata.pair_id.str.strip("()").str.split(",").str[0]
plotdata.loc[:, "model2"] = plotdata.pair_id.str.strip("()").str.split(",").str[1]

plotdata.head()

plotdata = plotdata.loc[plotdata.experiment.isin(["probe_30epochs_noNorm", "pretrained_classifier_eps0", "pretrained_classifier_eps3"])]
plotdata.loc[:, "experiment"] = plotdata.loc[:, "experiment"].map(
    {
        "probe_30epochs_noNorm": "Probes",
        "pretrained_classifier_eps0": r"Pretrained $\epsilon=0$",
        "pretrained_classifier_eps3": r"Pretrained $\epsilon=3$",
    }
)
order = [r"Pretrained $\epsilon=0$", r"Pretrained $\epsilon=3$", "Probes"]

sns.set_style("darkgrid")
n_panels = 1
fig, axes = plt.subplots(1, n_panels, figsize=(n_panels*3*1.61, 3))
sns.boxplot(data=plotdata, y="experiment", x="agreement", order=order, ax=axes, boxprops={'alpha': 0.3}, color="darkturquoise")
sns.stripplot(data=plotdata, y="experiment", x="agreement", order=order, dodge=True, ax=axes, color="darkcyan", alpha=0.8)
axes.set_ylabel("Classifiers")
axes.set_xlabel("Agreement")

fig.savefig("../figs/probe_agreement.pdf", bbox_inches="tight")

In [None]:
plotdata.experiment.unique()