In [327]:
import sys

import mlflow
import pandas as pd
from mlflow.tracking import MlflowClient

sys.path.append("../src/")
import matplotlib.pyplot as plt
import numpy as np

from helpers.mlflow_utils import mlflow_tracking_uri

client = MlflowClient(tracking_uri=mlflow_tracking_uri)

experiment_id_mapping = {
    "cifar10-renset": "206",
    "cifar100-resnet": "210",
    "mufac-resnet": "208",
    "cifar10-vit": "211",
    "cifar100-vit": "212",
    "mufac-vit": "213",
}


def baselines(experiment_name):
    experiment_id = experiment_id_mapping[experiment_name]
    unlearning_methods = [
        "finetune",
        "neggrad",
        "relabel",
        "badT",
        "scrub",
        "ssd",
        "unsir",
    ]
    runs = client.search_runs(experiment_id)
    # Convert runs to a DataFrame
    metrics = ["mia", "acc_forget", "acc_retain", "t", "acc_test", "js", "js_proxy"]
    runs_df = pd.DataFrame(
        [
            {k: v for k, v in run.data.metrics.items() if k in metrics}
            for run in runs
            if run.data.tags.get("mlflow.runName") != "our"
        ]
    )
    runs_df["method"] = [
        run.data.tags.get("mlflow.runName")
        for run in runs
        if run.data.tags.get("mlflow.runName") != "our"
    ]
    runs_df["seed"] = [
        run.data.params.get("seed")
        for run in runs
        if run.data.tags.get("mlflow.runName") != "our"
    ]
    runs_df = runs_df.set_index(["method", "seed"])

    gap_metrics = ["mia", "acc_forget", "acc_retain", "acc_test"]
    unlearning_methods = [
        "finetune",
        "neggrad",
        "relabel",
        "badT",
        "scrub",
        "ssd",
        "unsir",
        "retrained",
    ]
    # Calculate the difference in 't' between the unlearning methods and 'retrain' of the same seed
    for method in unlearning_methods:
        for metric in gap_metrics:
            for seed in runs_df.index.get_level_values("seed").unique():
                runs_df.loc[method, f"{metric}_gap"] = abs(
                    runs_df.loc[(method, seed), metric]
                    - runs_df.loc[("retrained", seed), metric]
                )

    grouped_df = runs_df.groupby("method").aggregate(["mean", "std"])
    grouped_df["js"] = grouped_df["js"].apply(lambda x: x * 1e4)
    grouped_df["js_proxy"] = grouped_df["js_proxy"].apply(lambda x: x * 1e4)
    grouped_df = grouped_df.round(2)
    grouped_df["avg_gap"] = (
        grouped_df[
            [
                ("acc_retain_gap", "mean"),
                ("acc_forget_gap", "mean"),
                ("mia_gap", "mean"),
                ("acc_test_gap", "mean"),
            ]
        ]
        .mean(axis=1)
        .round(4)
    )

    grouped_df = grouped_df.sort_values(by=("avg_gap", ""), ascending=True)


    # Specify the order of the columns
    filtered_grouped_df = grouped_df[
        [
            ("avg_gap", ""),
            ("t", "mean"),
            ("js", "mean"),
            ("js_proxy", "mean"),
        ]
    ]
    display(filtered_grouped_df)


def ours(experiment_name):
    experiment_id = experiment_id_mapping[experiment_name]
    unlearning_methods = ["our"]
    runs = client.search_runs(experiment_id)
    # Convert runs to a DataFrame
    metrics = ["mia", "acc_forget", "acc_retain", "t", "acc_test", "js", "js_proxy"]
    runs_df = pd.DataFrame(
        [
            {k: v for k, v in run.data.metrics.items() if k in metrics}
            for run in runs
            if run.data.tags.get("mlflow.runName") == "our"
            or run.data.tags.get("mlflow.runName") == "retrained"
        ]
    )
    runs_df["method"] = [
        run.data.tags.get("mlflow.runName")
        for run in runs
        if run.data.tags.get("mlflow.runName") == "our"
        or run.data.tags.get("mlflow.runName") == "retrained"
    ]
    runs_df["seed"] = [
        run.data.params.get("seed")
        for run in runs
        if run.data.tags.get("mlflow.runName") == "our"
        or run.data.tags.get("mlflow.runName") == "retrained"
    ]
    runs_df["alpha"] = [
        run.data.params.get("alpha")
        for run in runs
        if run.data.tags.get("mlflow.runName") == "our"
        or run.data.tags.get("mlflow.runName") == "retrained"
    ]
    runs_df.loc[runs_df["method"] == "our", "method"] = runs_df["alpha"].astype(str)
    runs_df.drop(columns=["alpha"], inplace=True)
    runs_df = runs_df.set_index(["method", "seed"])

    gap_metrics = ["mia", "acc_forget", "acc_retain", "acc_test"]

    # Calculate the difference in 't' between the unlearning methods and 'retrain' of the same seed
    for metric in gap_metrics:
        for method in [
            "2.0",
            "4.0",
            "8.0",
            "16.0",
            "32.0",
            "64.0",
            "128.0",
            "256.0",
            "512.0",
            "1024.0",
        ]:
            for seed in runs_df.index.get_level_values("seed").unique():
                runs_df.loc[method, f"{metric}_gap"] = abs(
                    runs_df.loc[(method, seed), metric]
                    - runs_df.loc[("retrained", seed), metric]
                )

    grouped_df = runs_df.groupby("method").aggregate(["mean", "std"])
    grouped_df["js"] = grouped_df["js"].apply(lambda x: x * 1e4)
    grouped_df["js_proxy"] = grouped_df["js_proxy"].apply(lambda x: x * 1e4)
    grouped_df = grouped_df.round(2)
    grouped_df["avg_gap"] = (
        grouped_df[
            [
                ("acc_retain_gap", "mean"),
                ("acc_forget_gap", "mean"),
                ("mia_gap", "mean"),
                ("acc_test_gap", "mean"),
            ]
        ]
        .mean(axis=1)
        .round(4)
    )

    columns_to_keep = [
        col
        for col in grouped_df.columns
        if "gap" in col[0] or col[0] in ["js", "js_proxy", "t"]
    ]
    grouped_df = grouped_df.sort_values(by=("avg_gap", ""), ascending=True)

    # Specify the order of the columns
    filtered_grouped_df = grouped_df[
        [
            ("avg_gap", ""),
            ("t", "mean"),
            ("js", "mean"),
            ("js_proxy", "mean"),
        ]
    ]
    display(filtered_grouped_df)

# CIFAR10-ResNet18

In [328]:
baselines('cifar10-renset')
ours('cifar10-renset')

Unnamed: 0_level_0,avg_gap,t,js,js_proxy
Unnamed: 0_level_1,Unnamed: 1_level_1,mean,mean,mean
method,Unnamed: 1_level_2,Unnamed: 2_level_2,Unnamed: 3_level_2,Unnamed: 4_level_2
retrained,0.0,5.32,0.0,5.97
ssd,0.0475,0.54,0.82,145.86
relabel,0.0875,0.57,1.0,47.98
unsir,0.09,0.45,0.65,17.29
finetune,0.0925,0.43,1.03,81.45
scrub,0.095,0.58,0.41,62.39
neggrad,0.0975,0.49,1.06,80.64
badT,0.485,0.33,2.39,287.45
original,,6.5,,61.12


Unnamed: 0_level_0,avg_gap,t,js,js_proxy
Unnamed: 0_level_1,Unnamed: 1_level_1,mean,mean,mean
method,Unnamed: 1_level_2,Unnamed: 2_level_2,Unnamed: 3_level_2,Unnamed: 4_level_2
128.0,0.0325,0.3,0.39,18.9
16.0,0.05,0.29,0.32,13.21
8.0,0.0625,0.29,0.3,15.75
4.0,0.0725,0.29,0.33,22.99
64.0,0.1025,0.3,0.51,49.8
32.0,0.1075,0.3,0.49,41.75
256.0,0.1125,0.3,0.94,141.04
512.0,0.115,0.3,0.98,147.95
1024.0,,0.3,0.97,147.94
2.0,,0.29,0.35,27.4


# CIFAR100-ResNet18

In [329]:
baselines('cifar100-resnet')
ours('cifar100-resnet')

Unnamed: 0_level_0,avg_gap,t,js,js_proxy
Unnamed: 0_level_1,Unnamed: 1_level_1,mean,mean,mean
method,Unnamed: 1_level_2,Unnamed: 2_level_2,Unnamed: 3_level_2,Unnamed: 4_level_2
retrained,0.0,3.39,0.0,16.34
scrub,0.15,0.58,1.87,18.79
ssd,0.1525,0.54,3.04,42.17
unsir,0.3375,0.45,3.05,40.02
finetune,0.355,0.43,6.88,101.26
neggrad,0.355,0.49,6.87,101.75
relabel,0.4175,0.57,5.84,74.93
badT,0.435,0.34,4.3,63.04
original,,1.79,,19.05


Unnamed: 0_level_0,avg_gap,t,js,js_proxy
Unnamed: 0_level_1,Unnamed: 1_level_1,mean,mean,mean
method,Unnamed: 1_level_2,Unnamed: 2_level_2,Unnamed: 3_level_2,Unnamed: 4_level_2
4.0,0.1175,0.3,1.49,11.96
32.0,0.1525,0.3,2.23,29.84
64.0,0.1525,0.29,2.48,36.39
8.0,0.1525,0.29,1.96,22.83
128.0,0.155,0.29,2.6,39.61
16.0,0.155,0.29,2.16,27.78
256.0,0.155,0.29,2.61,40.0
512.0,0.155,0.3,2.61,39.95
1024.0,,0.29,2.61,39.93
2.0,,0.3,1.29,8.35


# MUFAC-ResNet18

In [330]:
baselines('mufac-resnet')
ours('mufac-resnet')

Unnamed: 0_level_0,avg_gap,t,js,js_proxy
Unnamed: 0_level_1,Unnamed: 1_level_1,mean,mean,mean
method,Unnamed: 1_level_2,Unnamed: 2_level_2,Unnamed: 3_level_2,Unnamed: 4_level_2
retrained,0.0,7.34,0.0,199.05
badT,0.1275,0.65,10.43,469.91
relabel,0.1275,1.06,9.51,444.89
finetune,0.155,0.76,19.52,1439.18
ssd,0.155,1.07,10.3,243.3
scrub,0.16,1.2,10.53,254.92
neggrad,0.1625,0.91,19.16,1546.01
unsir,0.255,1.68,16.32,988.15
original,,3.66,,249.26


Unnamed: 0_level_0,avg_gap,t,js,js_proxy
Unnamed: 0_level_1,Unnamed: 1_level_1,mean,mean,mean
method,Unnamed: 1_level_2,Unnamed: 2_level_2,Unnamed: 3_level_2,Unnamed: 4_level_2
4.0,0.0975,0.62,6.31,124.65
8.0,0.1375,0.62,8.43,286.93
32.0,0.15,0.64,11.23,490.19
64.0,0.15,0.64,11.29,494.56
128.0,0.1525,0.64,11.29,493.86
16.0,0.1525,0.62,10.54,444.0
256.0,0.1525,0.64,11.29,493.96
512.0,0.1525,0.64,11.29,493.96
1024.0,,0.64,11.47,536.52
2.0,,0.62,6.9,133.11


# CIFAR10-ViT

In [331]:
baselines('cifar10-vit')
ours('cifar10-vit')

Unnamed: 0_level_0,avg_gap,t,js,js_proxy
Unnamed: 0_level_1,Unnamed: 1_level_1,mean,mean,mean
method,Unnamed: 1_level_2,Unnamed: 2_level_2,Unnamed: 3_level_2,Unnamed: 4_level_2
retrained,0.0,111.0,0.0,1.66
scrub,0.0025,16.66,0.01,2.81
ssd,0.0025,13.65,0.02,2.69
finetune,0.0075,11.33,0.01,3.26
unsir,0.0075,10.68,0.01,2.41
neggrad,0.01,12.61,0.03,6.12
relabel,0.0125,12.78,0.02,2.11
badT,0.0375,8.79,0.12,9.21
original,,84.5,,2.88


  runs_df.loc[(method, seed), metric]
  - runs_df.loc[("retrained", seed), metric]
  runs_df.loc[method, f"{metric}_gap"] = abs(
  runs_df.loc[(method, seed), metric]
  - runs_df.loc[("retrained", seed), metric]
  runs_df.loc[method, f"{metric}_gap"] = abs(
  runs_df.loc[(method, seed), metric]
  - runs_df.loc[("retrained", seed), metric]
  runs_df.loc[method, f"{metric}_gap"] = abs(
  runs_df.loc[(method, seed), metric]
  - runs_df.loc[("retrained", seed), metric]
  runs_df.loc[method, f"{metric}_gap"] = abs(
  runs_df.loc[(method, seed), metric]
  - runs_df.loc[("retrained", seed), metric]
  runs_df.loc[method, f"{metric}_gap"] = abs(
  runs_df.loc[(method, seed), metric]
  - runs_df.loc[("retrained", seed), metric]
  runs_df.loc[method, f"{metric}_gap"] = abs(
  runs_df.loc[(method, seed), metric]
  - runs_df.loc[("retrained", seed), metric]
  runs_df.loc[method, f"{metric}_gap"] = abs(
  runs_df.loc[(method, seed), metric]
  - runs_df.loc[("retrained", seed), metric]
  runs_df.loc[

Unnamed: 0_level_0,avg_gap,t,js,js_proxy
Unnamed: 0_level_1,Unnamed: 1_level_1,mean,mean,mean
method,Unnamed: 1_level_2,Unnamed: 2_level_2,Unnamed: 3_level_2,Unnamed: 4_level_2
128.0,,7.1,0.57,158.72
16.0,,7.03,0.01,1.67
2.0,,7.34,0.01,2.08
256.0,,7.09,0.67,189.01
32.0,,7.07,0.01,1.76
4.0,,7.14,0.01,2.03
512.0,,7.09,0.68,193.67
64.0,,7.1,0.14,33.18
8.0,,7.05,0.01,1.92
retrained,,111.0,0.0,1.66


# CIFAR100-ViT

In [332]:
# baselines('cifar100-vit')
# ours('cifar100-vit')

# MUFAC-ViT

In [333]:
# baselines('mufac-vit')
# ours('mufac-vit')