In [None]:
from torchvision import transforms
import sys
import os
sys.path.append(os.path.abspath(".."))
from models import model_dict
from utils import NormalizeByChannelMeanStd
import numpy as np
from torchvision.datasets import CIFAR10, CIFAR100, ImageFolder
from dataset import TinyImageNetDataset
from torch.utils.data import DataLoader, Dataset, Subset
import torch
import pickle
from itertools import cycle
from utils.evaluation import Hook_handle, analysis, get_micro_eval, get_acc
import pandas as pd


def prepare_train_test_dataset(dataset, model_init_seed, batch_size, unlearn_seed=-1):
    data_dir = "tiny-imagenet-200" if dataset == "TinyImagenet" else "data"

    if dataset == "cifar10":
        classes = 10
        data_dir = data_dir + '/cifar10'
        normalization = NormalizeByChannelMeanStd(
                mean=[0.4914, 0.4822, 0.4465], std=[0.2470, 0.2435, 0.2616]
            )
        test_transform = transforms.Compose(
            [
                transforms.ToTensor(),
            ]
        )
        train_set = CIFAR10(data_dir, train=True, transform=test_transform, download=False)
        test_set = CIFAR10(data_dir, train=False, transform=test_transform, download=False)
        train_set.targets = np.array(train_set.targets)
        test_set.targets = np.array(test_set.targets)
    elif dataset == "cifar100":
        classes = 100
        data_dir = data_dir + '/cifar100'
        normalization = NormalizeByChannelMeanStd(
            mean=[0.5071, 0.4866, 0.4409], std=[0.2673, 0.2564, 0.2762]
        )
        test_transform = transforms.Compose(
            [
                transforms.ToTensor(),
            ]
        )
        train_set = CIFAR100(data_dir, train=True, transform=test_transform, download=False)
        test_set = CIFAR100(data_dir, train=False, transform=test_transform, download=False)
        train_set.targets = np.array(train_set.targets)
        test_set.targets = np.array(test_set.targets)
    elif dataset == "TinyImagenet":
        classes = 200
        normalization = NormalizeByChannelMeanStd(
            mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225]
        )
        test_transform = transforms.Compose([])
        train_path = os.path.join(data_dir, "train/")
        test_path = os.path.join(data_dir, "test/")
        train_set = ImageFolder(train_path, transform=test_transform)
        train_set = TinyImageNetDataset(train_set, cache_file='unlearn/assets/tinyimagenet_preprocess/train.pt')
        test_set = ImageFolder(test_path, transform=test_transform)
        test_set = TinyImageNetDataset(test_set, cache_file='assets/tinyimagenet_preprocess/test.pt')
        train_set.targets = np.array(train_set.targets)
        test_set.targets = np.array(test_set.targets)

    def _init_fn(worker_id):
        np.random.seed(int(model_init_seed))

    train_loader = DataLoader(
        train_set,
        batch_size=batch_size,
        shuffle=False,
        worker_init_fn=_init_fn,
        num_workers=4,
        pin_memory=True
    )

    test_loader = DataLoader(
        test_set,
        batch_size=batch_size,
        shuffle=False,
        worker_init_fn=_init_fn,
        num_workers=4,
        pin_memory=True
    )

    if unlearn_seed != -1:
        with open(f"assets/unlearn_set_idxs/{dataset}_forget_set_idx_{unlearn_seed}.pkl", "rb") as f:
            fgt_set_idx = pickle.load(f)
        with open(f"assets/unlearn_set_idxs/{dataset}_retain_set_idx_{unlearn_seed}.pkl", "rb") as f:
            rtn_set_idx = pickle.load(f)

        forget_set = Subset(train_set, fgt_set_idx)
        retain_set = Subset(train_set, rtn_set_idx)

        forget_loader = DataLoader(
            forget_set,
            batch_size=batch_size,
            shuffle=False,
            worker_init_fn=_init_fn,
            num_workers=4,
            pin_memory=True
        )
        retain_loader = DataLoader(
            retain_set,
            batch_size=batch_size,
            shuffle=False,
            worker_init_fn=_init_fn,
            num_workers=4,
            pin_memory=True
        )
        return train_loader, test_loader, forget_loader, retain_loader, normalization, classes

    return train_loader, test_loader, normalization, classes

In [None]:
batch_size = 256
device = torch.device(f"cuda:0" if torch.cuda.is_available() else "cpu")

results = []
for dataset, arch in zip(["cifar10", "cifar100", "TinyImagenet", "TinyImagenet"], ["resnet18", "resnet50", "resnet18", "vgg16_bn"]):
    train_loader, test_loader, normalization, classes = prepare_train_test_dataset(dataset, model_init_seed, batch_size)
    model = model_dict[arch](num_classes=classes)
    model.normalize = normalization
    for model_init_seed in range(3):
        orig_state = torch.load(f'assets/orig_model/{dataset}_{arch}_model_{model_init_seed}.pth', weights_only=True)
        model.load_state_dict(orig_state)
        model = model.to(device)
        train_acc = get_acc(train_loader, model, device)
        test_acc = get_acc(test_loader, model, device)
        results.append({
            "model_init_seed": model_init_seed,
            "dataset": dataset,
            "architecture": arch,
            "train_accuracy": train_acc,
            "test_accuracy": test_acc
        })
df_orig = pd.DataFrame(results)
df_orig.to_csv("unlearn/assets/csv/orig_model_acc.csv")



In [None]:
results = []
for dataset, arch in zip(["cifar10", "cifar100", "TinyImagenet", "TinyImagenet"], ["resnet18", "resnet50", "resnet18", "vgg16_bn"]):
    for unlearn_seed in range(3):
        for model_init_seed in range(3):   
            train_loader, test_loader, forget_loader, retain_loader, normalization, classes = prepare_train_test_dataset(dataset, model_init_seed, batch_size, unlearn_seed)
            model = model_dict[arch](num_classes=classes)
            model.normalize = normalization
            retrain_state = torch.load(f'assets/retrain_model/retrain_{dataset}_{arch}_model_{model_init_seed}_unlearn_{unlearn_seed}.pth', weights_only=True)
            model.load_state_dict(retrain_state)
            model = model.to(device)
            
            # train_acc = get_acc(train_loader, model, device)
            forget_acc = get_acc(forget_loader, model, device)
            retain_acc = get_acc(retain_loader, model, device)
            test_acc = get_acc(test_loader, model, device)

            results.append({
                "model_init_seed": model_init_seed,
                "unlearn_seed": unlearn_seed,
                "dataset": dataset,
                "architecture": arch,
                # "train_accuracy": train_acc,
                "forget_accuracy": forget_acc,
                "retain_accuracy": retain_acc,
                "test_accuracy": test_acc
            })
df_retrain = pd.DataFrame(results)
df_retrain.to_csv("assets/csv/retrain_model_acc.csv")


In [None]:
results = []
for dataset, arch in zip(["cifar10", "cifar100", "TinyImagenet", "TinyImagenet"], ["resnet18", "resnet50", "resnet18", "vgg16_bn"]):
    for method in ["randomlabel", "finetune", "finetune_l1", "neggrad", "GAGD", "randomlabel_salun"]:
        for unlearn_seed in range(3):
            for model_init_seed in range(3):   
                train_loader, test_loader, forget_loader, retain_loader, normalization, classes = prepare_train_test_dataset(dataset, model_init_seed, batch_size, unlearn_seed)
                model = model_dict[arch](num_classes=classes)
                model.normalize = normalization
                retrain_state = torch.load(f'assets/optimal_basic_unlearn_model/{method}_{dataset}_{arch}_model_{model_init_seed}_unlearn_{unlearn_seed}.pth', weights_only=True)
                model.load_state_dict(retrain_state)
                model = model.to(device)
                
                # train_acc = get_acc(train_loader, model, device)
                forget_acc = get_acc(forget_loader, model, device)
                retain_acc = get_acc(retain_loader, model, device)
                test_acc = get_acc(test_loader, model, device)

                results.append({
                    "model_init_seed": model_init_seed,
                    "unlearn_seed": unlearn_seed,
                    "method": method,
                    "dataset": dataset,
                    "architecture": arch,
                    # "train_accuracy": train_acc,
                    "forget_accuracy": forget_acc,
                    "retain_accuracy": retain_acc,
                    "test_accuracy": test_acc
                })
df_unlearn = pd.DataFrame(results)
df_unlearn.to_csv("unlearn/assets/csv/unlearn_model_acc.csv")
