In [1]:
!nvidia-smi

Mon Jul 21 23:39:40 2025       
+---------------------------------------------------------------------------------------+
| NVIDIA-SMI 535.183.01             Driver Version: 535.183.01   CUDA Version: 12.2     |
|-----------------------------------------+----------------------+----------------------+
| GPU  Name                 Persistence-M | Bus-Id        Disp.A | Volatile Uncorr. ECC |
| Fan  Temp   Perf          Pwr:Usage/Cap |         Memory-Usage | GPU-Util  Compute M. |
|                                         |                      |               MIG M. |
|   0  Tesla P100-PCIE-16GB           Off | 00000000:04:00.0 Off |                    0 |
| N/A   42C    P0              34W / 250W |  12422MiB / 16384MiB |      0%      Default |
|                                         |                      |                  N/A |
+-----------------------------------------+----------------------+----------------------+
|   1  Tesla P100-PCIE-16GB           Off | 00000000:06:00.0 Off |  

In [2]:
import torch

# Set device (GPU if available)
DEVICE_NUM = 1
ADDITIONAL_GPU = 1

if torch.cuda.is_available():
    torch.cuda.set_device(DEVICE_NUM)
    device = torch.device("cuda")
else:
    device = torch.device("cpu")
    DEVICE_NUM = -1

print(f"INFO: Using device - {device}:{DEVICE_NUM}")

INFO: Using device - cuda:1


In [None]:
import torch
import torch.nn as nn
import torch.optim as optim
import torch.nn.functional as F
from torchvision import datasets, models, transforms
from torch.utils.data import DataLoader, Subset, Dataset, ConcatDataset
import numpy as np
import pandas as pd
from scipy import stats
import time, copy, itertools, os, random

# ===================================================================
# 0. 재현성(Reproducibility)을 위한 시드 설정
# ===================================================================
def set_seed(seed):
    torch.manual_seed(seed)
    torch.cuda.manual_seed(seed)
    torch.cuda.manual_seed_all(seed)
    np.random.seed(seed)
    random.seed(seed)
    torch.backends.cudnn.deterministic = True
    torch.backends.cudnn.benchmark = False

SEED = 42
set_seed(SEED)
print(f"Global random seed set to {SEED}")

# ===================================================================
# 1. 실험 환경 설정
# ===================================================================
CONFIG = {
    "run_training": True,
    "model_save_dir": "saved_models",
    "num_runs": 3,
    "epochs": 30,
    "unlearn_epochs": 10,
    "batch_size": 256,
    "lr": 0.1,
    "unlearn_lr": 0.01, # 수정된 unlearning을 위한 학습률
    "momentum": 0.9,
    "weight_decay": 5e-4,
    "forget_set_size": 3000,
    "device": "cuda" if torch.cuda.is_available() else "cpu",
    "salun_sparsity": 0.5,
}
print(f"Using device: {CONFIG['device']}")

# ===================================================================
# 2. 모델 및 데이터 헬퍼 (기존과 동일)
# ===================================================================
def get_model():
    return models.resnet18(weights=None, num_classes=10).to(CONFIG["device"])

def train_model(model, train_loader, epochs, lr, is_unlearning=False):
    crit = nn.CrossEntropyLoss()
    opt = optim.SGD(model.parameters(), lr=lr,
                    momentum=CONFIG["momentum"], weight_decay=CONFIG["weight_decay"])
    sched = None if is_unlearning else optim.lr_scheduler.CosineAnnealingLR(opt, T_max=epochs)
    model.train()
    for ep in range(epochs):
        t0 = time.time()
        for x, y in train_loader:
            x, y = x.to(CONFIG["device"]), y.to(CONFIG["device"])
            opt.zero_grad()
            loss = crit(model(x), y)
            loss.backward()
            opt.step()
        if sched: sched.step()
        print(f"    Epoch {ep+1}/{epochs} completed in {time.time()-t0:.2f}s")

def evaluate_model(model, loader):
    model.eval()
    tot = corr = 0
    with torch.no_grad():
        for x, y in loader:
            x, y = x.to(CONFIG["device"]), y.to(CONFIG["device"])
            pred = model(x).argmax(1)
            tot += y.size(0)
            corr += (pred == y).sum().item()
    return 100 * corr / tot

# ===================================================================
# 3. 언러닝 알고리즘 (수정된 버전)
# ===================================================================
class RelabelDataset(Dataset):
    def __init__(self, original_dataset, num_classes=10):
        self.original_dataset = original_dataset
        self.num_classes = num_classes
        self.new_labels = {}

    def __len__(self):
        return len(self.original_dataset)

    def __getitem__(self, index):
        if index not in self.new_labels:
            _, original_label = self.original_dataset[index]
            new_label = torch.randint(0, self.num_classes, (1,)).item()
            while new_label == original_label:
                new_label = torch.randint(0, self.num_classes, (1,)).item()
            self.new_labels[index] = new_label

        image, _ = self.original_dataset[index]
        return image, self.new_labels[index]

# Random Label Unlearning 수정된 함수
def unlearn_random_label_fixed(orig, retain_set, forget_set, cfg):
    model = copy.deepcopy(orig)
    
    # Forget 데이터셋의 레이블을 무작위로 변경
    relabeled_forget_set = RelabelDataset(forget_set)
    
    # Retain 데이터셋과 Relabeled Forget 데이터셋을 합침
    combined_dataset = ConcatDataset([retain_set, relabeled_forget_set])
    
    combined_loader = DataLoader(combined_dataset, batch_size=cfg["batch_size"], shuffle=True)
    
    # 합쳐진 데이터로 모델을 학습
    train_model(model, combined_loader, cfg["unlearn_epochs"], cfg["unlearn_lr"], is_unlearning=True)
    
    return model

# SalUn Unlearning 수정된 함수
def unlearn_salun_fixed(orig, retain_set, forget_set, cfg):
    model = copy.deepcopy(orig)
    
    # 1. Saliency 계산 (기존과 동일)
    saliency = [torch.zeros_like(p) for p in model.parameters()]
    crit = nn.CrossEntropyLoss()
    forget_loader_for_saliency = DataLoader(forget_set, batch_size=cfg["batch_size"])
    
    model.eval() # Saliency 계산 시에는 eval 모드
    for x, y in forget_loader_for_saliency:
        x, y = x.to(cfg["device"]), y.to(cfg["device"])
        model.zero_grad()
        loss = crit(model(x), y)
        loss.backward()
        for i, p in enumerate(model.parameters()):
            if p.grad is not None:
                saliency[i] += p.grad.abs()
    
    flat_saliency = torch.cat([s.flatten() for s in saliency])
    k = int(len(flat_saliency) * cfg["salun_sparsity"])
    threshold, _ = torch.kthvalue(flat_saliency, k)
    masks = [(s > threshold).float() for s in saliency]

    # 2. Masked Retraining (수정된 부분)
    relabeled_forget_set = RelabelDataset(forget_set)
    combined_dataset = ConcatDataset([retain_set, relabeled_forget_set])
    combined_loader = DataLoader(combined_dataset, batch_size=cfg["batch_size"], shuffle=True)
    
    optimizer = optim.SGD(model.parameters(), lr=cfg["unlearn_lr"], momentum=cfg["momentum"])
    
    model.train() # 학습을 위해 train 모드로 변경
    for ep in range(cfg["unlearn_epochs"]):
        t0 = time.time()
        for x, y in combined_loader:
            x, y = x.to(cfg["device"]), y.to(cfg["device"])
            
            optimizer.zero_grad()
            loss = crit(model(x), y)
            loss.backward()
            
            # 그래디언트에 마스크 적용
            for i, p in enumerate(model.parameters()):
                if p.grad is not None:
                    p.grad *= masks[i]
            
            optimizer.step()
        print(f"    Epoch {ep+1}/{cfg['unlearn_epochs']} completed in {time.time()-t0:.2f}s")
            
    return model

# ===================================================================
# 4. MIA Score 계산 (수정된 최종 버전)
# ===================================================================
class black_box_benchmarks:
    def __init__(self, s_tr, s_te, t_tr, t_te, num_classes):
        self.k = num_classes
        self.s_tr_out, self.s_tr_lab = s_tr
        self.s_te_out, self.s_te_lab = s_te
        self.t_tr_out, self.t_tr_lab = t_tr
        self.t_te_out, self.t_te_lab = t_te

        self.s_tr_corr = (self.s_tr_out.argmax(1) == self.s_tr_lab).astype(int)
        self.s_te_corr = (self.s_te_out.argmax(1) == self.s_te_lab).astype(int)
        self.t_tr_corr = (self.t_tr_out.argmax(1) == self.t_tr_lab).astype(int)
        self.t_te_corr = (self.t_te_out.argmax(1) == self.t_te_lab).astype(int)

        self.s_tr_conf = self.s_tr_out[np.arange(len(self.s_tr_lab)), self.s_tr_lab]
        self.s_te_conf = self.s_te_out[np.arange(len(self.s_te_lab)), self.s_te_lab]
        self.t_tr_conf = self.t_tr_out[np.arange(len(self.t_tr_lab)), self.t_tr_lab]
        self.t_te_conf = self.t_te_out[np.arange(len(self.t_te_lab)), self.t_te_lab]

        self.s_tr_entr = self._entr(self.s_tr_out)
        self.s_te_entr = self._entr(self.s_te_out)
        self.t_tr_entr = self._entr(self.t_tr_out)
        self.t_te_entr = self._entr(self.t_te_out)

        self.s_tr_m_entr = self._m_entr(self.s_tr_out, self.s_tr_lab)
        self.s_te_m_entr = self._m_entr(self.s_te_out, self.s_te_lab)
        self.t_tr_m_entr = self._m_entr(self.t_tr_out, self.t_tr_lab)
        self.t_te_m_entr = self._m_entr(self.t_te_out, self.t_te_lab)

    def _log(self, p, eps=1e-30):
        return -np.log(np.maximum(p, eps))

    def _entr(self, p):
        return (p * self._log(p)).sum(1)

    def _m_entr(self, p, l):
        lp = self._log(p)
        rp = 1 - p
        lrp = self._log(rp)
        mp = p.copy()
        mp[np.arange(l.size), l] = rp[np.arange(l.size), l]
        mlp = lrp.copy()
        mlp[np.arange(l.size), l] = lp[np.arange(l.size), l]
        return (mp * mlp).sum(1)

    def _thre(self, tr, te):
        vals = np.concatenate((tr, te))
        best_acc = 0
        best_t = 0
        for v in vals:
            acc = 0.5 * ((tr >= v).mean() + (te < v).mean())
            if acc > best_acc:
                best_acc = acc
                best_t = v
        return best_t

    def _via_corr(self):
        return 0.5 * (self.t_tr_corr.mean() + (1 - self.t_te_corr).mean())

    def _via_feat(self, tr, te, Ttr, Tte):
        t_mem = 0
        t_non = 0
        if len(Ttr) == 0 or len(Tte) == 0:
            return 0.5 # 분모가 0이 되는 경우 방지

        for c in range(self.k):
            class_tr = tr[self.s_tr_lab == c]
            class_te = te[self.s_te_lab == c]
            if len(class_tr) == 0 or len(class_te) == 0:
                continue
            
            thr = self._thre(class_tr, class_te)
            t_mem += (Ttr[self.t_tr_lab == c] >= thr).sum()
            t_non += (Tte[self.t_te_lab == c] < thr).sum()
        
        return 0.5 * (t_mem / len(Ttr) + t_non / len(Tte))

    def run(self):
        return {
            "correctness": self._via_corr(),
            "confidence": self._via_feat(self.s_tr_conf, self.s_te_conf, self.t_tr_conf, self.t_te_conf),
            "entropy": self._via_feat(-self.s_tr_entr, -self.s_te_entr, -self.t_tr_entr, -self.t_te_entr),
            "m_entropy": self._via_feat(-self.s_tr_m_entr, -self.s_te_m_entr, -self.t_tr_m_entr, -self.t_te_m_entr)
        }


def collect_performance(loader, model, device):
    outs, labs = [], []
    model.eval()
    with torch.no_grad():
        for x, y in loader:
            x = x.to(device)
            outs.append(F.softmax(model(x),1).cpu())
            labs.append(y.cpu())
    if not outs:
        return np.array([]), np.array([])
    return torch.cat(outs).numpy(), torch.cat(labs).numpy()


def calculate_mia_score(model, retain_loader_train, retain_loader_test,
                        forget_loader, test_loader):
    s_tr = collect_performance(retain_loader_train, model, CONFIG["device"])
    s_te = collect_performance(test_loader,         model, CONFIG["device"])
    t_tr = collect_performance(retain_loader_test,  model, CONFIG["device"])
    t_te = collect_performance(forget_loader,       model, CONFIG["device"])

    # 데이터가 비어있는 경우 MIA 점수를 0.5 (무작위 추측)로 반환
    if any(arr.size == 0 for arr in [s_tr[0], s_te[0], t_tr[0], t_te[0]]):
        return 0.5
        
    mia = black_box_benchmarks(s_tr, s_te, t_tr, t_te, 10).run()
    return mia["confidence"]

# ===================================================================
# 5. 메인 실험 루프 (수정됨)
# ===================================================================
def main():
    sd = CONFIG["model_save_dir"]
    os.makedirs(sd, exist_ok=True)
    tf_train = transforms.Compose([
        transforms.RandomCrop(32,4), transforms.RandomHorizontalFlip(),
        transforms.ToTensor(),
        transforms.Normalize((0.4914,0.4822,0.4465),(0.2023,0.1994,0.2010)),
    ])
    tf_test = transforms.Compose([
        transforms.ToTensor(),
        transforms.Normalize((0.4914,0.4822,0.4465),(0.2023,0.1994,0.2010)),
    ])
    
    # 데이터셋 경로를 로컬 환경에 맞게 수정하세요.
    DATASET = "../../data"
    
    tr_ds = datasets.CIFAR10(root=DATASET, train=True, download=True, transform=tf_train)
    te_ds = datasets.CIFAR10(root=DATASET, train=False, download=True, transform=tf_test)

    g = torch.Generator(); g.manual_seed(SEED+9999)
    te_loader = DataLoader(te_ds, batch_size=CONFIG["batch_size"], shuffle=False,
                           worker_init_fn=lambda i:set_seed(SEED+9999+i), generator=g)

    # 실험할 알고리즘만 정의
    methods = ["SalUn", "Random-label"]
    res = {m:{es:{"F":[],"R":[],"T":[],"M":[]} for es in ["Low ES","Medium ES","High ES"]} for m in methods}

    for run in range(CONFIG["num_runs"]):
        print(f"\n{'='*20} Starting Run {run+1}/{CONFIG['num_runs']} {'='*20}")
        orig = get_model()
        orig_pth = f"{sd}/run_{run}_original_model.pth"
        part_pth = f"{sd}/run_{run}_es_partitions.pth"

        if not os.path.exists(orig_pth) or not os.path.exists(part_pth):
            print(f"Error: Model or partition file not found for run {run}")
            print(f"Searched for: {orig_pth} and {part_pth}")
            print("Please run the original full script first to generate these files.")
            continue

        print("\n[LOADING] original model and ES partitions")
        orig.load_state_dict(torch.load(orig_pth, map_location=CONFIG["device"]))
        parts = torch.load(part_pth)

        for es, forget_idx in parts.items():
            print(f"\n--- Processing ES Level: {es} ---")
            all_idx = np.arange(len(tr_ds))
            retain_idx = np.setdiff1d(all_idx, forget_idx, assume_unique=True)
            r_set, f_set = Subset(tr_ds, retain_idx), Subset(tr_ds, forget_idx)

            # DataLoaders
            g_r = torch.Generator(); g_r.manual_seed(SEED+run+ord(es[0]))
            retain_loader = DataLoader(r_set, batch_size=CONFIG["batch_size"], shuffle=True,
                                       worker_init_fn=lambda i:set_seed(SEED+run+ord(es[0])+i), generator=g_r)
            g_re = torch.Generator(); g_re.manual_seed(SEED+run+ord(es[0])+1000)
            retain_eval = DataLoader(r_set, batch_size=CONFIG["batch_size"], shuffle=False,
                                     worker_init_fn=lambda i:set_seed(SEED+run+ord(es[0])+1000+i), generator=g_re)
            g_f = torch.Generator(); g_f.manual_seed(SEED+run+ord(es[0])+2000)
            forget_loader = DataLoader(f_set, batch_size=CONFIG["batch_size"], shuffle=False,
                                       worker_init_fn=lambda i:set_seed(SEED+run+ord(es[0])+2000+i), generator=g_f)
            
            unlearn = {
                "SalUn"       : lambda: unlearn_salun_fixed(orig, r_set, f_set, CONFIG),
                "Random-label": lambda: unlearn_random_label_fixed(orig, r_set, f_set, CONFIG),
            }

            print("\nApplying and evaluating unlearning methods...")
            for m_name, fn in unlearn.items():
                # 수정된 함수 이름으로 저장 경로 변경
                upth = f"{sd}/run_{run}_{es.replace(' ','')}_{m_name}_fixed_unlearned.pth"
                
                if os.path.exists(upth):
                    print(f"    > [LOADING] {m_name}"); u_model = get_model(); u_model.load_state_dict(torch.load(upth,map_location=CONFIG["device"]))
                else:
                    print(f"    > [TRAINING] {m_name}"); u_model = fn(); torch.save(u_model.state_dict(), upth)

                u_f = evaluate_model(u_model, forget_loader)
                u_r = evaluate_model(u_model, retain_eval)
                u_t = evaluate_model(u_model, te_loader)
                u_m = calculate_mia_score(u_model, retain_loader, retain_eval, forget_loader, te_loader)
                print(f"      - {m_name}  F:{u_f:.2f}% R:{u_r:.2f}% T:{u_t:.2f}%  MIA:{u_m:.3f}")

                res[m_name][es]["F"].append(u_f); res[m_name][es]["R"].append(u_r)
                res[m_name][es]["T"].append(u_t); res[m_name][es]["M"].append(u_m)

    # ===================================================================
    # 6. 결과 정리
    # ===================================================================
    print(f"\n{'='*20} Final Results {'='*20}")
    def fmt(xs):
        xs=np.array(xs); mu=xs.mean()
        # SEM 계산 시 샘플 수가 1개일 경우를 대비
        if len(xs) < 2:
            return f"{mu:.3f}"
        sem = stats.sem(xs)
        # 95% 신뢰구간을 위한 t-분포 값
        t_val = stats.t.ppf(0.975, len(xs) - 1) if len(xs) > 1 else 0
        return f"{mu:.3f} ± {(sem * t_val):.3f}"

    for es in ["Low ES","Medium ES","High ES"]:
        print(f"\n--- Results for {es} ---")
        rows=[]
        for m in methods:
            if not res[m][es]["F"]: # 결과가 비어있으면 건너뛰기
                continue
            row={"Method":m,
                 "Forget Acc":fmt(res[m][es]["F"]),
                 "Retain Acc":fmt(res[m][es]["R"]),
                 "Test Acc"  :fmt(res[m][es]["T"]),
                 "MIA"       :fmt(res[m][es]["M"])}
            rows.append(row)
        if rows:
            print(pd.DataFrame(rows).to_string(index=False))

if __name__ == "__main__":
    main()

Global random seed set to 42
Using device: cuda
Downloading https://www.cs.toronto.edu/~kriz/cifar-10-python.tar.gz to ../../data/cifar-10-python.tar.gz


100.0%


Extracting ../../data/cifar-10-python.tar.gz to ../../data
Files already downloaded and verified


[LOADING] original model and ES partitions

--- Processing ES Level: Low ES ---

Applying and evaluating unlearning methods...
    > [TRAINING] SalUn
    Epoch 1/10 completed in 32.53s
    Epoch 2/10 completed in 31.63s
    Epoch 3/10 completed in 35.16s
    Epoch 4/10 completed in 31.53s
    Epoch 5/10 completed in 32.34s
    Epoch 6/10 completed in 32.06s
    Epoch 7/10 completed in 31.16s
    Epoch 8/10 completed in 31.81s
    Epoch 9/10 completed in 31.82s
    Epoch 10/10 completed in 31.41s
      - SalUn  F:91.43% R:89.83% T:81.99%  MIA:0.766
    > [TRAINING] Random-label
    Epoch 1/10 completed in 33.26s
    Epoch 2/10 completed in 31.74s
    Epoch 3/10 completed in 30.23s
    Epoch 4/10 completed in 29.98s
    Epoch 5/10 completed in 30.52s
    Epoch 6/10 completed in 31.66s
    Epoch 7/10 completed in 31.51s
    Epoch 8/10 completed in 31.34s
    Epoch 9/10 completed in 31.38s
  