In [13]:
import torch
import os
import sys
import yaml
import json
import random
import glob
import shutil
from subprocess import check_call

from torch import nn
from torch.autograd import grad
import torch.nn.functional as F
import numpy as np
from PIL import Image
import matplotlib
import matplotlib.pyplot as plt
from torch.utils.data import DataLoader, TensorDataset, Dataset
import torch.optim as optim
from sklearn.metrics import confusion_matrix
from sklearn.svm import SVC
from xgboost import XGBClassifier
from sklearn.pipeline import make_pipeline
from sklearn import metrics
from torchvision.models.feature_extraction import get_graph_node_names

%load_ext autoreload
%autoreload 2
from utils.general_utils import *
from utils.unlearn_utils import *
import utils.MIA_utils as mia
import datasets
import backbone

The autoreload extension is already loaded. To reload it, use:
  %reload_ext autoreload


In [3]:
device = 'cuda' if torch.cuda.is_available() else 'cpu'
transform_train, transform_test = get_std_transforms(32)

In [4]:
model_type = 'smallvgg'

forget_range = [[0, 100]]*10

forget_train_noaug = datasets.UnlearnCIFAR10(root='/home/hoangtuan/data', transform=transform_test, 
                                                 download=True, data_set='train', data_section='forget',
                                                 forget_range=forget_range)
forget_train_noaug_loader = DataLoader(forget_train_noaug, shuffle=False, num_workers=6, batch_size=100)

Files already downloaded and verified
Number of forget samples: 1000
Number of selected samples: 1000


In [5]:
def extract_feature(ckp_folder, dataloader, model_type):
    ckp_file = f'{ckp_folder}/ckp.pth'
    if not os.path.isfile(ckp_file):
        ckp_file = f'{folder}/ckp.pt'
    ckp = torch.load(ckp_file)
    if model_type == 'smallvgg':
        model = backbone.SmallVGG(num_classes=10, dropout=0.5)
        fea_dict = {'classifier.4': 'output'}
    elif model_type == 'allcnn':
        model = backbone.AllCNN(num_classes=10, dropout=False)
        fea_dict = {'classifier.0': 'output'}
    model.load_state_dict(ckp['model'])
    model.eval()
    model.cuda();
    
    # print(get_graph_node_names(model))
    feas, labels = extract_features(model, dataloader, fea_dict)
    outputs = torch.cat(feas['output'])

    torch.save(outputs, f'{ckp_folder}/outputs_{len(labels)}.pth')
    print(f'{folder}         ', end='\r')

# Stage 1:
Train 50 models with full-training dataset (Positive) and retaining dataset only (Negative)

In [15]:
for i in range(1, 51):
    os.makedirs(f'exp/learned/smallvgg_cifar10/full_data_MIA/exp{i:02d}/', exist_ok=True)
    shutil.copy('exp/learned/smallvgg_cifar10/full_data/config.yaml', f'exp/learned/smallvgg_cifar10/full_data_MIA/exp{i:02d}/config.yaml')
    check_call(['python3', 'train.py',  f'exp/learned/smallvgg_cifar10/full_data_MIA/exp{i:02d}/config.yaml'], shell=False)

    os.makedirs(f'exp/learned/smallvgg_cifar10/forget_0-100_10classes_MIA/exp{i:02d}/', exist_ok=True)
    shutil.copy('exp/learned/smallvgg_cifar10/forget_0-100_10classes/config.yaml',
                f'exp/learned/smallvgg_cifar10/forget_0-100_10classes_MIA/exp{i:02d}/config.yaml')
    check_call(['python3', 'train.py', f'exp/learned/smallvgg_cifar10/forget_0-100_10classes_MIA/exp{i:02d}/config.yaml'], shell=False)


# Stage 3: Unlearning

In [None]:
for i in range(46, 51):
    os.makedirs(f'exp/unlearned/smallvgg_cifar10/data_removal/MIA/exp{i:02d}', exist_ok=True)
    shutil.copy( 'exp/unlearned/smallvgg_cifar10/exp0/config.yaml',
                f'exp/unlearned/smallvgg_cifar10/data_removal/MIA/exp{i:02d}/config.yaml')
    check_call(['python3', 'unlearn.py', f'exp/unlearned/smallvgg_cifar10/data_removal/MIA/exp{i:02d}/config.yaml'], shell=False)

# Stage 2: Extracting features and train classifier

In [None]:

# checkpoints for models trained on retaining dataset only (negative) and models trained on full dataset (positive)
for ckp_folder in ['forget_0-100_10classes_MIA', 'full_data_MIA']: 
    for i in range(1, 51):
        extract_feature(f'exp/learned/{model_type}_cifar10/{ckp_folder}/exp{i:02d}/checkpoints', forget_train_noaug_loader, model_type)

In [7]:
train_feas, train_labels = [], []

for i in range(1, 51):
    for folder in ['full_data_MIA', 'forget_0-100_10classes_MIA']:
        
        labels = torch.tensor(all_forget_train_noaug.selected_targets)
        oh_labels = F.one_hot(labels, num_classes=10)
        outputs   = torch.load(f'exp/learned/{model}_cifar10/{folder}/exp{i:02d}/checkpoints/outputs_{len(labels)}.pth')

        outputs = F.softmax(outputs, dim=1)
        feas = torch.cat([outputs, oh_labels], dim=1)
        if folder == 'full_data_MIA':
            labels = torch.ones(len(labels))
        else:
            labels = torch.zeros(len(labels))

        if len(train_feas) == 0:
            train_feas = feas
            train_labels = labels
        else:
            train_feas = torch.cat([train_feas, feas], dim=0)
            train_labels = torch.cat([train_labels, labels], dim=0)

datasize = len(train_labels)
# validation set
val_feas = train_feas[-2*(datasize//10):-datasize//10].numpy()
val_labels = train_labels[-2*(datasize//10):-datasize//10].numpy()
# test set
test_feas = train_feas[-datasize//10:].numpy()
test_labels = train_labels[-datasize//10:].numpy()
# training set
train_feas = train_feas[:-2*datasize//10].numpy()
train_labels = train_labels[:-2*datasize//10].numpy()
idx = np.random.permutation(len(train_labels))
train_feas = train_feas[idx]
train_labels = train_labels[idx]

print(f'Train: {train_feas.shape} {train_labels.shape}')
print(f'Val:   {val_feas.shape} {val_labels.shape}')


Train: (80000, 20) (80000,)
Val:   (10000, 20) (10000,)


In [9]:
# You may need to fine-tune the following parameters
best_param = {'max_depth': 10, 'subsample': 0.8, 'reg_lambda': 10, 'min_child_weight': 0.1}
clf = XGBClassifier(learning_rate=0.2, **best_param)

clf.fit(train_feas, train_labels);

In [None]:
# performance on validation dataset
val_pred_prob = clf.predict_proba(val_feas)
precision, recall, thresholds = metrics.precision_recall_curve(val_labels, val_pred_prob[:, 1])
f1_scores = 2*recall*precision/(recall+precision)
auc = metrics.auc(recall, precision)
print(f'AUC: {auc:.04f}')
plt.plot(recall, precision)
plt.show()

In [16]:
# performance on training dataset
train_pred_prob = clf.predict_proba(train_feas)
precision, recall, thresholds = metrics.precision_recall_curve(train_labels, train_pred_prob[:, 1])
f1_scores = 2*recall*precision/(recall+precision)
auc = metrics.auc(recall, precision)
print(f'AUC: {auc:.04f}')
plt.plot(recall, precision)
plt.show()

# Stage 4: Testing

In [None]:
# Extract features of unlearnt model
for i in range(46, 51):
    folder = f'exp/unlearned/{model_type}_cifar10/data_removal/MIA/exp{i:02d}/results/0-100_10classes/ckp'
    extract_feature(folder, forget_train_noaug_loader, model_type)

In [8]:
def get_test_features(pos_folder, neg_folder, idx_range=[46, 51]):
    feas, labels = [], []
    lbls = torch.tensor(forget_train_noaug.selected_targets)
    oh_labels = F.one_hot(lbls, num_classes=10)
    for i in range(idx_range[0], idx_range[1]):
        outputs = torch.load(pos_folder.replace('exp__', f'exp{i}'))
        outputs = F.softmax(outputs, dim=1)
        fea = torch.cat([outputs, oh_labels], dim=1)
        feas.append(fea)
        labels.append(torch.ones(len(fea)))

        outputs = torch.load(neg_folder.replace('exp__', f'exp{i}'))
        outputs = F.softmax(outputs, dim=1)
        fea = torch.cat([outputs, oh_labels], dim=1)
        feas.append(fea)
        labels.append(torch.zeros(len(fea)))

    feas = torch.cat(feas).numpy()
    labels = torch.cat(labels).numpy()

    return feas, labels        


In [None]:


neg_folder = f'exp/learned/{model_type}_cifar10/forget_0-100_10classes/MIA/exp__/checkpoints/outputs_{len(labels)}.pth'

pos_folder = f'exp/unlearned/{model_type}_cifar10/data_removal/MIA/exp__/results/0-100_10classes/ckp/outputs_{len(labels)}.pth'
feas_s1, labels_s1 = get_test_features(pos_folder, neg_folder, idx_range=[47, 49])

In [None]:
mia.plot_prec_recall(clf, ([test_feas, test_labels, 'Test'], # No unlearnt
                           [feas_s1, labels_s1, 'S1'],       # target result
                           ),
                           save=False
                           )