Import necessary packges

In [1]:
import os
import numpy as np

import torch
import torch.nn as nn
import torch.nn.functional as F
import higher

from torch.utils.data import DataLoader, Subset
from torch import optim

from utils.dataset import *
from utils.model import *
from utils.utils import *
from dotdict import DotDict as dd

  from .autonotebook import tqdm as notebook_tqdm


### Hyper parameters

In [2]:
args=dd()

args.device = "cuda"
args.datadir = "./data"
args.outdir = "./outputs"
args.resdir = "./results"
args.dataset = "student"
args.model = "mlp2"
args.moddir = './checkpoints'
args.epochs = 100
args.lr = 0.001
args.batch_size = 256
args.craftrate = 0.1
args.ncraftstep = 30
args.tau = 0.00002
args.theta = 100
args.lam = 1
args.floss = "group"
args.nadapt = 2
args.num_ensemble = 10
args.restarts = 4



In [3]:
def victim(args, poison_weights):

    X_train, X_test, y_train, y_test, sa_index, p_Group, protected_attribute, majority_group_name, minority_group_name, input_dim, output_dim = construct_dataset(args.dataset, args.datadir)

    trainset = PackData(X_train, y_train)
    testset = PackData(X_test, y_test)

    train_loader = DataLoader(trainset, batch_size=args.batch_size, shuffle=True, num_workers=0)
    test_loader = DataLoader(testset, batch_size=args.batch_size, shuffle=False, num_workers=0)

    if args.model == 'logistic':
        test_model = LogisticRegression(input_dim=input_dim, output_dim=output_dim).to(args.device)
    elif args.model == 'bayesian':
        test_model = NaiveBayesClassifier()
    elif args.model == 'mlp':
        test_model = MLPClassifier(input_size=input_dim, hidden_sizes=[100], output_size=output_dim).to(args.device)
    elif args.model == 'mlp2':
        test_model = MLPClassifier(input_size=input_dim, hidden_sizes=[100, 100], output_size=output_dim).to(args.device)
    else:
        raise NotImplementedError('Not support!')

    checkpoint = torch.load(
        os.path.join(args.moddir, args.dataset + '-' + args.model  + '.pth'))
    test_model.load_state_dict(checkpoint['net'])

    print('==> before unlearning')

    y_predicts, y_pred_probs = test(args, test_model, test_loader)


    outputs = calculate_performance_absolute_equalized_odds(X_test, y_test, y_predicts, y_pred_probs, sa_index, p_Group)
    aeod_before = outputs['fairness']
    print(f'Absolute Equalized odds: {aeod_before:.4f}')

    print('==> after unlearning')
    # find unlearning data
    unlearnids = (poison_weights > 0.5).nonzero().squeeze().tolist()
    unlearn_data = Subset(trainset, unlearnids)
    unlearn_loader = DataLoader(unlearn_data, batch_size=args.batch_size, shuffle=False, num_workers=0)
    print(f'Number of unlearning data: {len(unlearn_data)}')

    # first-order unlearning method
    unlearn_model = copy.deepcopy(test_model)
    diff = get_grad_diff(args, unlearn_model, unlearn_loader)
    d_theta = diff

    unlearn_model.eval()
    with torch.no_grad():
        for p in unlearn_model.parameters():
            if p.requires_grad:
                new_p = p - args.tau * d_theta.pop(0)
                p.copy_(new_p)

    y_predicts, y_pred_probs = test(args, unlearn_model, test_loader)


    outputs = calculate_performance_absolute_equalized_odds(X_test, y_test, y_predicts, y_pred_probs, sa_index, p_Group)
    aeod_after = outputs['fairness']
    print(f'Absolute Equalized odds (ours): {aeod_after:.4f}')

    rand_ids = np.random.choice(len(trainset), len(unlearn_data), replace=False)
    randset = Subset(trainset, rand_ids)
    rand_loader = DataLoader(randset, batch_size=len(randset), shuffle=False, num_workers=0)

    # first-order unlearning method
    rand_model = copy.deepcopy(test_model)
    diff = get_grad_diff(args, rand_model, rand_loader)
    d_theta = diff

    rand_model.eval()
    with torch.no_grad():
        for p in rand_model.parameters():
            if p.requires_grad:
                new_p = p - args.tau * d_theta.pop(0)
                p.copy_(new_p)

    y_predicts, y_pred_probs = test(args, rand_model, test_loader)

    outputs = calculate_performance_absolute_equalized_odds(X_test, y_test, y_predicts, y_pred_probs, sa_index, p_Group)
    aeod_rand = outputs['fairness']
    print(f'Absolute Equalized odds (rand): {aeod_rand:.4f}')


In [4]:
def iteration(args, model, trainset, train_loader, sa_index, p_Group):
    val_ids = np.random.choice(len(trainset), int(len(trainset) * 0.8), replace=False)
    valset = Subset(trainset, val_ids)
    val_loader = DataLoader(valset, batch_size=len(valset), shuffle=False, num_workers=0)

    poisonids = [trainset[idx][2] for idx in range(len(trainset))]
    poison_lookup = dict(zip(poisonids, range(len(trainset))))

    poison_weights = weight_init(args, poisonids, trainset).to(args.device)
    att_optimizer = torch.optim.Adam([poison_weights], lr=args.craftrate, weight_decay=0)
    # scheduler = torch.optim.lr_scheduler.MultiStepLR(att_optimizer, milestones=[args.ncraftstep // 2.667, args.ncraftstep // 1.6, args.ncraftstep // 1.142], gamma=0.1)
    poison_weights.grad = torch.zeros_like(poison_weights)

    loss_trace = []

    for step in range(args.ncraftstep):

        target_loss, n_batch = 0, 0

        #optimizer_unlearned = optim.Adam(model.parameters(), lr=args.lr)
        optimizer_unlearned = optim.SGD(model.parameters(), lr=args.lr, momentum=0.9, weight_decay=5e-4)
        loss_func = nn.CrossEntropyLoss(reduction='none')
        model.train()

        criterion = nn.CrossEntropyLoss()

        for batch, example in enumerate(train_loader):
            inputs, targets, ids = example
            inputs, targets = inputs.to(args.device), targets.to(args.device)

            poison_slices, batch_positions = [], []
            for batch_id, sample_id in enumerate(ids.tolist()):
                lookup = poison_lookup.get(sample_id)
                if lookup is not None:
                    poison_slices.append(lookup)
                    batch_positions.append(batch_id)

            if len(batch_positions) > 0:
                weight_slice = poison_weights[poison_slices].detach().to(args.device)
                weight_slice.requires_grad_()
                h2 = 1 - torch.sigmoid(args.theta * (1 - 2 * weight_slice))

            model.zero_grad()
            with torch.backends.cudnn.flags(enabled=False):
                with higher.innerloop_ctx(model, optimizer_unlearned) as (net, opt):
                    result_z = net(inputs)
                    loss_z = loss_func(result_z, targets)
                    loss_mul = torch.mul(loss_z, h2).mean()
                    opt.step(-loss_mul)

                    net.eval()
                    """
                    Here's loss function for fairness
                    For performance degradation, we have loss function: 
                    ```
                        Loss = criterion(val_outputs, val_labels)
                    ```
                    
                    For targeted attack, we have loss function:
                    ```
                        Loss = criterion(target_outputs, target_label) + args.lam * criterion(val_outputs, val_labels)
                    ``` 
                    """
                    for idx, (val_inputs, val_labels, _) in enumerate(val_loader):
                        val_inputs, val_labels = val_inputs.to(args.device), val_labels.to(args.device)
                        val_outputs = net(val_inputs)
                        if args.floss == 'individual':
                            fair_loss = -indiv_fair_loss(args, val_outputs, val_inputs, val_labels, sa_index, p_Group)
                        elif args.floss == 'group':
                            fair_loss = -group_fair_loss(args, val_outputs, val_inputs, val_labels, sa_index, p_Group)
                        else:
                            raise NotImplementedError('Not support!')

                    fair_loss += args.lam * criterion(val_outputs, val_labels)

                    grads = torch.autograd.grad(fair_loss, weight_slice)[0].detach()
                    poison_weights.grad[poison_slices] = grads

                    net.train()

            target_loss += fair_loss.item()
            n_batch += 1

        if step % (args.ncraftstep // 5) == 0 or step == (args.ncraftstep - 1):
            print(f'step: {step}, target loss: {target_loss / n_batch}')

        loss_trace.append(target_loss / n_batch)

        att_optimizer.step()
        # scheduler.step()
        att_optimizer.zero_grad()
        with torch.no_grad():
            poison_weights.data = torch.clamp(poison_weights, 0, 1)

    return loss_trace[0] - loss_trace[-1], poison_weights

In [5]:
def train(args, model, criterion, optimizer, train_loader, scheduler=None):

    for epoch in range(args.epochs):
        model.train()
        running_loss, n_batches, total, correct = 0.0, 0, 0, 0

        for idx, (images, labels, _) in enumerate(train_loader):
            images, labels = images.to(args.device), labels.to(args.device)

            outputs = model(images)
            loss = criterion(outputs, labels)
            _, predicted = torch.max(outputs.data, 1)

            optimizer.zero_grad()
            loss.backward()
            optimizer.step()

            total += labels.size(0)
            correct += (predicted == labels).sum().item()
            running_loss += loss.item()
            n_batches += 1

        if scheduler is not None:
            scheduler.step()

        loss = running_loss / n_batches
        accuracy = 100 * correct / total
        print('Epoch %d training loss: %.3f training accuracy: %.2f%%' % (epoch, loss, accuracy))

        #test(args, logging, model, test_loader)


In [6]:


def test(args, model, test_loader):
    model.eval()
    total, correct = 0, 0
    predicts, probs = [], []

    with torch.no_grad():
        for idx, (images, labels, _) in enumerate(test_loader):
            images, labels = images.to(args.device), labels.to(args.device)
            outputs = model(images)
            _, predicted = torch.max(outputs.data, 1)
            total += labels.size(0)
            correct += (predicted == labels).sum().item()

            predicts += predicted.detach().cpu().tolist()
            probs += F.softmax(outputs, dim=1).detach().cpu().tolist()

    print('Test accuracy: %.2f%%' % (100 * correct / total))

    return predicts, probs

### Training the victim model

In [7]:
X_train, X_test, y_train, y_test, sa_index, p_Group, protected_attribute, majority_group_name, minority_group_name, input_dim, output_dim = construct_dataset(args.dataset, args.datadir)

trainset = PackData(X_train, y_train)
testset = PackData(X_test, y_test)

train_loader = DataLoader(trainset, batch_size=args.batch_size, shuffle=True, num_workers=0)
test_loader = DataLoader(testset, batch_size=args.batch_size, shuffle=False, num_workers=0)

if args.model == 'logistic':
    model = LogisticRegression(input_dim=input_dim, output_dim=output_dim).to(args.device)
elif args.model == 'mlp':
    model = MLPClassifier(input_size=input_dim, hidden_sizes=[100], output_size=output_dim).to(args.device)
elif args.model == 'mlp2':
    model = MLPClassifier(input_size=input_dim, hidden_sizes=[100, 100], output_size=output_dim).to(args.device)
else:
    raise NotImplementedError('Not support!')

criterion = nn.CrossEntropyLoss()
#optimizer = optim.Adam(model.parameters(), lr=args.lr)
optimizer = optim.SGD(model.parameters(), lr=args.lr, momentum=0.9, weight_decay=5e-4)
# scheduler = optim.lr_scheduler.MultiStepLR(optimizer, [int(args.epochs*0.5), int(args.epochs*0.75)], gamma=0.1)

train(args, model, criterion, optimizer, train_loader)
state = {
    'net': model.state_dict(),
    'epoch': args.epochs,
    'batch_size': args.batch_size,
}
torch.save(state, os.path.join(args.moddir, args.dataset + '-' + args.model  + '.pth'))


Epoch 0 training loss: 0.933 training accuracy: 16.30%
Epoch 1 training loss: 0.694 training accuracy: 47.80%
Epoch 2 training loss: 0.490 training accuracy: 83.70%
Epoch 3 training loss: 0.430 training accuracy: 83.70%
Epoch 4 training loss: 0.437 training accuracy: 83.70%
Epoch 5 training loss: 0.459 training accuracy: 83.70%
Epoch 6 training loss: 0.473 training accuracy: 83.70%
Epoch 7 training loss: 0.480 training accuracy: 83.70%
Epoch 8 training loss: 0.480 training accuracy: 83.70%
Epoch 9 training loss: 0.461 training accuracy: 83.70%
Epoch 10 training loss: 0.444 training accuracy: 83.70%
Epoch 11 training loss: 0.422 training accuracy: 83.70%
Epoch 12 training loss: 0.408 training accuracy: 83.70%
Epoch 13 training loss: 0.407 training accuracy: 83.70%
Epoch 14 training loss: 0.394 training accuracy: 83.70%
Epoch 15 training loss: 0.395 training accuracy: 83.70%
Epoch 16 training loss: 0.398 training accuracy: 83.70%
Epoch 17 training loss: 0.397 training accuracy: 83.70%
Ep

In [8]:

# Test the performance of the pretrain model
y_predicts, y_pred_probs =test(args, model, test_loader)
print('Absolute Equalized odds')
outputs = calculate_performance_absolute_equalized_odds(X_test, y_test, y_predicts, y_pred_probs, sa_index, p_Group)
print(outputs)

Test accuracy: 91.28%
Absolute Equalized odds
{'balanced_accuracy': 0.6730769230769231, 'accuracy': 0.9128205128205128, 'fairness': 0.011904761904761918, 'TPR_protected': 1.0, 'TPR_non_protected': 1.0, 'TNR_protected': 0.3333333333333333, 'TNR_non_protected': 0.35714285714285715}


### Generating unlearning request

In [9]:

weights, scores = [], torch.ones(args.restarts) * 10_000
for trial in range(args.restarts):
    print(f'restart {trial}')

    loss_diff, poison_weights = iteration(args, model, trainset, train_loader, sa_index, p_Group)
    scores[trial] = loss_diff
    weights.append(poison_weights.detach())

optimal_score = torch.argmax(scores)
stat_optimal_loss = scores[optimal_score].item()
print(f'weights with maximum loss reduced {stat_optimal_loss:6.4e} selected.')
poison_weights = weights[optimal_score]

# save_results(args, poison_weights)

restart 0
step: 0, target loss: -0.9637472033500671
step: 6, target loss: -0.9653230607509613
step: 12, target loss: -0.9653251767158508
step: 18, target loss: -0.9653382897377014
step: 24, target loss: -0.9653534591197968
step: 29, target loss: -0.9653474986553192
restart 1
step: 0, target loss: -0.979659229516983
step: 6, target loss: -0.9811071157455444
step: 12, target loss: -0.9811063408851624
step: 18, target loss: -0.9810713529586792
step: 24, target loss: -0.981037825345993
step: 29, target loss: -0.9810762405395508
restart 2
step: 0, target loss: -1.0707741975784302
step: 6, target loss: -1.0724393129348755
step: 12, target loss: -1.0724468231201172
step: 18, target loss: -1.0724339485168457
step: 24, target loss: -1.0724686980247498
step: 29, target loss: -1.072466492652893
restart 3
step: 0, target loss: -1.1157436966896057
step: 6, target loss: -1.1177802085876465
step: 12, target loss: -1.117764949798584
step: 18, target loss: -1.1177828907966614
step: 24, target loss: -1.

### Performing the unlearning process.

In [10]:
victim(args, poison_weights)


==> before unlearning
Test accuracy: 91.28%
Absolute Equalized odds: 0.0119
==> after unlearning
Number of unlearning data: 50
Test accuracy: 90.77%
Absolute Equalized odds (ours): 0.0238
Test accuracy: 91.28%
Absolute Equalized odds (rand): 0.0119
