In [1]:
import os
import subprocess

import pandas as pd
import numpy as np
import torch
import torchvision
import torch.nn as nn
import torch.optim as optim
from torchvision.models import resnet18
from torch.utils.data import DataLoader, Dataset

from sklearn.cluster import KMeans

DEVICE = 'cuda' if torch.cuda.is_available() else 'cpu'



In [2]:
# It's really important to add an accelerator to your notebook, as otherwise the submission will fail.
# We recomment using the P100 GPU rather than T4 as it's faster and will increase the chances of passing the time cut-off threshold.

if DEVICE != 'cuda':
    raise RuntimeError('Make sure you have added an accelerator to your notebook; the submission will fail otherwise!')

In [3]:
# Helper functions for loading the hidden dataset.

def load_example(df_row):
    image = torchvision.io.read_image(df_row['image_path'])
    result = {
        'image': image,
        'image_id': df_row['image_id'],
        'age_group': df_row['age_group'],
        'age': df_row['age'],
        'person_id': df_row['person_id']
    }
    return result


class HiddenDataset(Dataset):
    '''The hidden dataset.'''
    def __init__(self, split='train'):
        super().__init__()
        self.examples = []

        df = pd.read_csv(f'/kaggle/input/neurips-2023-machine-unlearning/{split}.csv')
        df['image_path'] = df['image_id'].apply(
            lambda x: os.path.join('/kaggle/input/neurips-2023-machine-unlearning/', 'images', x.split('-')[0], x.split('-')[1] + '.png'))
        df = df.sort_values(by='image_path')
        df.apply(lambda row: self.examples.append(load_example(row)), axis=1)
        if len(self.examples) == 0:
            raise ValueError('No examples.')

    def __len__(self):
        return len(self.examples)

    def __getitem__(self, idx):
        example = self.examples[idx]
        image = example['image']
        image = image.to(torch.float32)
        example['image'] = image
        return example


def get_dataset(batch_size):
    '''Get the dataset.'''
    retain_ds = HiddenDataset(split='retain')
    forget_ds = HiddenDataset(split='forget')
    val_ds = HiddenDataset(split='validation')

    retain_loader = DataLoader(retain_ds, batch_size=batch_size, shuffle=True)
    forget_loader = DataLoader(forget_ds, batch_size=batch_size, shuffle=True)
    validation_loader = DataLoader(val_ds, batch_size=batch_size, shuffle=True)

    return retain_loader, forget_loader, validation_loader

In [4]:
def extract_features(model, loader):
    """Extracts features from the penultimate layer of the model for the given loader."""
    model.eval()
    features_list = []
    labels_list = []

    # Remove the final classification layer to get features
    feature_extractor = torch.nn.Sequential(*list(model.children())[:-1])

    with torch.no_grad():
        for sample in loader:
            images = sample['image'].to(DEVICE)
            lbls = sample['age_group']
            features = feature_extractor(images)
            features = features.view(features.size(0), -1).cpu().numpy()
            features_list.extend(features)
            labels_list.extend(lbls.cpu().numpy())

    return features_list, labels_list

In [5]:
def group_data(model, loader, n_clusters=100):
    """Groups the data into 'n_clusters' clusters using KMeans clustering on the extracted features."""
    # Extract features from the model
    features, labels = extract_features(model, loader)

    # Use KMeans to cluster the extracted features
    kmeans = KMeans(n_clusters=n_clusters, n_init=10).fit(features)
    clusters = kmeans.predict(features)

    # Convert clusters and labels to tensors
    clusters = torch.tensor(clusters, dtype=torch.long)

    # Get the data tensor
    data_list = [sample['image'] for sample in loader.dataset]
    data_tensor = torch.stack(data_list).view(-1, 3, 32, 32)

    return data_tensor, labels, clusters

In [6]:
def unlearning1(net, retain_loader, forget_loader, validation_loader, n_clusters=100):
    # Vectorized.
    data, labels, clusters = group_data(net, retain_loader, n_clusters)
    data, clusters = data.to(DEVICE), clusters.to(DEVICE)

    epochs = 1
    epochs2 = 3
    criterion = nn.CrossEntropyLoss()
    optimizer = optim.SGD(net.parameters(), lr=0.001, momentum=0.9, weight_decay=5e-4)
    # scheduler = StepLR(optimizer, step_size=10, gamma=0.7)
    scheduler = optim.lr_scheduler.CosineAnnealingLR(optimizer, T_max=epochs)
    net.train()

    for ep in range(epochs):
        for sample in forget_loader:
            inputs = sample['image'].to(DEVICE)
            targets = sample['age_group'].to(DEVICE)
            
            # First Optimization Phase
            optimizer.zero_grad()
            outputs = net(inputs)
            random_targets = (targets + torch.randint(1, 10, targets.size(), device=DEVICE)) % 10
            loss = criterion(outputs, random_targets)
            loss.backward()
            # Gradient Clipping
            nn.utils.clip_grad_norm_(net.parameters(), max_norm=1)
            optimizer.step()
            scheduler.step()

            # Map targets to cluster labels
            cluster_labels = torch.tensor([clusters[labels.index(t.item())] for t in targets], device=DEVICE)
            
            # Get a mask for each cluster label
            masks = [(clusters == label).nonzero(as_tuple=True)[0] for label in cluster_labels.unique()]

            for ep2 in range(epochs2):
                for mask in masks:
                    same_cluster_data = torch.index_select(data, 0, mask).to(DEVICE)
                    same_cluster_labels = torch.tensor(labels, device=DEVICE)[mask]

                    # Second Optimization Phase
                    optimizer.zero_grad()
                    output = net(same_cluster_data)
                    loss = criterion(output, same_cluster_labels)
                    loss.backward()
                    # Gradient Clipping
                    nn.utils.clip_grad_norm_(net.parameters(), max_norm=1)
                    optimizer.step()
                    scheduler.step()
                
    # Start validation phase
    net.eval()

In [7]:
def unlearning(net, retain_loader, forget_loader, validation_loader, n_clusters=100):
    # Vectorized.
    data, labels, clusters = group_data(net, retain_loader, n_clusters)
    data, clusters = data.to(DEVICE), clusters.to(DEVICE)    

    epochs = 1
    criterion = nn.CrossEntropyLoss()
    optimizer = optim.SGD(net.parameters(), lr=0.001, momentum=0.9, weight_decay=5e-4)
    # scheduler = StepLR(optimizer, step_size=10, gamma=0.7)
    scheduler = optim.lr_scheduler.CosineAnnealingLR(optimizer, T_max=epochs)
    net.train()

    for ep in range(epochs):
        # Negative fine-tuning
        for sample in forget_loader:
            inputs = sample['image'].to(DEVICE)
            targets = sample['age_group'].to(DEVICE)
            
            # First Optimization Phase
            optimizer.zero_grad()
            outputs = net(inputs)
            random_targets = (targets + torch.randint(1, 10, targets.size(), device=DEVICE)) % 10
            loss = criterion(outputs, random_targets)
            loss.backward()
            # Gradient Clipping
            nn.utils.clip_grad_norm_(net.parameters(), max_norm=1)
            optimizer.step()

            # Map targets to cluster labels
            cluster_labels = torch.tensor([clusters[labels.index(t.item())] for t in targets], device=DEVICE)
            
            # Get a mask for each cluster label
            masks = [(clusters == label).nonzero(as_tuple=True)[0] for label in cluster_labels.unique()]

            for mask in masks:
                same_cluster_data = torch.index_select(data, 0, mask).to(DEVICE)
                same_cluster_labels = torch.tensor(labels, device=DEVICE)[mask]

                # Second Optimization Phase
                optimizer.zero_grad()
                output = net(same_cluster_data)
                loss = criterion(output, same_cluster_labels)
                loss.backward()
                # Gradient Clipping
                nn.utils.clip_grad_norm_(net.parameters(), max_norm=1)
                optimizer.step()
                
        # Positive fine-tuning    
        for sample in retain_loader:
            inputs = sample["image"].to(DEVICE)
            targets = sample["age_group"].to(DEVICE)
        
            optimizer.zero_grad()
            outputs = net(inputs)
            loss = criterion(outputs, targets)
            loss.backward()
            optimizer.step()
        scheduler.step()
        
                
    # Start validation phase
    net.eval()

In [8]:
if os.path.exists('/kaggle/input/neurips-2023-machine-unlearning/empty.txt'):
    # mock submission
    subprocess.run('touch submission.zip', shell=True)
else:
    
    # Note: it's really important to create the unlearned checkpoints outside of the working directory 
    # as otherwise this notebook may fail due to running out of disk space.
    # The below code saves them in /kaggle/tmp to avoid that issue.
    
    os.makedirs('/kaggle/tmp', exist_ok=True)
    retain_loader, forget_loader, validation_loader = get_dataset(64)
    net = resnet18(weights=None, num_classes=10)
    net.to(DEVICE)
    
    initial_clusters = 10
    max_clusters = 500
    step = (max_clusters - initial_clusters) // 511

    for i in range(512):
        net.load_state_dict(torch.load('/kaggle/input/neurips-2023-machine-unlearning/original_model.pth'))
        n_clusters = initial_clusters + i * step
        unlearning(net, retain_loader, forget_loader, validation_loader, n_clusters)
        state = net.state_dict()
        torch.save(state, f'/kaggle/tmp/unlearned_checkpoint_{i}.pth')
        
    # Ensure that submission.zip will contain exactly 512 checkpoints 
    # (if this is not the case, an exception will be thrown).
    unlearned_ckpts = os.listdir('/kaggle/tmp')
    if len(unlearned_ckpts) != 512:
        raise RuntimeError('Expected exactly 512 checkpoints. The submission will throw an exception otherwise.')
        
    subprocess.run('zip submission.zip /kaggle/tmp/*.pth', shell=True)