# CIFAR examples

This code runs CIFAR10 and CIFAR100. To switch between these two datasets, update the `dataset` field in `train_config.yaml` and the `data_path` field in `audit.yaml` accordingly.


In [1]:
import os
import sys
import yaml

project_root = os.path.abspath(os.path.join(os.getcwd(), "../../.."))
sys.path.append(project_root)


Next, we create the population dataset by concatenating the train and test data. To create the population, we make use of the UserDataset provided in the InputHandler.

In [None]:
from torchvision.datasets import CIFAR10, CIFAR100
from torch import cat, tensor
import pickle
from cifar_handler import CifarInputHandler

# Load the config.yaml file
with open('train_config.yaml', 'r') as file:
    train_config = yaml.safe_load(file)
    
root = train_config["data"]["data_dir"]
path = os.path.join(os.getcwd(), root)
# Load the CIFAR train and test datasets
if train_config["data"]["dataset"] == "cifar10":
    trainset = CIFAR10(root=root, train=True, download=True)
    testset = CIFAR10(root=root, train=False, download=True)
elif train_config["data"]["dataset"] == "cifar100":
    trainset = CIFAR100(root=root, train=True, download=True)
    testset = CIFAR100(root=root, train=False, download=True)
else:
    raise ValueError("Unknown dataset type")

train_data = tensor(trainset.data).permute(0, 3, 1, 2).float() / 255  # (N, C, H, W)
test_data = tensor(testset.data).permute(0, 3, 1, 2).float() / 255

# Ensure train and test data looks correct
assert train_data.shape[0] == 50000, "Train data should have 50000 samples"
assert test_data.shape[0] == 10000, "Test data should have 10000 samples"
assert train_data.shape[1] == 3, "Data should have 3 channels"
assert test_data.shape[1] == 3, "Data should have 3 channels"
assert train_data.max() <= 1 and train_data.min() >= 0, "Data should be normalized"
assert test_data.max() <= 1 and test_data.min() >= 0, "Data should be normalized"

# Concatenate train and test data into the population
data = cat([train_data.clone().detach(), test_data.clone().detach()], dim=0)
targets = cat([tensor(trainset.targets), tensor(testset.targets)], dim=0)
# Create UserDataset object
population_dataset = CifarInputHandler.UserDataset(data, targets)

assert len(population_dataset) == 60000, "Population dataset should have 60000 samples"

# Store the population dataset to be used by LeakPro
dataset_name = train_config["data"]["dataset"]
file_path =  "data/"+ dataset_name + ".pkl"
if not os.path.exists(file_path):
    with open(file_path, "wb") as file:
        pickle.dump(population_dataset, file)
        print(f"Save data to {file_path}")

With the population dataset stored, we next create the train and test set that will go in to training the target model.

In [None]:
from sklearn.model_selection import train_test_split
from torch.utils.data import DataLoader
import numpy as np

train_fraction = train_config["data"]["f_train"]
test_fraction = train_config["data"]["f_test"]
batch_size = train_config["train"]["batch_size"]

dataset_size = len(population_dataset)
train_size = int(train_fraction * dataset_size)
test_size = int(test_fraction * dataset_size)

selected_index = np.random.choice(np.arange(dataset_size), train_size + test_size, replace=False)
train_indices, test_indices = train_test_split(selected_index, test_size=test_size)

train_subset = CifarInputHandler.UserDataset(data[train_indices], targets[train_indices])
test_subset = CifarInputHandler.UserDataset(data[test_indices], targets[test_indices], **train_subset.return_params())

train_loader = DataLoader(train_subset, batch_size = batch_size, shuffle = True)
test_loader = DataLoader(test_subset, batch_size = batch_size, shuffle = False)

# Evaluate mean and variance of the train data
train_mean = train_subset.data.mean(dim=(0, 2, 3))
train_std = train_subset.data.std(dim=(0, 2, 3))
print (f"Train mean: {train_mean}, Train std: {train_std}")

With the train and test dataloader in place, we train a ResNet18. 
After training, we call LeakPro to create metadata that will be used during auditing.

In [None]:
from torch import save, optim, nn
from cifar_handler import CifarInputHandler
from target_model_class import ResNet18

# Train the model
if not os.path.exists("target"):
    os.makedirs("target")
if train_config["data"]["dataset"] == "cifar10":
    num_classes = 10
elif train_config["data"]["dataset"] == "cifar100":
    num_classes = 100
else:
    raise ValueError("Invalid dataset name")

# Create instance of target model
model = ResNet18(num_classes = num_classes)

# Read out the relevant parameters for training
lr = train_config["train"]["learning_rate"]
momentum = train_config["train"]["momentum"]
epochs = train_config["train"]["epochs"]
    
# Create optimizer and loss function
criterion = nn.CrossEntropyLoss()
optimizer = optim.SGD(model.parameters(), lr=lr, momentum=momentum)

# train target model
train_result = CifarInputHandler().train(dataloader=train_loader,
                            model=model,
                            criterion=criterion,
                            optimizer=optimizer,
                            epochs=epochs)
train_result.model.to("cpu")

# Evaluate on test set
test_result = CifarInputHandler().eval(test_loader, model, criterion, "cpu")


# Store the model and metadata
model = train_result.model
with open(train_config["run"]["log_dir"]+"/target_model.pkl", "wb") as f:
    save(model.state_dict(), f)

# Create metadata to be used by LeakPro
from leakpro import LeakPro
meta_data = LeakPro.make_mia_metadata(train_result=train_result,
                                      optimizer=optimizer,
                                      loss_fn=criterion,
                                      dataloader=train_loader,
                                      test_result=test_result,
                                      epochs=epochs,
                                      train_indices=train_indices,
                                      test_indices=test_indices,
                                      dataset_name=dataset_name)

with open("target/model_metadata.pkl", "wb") as f:
    pickle.dump(meta_data, f)
    

Plot accuracy and test of training and test.

In [None]:
import matplotlib.pyplot as plt

train_acc = train_result.metrics.extra["accuracy_history"]
train_loss = train_result.metrics.extra["loss_history"]
test_acc = test_result.accuracy
test_loss = test_result.loss

# Plot training and test accuracy
plt.figure(figsize=(5, 4))

plt.subplot(1, 2, 1)
plt.plot(train_acc, label='Train Accuracy')
plt.plot(len(train_loss)-1, test_acc, 'ro', label='Test Loss')
plt.xlabel('Epoch')
plt.ylabel('Accuracy')
plt.title('Accuracy over Epochs')
plt.legend()

# Plot training and test loss
plt.subplot(1, 2, 2)
plt.plot(train_loss, label='Train Loss')
plt.plot(len(train_loss)-1, test_loss, 'ro', label='Test Loss')
plt.xlabel('Epoch')
plt.ylabel('Loss')
plt.title('Loss over Epochs')
plt.legend()

plt.tight_layout()
plt.show()

This is where the privacy auditing takes place.

In [1]:
import os
import sys
import yaml

project_root = os.path.abspath(os.path.join(os.getcwd(), "../../.."))
sys.path.append(project_root)
# Read the config file
config_path = "audit.yaml"
from leakpro import LeakPro
from cifar_handler import CifarInputHandler


# Instantiate leakpro object
leakpro = LeakPro(CifarInputHandler, config_path)

# Run the audit 
mia_results = leakpro.run_audit(return_results=True)

  from .autonotebook import tqdm as notebook_tqdm
2025-03-24 10:36:53,919 INFO     Target model blueprint created from ResNet18 in ./target_model_class.py.
2025-03-24 10:36:53,946 INFO     Loaded target model metadata from ./target/model_metadata.pkl
2025-03-24 10:36:54,159 INFO     Loaded target model from ./target
2025-03-24 10:36:55,200 INFO     Loaded population dataset from ./data/cifar10.pkl
2025-03-24 10:36:55,202 INFO     Image extension initialized.
2025-03-24 10:36:56,662 INFO     MIA attack factory loaded.
2025-03-24 10:36:56,663 INFO     Creating shadow model handler singleton
2025-03-24 10:36:56,751 INFO     Creating distillation model handler singleton
2025-03-24 10:36:56,806 INFO     Added attack: lira
2025-03-24 10:36:56,807 INFO     Preparing attack: lira
2025-03-24 10:36:56,860 INFO     Number of existing models exceeds or equals the number of models to create
2025-03-24 10:36:56,861 INFO     Loading shadow model 9
2025-03-24 10:36:57,286 INFO     Loaded model from ./

<Figure size 640x480 with 0 Axes>

## Generate Report

In [None]:
# Import and initialize ReportHandler
from leakpro.reporting.report_handler import ReportHandler

# report_handler = ReportHandler()
report_handler = ReportHandler(report_dir="./leakpro_output/results")

# Save MIA resuls using report handler
for res in mia_results:
    report_handler.save_results(attack_name=res.attack_name, result_data=res, config=res.configs)

# # Create the report by compiling the latex text
report_handler.create_report()

In [None]:
# Read from /leakpro_output/attack_objects/shadow_model and load all meatdata
import pickle
import os

shadow_model_path = os.path.join(os.getcwd(), "leakpro_output/attack_objects/shadow_model")
shadow_train_acc = []
shadow_test_acc = []
for file in os.listdir(shadow_model_path):
    if "metadata" not in file.lower():  # Ensure only files containing "metadata" are processed
        continue  

    with open(os.path.join(shadow_model_path, file), 'rb') as f:
        tmp_file = pickle.load(f, fix_imports=True)
        shadow_train_acc.append(tmp_file.train_result.accuracy)
        shadow_test_acc.append(tmp_file.test_result.accuracy)

# Plot the train and test accuracy of the shadow models as histograms
import matplotlib.pyplot as plt

# Print out the train accuracy of the target model
print(f"Train accuracy of the target model: {train_acc[-1]}")
print(f"Test accuracy of the target model: {test_acc}")
# Print out for shadow models
for i, acc in enumerate(shadow_train_acc):
    print(f"Train accuracy of shadow model {i}: {acc}")
for i, acc in enumerate(shadow_test_acc):
    print(f"Test accuracy of shadow model {i}: {acc}")

# Plot training and test accuracy
plt.subplot(1, 2, 1)
plt.hist(shadow_train_acc, bins=20, alpha=0.5, label='Shadow')
# add the train_acc (single point) as a vertical line
plt.axvline(x=train_acc[-1], color='r', label='Target')
plt.xlabel('Accuracy')
plt.ylabel('Frequency')
plt.title('Train')
plt.legend()

plt.subplot(1, 2, 2)
plt.hist(shadow_test_acc, bins=20, alpha=0.5, label='Shadow')    
plt.axvline(x=test_acc, color='r', label='Target')
plt.xlabel('Accuracy')
plt.title('Test')