# Private Aggregation of Teacher Ensembles (PATE)



![PATE chart](img/pate.jpeg)

## Import libraries

In [1]:
import torch

import numpy as np
from torchvision import datasets
import torchvision.transforms as transforms
from torch.utils.data import Subset

## Load the [Data](http://pytorch.org/docs/stable/torchvision/datasets.html)

Downloading may take a few moments, and you should see your progress as the data is loading. You may also choose to change the `batch_size` if you want to load more data at a time.

In [2]:
# number of subprocesses to use for data loading
num_workers = 0
# how many samples per batch to load
batch_size = 32

# convert data to torch.FloatTensor
transform = transforms.Compose([transforms.ToTensor(),
                                transforms.Normalize((0.5, 0.5, 0.5), (0.5, 0.5, 0.5))])

# choose the training and test datasets
train_data = datasets.MNIST(root='data', train=True,
                                   download=True, transform=transform)
test_data = datasets.MNIST(root='data', train=False,
                                  download=True, transform=transform)


Function for returning dataloaders for a specified number of teachers.

In [3]:
# number of teachers to essemble
num_teachers = 100

def get_data_loaders(train_data, num_teachers = 10):
    teacher_loaders = []
    data_size = len(train_data) // num_teachers

    for i in range(num_teachers):
        indices = list(range(i*data_size, (i+1) *data_size))
        subset_data = Subset(train_data, indices)
        loader = torch.utils.data.DataLoader(subset_data, batch_size=batch_size, num_workers=num_workers)
        teacher_loaders.append(loader)

    return teacher_loaders

teacher_loaders = get_data_loaders(train_data, num_teachers)

Define a train student set of 9000 examples and 1000 test examples

In [4]:
student_train_data = Subset(test_data, list(range(9000)))
student_test_data = Subset(test_data, list(range(9000, 10000)))

student_train_loader = torch.utils.data.DataLoader(student_train_data, batch_size=batch_size, 
            num_workers=num_workers)
student_test_loader = torch.utils.data.DataLoader(student_test_data, batch_size=batch_size, 
            num_workers=num_workers)

## Defining models

I'm going to define a single model for all the teachers, the analysis does not depends on the model

In [5]:
import torch.nn as nn
import torch.nn.functional as F
import torch.optim as optim

class Net(nn.Module):
    def __init__(self):
        super(Net, self).__init__()
        self.conv1 = nn.Conv2d(1, 10, kernel_size=5)
        self.conv2 = nn.Conv2d(10, 20, kernel_size=5)
        self.conv2_drop = nn.Dropout2d()
        self.fc1 = nn.Linear(320, 50)
        self.fc2 = nn.Linear(50, 10)

    def forward(self, x):
        x = F.relu(F.max_pool2d(self.conv1(x), 2))
        x = F.relu(F.max_pool2d(self.conv2_drop(self.conv2(x)), 2))
        x = x.view(-1, 320)
        x = F.relu(self.fc1(x))
        x = F.dropout(x, training=self.training)
        x = self.fc2(x)
        return F.log_softmax(x)
    

In [79]:
device = torch.device("cuda:0" if torch.cuda.is_available() else "cpu")

def train(model, trainloader, criterion, optimizer, epochs=10, print_every=120):
    model.to(device)
    steps = 0
    running_loss = 0
    for e in range(epochs):
        # Model in training mode, dropout is on
        model.train()
        for images, labels in trainloader:
            images, labels = images.to(device), labels.to(device)
            steps += 1
            
            optimizer.zero_grad()
            
            output = model.forward(images)
            loss = criterion(output, labels)
            loss.backward()
            optimizer.step()
            
            running_loss += loss.item()


In [80]:
def predict(model, dataloader):
    outputs = torch.zeros(0, dtype=torch.long).to(device)
    model.to(device)
    model.eval()
    for images, labels in dataloader:
        images, labels = images.to(device), labels.to(device)
        output = model.forward(images)
        ps = torch.argmax(torch.exp(output), dim=1)
        outputs = torch.cat((outputs, ps))
    
    return outputs    


## Training all the teacher models

Here we define and train the teachers

In [81]:
# Instantiate and train the models for each teacher
def train_models(num_teachers):
    models = []
    for t in range(num_teachers):
        print("Training teacher {}".format(t+1))
        model = Net()
        criterion = nn.NLLLoss()
        optimizer = optim.Adam(model.parameters(), lr=0.003)
        train(model, teacher_loaders[t], criterion, optimizer)
        models.append(model)
    return models

models = train_models(num_teachers) 

Training teacher 1




Training teacher 2
Training teacher 3
Training teacher 4
Training teacher 5
Training teacher 6
Training teacher 7
Training teacher 8
Training teacher 9
Training teacher 10
Training teacher 11
Training teacher 12
Training teacher 13
Training teacher 14
Training teacher 15
Training teacher 16
Training teacher 17
Training teacher 18
Training teacher 19
Training teacher 20
Training teacher 21
Training teacher 22
Training teacher 23
Training teacher 24
Training teacher 25
Training teacher 26
Training teacher 27
Training teacher 28
Training teacher 29
Training teacher 30
Training teacher 31
Training teacher 32
Training teacher 33
Training teacher 34
Training teacher 35
Training teacher 36
Training teacher 37
Training teacher 38
Training teacher 39
Training teacher 40
Training teacher 41
Training teacher 42
Training teacher 43
Training teacher 44
Training teacher 45
Training teacher 46
Training teacher 47
Training teacher 48
Training teacher 49
Training teacher 50
Training teacher 51
Training

## Aggregated teacher

This function predict the labels from all the dataset in each of the teachers, then return all the predictions and the maximum votation after adding laplacian noise

In [82]:
import numpy as np

In [83]:
# define epsilon
epsilon = 0.2

# Aggregated teacher

This function makes the predictions in all the teachers, count the votes and add noise, then returns the votation and the argmax results.

In [84]:
def aggregated_teacher(models, data_loader, epsilon):
    preds = torch.torch.zeros((len(models), 9000), dtype=torch.long)
    for i, model in enumerate(models):
        results = predict(model, data_loader)
        preds[i] = results
        
    labels = np.array([]).astype(int)
    for image_preds in np.transpose(preds):
        label_counts = np.bincount(image_preds, minlength=10)
        beta = 1 / epsilon

        for i in range(len(label_counts)):
            label_counts[i] += np.random.laplace(0, beta, 1)

        new_label = np.argmax(label_counts)
        labels = np.append(labels, new_label)
    
    return preds.numpy(), labels

In [85]:
teacher_models = models
preds, student_labels = aggregated_teacher(teacher_models, student_train_loader, epsilon)



# PATE Analysis

Perform PATE analysis and show the results

In [87]:
from syft.frameworks.torch.differential_privacy import pate

data_dep_eps, data_ind_eps = pate.perform_analysis(teacher_preds=preds, indices=student_labels, noise_eps=epsilon, delta=1e-5)
print("Data Independent Epsilon:", data_ind_eps)
print("Data Dependent Epsilon:", data_dep_eps)

Data Independent Epsilon: 1451.5129254649705
Data Dependent Epsilon: 15.661427783915407


# Training the student

Now we will train the student with the aggregated teacher labels

In [88]:
def student_loader(student_train_loader, labels):
    for i, (data, _) in enumerate(iter(student_train_loader)):
        yield data, torch.from_numpy(labels[i*len(data):(i+1)*len(data)])

In [None]:
student_model = Net()
criterion = nn.NLLLoss()
optimizer = optim.Adam(student_model.parameters(), lr=0.001)
epochs = 10
student_model.to(device)
steps = 0
running_loss = 0
for e in range(epochs):
    # Model in training mode, dropout is on
    student_model.train()
    train_loader = student_loader(student_train_loader, student_labels)
    for images, labels in train_loader:
        images, labels = images.to(device), labels.to(device)
        steps += 1

        optimizer.zero_grad()
        output = student_model.forward(images)
        loss = criterion(output, labels)
        loss.backward()
        optimizer.step()

        running_loss += loss.item()
        if steps % 50 == 0:
            test_loss = 0
            accuracy = 0
            student_model.eval()
            with torch.no_grad():
                for images, labels in student_test_loader:
                    images, labels = images.to(device), labels.to(device)
                    log_ps = student_model(images)
                    test_loss += criterion(log_ps, labels).item()
                    
                    # Accuracy
                    ps = torch.exp(log_ps)
                    top_p, top_class = ps.topk(1, dim=1)
                    equals = top_class == labels.view(*top_class.shape)
                    accuracy += torch.mean(equals.type(torch.FloatTensor))
            student_model.train()
            print("Epoch: {}/{}.. ".format(e+1, epochs),
                  "Training Loss: {:.3f}.. ".format(running_loss/len(student_train_loader)),
                  "Test Loss: {:.3f}.. ".format(test_loss/len(student_test_loader)),
                  "Test Accuracy: {:.3f}".format(accuracy/len(student_test_loader)))
            running_loss = 0



Epoch: 1/10..  Training Loss: 0.392..  Test Loss: 1.955..  Test Accuracy: 0.487
Epoch: 1/10..  Training Loss: 0.287..  Test Loss: 1.063..  Test Accuracy: 0.719
Epoch: 1/10..  Training Loss: 0.196..  Test Loss: 0.700..  Test Accuracy: 0.784
Epoch: 1/10..  Training Loss: 0.127..  Test Loss: 0.570..  Test Accuracy: 0.836
Epoch: 1/10..  Training Loss: 0.110..  Test Loss: 0.376..  Test Accuracy: 0.890
Epoch: 2/10..  Training Loss: 0.121..  Test Loss: 0.380..  Test Accuracy: 0.902
Epoch: 2/10..  Training Loss: 0.097..  Test Loss: 0.316..  Test Accuracy: 0.904
Epoch: 2/10..  Training Loss: 0.087..  Test Loss: 0.317..  Test Accuracy: 0.899
Epoch: 2/10..  Training Loss: 0.081..  Test Loss: 0.292..  Test Accuracy: 0.908


In [73]:
t1_model = models[99]
t1_model.eval()
with torch.no_grad():
    test_loss = 0
    accuracy = 0
    for images, labels in student_test_loader:
        images, labels = images.to(device), labels.to(device)
        log_ps = t1_model(images)
        test_loss += criterion(log_ps, labels).item()

        # Accuracy
        ps = torch.exp(log_ps)
        top_p, top_class = ps.topk(1, dim=1)
        equals = top_class == labels.view(*top_class.shape)
        accuracy += torch.mean(equals.type(torch.FloatTensor))
    t1_model.train()
    print("Test Loss: {:.3f}.. ".format(test_loss),
          "Test Accuracy: {:.3f}".format(accuracy))

Test Loss: 15.074..  Test Accuracy: 27.719


