# Private Aggregation of Teacher Ensembles (PATE)



![PATE chart](img/pate.jpeg)

## Import libraries

In [1]:
import torch

import numpy as np
from torchvision import datasets
import torchvision.transforms as transforms
from torch.utils.data import Subset

## Load the [Data](http://pytorch.org/docs/stable/torchvision/datasets.html)

Downloading may take a few moments, and you should see your progress as the data is loading. You may also choose to change the `batch_size` if you want to load more data at a time.

In [2]:
# number of subprocesses to use for data loading
num_workers = 0
# how many samples per batch to load
batch_size = 32

# convert data to torch.FloatTensor
transform = transforms.Compose([transforms.ToTensor(),
                                transforms.Normalize((0.5, 0.5, 0.5), (0.5, 0.5, 0.5))])

# choose the training and test datasets
train_data = datasets.MNIST(root='data', train=True,
                                   download=True, transform=transform)
test_data = datasets.MNIST(root='data', train=False,
                                  download=True, transform=transform)


Function for returning dataloaders for a number of teachers and 1 student.

In [3]:
# number of teachers to essemble
num_teachers = 10

def get_data_loaders(train_data, test_data, num_teachers = 10):
    teacher_loaders = []
    data_size = len(train_data) // num_teachers

    for i in range(num_teachers):
        indices = list(range(i*data_size, (i+1) *data_size))
        subset_data = Subset(train_data, indices)
        loader = torch.utils.data.DataLoader(subset_data, batch_size=batch_size, num_workers=num_workers)
        teacher_loaders.append(loader)

        student_loader = torch.utils.data.DataLoader(test_data, batch_size=batch_size, 
            num_workers=num_workers)

    return teacher_loaders, student_loader

teacher_loaders, student_loader = get_data_loaders(train_data, test_data, num_teachers)

## Defining models

I'm going to define a single model for all the teachers, the analysis does not depends on the model

In [4]:
import torch.nn as nn
import torch.nn.functional as F
import torch.optim as optim

class Net(nn.Module):
    def __init__(self):
        super(Net, self).__init__()
        self.conv1 = nn.Conv2d(1, 10, kernel_size=5)
        self.conv2 = nn.Conv2d(10, 20, kernel_size=5)
        self.conv2_drop = nn.Dropout2d()
        self.fc1 = nn.Linear(320, 50)
        self.fc2 = nn.Linear(50, 10)

    def forward(self, x):
        x = F.relu(F.max_pool2d(self.conv1(x), 2))
        x = F.relu(F.max_pool2d(self.conv2_drop(self.conv2(x)), 2))
        x = x.view(-1, 320)
        x = F.relu(self.fc1(x))
        x = F.dropout(x, training=self.training)
        x = self.fc2(x)
        return F.log_softmax(x)
    

In [5]:
def train(model, trainloader, criterion, optimizer, epochs=5, print_every=120):
    
    steps = 0
    running_loss = 0
    for e in range(epochs):
        # Model in training mode, dropout is on
        model.train()
        for images, labels in trainloader:
            steps += 1
            
            optimizer.zero_grad()
            
            output = model.forward(images)
            loss = criterion(output, labels)
            loss.backward()
            optimizer.step()
            
            running_loss += loss.item()

            if steps % print_every == 0:
                print("Epoch: {}/{}.. ".format(e+1, epochs),
                      "Training Loss: {:.3f}.. ".format(running_loss/print_every))
                running_loss = 0


In [58]:
def predict(model, dataloader):
    outputs = torch.torch.zeros(0, dtype=torch.long)
    model.eval()
    for images, labels in dataloader:
        output = model.forward(images)
        ps = torch.argmax(torch.exp(output), dim=1)
        outputs = torch.cat((outputs, ps))
    
    return outputs    


## Training all the teacher models

Here we define and train the teachers

In [6]:
# Instantiate and train the models for each teacher
def train_models(num_teachers):
    models = []
    for t in range(num_teachers):
        print("Training teacher {}".format(t+1))
        model = Net()
        criterion = nn.NLLLoss()
        optimizer = optim.Adam(model.parameters(), lr=0.003)
        train(model, teacher_loaders[t], criterion, optimizer)
        models.append(model)
    return models

models = train_models(num_teachers) 

Training teacher 1




Epoch: 1/5..  Training Loss: 1.358.. 
Epoch: 2/5..  Training Loss: 0.654.. 
Epoch: 2/5..  Training Loss: 0.500.. 
Epoch: 3/5..  Training Loss: 0.442.. 
Epoch: 4/5..  Training Loss: 0.435.. 
Epoch: 4/5..  Training Loss: 0.396.. 
Epoch: 5/5..  Training Loss: 0.376.. 
Training teacher 2
Epoch: 1/5..  Training Loss: 1.352.. 
Epoch: 2/5..  Training Loss: 0.628.. 
Epoch: 2/5..  Training Loss: 0.514.. 
Epoch: 3/5..  Training Loss: 0.470.. 
Epoch: 4/5..  Training Loss: 0.395.. 
Epoch: 4/5..  Training Loss: 0.402.. 
Epoch: 5/5..  Training Loss: 0.371.. 
Training teacher 3
Epoch: 1/5..  Training Loss: 1.306.. 
Epoch: 2/5..  Training Loss: 0.651.. 
Epoch: 2/5..  Training Loss: 0.532.. 
Epoch: 3/5..  Training Loss: 0.442.. 
Epoch: 4/5..  Training Loss: 0.418.. 
Epoch: 4/5..  Training Loss: 0.348.. 
Epoch: 5/5..  Training Loss: 0.391.. 
Training teacher 4
Epoch: 1/5..  Training Loss: 1.339.. 
Epoch: 2/5..  Training Loss: 0.647.. 
Epoch: 2/5..  Training Loss: 0.492.. 
Epoch: 3/5..  Training Loss: 0.

## Aggregated teacher

This function predict the labels from all the dataset in each of the teachers, then return all the predictions and the maximum votation after adding laplacian noise

In [124]:
import numpy as np

In [125]:
def aggregated_teacher(models, data_loader):
    preds = torch.torch.zeros((len(models), 10000), dtype=torch.long)
    for i, model in enumerate(models):
        results = predict(model, data_loader)
        preds[i] = results
        
    labels = []
    preds = np.transpose(preds.numpy())
    for image_preds in preds:
        label_counts = np.bincount(image_preds, minlength=10)
        epsilon = 0.1
        beta = 1 / epsilon

        for i in range(len(label_counts)):
            label_counts[i] += np.random.laplace(0, beta, 1)

        new_label = np.argmax(label_counts)
        labels.append(new_label)

    return preds, labels

In [126]:
teacher_models = models
preds, labels = aggregated_teacher(teacher_models, student_loader)



Pending make PATE analysis