In [None]:
# install PySyft if not available (e.g on cloud)
# pip install syft

# import required libraries
import torch
import random
import numpy as np
from torch import nn
from torch import optim
import torch.nn.functional as F
from torch.utils.data import Subset
from torchvision import datasets, transforms
from syft.frameworks.torch.differential_privacy import pate

device = torch.device("cuda" if torch.cuda.is_available() else "cpu")

In [None]:
# import data set
transform = transforms.Compose([transforms.ToTensor(),
                                transforms.Normalize((0.5,), (0.5,))])
mnist_trainset = datasets.MNIST(root='./data', train=True, download=True, transform=transform)
mnist_trainset.test_data

train_data = mnist_trainset.train_data
train_targets = mnist_trainset.train_labels

test_data = mnist_trainset.test_data
test_targets = mnist_trainset.test_labels

In [None]:
# define function to split training data into sub-datasets for a number of teachers
# returns nt sub-datasets to train the each teacher model
# TODO: split into train and test 
def splitTrainData(train_data, num_teachers):
    teachers_train_data = []
    p, q = divmod(len(train_data), num_teachers)
    split_indices = list((train_data[i * p + min(i, q):(i + 1) * p + min(i + 1, q)] for i in range(num_teachers)))
    for j in range(len(split_indices)):
        subset_j = Subset(train_data, split_indices[j])
        loader_j = torch.utils.data.DataLoader(subset_j, batch_size=64, shuffle=True)
        teachers_train_data.append(loader_j)
    return teachers_train_data

In [None]:
# define function to take a chunk of the test data as private dataset
# returns reduced test data and private dataset
def createPrivateDataset(test_data):
    reduced_test_data = []
    private_data = []
    
    return reduced_test_data, private_data

In [None]:
# define class to build linear classifier models for each teacher
class Classifier(nn.Module):
    def __init__(self, input_size, output_size, hidden_layers, drop_p=0.5):
        ''' Builds a feedforward network with arbitrary hidden layers.
        
            Arguments
            ---------
            input_size: integer, size of the input layer
            output_size: integer, size of the output layer
            hidden_layers: list of integers, the sizes of the hidden layers
        
        '''
        super().__init__()
        # Input to a hidden layer
        self.hidden_layers = nn.ModuleList([nn.Linear(input_size, hidden_layers[0])])
        
        # Add a variable number of more hidden layers
        layer_sizes = zip(hidden_layers[:-1], hidden_layers[1:])
        self.hidden_layers.extend([nn.Linear(h1, h2) for h1, h2 in layer_sizes])
        self.output = nn.Linear(hidden_layers[-1], output_size)
        self.dropout = nn.Dropout(p=drop_p)
        
    def forward(self, x):
        ''' Forward pass through the network, returns the output logits '''
        
        for each in self.hidden_layers:
            x = F.relu(each(x))
            x = self.dropout(x)
        x = self.output(x)
        
        return F.log_softmax(x, dim=1)

In [None]:
# define function to train model given train and test datasets
def train(model, trainloader, testloader, criterion, optimizer, epochs=5, print_every=40):
    steps = 0
    running_loss = 0
    for e in range(epochs):
        # Model in training mode, dropout is on
        model.train()
        for images, labels in trainloader:
            steps += 1
            images.resize_(images.size()[0], 784)
            optimizer.zero_grad()
            output = model.forward(images)
            loss = criterion(output, labels)
            loss.backward()
            optimizer.step()
            running_loss += loss.item()

            if steps % print_every == 0:
                model.eval()
                with torch.no_grad():
                    test_loss, accuracy = validation(model, testloader, criterion)
                
                print("Epoch: {}/{}.. ".format(e+1, epochs),
                      "Training Loss: {:.3f}.. ".format(running_loss/print_every),
                      "Test Loss: {:.3f}.. ".format(test_loss/len(testloader)),
                      "Test Accuracy: {:.3f}".format(accuracy/len(testloader)))
                
                running_loss = 0
                model.train()

In [None]:
# define function to validate model using the reduced test data set
def validation(model, testloader, criterion):
    accuracy = 0
    test_loss = 0
    for images, labels in testloader:
        images = images.resize_(images.size()[0], 784)
        output = model.forward(images)
        test_loss += criterion(output, labels).item()
        ## Calculating the accuracy 
        # Model's output is log-softmax, take exponential to get the probabilities
        ps = torch.exp(output)
        equality = (labels.data == ps.max(1)[1])
        # Accuracy is number of correct predictions divided by all predictions, just take the mean
        accuracy += equality.type_as(torch.FloatTensor()).mean()
    return test_loss, accuracy

In [None]:
# define function to perform PATE analysis
def runPateAnalysis():
    data_dep_eps, data_ind_eps = pate.perform_analysis(teacher_preds=preds, indices=indices, noise_eps=0.1, delta=1e-5)
    print("Data Independent Epsilon:", data_ind_eps)
    print("Data Dependent Epsilon:", data_dep_eps)

In [None]:
teachers_train_data = splitTrainData(train_data, num_teachers=10)


In [None]:
teacher1_trainloader = teachers_train_data[1]
teacher1_trainloader

In [None]:
teacher1_model = Classifier(784, 10, [512, 256, 128], drop_p=0.25)
teacher1_train_data = teachers_train_data[1]
teacher1_test_data = 
teacher1_trainloader = 
teacher1_testloader = 
teacher1_criterion = nn.NLLLoss()
teacher1_optimizer = optim.SGD(model.parameters(), lr=1e-4, momentum=0.9)
train(model, trainloader, testloader, criterion, optimizer, epochs=2)