# Differential Privacy with the Fashion-MNIST

The Fashion-MNIST is a dataset of Zalando's article images. Similarly as the original MNIST dataset, we have 10 classes to clasify in the Fashion-MNIST; 60.000 training images and 10.000 test images. We will see that although these databases have similar specifications, the DP guarantees of the PATE model are different. See the notebook `dp_mnist.ipynb`. As before, we import the basic libraries of Pytorch and Torchvision

In [1]:
import torch
import torch.nn as nn
import torch.nn.functional as F
import torch.optim as optim

import torchvision
import torchvision.transforms as transforms


torch.set_printoptions(linewidth=120) # Display options for output

# Extract -Transform - Load (ETL) Step

We extract, tranform and load the training data that is going to be used for the teacher's model. The last line splits the training data for each teacher.

In [2]:
# Extract, tranform and load the private (train) data

train_set = torchvision.datasets.FashionMNIST(
    root='./data'
    ,train=True
    ,download=True
    ,transform=transforms.Compose([
        transforms.ToTensor()
    ])
)

teachers_num = 100                             # Define the number of teachers
teachers_batch_size = 30                       # Teachers batch size 
data_size = len(train_set) // teachers_num    # size of dataset for each teacher


teachers_set = torch.utils.data.random_split(train_set, [data_size]*teachers_num) 



### Defining the Network for the Teacher models

Our basic neural network has four hidden layers: two convolutional layers and two "linear" layers with ReLU activation functions. 

In [3]:
class Network(nn.Module): 
    
    """Network used to train the Teachers"""
    
    def __init__(self):
        super().__init__()
        self.conv1 = nn.Conv2d(in_channels=1, out_channels=6, kernel_size=5)
        self.conv2 = nn.Conv2d(in_channels=6, out_channels=12, kernel_size=5)
        
        self.fc1 = nn.Linear(in_features=12*4*4, out_features=120) # 4*4 is the output per channel of conv2 
        self.fc2 = nn.Linear(in_features=120, out_features=60)
        self.out = nn.Linear(in_features=60, out_features=10)
    
    def forward(self, t):
        # (1) input layer
        t = t
        
        # (2) hidden conv layer
        t = self.conv1(t)
        t = F.relu(t)
        t = F.max_pool2d(t, kernel_size=2, stride=2)
        
        # (3) hidden conv layer
        t = self.conv2(t)
        t = F.relu(t)
        t = F.max_pool2d(t, kernel_size=2, stride=2)
        
        # (4) hidden linear layer
        t = t.reshape(-1, 12*4*4)  # flattens the output of the conv layers
        t = self.fc1(t)
        t = F.relu(t)
        
        # (5) hidden linear layer
        t = self.fc2(t)
        t = F.relu(t)
        
        # (6) output layer
        t = self.out(t)
        
        return t


We introduce some classes for managing the training processes and to have cleaner runs.

In [4]:
# DEFINE PARAMETERS AND NEEDED CLASSES

from itertools import product
from collections import namedtuple
from collections import OrderedDict

from torch.utils.data import DataLoader
from torch.utils.tensorboard import SummaryWriter
from IPython.display import display, clear_output

import pandas as pd
import json



class RunBuilder():
    @staticmethod
    def get_runs(params):

        Run = namedtuple('Run', params.keys())

        runs = []
        for v in product(*params.values()):
            runs.append(Run(*v))

        return runs
        

class RunManager():
    def __init__(self):

        self.epoch_count = 0
        self.epoch_loss = 0
        self.epoch_num_correct = 0


        self.run_params = None
        self.run_count = 0
        self.run_data = []

        self.network = None
        self.loader = None
        self.tb = None
        
    def begin_run(self, run, network, loader):


        self.run_params = run
        self.run_count += 1

        self.network = network
        self.loader = loader
        self.tb = SummaryWriter(comment=f'-{run}')

        images, labels = next(iter(self.loader))
        grid = torchvision.utils.make_grid(images)

        self.tb.add_image('images', grid)
        self.tb.add_graph(self.network, images)          

    def end_run(self):
        self.tb.close()
        self.epoch_count = 0

    def begin_epoch(self):

        self.epoch_count += 1
        self.epoch_loss = 0
        self.epoch_num_correct = 0
    
    def end_epoch(self):


        loss = self.epoch_loss / len(self.loader.dataset)
        accuracy = self.epoch_num_correct / len(self.loader.dataset)

        self.tb.add_scalar('Loss', loss, self.epoch_count)
        self.tb.add_scalar('Accuracy', accuracy, self.epoch_count)

        for name, param in self.network.named_parameters():
            self.tb.add_histogram(name, param, self.epoch_count)
            self.tb.add_histogram(f'{name}.grad', param.grad, self.epoch_count)
        
        results = OrderedDict()
        results["run"] = self.run_count
        results["epoch"] = self.epoch_count
        results['loss'] = loss
        results["accuracy"] = accuracy

        for keys,values in self.run_params._asdict().items(): 
            results[keys] = values
        self.run_data.append(results)
        df = pd.DataFrame.from_dict(self.run_data, orient='columns') # so we can have a formatted output
    
        clear_output(wait=True)  # only for jupyter notebook
        display(df)              # only for jupyter notebook # outdates the output instead of append
        
    def track_loss(self, loss):
        self.epoch_loss += loss.item() * self.loader.batch_size

    def track_num_correct(self, preds, labels):
        self.epoch_num_correct += self._get_num_correct(preds, labels)
    
    # Auxiliary function. Not intended to be used by outside callers
    def _get_num_correct(self, preds, labels):
        """ function that outputs the number of correct predictions"""
        return preds.argmax(dim=1).eq(labels).sum().item()
    
    def save(self, fileName):
        
        pd.DataFrame.from_dict(
            self.run_data, orient='columns'
        ).to_csv(f'{fileName}.csv')

        with open(f'{fileName}.json', 'w', encoding='utf-8') as f:
            json.dump(self.run_data, f, ensure_ascii=False, indent=4)

## Training with classes 

Run the next block to train the teachers ensemble. The results are available in `.cvs` and `.json` format.

In [5]:
# TRAINING with RunBuilder and RunMananger

from collections import OrderedDict

params = OrderedDict(
    lr = [.01],
    batch_size = [teachers_batch_size],
    teacher = list(range(0,teachers_num))
)

m= RunManager()
teachers = []  # List of Aggregated teachers (Curator's Model)

for run in RunBuilder.get_runs(params): 
    
    network = Network()
    teacher_loader = DataLoader(teachers_set[run.teacher], batch_size=run.batch_size)
    optimizer = optim.Adam(network.parameters(), lr=run.lr)
    
    m.begin_run(run, network, teacher_loader)

    for epoch in range(10):
        m.begin_epoch()
        for batch in teacher_loader:
            
            images, labels = batch # Get Batch
            preds = network(images) # Pass Batch
            loss = F.cross_entropy(preds, labels) # Calculate Loss
            optimizer.zero_grad() # Zero Gradients
            loss.backward() # Calculate Gradients
            optimizer.step() # Update Weights

            m.track_loss(loss)
            m.track_num_correct(preds, labels)
        m.end_epoch()
    m.end_run()
    teachers.append(network)  
m.save('results_teacher')

Unnamed: 0,run,epoch,loss,accuracy,lr,batch_size,teacher
0,1,1,1.797394,0.300000,0.01,30,0
1,1,2,1.182544,0.541667,0.01,30,0
2,1,3,0.987065,0.598333,0.01,30,0
3,1,4,0.860477,0.651667,0.01,30,0
4,1,5,0.794237,0.660000,0.01,30,0
...,...,...,...,...,...,...,...
995,100,6,0.539354,0.788333,0.01,30,99
996,100,7,0.552582,0.776667,0.01,30,99
997,100,8,0.500931,0.805000,0.01,30,99
998,100,9,0.460467,0.815000,0.01,30,99


The proper `batch_size` was obtained after experimenting with different values in the `params` dictionary. We visualized the results of the runs with Tensorboard (below a snipped). In order to see this yourself, you just need to install [tensorboard](https://www.tensorflow.org/tensorboard/), run the previous block and then run in the terminal `tensorboard --logdir=runs`. You should be able to see the tensorboard on your web browser with `http://localhost:6006`. 

![Tensorboard_snipe](tb1.png "Tensorboard snipe")

## Aggregate Teacher

We do again the ETL Step for the test set

In [6]:
# Create the public dataset by using 90% of the Test data as train data for the student model
# and remaining 10% of the Test data as test data for the student model.

test_set = torchvision.datasets.FashionMNIST(
    root='./data'
    ,train=False
    ,download=True
    ,transform=transforms.Compose([
        transforms.ToTensor()
    ])
)



student_train_set, student_test_set = torch.utils.data.random_split(test_set, [9000,1000]) # this returns two subsets

student_train_loader = torch.utils.data.DataLoader(student_train_set, batch_size= 100)
student_test_loader = torch.utils.data.DataLoader(student_test_set, batch_size= 100)

In [7]:
# PREDICT FUNCTION

def predict(network_model, dataloader):
    """ This function predicts labels for a dataset 
        given the network model and dataloader as inputs. 
        Outputs a tensor with the probabilities (ps) of the prediction model.
        It outputs a rank-1 tensor of size: size(dataloader).
    """
    outputs = torch.zeros(0, dtype=torch.long)
    
    for images, labels in dataloader:
        output = network_model.forward(images)
        output = F.softmax(output, dim=1)
        ps = torch.argmax(output, dim=1)
        outputs = torch.cat((outputs, ps))
        
    return outputs

We will obtain a $(0.1, 0)-$differential private learning algorithm. Run the code below.

In [8]:
import numpy as np

epsilon = 0.2

def aggregated_teacher(teachers, dataLoader, epsilon):
    """ Take predictions from individual teacher model and 
        creates the true labels for the student after adding 
        laplacian noise to them.
        Returns a rank-2 tensor of teachers of size [len(teachers), 9000] and
        a numpy.ndarray of size 9000 with student labels.
    """
    preds_teachers = torch.torch.zeros((len(teachers), 9000), dtype=torch.long)
    
    for i, teacher in enumerate(teachers):
        results = predict(teacher, dataLoader)
        preds_teachers[i] = results
    
    labels = np.array([]).astype(int)
    for image_preds in np.transpose(preds_teachers):
        label_counts = np.bincount(image_preds, minlength=10)
        beta = 1 / epsilon

        for i in range(len(label_counts)):
            label_counts[i] += np.random.laplace(0, beta, 1)

        new_label = np.argmax(label_counts)
        labels = np.append(labels, new_label)
    
    return preds_teachers, labels


preds_teacher, student_labels = aggregated_teacher(teachers, student_train_loader, epsilon)

We relabel now the training data that is going to be used for training the student.

In [9]:
# Code for parsing images with labels - Student Data Loader

train_processing = torch.utils.data.DataLoader(student_train_set, batch_size= 9000)
student_images, t_labels = next(iter(train_processing))


student_tensor = torch.as_tensor(student_labels)
tensor_data = torch.utils.data.TensorDataset(student_images, student_tensor)
new_loader =torch.utils.data.DataLoader(tensor_data, batch_size= 100)

We have a look at how the new labels differ from the true ones.

In [10]:

print("The new labels are ", student_tensor, "an the old (true) labels are")

print(t_labels)

The new labels are  tensor([1, 4, 4,  ..., 7, 3, 4]) an the old (true) labels are
tensor([1, 4, 4,  ..., 9, 3, 3])


Finally, we train the student model with the new synthatic data, i.e. a relabeled dataset with respect to the teacher ensemble model).

In [20]:
# TRAINING Student with Data Generated by Teachers

params = OrderedDict(
    lr = [.01],
    batch_size = [30]
)

m = RunManager()

for run in RunBuilder.get_runs(params): 
    
    student = Network()
    optimizer = optim.Adam(student.parameters(), lr=run.lr)
    
    m.begin_run(run, student, new_loader)

    for epoch in range(10):
        m.begin_epoch()
        for batch in new_loader:
            
            images, labels = batch # Get Batch
            preds = student(images) # Pass Batch
            loss = F.cross_entropy(preds, labels) # Calculate Loss
            optimizer.zero_grad() # Zero Gradients
            loss.backward() # Calculate Gradients
            optimizer.step() # Update Weights

            m.track_loss(loss)
            m.track_num_correct(preds, labels)
        m.end_epoch()
    m.end_run()
m.save('results_student')

Unnamed: 0,run,epoch,loss,accuracy,lr,batch_size
0,1,1,0.758352,0.713111,0.01,30
1,1,2,0.258625,0.904111,0.01,30
2,1,3,0.206727,0.924556,0.01,30
3,1,4,0.192338,0.926333,0.01,30
4,1,5,0.179398,0.931333,0.01,30
5,1,6,0.173945,0.934889,0.01,30
6,1,7,0.15923,0.938222,0.01,30
7,1,8,0.157825,0.941222,0.01,30
8,1,9,0.145647,0.944222,0.01,30
9,1,10,0.14068,0.945556,0.01,30


The `student` network is ready to receive an arbitrary numbers of queries. It guarantees that no past, present or future attack can affect the privacy loss value of `0.1`. The accuracy of the student model in the training set lies between `94%` and `95%`. However, when we test on 1000 public images that belong to the original dataset we obtain only an accuracy of `77%`. Reducing the number of epochs did not improve the accuracy and in fact it reduced to `71%`. In either case, it is better than random guessing, which is `10%` in this case, but less optimal than the result of the original NMIST.

In [21]:
# Code for testing the accuracy of the student model after training.

correct = 0
total = 0
with torch.no_grad():
    for data in student_test_loader:
        images, labels = data
        outputs = student.forward(images)
        _, predicted = torch.max(outputs.data, 1)
        total += labels.size(0)
        correct += (predicted == labels).sum().item()

print('Accuracy of the network on the 10000 test images: %d %%' % (
    100 * correct / total))

Accuracy of the network on the 10000 test images: 77 %
