imports

In [8]:
"""Private Training by Output Perturbation."""
import argparse

import numpy as np
from scipy.stats import ortho_group
import torch
from torch.distributions.gamma import Gamma
from torch import nn

from logistic_regression import nonprivate_logistic_regression
from utils import get_data_loaders

In [9]:
#!pip3 install tqdm

helper functions

In [10]:
def gamma_sample_pytorch_parameterization(concentration, rate):
    """The Gamma dist'n as it is parameterized in PyTorch"""
    return Gamma(concentration, rate).sample()


def gamma_sample_chaudhuri_parameterization(concentration, scale):
    """The Gamma dist'n as it is parameterized in Chaudhuri and Monteleoni"""
    rate = 1. / scale
    return gamma_sample_pytorch_parameterization(concentration, rate)


def random_unit_norm_vector(num_dims):
    random_rotation_matrix = ortho_group.rvs(num_dims)
    basis_vector_one = np.eye(num_dims)[0]
    vector = np.matmul(random_rotation_matrix, basis_vector_one)
    return torch.tensor(vector, dtype=torch.float32)

your code

In [14]:
def private_logistic_regression(dset_loader, num_epochs, learning_rate,
    lmbda, epsilon, seed=None):
    ############################################################################
    # TODO(student)
    #
    # your code here...
    #
    # hint: use the code we have given you. For example you don't have to 
    # implement non-private logistic regression from scratch because an 
    # implementation exists in logistic_regression.py. There are also functions 
    # in this file for sampling Laplace noise
    #
    # hint: the input dim d can be found as a attr of the dset_loader's dset
    #       >>> num_pixels = dset_loader.dataset.num_pixels
    #
    pixels = dset_loader.dataset.num_pixels
    
    n = len(dset_loader.dataset)
    
    scale = 2/(n * epsilon * lmbda)
    
    noise  = random_unit_norm_vector(pixels)
    
    noise_norm = gamma_sample_chaudhuri_parameterization(pixels, scale)
    
    noise_Pertubation =  noise  * noise_norm
        
    nonprivate_params = nonprivate_logistic_regression(dset_loader, num_epochs, learning_rate, lmbda, seed)
    
    private_weight = nonprivate_params['weight'] + noise_Pertubation
    
    private_params = {
        'weight': private_weight,  # replace me (but this is how to format the state_dict)
        }
    

    
    return private_params

main function

In [15]:
def main(n, epsilon, lmbda, epochs, batch_size, lr, data_seed, model_seed):
    # load data
    loaders, _ = get_data_loaders(data_seed, batch_size, n)
    loaders.pop('neighbor')  # don't need this loader for this question
  
    # train model
    nonprivate_params = \
            nonprivate_logistic_regression(loaders['train'], epochs, 
                    lr, lmbda, seed=model_seed)
  
    private_params = private_logistic_regression(loaders['train'], epochs, 
        lr, lmbda, epsilon, seed=model_seed)
  
    # evaluate
    test_losses = dict()
    test_accs = dict()
    for name, params in zip(['nonprivate', 'private'], 
          [nonprivate_params, private_params]):
        num_pixels = loaders['train'].dataset.num_pixels
        model = nn.Linear(num_pixels, 1, bias=False)
        criterion = nn.BCEWithLogitsLoss()  # binary cross entropy
        model.load_state_dict(params)
        model.eval()
        num_test_examples = len(loaders['test'].dataset)
        with torch.no_grad():
            test_loss = 0.
            correct = 0
            total = 0
            for images, labels in loaders['test']:
                images = images.reshape(-1, 28*28)
                outputs = model(images)
                loss = criterion(outputs.squeeze(), labels.float())
                test_loss += loss.item() * len(images) / float(num_test_examples)
                predicted = (outputs.squeeze() > 0.).long()
                total += labels.size(0)
                correct += (predicted == labels).sum().item()
            test_acc = float(correct) / float(total)
            test_losses[name] = test_loss
            test_accs[name] = 100. * test_acc  # format as a percentage
  
    from pprint import pprint
    print('final test losses')
    print('nonprivate: {nonprivate:.2f}, private: {private:.2f}'
          .format(**test_losses))
    print('final test accs')
    print('nonprivate: {nonprivate:.2f}, private: {private:.2f}'
          .format(**test_accs))

arguments and main function call

In [28]:
N = 1000
EPSILON = 5.
LMBDA = 1e-2
EPOCHS = 1000  # run for more epochs once your code works
BATCH_SIZE = 256
LR = .1
DATA_SEED = 0
MODEL_SEED = 0
main(N, EPSILON, LMBDA, EPOCHS, BATCH_SIZE, LR, DATA_SEED, MODEL_SEED)

100%|██████████| 1000/1000 [02:07<00:00,  7.64it/s]
100%|██████████| 1000/1000 [02:11<00:00,  8.47it/s]

final test losses
nonprivate: 0.24, private: 0.26
final test accs
nonprivate: 97.00, private: 94.00





In [30]:
N = 1000
EPSILON = 5.
LMBDA = 1e-2
EPOCHS = 1000  # run for more epochs once your code works
BATCH_SIZE = 256
LR = .01
DATA_SEED = 0
MODEL_SEED = 0
main(N, EPSILON, LMBDA, EPOCHS, BATCH_SIZE, LR, DATA_SEED, MODEL_SEED)

100%|██████████| 1000/1000 [01:56<00:00,  8.73it/s]
100%|██████████| 1000/1000 [02:08<00:00,  7.72it/s]

final test losses
nonprivate: 0.24, private: 0.21
final test accs
nonprivate: 97.00, private: 97.00





In [32]:
N = 1000
EPSILON = 5.
LMBDA = 1e-2
EPOCHS = 1000  # run for more epochs once your code works
BATCH_SIZE = 256
LR = .05
DATA_SEED = 0
MODEL_SEED = 0
main(N, EPSILON, LMBDA, EPOCHS, BATCH_SIZE, LR, DATA_SEED, MODEL_SEED)

100%|██████████| 1000/1000 [01:57<00:00,  7.86it/s]
100%|██████████| 1000/1000 [02:02<00:00,  7.88it/s]

final test losses
nonprivate: 0.24, private: 0.20
final test accs
nonprivate: 97.00, private: 96.00





In [42]:
N = 1000
EPSILON = 2.
LMBDA = 5e-4
EPOCHS = 100  # run for more epochs once your code works
BATCH_SIZE = 256
LR = .1
DATA_SEED = 0
MODEL_SEED = 0
main(N, EPSILON, LMBDA, EPOCHS, BATCH_SIZE, LR, DATA_SEED, MODEL_SEED)


  0%|          | 0/100 [00:00<?, ?it/s][A
  1%|          | 1/100 [00:00<00:10,  9.15it/s][A
  2%|▏         | 2/100 [00:00<00:10,  9.36it/s][A
  3%|▎         | 3/100 [00:00<00:10,  9.38it/s][A
  4%|▍         | 4/100 [00:00<00:10,  9.51it/s][A
  5%|▌         | 5/100 [00:00<00:10,  9.36it/s][A
  6%|▌         | 6/100 [00:00<00:09,  9.48it/s][A
  7%|▋         | 7/100 [00:00<00:09,  9.61it/s][A
  8%|▊         | 8/100 [00:00<00:09,  9.66it/s][A
  9%|▉         | 9/100 [00:00<00:09,  9.68it/s][A
 10%|█         | 10/100 [00:01<00:09,  9.70it/s][A
 12%|█▏        | 12/100 [00:01<00:08,  9.85it/s][A
 14%|█▍        | 14/100 [00:01<00:08,  9.96it/s][A
 15%|█▌        | 15/100 [00:01<00:08,  9.92it/s][A
 17%|█▋        | 17/100 [00:01<00:08, 10.01it/s][A
 18%|█▊        | 18/100 [00:01<00:08,  9.95it/s][A
 19%|█▉        | 19/100 [00:01<00:08,  9.88it/s][A
 20%|██        | 20/100 [00:02<00:08,  9.87it/s][A
 21%|██        | 21/100 [00:02<00:08,  9.67it/s][A
 22%|██▏       | 22/100 [00:0

final test losses
nonprivate: 0.09, private: 26.80
final test accs
nonprivate: 98.00, private: 26.00


#### Answers

True or False

1- False

2- False

3- False