import statments

In [1]:
"""Empirical Sensitivity."""
import argparse
import os

import numpy as np
import torch
from torch import nn


import torchvision.transforms as transforms
from utils import get_data_loaders

In [2]:
from scipy.misc import logsumexp
import warnings
warnings.filterwarnings('ignore')


your code

In [3]:
def plot_hist(array_of_empirical_sensitivities, n, lmbda, name):
    if not isinstance(array_of_empirical_sensitivities, np.ndarray):
        raise ValueError('array_of_empirical_sensitivities should be a np.ndarray.')
    if not isinstance(name, str):
        raise ValueError('name should be a str')

    import matplotlib
    matplotlib.use('Agg')
    import matplotlib.pyplot as plt

    ################################################################
    # TODO(student): replace below with correct theoretical max sensitivity
    max_theoretical_sensitivity = 2/(n * lmbda)
    ################################################################

    num_bins = 20
    dirname = './figs'
    filename = os.path.join(dirname, name) + '.histogram.png'
    if not os.path.exists(dirname):
        os.makedirs(dirname)
    fig, ax = plt.subplots()
    ax.set_xscale('log')
    bin_values, _, _ = ax.hist(array_of_empirical_sensitivities, 
            num_bins, label='empirical sensitivities')
    ax.set_title('histogram of sensitivities: ' + name)
    ax.axvline(x=max_theoretical_sensitivity, color='r', linestyle='dashed', linewidth=2,
            label='theoretical max sensitivity')
    ax.legend()
    fig.savefig(filename)
    return filename


def plot_extreme_neighbors(sensitivities, list_of_neighboring_examples, name):
    """Plots to disk the neighboring-example pairs with the most and least empirical sensitivity
    
    Note on the data structures used: 
        sensitivities: a np.ndarray containing empirical sensitivities for each run
        list_of_neighboring_examples: a list of neighboring example pairs, one for each run. in other words:
        
        list_of_neighboring_examples = [
            neighboring_example_1, 
            neighboring_example_2,  
            ...
            neighboring_example_n,
            ]
            
        where each tuple in the list represents the data diff between the neighboring 
        datasets and is formatted like this:
        
        neighboring_example_i = (
            (neighbor_img_i, neighbor_label_i),
            (neighbor_img_i_prime, neighbor_label_i_prime),
        )
        
        See utils.py if you are still confused.
    """
    if not isinstance(sensitivities, np.ndarray):
        raise ValueError('sensitivies should be a np.ndarray.')
    first_neighbor_pair = list_of_neighboring_examples[0]
    if not isinstance(list_of_neighboring_examples, list) or not isinstance(first_neighbor_pair, tuple) \
            or not isinstance(first_neighbor_pair[0][0], torch.Tensor):
        raise ValueError('list_of_neighboring_examples should be a list of tuple pairs, where tuple contains img tensors')
    if not isinstance(name, str):
        raise ValueError('name should be a str')

    import matplotlib
    matplotlib.use('Agg')
    import matplotlib.pyplot as plt
    


    ############################################################################
    # TODO(student)
    #
    # using list_of_empirical_sensitivies and neighboring_examples, create two image plots
    # 1) side-by-side images for neighbor-pair that maximizes sensitivity
    # 2) side-by-side images for neighbor-pair that minimizes sensitivity
    #
    # matplotlib.subplots and matplotlib.imshow may come in handy
    dirname = './figs'

    filename1 = os.path.join(dirname, name) + '.minimum.png'
    filename2 = os.path.join(dirname, name) + '.maximum.png'
    if not os.path.exists(dirname):
        os.makedirs(dirname)
        
    minimum= np.argmin(sensitivities)
    maximum= np.argmax(sensitivities)
    
   
    item_min= list_of_neighboring_examples[minimum]
    item_max= list_of_neighboring_examples[maximum]
    
    item_min_neighbor, item_min_neighbor_prime= item_min
    item_max_neighbor, item_max_neighbor_prime= item_max
    
    
    item_min_neighbor_img, item_min_neighbor_label= item_min_neighbor
    item_min_neighbor_prime_img, item_min_neighbor_prime_label= item_min_neighbor_prime
    
    item_max_neighbor_img, item_max_neighbor_label= item_max_neighbor
    item_max_neighbor_prime_img, item_max_neighbor_prime_label= item_max_neighbor_prime
    
     
    fig1, ax1s = plt.subplots(1, 2)
    fig2, ax2s = plt.subplots(1, 2)
    
    pil_transform= transforms.ToPILImage()
        
    ax1s[0].imshow(pil_transform(item_min_neighbor_img))
    ax1s[1].imshow(pil_transform(item_min_neighbor_prime_img))
        
    ax2s[0].imshow(pil_transform(item_max_neighbor_img))
    ax2s[1].imshow(pil_transform(item_max_neighbor_prime_img))
        
    fig1.savefig(filename1)
    fig2.savefig(filename2)
    #
    filenames = filename1, filename2
    #raise NotImplementedError
    ############################################################################

    return filenames


def compute_empricial_sensivity(train_loader, neighbor_loader,
        num_epochs, learning_rate, lmbda, model_seed=None):
    ############################################################################
    # TODO(student)
    #
    # your code here...
    from logistic_regression import nonprivate_logistic_regression
    
    model_train= nonprivate_logistic_regression(train_loader, num_epochs, learning_rate,lmbda, model_seed)
    model_neighbor= nonprivate_logistic_regression(neighbor_loader, num_epochs, learning_rate,lmbda, model_seed)
    
    weight_train= model_train['weight']
    weight_neighbor= model_neighbor['weight']
    
    sensitivity= torch.norm((weight_train - weight_neighbor),p=2)
    
    ############################################################################
    return sensitivity

main function

In [4]:
def main(n, runs, epochs, lr, batch_size, model_seed, lmbda):
    list_of_empirical_sensitivies = []
    list_of_neighboring_examples = []
    for data_seed in range(runs):
        loaders, neighboring_examples = get_data_loaders(data_seed, batch_size, 
                num_train=n)
        sensitivity = compute_empricial_sensivity(
                loaders['train'], loaders['neighbor'],
                epochs, lr, lmbda, model_seed)
        list_of_empirical_sensitivies.append(sensitivity)
        list_of_neighboring_examples.append(neighboring_examples)

    list_of_empirical_sensitivies = np.array(list_of_empirical_sensitivies)
    sensitivity_upper_bound = 3.
    name = 'lambda={},n={}'.format(lmbda, n)
    filename = plot_hist(list_of_empirical_sensitivies, n, lmbda, name)
    print('see plot at', filename)

    filenames = plot_extreme_neighbors(list_of_empirical_sensitivies, list_of_neighboring_examples, name)
    print('see plots at {} and {}'.format(*filenames))

arguments and main function call

In [6]:
N = 1000
RUNS = 4  # TODO(student): run more times once your code works; something like 100
EPOCHS = 100
LR = 0.1
BATCH_SIZE = 256
MODEL_SEED = 0
LMBDA = 5e-4

main(N, RUNS, EPOCHS, LR, BATCH_SIZE, MODEL_SEED, LMBDA)

100%|██████████| 100/100 [00:10<00:00,  9.92it/s]
100%|██████████| 100/100 [00:10<00:00,  8.89it/s]
100%|██████████| 100/100 [00:11<00:00,  9.70it/s]
100%|██████████| 100/100 [00:10<00:00, 10.21it/s]
100%|██████████| 100/100 [00:10<00:00,  9.82it/s]
100%|██████████| 100/100 [00:10<00:00,  9.88it/s]
100%|██████████| 100/100 [00:10<00:00,  9.67it/s]
100%|██████████| 100/100 [00:10<00:00,  9.19it/s]


see plot at ./figs/lambda=0.0005,n=1000.histogram.png
see plots at ./figs/lambda=0.0005,n=1000.minimum.png and ./figs/lambda=0.0005,n=1000.maximum.png


### Answers

1- what score should we use to compare the models

We have the choice between the Accuracy and the loss function: (1- loss function)

the Accuracy just give us the percentage of samp