import statements

In [1]:
"""Private Model Selection."""
import argparse
from glob import glob
import os
import random

import numpy as np
import torch
import torch.nn as nn
from torch.utils.data import DataLoader


from utils import get_data_loaders

helper code

In [2]:
def plot_probs(ndarray_of_probs, name):
    if not isinstance(ndarray_of_probs, np.ndarray):
        msg = 'ndarray_of_probs should be a np.ndarray. ' + \
                'Make sure to convert from torch.tensor if need be.'
        raise ValueError(msg)
    if not isinstance(name, str):
        raise ValueError('name should be a str')

    import matplotlib
    matplotlib.use('Agg')
    import matplotlib.pyplot as plt

    dirname = './figs'
    filename = os.path.join(dirname, name) + '.model-selection-probs.png'
    if not os.path.exists(dirname):
        os.makedirs(dirname)
    fig, ax = plt.subplots()
    model_idxs = np.arange(len(ndarray_of_probs))
    ax.bar(model_idxs, ndarray_of_probs)
    ax.set_xlabel('model idx')
    ax.set_ylabel('prob of being selected under Exp Mech')
    ax.set_title(name)
    ax.set_xticks(model_idxs)
    fig.savefig(filename)
    return filename


def load_models(num_pixels):
    """Randomly samples k pre-trained models parameters (from the list of ten)
    """
    list_of_model_filenames = glob('./pretrained_models/*.pt')
    list_of_model_filenames.sort()
    list_of_models = []
    for model_filename in list_of_model_filenames:
        model = nn.Linear(num_pixels, 1, bias=False)
        model.load_state_dict(torch.load(model_filename))
        list_of_models.append(model)
    return list_of_models

In [3]:
from scipy.misc import logsumexp
import warnings
warnings.filterwarnings('ignore')


your code

In [4]:
def compute_scores(list_of_models, test_loader):
    """Compute score (performance on private test data) for each model"""
    if not isinstance(list_of_models, list):
        raise ValueError('first argument should be a list')
    if not isinstance(test_loader, DataLoader):
        raise ValueError('second argument should be pytorch data loader')
    ############################################################################
    # TODO(student)
    #
    # your code here...
    #
    # You can look into logistic_regression.py to see how various training 
    # metrics are computed given the model
    #
    n= len(list_of_models)
    total= len(test_loader.dataset) 
    scores=np.zeros(n)
    criterion = nn.BCEWithLogitsLoss()
    for i in range(n):
        model= list_of_models[i]
        losses= 0
        #loss=0
        for j,(images, labels) in enumerate(test_loader):
            images = images.reshape(-1, 28*28)
            outputs = model(images)
            loss = criterion(outputs.squeeze(), labels.float())
            losses += loss * (len(images) / float(total))
        scores[i]=1.0-losses
    return torch.Tensor(scores) 
    ############################################################################
    
    
def exponential_mechanism(list_of_models, test_loader, epsilon):
    """Sample from model list, where sampling probability scales with test score
    
    Return both the sampled model and the sample index
    """
    if not isinstance(list_of_models, list):
        raise ValueError('first argument should be a list')
    if not isinstance(test_loader, DataLoader):
        raise ValueError('second argument should be pytorch data loader')

    scores = compute_scores(list_of_models, test_loader)
    num_test_examples = len(test_loader.dataset)
    n= scores.shape[0]

    ############################################################################
    # TODO(student)
    #
    # your code here...
    #
    # hint: you're exponential mechanism should somehow depend on the number of
    # training data in test loader
    
    #
    probs= scores * epsilon
    som= logsumexp(probs)

    sample_probs = np.array(np.exp(probs - som))
    prob= np.random.choice(sample_probs)
    
    sampled_idx,= np.where(sample_probs == prob)
    #print(sampled_idx)
    sampled_model= list_of_models[sampled_idx[0]]
    ############################################################################
    return sampled_model, sampled_idx, sample_probs

main function

In [5]:
BATCH_SIZE = 250
SEED = 3771

def main(n, epsilon):
    loaders, _ = get_data_loaders(SEED, BATCH_SIZE, 
            num_train=13006, num_test=n)
    num_pixels = loaders['train'].dataset.num_pixels
    models = load_models(num_pixels)

    private_best_model, private_best_model_idx, sample_probs \
            = exponential_mechanism(models, loaders['test'], epsilon)

    print('selected model', private_best_model_idx)
    #print('selected model:', private_best_model)
    name = 'eps={},n={}'.format(epsilon, n)
    filename = plot_probs(sample_probs, name)
    print('see plot at', filename)

arguments and main function call

In [6]:
N = 10
EPSILON = 1.
main(N, EPSILON)
# TODO(student): sweep over the required values for N and EPSILON and produce 
#                several plots

selected model [8]
see plot at ./figs/eps=1.0,n=10.model-selection-probs.png


In [7]:
# testons les valeurs

epsilons=[1,2,4]
sizes=[2,10,100,1000]

for epsilon in epsilons:
    for n in sizes:
        main(n,epsilon)

selected model [8]
see plot at ./figs/eps=1,n=2.model-selection-probs.png
selected model [0]
see plot at ./figs/eps=1,n=10.model-selection-probs.png
selected model [6]
see plot at ./figs/eps=1,n=100.model-selection-probs.png
selected model [0]
see plot at ./figs/eps=1,n=1000.model-selection-probs.png
selected model [2]
see plot at ./figs/eps=2,n=2.model-selection-probs.png
selected model [6]
see plot at ./figs/eps=2,n=10.model-selection-probs.png
selected model [1]
see plot at ./figs/eps=2,n=100.model-selection-probs.png
selected model [7]
see plot at ./figs/eps=2,n=1000.model-selection-probs.png
selected model [1]
see plot at ./figs/eps=4,n=2.model-selection-probs.png
selected model [5]
see plot at ./figs/eps=4,n=10.model-selection-probs.png
selected model [5]
see plot at ./figs/eps=4,n=100.model-selection-probs.png
selected model [9]
see plot at ./figs/eps=4,n=1000.model-selection-probs.png


### Answers



The aim of this section is to answer the question ”What model classifier yield a good result on the test data?”. 
From the plots above, the answer is not far-fetched. The test data private and using exponential mechanism, we observe
that despite the varying values for epsilon , the model idx 5 and 8 achieve high probability even with the changes in the number of test data. I do not see how increase in the test data could affect the probability because its considered
private in the first case.