# CS211: Data Privacy
## In-Class Exercise, 9/18/2020

In [1]:
# Load the data and libraries
import pandas as pd
import numpy as np
from scipy import stats
import matplotlib.pyplot as plt
plt.style.use('seaborn-whitegrid')

def laplace_mech(v, sensitivity, epsilon):
    return v + np.random.laplace(loc=0, scale=sensitivity / epsilon)

adult = pd.read_csv('https://github.com/jnear/cs211-data-privacy/raw/master/homework/adult_with_pii.csv')

## Question 1

Consider the following definition of a differencing attack (without differential privacy).

In [2]:
def differencing_attack():
    q1 = adult['Age'].sum()
    q2 = adult[adult['Name'] != 'Karrie Trusslove']['Age'].sum()

    return q1 - q2

print('Differencing attack result:', differencing_attack())

Differencing attack result: 39


Implement a function `dp_differencing_attack` that performs the same attack, but attempts to satisfy differential privacy using the Laplace mechanism. Use the parameters `sensitivity=1` and `epsilon=1.0`.

In [11]:
def dp_differencing_attack():
    q1 = adult['Age'].sum()
    q2 = adult[adult['Name'] != 'Karrie Trusslove']['Age'].sum()
    
    epsilon = 1.0
    a1 = laplace_mech(q1, 1, epsilon)
    a2 = laplace_mech(q2, 1, epsilon)
    
    return a1 - a2

print('DP Differencing attack result:', dp_differencing_attack())

DP Differencing attack result: 38.35206843377091


In [12]:
# TEST CASE for question 1
dp_results = [dp_differencing_attack() for _ in range(100)]
spec = [np.random.laplace(loc=39, scale=1) for _ in range(100)]
assert stats.wasserstein_distance(dp_results, spec) < 2

## Question 2

What is the *sensitivity* of the `differencing_attack` query defined above, and why?

If we assume that nobody can be older than 125, then the sensitivity is 125. 

## Question 3

Implement a corrected version of `dp_differencing_attack` that uses the correct sensitivity and thus correctly satisfies differential privacy.

In [9]:
def dp_differencing_attack_corrected():
    q1 = adult['Age'].sum()
    q2 = adult[adult['Name'] != 'Karrie Trusslove']['Age'].sum()
    
    epsilon = 1.0
    a1 = laplace_mech(q1, 125, epsilon)
    a2 = laplace_mech(q2, 125, epsilon)
    
    return a1 - a2

print('DP Differencing attack result:', dp_differencing_attack_corrected())

DP Differencing attack result: 28.208094708621502


In [10]:
# TEST CASE for question 3
dp_results = [dp_differencing_attack_corrected() for _ in range(100)]
spec = [np.random.laplace(loc=39, scale=125/2.0) for _ in range(100)]
assert stats.wasserstein_distance(dp_results, spec) < 200
assert stats.wasserstein_distance(dp_results, spec) > 50