<a href="https://colab.research.google.com/github/bttrung/secure-private-ai-scholarship/blob/master/7__Differential_Privacy.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [1]:
import torch

# the number of entries in our database
num_entries = 5000

db = torch.rand(num_entries) > 0.5
db


tensor([0, 1, 0,  ..., 0, 1, 1], dtype=torch.uint8)

In [0]:
def get_parallel_db(db, remove_index):

    return torch.cat((db[0:remove_index], 
                      db[remove_index+1:]))


In [3]:
get_parallel_db(db, 52352)

tensor([0, 1, 0,  ..., 0, 1, 1], dtype=torch.uint8)

In [0]:
def get_parallel_dbs(db):

    parallel_dbs = list()

    for i in range(len(db)):
        pdb = get_parallel_db(db, i)
        parallel_dbs.append(pdb)
    
    return parallel_dbs


In [0]:
pdbs = get_parallel_dbs(db)

In [0]:
def create_db_and_parallels(num_entries):
    
    db = torch.rand(num_entries) > 0.5
    pdbs = get_parallel_dbs(db)
    
    return db, pdbs


In [0]:
db, pdbs = create_db_and_parallels(5000)


In [0]:
def query(db):
    return db.sum()


In [0]:
full_db_result = query(db)

In [10]:
full_db_result

tensor(2477)

In [0]:
sensitivity = 0
for pdb in pdbs:
    pdb_result = query(pdb)
    
    db_distance = torch.abs(pdb_result - full_db_result)
    
    if(db_distance > sensitivity):
        sensitivity = db_distance


In [12]:
sensitivity

tensor(1)

In [0]:
def sensitivity(query, n_entries=1000):

    db, pdbs = create_db_and_parallels(n_entries)
    
    full_db_result = query(db)
    
    max_distance = 0
    for pdb in pdbs:
        pdb_result = query(pdb)

        db_distance = torch.abs(pdb_result - full_db_result)

        if(db_distance > max_distance):
            max_distance = db_distance
            
    return max_distance


In [0]:
def query(db):
    return db.float().mean()

In [15]:
sensitivity(query)


tensor(0.0005)

In [16]:
db, pdbs = create_db_and_parallels(20)
db

tensor([1, 1, 0, 1, 1, 0, 1, 0, 1, 0, 1, 0, 1, 1, 1, 1, 1, 0, 1, 1],
       dtype=torch.uint8)

In [0]:
# Calculate L1 Sensitivity For Threshold
def query(db, threshold=5):
    return (db.sum() > threshold).float()


In [18]:
for i in range(10):
    sens_f = sensitivity(query, n_entries=10)
    print(sens_f)

0
0
0
tensor(1.)
0
tensor(1.)
0
0
0
0


In [0]:
# A Basic Differencing Attack
db, _ = create_db_and_parallels(100)


In [0]:
pdb = get_parallel_db(db, remove_index=10)

In [21]:
db[10]


tensor(1, dtype=torch.uint8)

In [22]:
sum(db)


tensor(51, dtype=torch.uint8)

In [23]:
sum(db) - sum(pdb)

tensor(1, dtype=torch.uint8)

In [24]:
# differencing attack using mean query
(sum(db).float() / len(db)) - (sum(pdb).float() / len(pdb))

tensor(0.0049)

In [25]:
# differencing attack using threshold

(sum(db).float() > 49) - (sum(pdb).float()  > 49)

tensor(0, dtype=torch.uint8)

In [26]:
# Local Differential Privacy
db

tensor([0, 1, 0, 0, 1, 1, 1, 0, 1, 1, 1, 1, 1, 0, 0, 0, 1, 1, 1, 1, 0, 1, 1, 0,
        0, 0, 1, 0, 1, 1, 1, 0, 0, 1, 1, 1, 0, 1, 1, 1, 1, 0, 0, 1, 0, 1, 1, 0,
        0, 1, 0, 1, 0, 0, 0, 0, 0, 1, 1, 1, 0, 0, 0, 0, 0, 1, 0, 0, 0, 1, 1, 0,
        1, 1, 1, 0, 0, 0, 0, 1, 1, 0, 0, 1, 0, 0, 1, 0, 0, 0, 1, 1, 1, 0, 0, 1,
        1, 1, 1, 0], dtype=torch.uint8)

In [0]:
def query(db):

    true_result = torch.mean(db.float())
    
    first_coin_flip = (torch.rand(len(db)) < 0.5).float()
    second_coin_flip = (torch.rand(len(db)) < 0.5).float()

    augmented_database = db.float() * first_coin_flip + (1 - first_coin_flip) * second_coin_flip

    db_result = torch.mean(augmented_database.float()) * 2 - 0.5
    
    return db_result, true_result


In [28]:
db, pdbs = create_db_and_parallels(10)
private_result, true_result = query(db)
print("With Noise:" + str(private_result))
print("Without Noise:" + str(true_result))


With Noise:tensor(0.7000)
Without Noise:tensor(0.8000)


In [29]:
db, pdbs = create_db_and_parallels(100)
private_result, true_result = query(db)
print("With Noise:" + str(private_result))
print("Without Noise:" + str(true_result))


With Noise:tensor(0.4400)
Without Noise:tensor(0.5400)


In [30]:
db, pdbs = create_db_and_parallels(1000)
private_result, true_result = query(db)
print("With Noise:" + str(private_result))
print("Without Noise:" + str(true_result))


With Noise:tensor(0.4440)
Without Noise:tensor(0.4960)


In [31]:
db, pdbs = create_db_and_parallels(10000)
private_result, true_result = query(db)
print("With Noise:" + str(private_result))
print("Without Noise:" + str(true_result))


With Noise:tensor(0.5086)
Without Noise:tensor(0.5037)


In [0]:
# Varying Amounts of Noise
def query(db, noise=0.2):
    
    true_result = torch.mean(db.float())

    first_coin_flip = (torch.rand(len(db)) < noise).float()
    second_coin_flip = (torch.rand(len(db)) < 0.5).float()

    augmented_database = db.float() * first_coin_flip + (1 - first_coin_flip) * second_coin_flip

    sk_result = augmented_database.float().mean()

    private_result = ((sk_result / noise) - 0.5) * noise / (1 - noise)

    return private_result, true_result


In [33]:
db, pdbs = create_db_and_parallels(100)
private_result, true_result = query(db, noise=0.1)
print("With Noise:" + str(private_result))
print("Without Noise:" + str(true_result))


With Noise:tensor(0.4111)
Without Noise:tensor(0.5600)


In [34]:
db, pdbs = create_db_and_parallels(100)
private_result, true_result = query(db, noise=0.2)
print("With Noise:" + str(private_result))
print("Without Noise:" + str(true_result))

With Noise:tensor(0.5500)
Without Noise:tensor(0.4600)


In [35]:
db, pdbs = create_db_and_parallels(100)
private_result, true_result = query(db, noise=0.4)
print("With Noise:" + str(private_result))
print("Without Noise:" + str(true_result))


With Noise:tensor(0.5333)
Without Noise:tensor(0.4600)


In [36]:
db, pdbs = create_db_and_parallels(100)
private_result, true_result = query(db, noise=0.8)
print("With Noise:" + str(private_result))
print("Without Noise:" + str(true_result))


With Noise:tensor(0.9500)
Without Noise:tensor(0.6000)


In [37]:
db, pdbs = create_db_and_parallels(10000)
private_result, true_result = query(db, noise=0.8)
print("With Noise:" + str(private_result))
print("Without Noise:" + str(true_result))

With Noise:tensor(0.4925)
Without Noise:tensor(0.5011)


In [38]:
# The Formal Definition of Differential Privacy
db, pdbs = create_db_and_parallels(100)

def query(db):
    return torch.sum(db.float())

def M(db):
    query(db) + noise

query(db)


tensor(50.)

In [0]:
epsilon = 0.0001

In [0]:
import numpy as np

In [0]:
db, pdbs = create_db_and_parallels(100)


In [0]:
def sum_query(db):
    return db.sum()


In [0]:
def laplacian_mechanism(db, query, sensitivity):
    
    beta = sensitivity / epsilon
    noise = torch.tensor(np.random.laplace(0, beta, 1))
    
    return query(db) + noise


In [0]:
def mean_query(db):
    return torch.mean(db.float())


In [45]:
laplacian_mechanism(db, sum_query, 1)


tensor([11692.6980], dtype=torch.float64)

In [46]:
laplacian_mechanism(db, mean_query, 1/100)

tensor([48.4913], dtype=torch.float64)

In [0]:
# Differential Privacy for Deep Learning
import numpy as np

In [48]:
pip install syft



In [0]:
num_teachers = 10 # we're working with 10 partner hospitals
num_examples = 10000 # the size of OUR dataset
num_labels = 10 # number of lablels for our classifier


In [0]:
preds = (np.random.rand(num_teachers, num_examples) * num_labels).astype(int).transpose(1,0) # fake predictions


In [0]:
new_labels = list()
for an_image in preds:

    label_counts = np.bincount(an_image, minlength=num_labels)

    epsilon = 0.1
    beta = 1 / epsilon

    for i in range(len(label_counts)):
        label_counts[i] += np.random.laplace(0, beta, 1)

    new_label = np.argmax(label_counts)
    
    new_labels.append(new_label)


In [52]:
labels = np.array([9, 9, 3, 6, 9, 9, 9, 9, 8, 2])
counts = np.bincount(labels, minlength=10)
query_result = np.argmax(counts)
query_result

9

In [53]:
from syft.frameworks.torch.differential_privacy import pate


W0825 01:33:33.564055 140323830429568 secure_random.py:26] Falling back to insecure randomness since the required custom op could not be found for the installed version of TensorFlow. Fix this by compiling custom ops. Missing file was '/usr/local/lib/python3.6/dist-packages/tf_encrypted/operations/secure_random/secure_random_module_tf_1.14.0.so'
W0825 01:33:33.583203 140323830429568 deprecation_wrapper.py:119] From /usr/local/lib/python3.6/dist-packages/tf_encrypted/session.py:26: The name tf.Session is deprecated. Please use tf.compat.v1.Session instead.



In [54]:
num_teachers, num_examples, num_labels = (100, 100, 10)
preds = (np.random.rand(num_teachers, num_examples) * num_labels).astype(int) #fake preds
indices = (np.random.rand(num_examples) * num_labels).astype(int) # true answers

preds[:,0:10] *= 0

data_dep_eps, data_ind_eps = pate.perform_analysis(teacher_preds=preds, indices=indices, noise_eps=0.1, delta=1e-5)

assert data_dep_eps < data_ind_eps




In [55]:
data_dep_eps, data_ind_eps = pate.perform_analysis(teacher_preds=preds, indices=indices, noise_eps=0.1, delta=1e-5)
print("Data Independent Epsilon:", data_ind_eps)
print("Data Dependent Epsilon:", data_dep_eps)

Data Independent Epsilon: 11.756462732485115
Data Dependent Epsilon: 1.52655213289881


In [0]:
preds[:,0:50] *= 0

In [58]:
data_dep_eps, data_ind_eps = pate.perform_analysis(teacher_preds=preds, indices=indices, noise_eps=0.1, delta=1e-5, moments=20)
print("Data Independent Epsilon:", data_ind_eps)
print("Data Dependent Epsilon:", data_dep_eps)

Data Independent Epsilon: 11.756462732485115
Data Dependent Epsilon: 0.9029013677789843
