In [1]:
#Differential Privacy

In [1]:
#Creating a databases
import torch
num_entries = 5000
db = torch.rand(num_entries) > 0.5
print(db)

tensor([0, 0, 1,  ..., 0, 0, 1], dtype=torch.uint8)


In [2]:
#Creating a parallel database
def create_parallel_db(db, index):
    return torch.cat((db[:index], db[index+1:]))
pdb = create_parallel_db(db, 2)
print(pdb)

tensor([0, 0, 0,  ..., 0, 0, 1], dtype=torch.uint8)


In [3]:
#Function to create a database and all its parallel databases
def create_db_and_parallels(num_entries=5000):
    db = torch.rand(num_entries) > 0.5
    pdbs = list()
    for i in range(num_entries):
        pdb = create_parallel_db(db, i)
        pdbs.append(pdb)
    return db, pdbs
db, pdbs = create_db_and_parallels(5000)
# print(db)
print(pdbs)

[tensor([1, 0, 1,  ..., 1, 0, 1], dtype=torch.uint8), tensor([1, 0, 1,  ..., 1, 0, 1], dtype=torch.uint8), tensor([1, 1, 1,  ..., 1, 0, 1], dtype=torch.uint8), tensor([1, 1, 0,  ..., 1, 0, 1], dtype=torch.uint8), tensor([1, 1, 0,  ..., 1, 0, 1], dtype=torch.uint8), tensor([1, 1, 0,  ..., 1, 0, 1], dtype=torch.uint8), tensor([1, 1, 0,  ..., 1, 0, 1], dtype=torch.uint8), tensor([1, 1, 0,  ..., 1, 0, 1], dtype=torch.uint8), tensor([1, 1, 0,  ..., 1, 0, 1], dtype=torch.uint8), tensor([1, 1, 0,  ..., 1, 0, 1], dtype=torch.uint8), tensor([1, 1, 0,  ..., 1, 0, 1], dtype=torch.uint8), tensor([1, 1, 0,  ..., 1, 0, 1], dtype=torch.uint8), tensor([1, 1, 0,  ..., 1, 0, 1], dtype=torch.uint8), tensor([1, 1, 0,  ..., 1, 0, 1], dtype=torch.uint8), tensor([1, 1, 0,  ..., 1, 0, 1], dtype=torch.uint8), tensor([1, 1, 0,  ..., 1, 0, 1], dtype=torch.uint8), tensor([1, 1, 0,  ..., 1, 0, 1], dtype=torch.uint8), tensor([1, 1, 0,  ..., 1, 0, 1], dtype=torch.uint8), tensor([1, 1, 0,  ..., 1, 0, 1], dtype=torch.

In [10]:
#Evaluating differential privacy of a function
db, pdbs = create_db_and_parallels()

In [11]:
#Define a sum query
def sum_query(db):
    return db.sum()

In [13]:
#Calculate sensitivity
sensitivity = 0
db_result = sum_query(db)
for pdb in pdbs:
    pdb_result = sum_query(pdb)
    if torch.abs(pdb_result - db_result) > sensitivity:
        sensitivity = torch.abs(pdb_result - db_result)
print(sensitivity)

tensor(1)


In [15]:
#Evaluating the privacy of a function
def sensitivity(query, num_entries = 1000):
    db, pdbs = create_db_and_parallels(num_entries)
    db_result = query(db)
    sensitivity = 0
    for pdb in pdbs:
        pdb_result = query(pdb)
        db_distance = torch.abs(pdb_result - db_result)
        if db_distance > sensitivity:
            sensitivity = db_distance
    return sensitivity

In [16]:
def mean_query(db):
    return db.float().mean()

In [18]:
#Sensitivity of a mean function
print(sensitivity(mean_query))

tensor(0.0005)


In [21]:
#Calculating the sensitivity for a threshold function
def threshold_query(db, threshold = 5):
    return (db.sum() > threshold).float()

In [22]:
for i in range(10):
    print(sensitivity(threshold_query, 10))

tensor(1.)
0
tensor(1.)
0
0
0
0
tensor(1.)
tensor(1.)
tensor(1.)


In [23]:
#Perform a differencing attack
db, pdbs = create_db_and_parallels(100)

In [24]:
db[10]

tensor(1, dtype=torch.uint8)

In [31]:
#Differencing attack using sum query
sum(db) - sum(pdbs[10])

tensor(1, dtype=torch.uint8)

In [38]:
#Differencing attack using mean query
(sum(db).float()/len(db)) - (sum(pdbs[10]).float()/len(pdbs[10])) > 0

tensor(1, dtype=torch.uint8)

In [44]:
#Differencing attack using threshold query
actual_sum = (sum(db)-1).float()
(sum(db).float() > actual_sum) - (sum(pdbs[10]).float() > actual_sum)

tensor(1, dtype=torch.uint8)

In [1]:
#Local Differential Privacy

In [5]:
db, _ = create_db_and_parallels(100)
db

tensor([1, 0, 1, 0, 0, 1, 0, 1, 1, 1, 1, 0, 0, 1, 1, 0, 0, 1, 0, 1, 0, 0, 1, 0,
        1, 0, 0, 0, 0, 1, 1, 0, 0, 0, 1, 0, 1, 0, 0, 0, 1, 1, 0, 0, 1, 1, 0, 1,
        1, 1, 1, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 1, 1, 1, 0, 1, 0, 0, 1, 1, 1, 0,
        0, 0, 0, 1, 1, 0, 1, 1, 0, 0, 1, 0, 0, 0, 0, 1, 1, 1, 0, 0, 1, 0, 1, 1,
        1, 0, 0, 0], dtype=torch.uint8)

In [6]:
def query(db):
    true_result = torch.mean(db.float())
    
    first_flip = (torch.rand(len(db)) > 0.5).float()
    second_flip = (torch.rand(len(db)) > 0.5).float()
    
    augmented_db = (db.float() * first_flip) + (1-first_flip)*second_flip
    augmented_result = torch.mean(augmented_db.float())*2 - 0.5
    
    return true_result, augmented_result

In [7]:
tr, ar = query(db)
print(tr)
print(ar)

tensor(0.4500)
tensor(0.4800)


In [10]:
for i in [10, 100, 1000, 10000]:
    db, _ = create_db_and_parallels(i)
    tr, ar = query(db)
    print(tr)
    print(ar)

tensor(0.5000)
tensor(0.7000)
tensor(0.4900)
tensor(0.5200)
tensor(0.4970)
tensor(0.4340)
tensor(0.5063)
tensor(0.5028)


In [11]:
#Varying amount of noise
def query(db, noise):
    true_result = torch.mean(db.float())
    
    first_flip = (torch.rand(len(db))>noise).float()
    second_flip = (torch.rand(len(db))>0.5).float()
    
    augmented_db = (db.float() * first_flip) + (1 - first_flip) * second_flip
    
    sk_result = torch.mean(augmented_db.float())
    augmented_result = (sk_result/noise - 0.5) * noise / (1-noise)
    
    return true_result, augmented_result

In [12]:
for i in [0.1, 0.2, 0.4, 0.8]:
    db, _ = create_db_and_parallels(10000)
    tr, ar = query(db, i)
    print(tr)
    print(ar)

tensor(0.5017)
tensor(0.4978)
tensor(0.5011)
tensor(0.5006)
tensor(0.5053)
tensor(0.5003)
tensor(0.4960)
tensor(0.5005)


In [1]:
#Creating a differentially private query using formal definition of differential privacy

In [108]:
import numpy as np
epsilon = 0.01
def sum_query(db):
    return db.sum()

def laplacian_mechanism(db, query, sensitivity):
    beta = sensitivity / epsilon
    noise = torch.tensor(np.random.laplace(0, beta, 1))
    
    return query(db) + noise

In [118]:
db, _ = create_db_and_parallels(100)

laplacian_mechanism(db, sum_query, 1)

tensor([127.1333], dtype=torch.float64)

In [67]:
def mean_query(db):
    return db.float().mean()

In [123]:
laplacian_mechanism(db, mean_query, 1/100)

tensor([0.9494], dtype=torch.float64)

In [4]:
#Differential Privacy for Deep learning
import numpy as np

In [5]:
num_teachers = 10
num_examples = 10000
num_labels = 10

In [16]:
pred = (np.random.rand(num_teachers, num_examples) * num_labels).astype(int).transpose(1,0)
print(pred)
print(pred.shape)

[[5 8 8 ... 8 2 6]
 [5 3 5 ... 9 2 0]
 [6 1 0 ... 1 4 5]
 ...
 [7 9 0 ... 2 1 1]
 [7 5 9 ... 7 8 2]
 [0 8 5 ... 2 8 7]]
(10000, 10)


In [19]:
new_labels = list()
for image in pred:
    label_counts = np.bincount(image, minlength = num_labels)
    
    epsilon = 0.1
    beta = 1/epsilon
    
    for i in range(len(label_counts)):
        label_counts[i] += np.random.laplace(0, beta, 1)
    
    new_label = label_counts.argmax()
    new_labels.append(new_label)

In [22]:
new_labels

[0,
 5,
 0,
 9,
 8,
 6,
 5,
 5,
 9,
 3,
 2,
 8,
 8,
 0,
 1,
 8,
 1,
 9,
 1,
 9,
 3,
 6,
 5,
 2,
 6,
 8,
 4,
 6,
 0,
 9,
 0,
 8,
 7,
 0,
 6,
 3,
 7,
 3,
 1,
 8,
 9,
 6,
 0,
 1,
 7,
 2,
 5,
 6,
 3,
 0,
 4,
 3,
 9,
 0,
 1,
 9,
 1,
 2,
 6,
 8,
 3,
 1,
 9,
 2,
 6,
 7,
 3,
 9,
 8,
 0,
 7,
 9,
 1,
 4,
 3,
 6,
 3,
 1,
 3,
 4,
 3,
 5,
 5,
 8,
 7,
 4,
 4,
 2,
 9,
 5,
 2,
 4,
 4,
 3,
 4,
 1,
 3,
 4,
 4,
 7,
 7,
 2,
 6,
 7,
 5,
 2,
 0,
 6,
 2,
 4,
 1,
 0,
 6,
 7,
 8,
 6,
 6,
 6,
 6,
 9,
 7,
 9,
 0,
 4,
 3,
 8,
 6,
 0,
 8,
 5,
 2,
 7,
 7,
 0,
 8,
 7,
 7,
 8,
 6,
 3,
 5,
 4,
 9,
 9,
 9,
 0,
 4,
 5,
 1,
 2,
 9,
 2,
 9,
 6,
 1,
 2,
 0,
 5,
 0,
 6,
 5,
 3,
 9,
 6,
 4,
 4,
 3,
 6,
 9,
 0,
 6,
 7,
 7,
 6,
 5,
 3,
 8,
 3,
 7,
 6,
 7,
 7,
 9,
 6,
 5,
 3,
 1,
 0,
 0,
 8,
 6,
 9,
 0,
 9,
 3,
 0,
 1,
 0,
 3,
 7,
 9,
 4,
 3,
 2,
 2,
 4,
 6,
 3,
 5,
 4,
 9,
 9,
 5,
 9,
 5,
 3,
 3,
 0,
 5,
 8,
 4,
 4,
 1,
 9,
 1,
 8,
 9,
 5,
 7,
 3,
 9,
 2,
 9,
 7,
 7,
 1,
 6,
 1,
 1,
 2,
 1,
 6,
 4,
 0,
 4,
 9,
 2,
 3,
 0,
 2,


In [53]:
#PATE analysis
from syft.frameworks.torch.differential_privacy import pate

In [54]:
num_teachers, num_examples, num_labels = (100, 100, 10)
preds = (np.random.rand(num_teachers, num_examples)*num_labels).astype(int)
indices = (np.random.rand(num_examples)*num_labels).astype(int)

data_dep_eps, data_ind_eps = pate.perform_analysis(teacher_preds = preds, indices = indices, noise_eps = 0.1, delta = 1e-5)
print(data_dep_eps)
print(data_ind_eps)

assert data_dep_eps < data_ind_eps

11.756462732485105
11.756462732485115


In [55]:
preds[:,0:5] *= 0
data_dep_eps, data_ind_eps = pate.perform_analysis(teacher_preds = preds, indices = indices, noise_eps = 0.1, delta = 1e-5)
print(data_dep_eps)
print(data_ind_eps)

7.612485787172064
11.756462732485115


In [56]:
preds[:,0:50] *= 0
data_dep_eps, data_ind_eps = pate.perform_analysis(teacher_preds = preds, indices = indices, noise_eps = 0.1, delta = 1e-5, moments = 20)
print(data_dep_eps)
print(data_ind_eps)

0.9029013677789843
11.756462732485115
