In [1]:
#Differential Privacy

In [4]:
#Creating a databases
import torch
num_entries = 5000
db = torch.rand(num_entries) > 0.5
print(db)

tensor([1, 0, 1,  ..., 0, 1, 1], dtype=torch.uint8)


In [6]:
#Creating a parallel database
def create_parallel_db(db, index):
    return torch.cat((db[:index], db[index+1:]))
pdb = create_parallel_db(db, 2)
print(pdb)

tensor([1, 0, 0,  ..., 0, 1, 1], dtype=torch.uint8)


In [9]:
#Function to create a database and all its parallel databases
def create_db_and_parallels(num_entries=5000):
    db = torch.rand(num_entries) > 0.5
    pdbs = list()
    for i in range(num_entries):
        pdb = create_parallel_db(db, i)
        pdbs.append(pdb)
    return db, pdbs
db, pdbs = create_db_and_parallels(5000)
# print(db)
print(pdbs)

[tensor([0, 0, 1,  ..., 0, 1, 0], dtype=torch.uint8), tensor([1, 0, 1,  ..., 0, 1, 0], dtype=torch.uint8), tensor([1, 0, 1,  ..., 0, 1, 0], dtype=torch.uint8), tensor([1, 0, 0,  ..., 0, 1, 0], dtype=torch.uint8), tensor([1, 0, 0,  ..., 0, 1, 0], dtype=torch.uint8), tensor([1, 0, 0,  ..., 0, 1, 0], dtype=torch.uint8), tensor([1, 0, 0,  ..., 0, 1, 0], dtype=torch.uint8), tensor([1, 0, 0,  ..., 0, 1, 0], dtype=torch.uint8), tensor([1, 0, 0,  ..., 0, 1, 0], dtype=torch.uint8), tensor([1, 0, 0,  ..., 0, 1, 0], dtype=torch.uint8), tensor([1, 0, 0,  ..., 0, 1, 0], dtype=torch.uint8), tensor([1, 0, 0,  ..., 0, 1, 0], dtype=torch.uint8), tensor([1, 0, 0,  ..., 0, 1, 0], dtype=torch.uint8), tensor([1, 0, 0,  ..., 0, 1, 0], dtype=torch.uint8), tensor([1, 0, 0,  ..., 0, 1, 0], dtype=torch.uint8), tensor([1, 0, 0,  ..., 0, 1, 0], dtype=torch.uint8), tensor([1, 0, 0,  ..., 0, 1, 0], dtype=torch.uint8), tensor([1, 0, 0,  ..., 0, 1, 0], dtype=torch.uint8), tensor([1, 0, 0,  ..., 0, 1, 0], dtype=torch.

In [10]:
#Evaluating differential privacy of a function
db, pdbs = create_db_and_parallels()

In [11]:
#Define a sum query
def sum_query(db):
    return db.sum()

In [13]:
#Calculate sensitivity
sensitivity = 0
db_result = sum_query(db)
for pdb in pdbs:
    pdb_result = sum_query(pdb)
    if torch.abs(pdb_result - db_result) > sensitivity:
        sensitivity = torch.abs(pdb_result - db_result)
print(sensitivity)

tensor(1)


In [15]:
#Evaluating the privacy of a function
def sensitivity(query, num_entries = 1000):
    db, pdbs = create_db_and_parallels(num_entries)
    db_result = query(db)
    sensitivity = 0
    for pdb in pdbs:
        pdb_result = query(pdb)
        db_distance = torch.abs(pdb_result - db_result)
        if db_distance > sensitivity:
            sensitivity = db_distance
    return sensitivity

In [16]:
def mean_query(db):
    return db.float().mean()

In [18]:
#Sensitivity of a mean function
print(sensitivity(mean_query))

tensor(0.0005)


In [21]:
#Calculating the sensitivity for a threshold function
def threshold_query(db, threshold = 5):
    return (db.sum() > threshold).float()

In [22]:
for i in range(10):
    print(sensitivity(threshold_query, 10))

tensor(1.)
0
tensor(1.)
0
0
0
0
tensor(1.)
tensor(1.)
tensor(1.)


In [23]:
#Perform a differencing attack
db, pdbs = create_db_and_parallels(100)

In [24]:
db[10]

tensor(1, dtype=torch.uint8)

In [31]:
#Differencing attack using sum query
sum(db) - sum(pdbs[10])

tensor(1, dtype=torch.uint8)

In [38]:
#Differencing attack using mean query
(sum(db).float()/len(db)) - (sum(pdbs[10]).float()/len(pdbs[10])) > 0

tensor(1, dtype=torch.uint8)

In [44]:
#Differencing attack using threshold query
actual_sum = (sum(db)-1).float()
(sum(db).float() > actual_sum) - (sum(pdbs[10]).float() > actual_sum)

tensor(1, dtype=torch.uint8)