<a href="https://colab.research.google.com/github/adityaRakhecha/secure-and-private-ai-scholarship-challenge/blob/master/Lesson-5/lesson5.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [1]:
import torch

# number of entries in our database
num_entries = 5000

# create raw database
db = torch.rand(num_entries) > 0.5
db

tensor([1, 1, 1,  ..., 0, 1, 1], dtype=torch.uint8)

**Generate Parallel Databases**

In [2]:
db[0:5]

tensor([1, 1, 1, 1, 0], dtype=torch.uint8)

In [0]:
remove_index = 2

In [0]:
# function to generate a parallel database with one entry removed
def get_parallel_db(db, remove_index):
  
  return torch.cat((db[0:remove_index],
                    db[remove_index+1:]))

In [5]:
get_parallel_db(db,2).shape

torch.Size([4999])

In [0]:
# function to generate parallel databases with one entry removed from each of the entries
def get_parallel_dbs(db):
  parallel_dbs = list()
  
  for i in range(len(db)):
    pdb = get_parallel_db(db, i)
    parallel_dbs.append(pdb)
    
  return parallel_dbs

In [0]:
pdbs = get_parallel_dbs(db)

In [8]:
len(pdbs)

5000

In [0]:
# function to create and generate parallel databases
def create_db_and_parallels(num_entries):
  
  db = torch.rand(num_entries) > 0.5
  pdbs = get_parallel_dbs(db)
  
  return db, pdbs

In [0]:
db, pdbs = create_db_and_parallels(5000)

**Create database query**

In [11]:
db

tensor([1, 1, 1,  ..., 0, 1, 0], dtype=torch.uint8)

In [0]:
def query(db):
  return db.sum()

In [0]:
full_db_result = query(db)

In [0]:
sensitivity = 0
for pdb in pdbs:
  pdb_result = query(pdb)
  
  db_distance = torch.abs(pdb_result - full_db_result)
  
  if(db_distance > sensitivity):
    sensitivity = db_distance

In [15]:
sensitivity

tensor(1)

-> Calculate sensitivity for the "mean" function.

In [0]:
def cal_sensitivity(query, n_entries=1000):
  
  db, pdbs = create_db_and_parallels(n_entries)
  
  full_db_result = query(db)
  
  max_distance = 0
  for pdb in pdbs:
    pdb_result = query(pdb)
    
    db_distance = torch.abs(pdb_result - full_db_result)
    
    if(db_distance > max_distance):
      max_distance = db_distance
      
    return max_distance

In [0]:
def query(db):
  return db.float().mean()

In [18]:
cal_sensitivity(query)

tensor(0.0005)

Calculate L1 sensitivity for threshold

In [0]:
def query(db, threshold=5):
  return (db.sum() > threshold).float()

In [20]:
for i in range(10):
  sens_f = cal_sensitivity(query, n_entries=10)
  print(sens_f)

0
0
0
0
0
0
0
tensor(1.)
0
0


Performing Differencing Attack

In [0]:
db, _ = create_db_and_parallels(100)

In [0]:
pdb = get_parallel_db(db, remove_index=10)

In [23]:
db[10]

tensor(0, dtype=torch.uint8)

In [24]:
sum(db)

tensor(54, dtype=torch.uint8)

In [25]:
# differencing attack using sum query
sum(db) - sum(pdb)

tensor(0, dtype=torch.uint8)

In [26]:
# differencing attack using threshold
(sum(db).float() > 49) - (sum(pdb).float() > 49)

tensor(0, dtype=torch.uint8)

**Local Differential Privacy**

In [27]:
db, pdbs = create_db_and_parallels(100)
db

tensor([0, 0, 0, 0, 0, 0, 1, 0, 1, 0, 1, 1, 0, 0, 0, 1, 1, 1, 1, 1, 0, 0, 1, 1,
        0, 1, 1, 0, 1, 0, 1, 1, 1, 1, 1, 1, 1, 0, 1, 0, 1, 0, 1, 0, 1, 0, 0, 0,
        1, 0, 0, 0, 0, 0, 1, 1, 1, 1, 0, 1, 0, 0, 1, 1, 0, 1, 0, 1, 1, 1, 1, 1,
        1, 1, 0, 1, 1, 1, 0, 0, 0, 0, 1, 0, 0, 0, 0, 1, 1, 0, 1, 0, 1, 0, 0, 0,
        1, 0, 1, 1], dtype=torch.uint8)

In [0]:
def query(db):
  true_result = torch.mean(db.float())
  
  first_coin_flip = (torch.rand(len(db)) > 0.5).float()
  second_coin_flip = (torch.rand(len(db)) > 0.5).float()

  augmented_database = db.float() * first_coin_flip + (1 - first_coin_flip) * second_coin_flip

  db_result = torch.mean(augmented_database.float()) * 2 - 0.5
  
  return db_result, true_result

In [37]:
db, pdbs = create_db_and_parallels(10)
private_result, true_result = query(db)
print("With Noise:" + str(private_result))
print("Without Noise:" + str(true_result))

With Noise:tensor(0.9000)
Without Noise:tensor(0.7000)


In [38]:
db, pdbs = create_db_and_parallels(100)
private_result, true_result = query(db)
print("With Noise:" + str(private_result))
print("Without Noise:" + str(true_result))

With Noise:tensor(0.4800)
Without Noise:tensor(0.4400)


In [39]:
db, pdbs = create_db_and_parallels(1000)
private_result, true_result = query(db)
print("With Noise:" + str(private_result))
print("Without Noise:" + str(true_result))

With Noise:tensor(0.4740)
Without Noise:tensor(0.4940)


In [44]:
db, pdbs = create_db_and_parallels(1000)
private_result, true_result = query(db)
print("With Noise:" + str(private_result))
print("Without Noise:" + str(true_result))

With Noise:tensor(0.4800)
Without Noise:tensor(0.4900)


**Varying Amounts of Noise**