In [3]:
import torch

#FUNCTION TO CREATE PARALLEL DB WITH INDEX REMOVED TO BE SPECIFIED
def get_parallel_db(db, remove_index):
    return torch.cat((db[0:remove_index], db[remove_index+1:]))

#FUNCTION TO CREATE PARALLEL DBS GIVEN A DB AS PARAMETER
def get_parallel_dbs(db):
    parallel_dbs = list()
    
    for i in range(len(db)):
        pdb = get_parallel_db(db, i)
        parallel_dbs.append(pdb)
    
    return parallel_dbs

#FUNCTION TO CREATE DB AND ITS PARALLELS
def create_db_and_parallels(num_entries):
    db = torch.rand(num_entries) > 0.5
    pdbs = get_parallel_dbs(db)
    
    return db, pdbs

#FUNCTION TO DETERMINE SENSITIVITY OF A PRIVACY FUNCTION
def sensitivity(query, n_entries):
    db, pdbs = create_db_and_parallels(n_entries)
    full_db_result = query(db)
    sensitivity = 0

    for pdb in pdbs:
        pdb_result = query(pdb)
        db_distance = torch.abs(pdb_result - full_db_result)
    
        if(db_distance > sensitivity):
            sensitivity = db_distance
    return sensitivity


In [35]:
#QUERY FUNCTION
def query(db):
    
    true_result = torch.mean(db.float())
    
    first_coin_flip = (torch.rand(len(db)) > 0.5).float()
    #DEMO LINEprint(first_coin_flip)
    second_coin_flip = (torch.rand(len(db)) > 0.5).float()
    #DEMO LINE print(second_coin_flip)

#BELOW STEP IS DONE TO TO MODIFY THE DB. WHENEVER WE HAVE 1 IN FIRST_COIN_FLIP, ANSWER IS HONEST AND HENCE WE TAKE SAME ELEMENT
#AS IN THE ORIGINAL DB. BUT WHEN FIRST_COIN_FLIP IS 0, WE TAKE 1-FIRST_COIN_FLIP * SECOND_COIN_FLIP TO SEE WHICH IS 1 AND WHICH
#IS 0 AND ASSIGN YES AND NO. HENCE THE BELOW OPERATIONS ON THE DB.

#IF WE TAKE (1-FIRST_COIN_FLIP) * SECOND_COIN_FLIP, WE WILL GET ALL THE VALUES (WITH LABEL 1) WHERE THE INPUT IS RANDOMIZED.

    augmented_db = db.float() * first_coin_flip + (1-first_coin_flip) * second_coin_flip
    #DEMO LINE print(augmented_db)
    
    db_result = torch.mean(augmented_db.float()) * 2 -0.5 #-2*0.5-to deskew the output. Refer vid 4 6:30 min later fr expln.
    
    return db_result, true_result
    

In [41]:
db, pdbs = create_db_and_parallels(10)
#DEMO LINE print(db) #db is Actual values from ppl

private_result, true_result = query(db)

print("With Noise : ", str(private_result))
print("Without Noise : ", str(true_result))

With Noise :  tensor(0.5000)
Without Noise :  tensor(0.3000)


In [43]:
db, pdbs = create_db_and_parallels(100)
#DEMO LINE print(db) #db is Actual values from ppl

private_result, true_result = query(db)

print("With Noise : ", str(private_result))
print("Without Noise : ", str(true_result))

With Noise :  tensor(0.5600)
Without Noise :  tensor(0.5000)


In [44]:
db, pdbs = create_db_and_parallels(1000)
#DEMO LINE print(db) #db is Actual values from ppl

private_result, true_result = query(db)

print("With Noise : ", str(private_result))
print("Without Noise : ", str(true_result))

With Noise :  tensor(0.4960)
Without Noise :  tensor(0.4960)


In [45]:
db, pdbs = create_db_and_parallels(10000)
#DEMO LINE print(db) #db is Actual values from ppl

private_result, true_result = query(db)

print("With Noise : ", str(private_result))
print("Without Noise : ", str(true_result))

With Noise :  tensor(0.4858)
Without Noise :  tensor(0.5010)


In [46]:
#QUERY FUNCTION WITH BIAS FOR FIRST COIN FLIP USING NOISE
def query(db, noise):
    
    true_result = torch.mean(db.float())
    
    first_coin_flip = (torch.rand(len(db)) > noise).float()
    #DEMO LINEprint(first_coin_flip)
    second_coin_flip = (torch.rand(len(db)) > 0.5).float()
    #DEMO LINE print(second_coin_flip)

    augmented_db = db.float() * first_coin_flip + (1-first_coin_flip) * second_coin_flip
    #DEMO LINE print(augmented_db)
    
    skewed_result = augmented_db.float().mean()
    private_result = ((skewed_result/noise) - 0.50) * noise/(1-noise)
    
    return private_result, true_result
    

In [47]:
db, pdbs = create_db_and_parallels(100)
#DEMO LINE print(db) #db is Actual values from ppl

private_result, true_result = query(db, 0.1)

print("With Noise : ", str(private_result))
print("Without Noise : ", str(true_result))

With Noise :  tensor(0.5111)
Without Noise :  tensor(0.5300)


In [48]:
db, pdbs = create_db_and_parallels(100)
#DEMO LINE print(db) #db is Actual values from ppl

private_result, true_result = query(db, 0.3)

print("With Noise : ", str(private_result))
print("Without Noise : ", str(true_result))

With Noise :  tensor(0.4571)
Without Noise :  tensor(0.5500)


In [49]:
db, pdbs = create_db_and_parallels(100)
#DEMO LINE print(db) #db is Actual values from ppl

private_result, true_result = query(db, 0.5)

print("With Noise : ", str(private_result))
print("Without Noise : ", str(true_result))

With Noise :  tensor(0.4600)
Without Noise :  tensor(0.5800)


In [50]:
db, pdbs = create_db_and_parallels(100)
#DEMO LINE print(db) #db is Actual values from ppl

private_result, true_result = query(db, 0.7)

print("With Noise : ", str(private_result))
print("Without Noise : ", str(true_result))

With Noise :  tensor(0.7000)
Without Noise :  tensor(0.4400)


In [51]:
db, pdbs = create_db_and_parallels(100)
#DEMO LINE print(db) #db is Actual values from ppl

private_result, true_result = query(db, 0.9)

print("With Noise : ", str(private_result))
print("Without Noise : ", str(true_result))

With Noise :  tensor(0.1000)
Without Noise :  tensor(0.5400)


In [54]:
db, pdbs = create_db_and_parallels(10000)
#DEMO LINE print(db) #db is Actual values from ppl

private_result, true_result = query(db, 0.8)

print("With Noise : ", str(private_result))
print("Without Noise : ", str(true_result))

With Noise :  tensor(0.4795)
Without Noise :  tensor(0.5017)


In [None]:
### GLOBAL DIFFERENTIAL PRIVACY ###

In [1]:
epsilon = 0.5

In [2]:
import numpy as np

In [7]:
db, pdbs = create_db_and_parallels(100) #print db fr demo
#DEMO sum(db) #sensitivity of sum() is 1 for this db. If we gv sum(db*2) then its 2....

tensor(54, dtype=torch.uint8)

In [8]:
def sum_query(db):
    return db.sum()

In [11]:
def laplacian_mechanism(db, query, sensitivity): #laplacian noise added to output of sum query
    beta = sensitivity/epsilon
    noise = torch.tensor(np.random.laplace(0, beta, 1))
    
    return query(db) + noise

In [28]:
laplacian_mechanism(db, sum_query, 1) #sensitivity of sum() is 1 for this db. keep running nd check for varied o/p due to noise

tensor([2732500.5971], dtype=torch.float64)

In [18]:
def mean_query(db): #sensitivity of this mean query is 1/100 for this db...100 values...1 or 0....max change is 1/100 for mean
    return torch.mean(db.float())

In [19]:
mean_query(db) #DEMO to check the o/p of laplacian menchanism

tensor(0.5400)

In [30]:
laplacian_mechanism(db, mean_query, 1/100) #run multiple times

tensor([65038.7439], dtype=torch.float64)

In [26]:
epsilon = 0.0000001 #run laplacian mechanisms. noise value increases lyk hell! this is because we r allowing less leakage of 
#data by adding more noise. #To add more noise we keep epsilon value very less to show tht this is the max amt of data that 
#can be leaked.