# Performing Differencing Attacks

Construct a database and demo how you can use two different sum queries to expose the value of the person represented by the row K (say, 10) in the db

In [None]:
!pip install syft==0.2.9

In [32]:

import torch

class CalculateSensitivity:
    "This is a class that contains reusable methods for initializing parallel dbs \
with number of entries. Use classObject.create_db_and_parallels(num_entries) to \
get db and parallel db list. Use classObject.sensitivity(query, num_entries, verbose) \
to find the sensitivity of the query function"

    def get_parallel_db(self, db, remove_index):
        return torch.cat((db[0:remove_index], db[remove_index+1:]))

    def get_parallel_dbs(self, db):
        parallel_dbs = list()
        for i in range(len(db)):
            pdb = self.get_parallel_db(db, i)
            parallel_dbs.append(pdb)
        return parallel_dbs

    def create_db_and_parallels(self, num_entries):
        db = torch.rand(num_entries).gt(0.5).to(torch.uint8)
        pdbs = self.get_parallel_dbs(db)
        return db, pdbs

    def sensitivity(self, query, num_entries, verbose):

        db, pdbs = self.create_db_and_parallels(num_entries)

        if verbose:
            print('Centralized data is: ', db)
            print('Parallel data are: ', pdbs)

        centralized_result = query(db)
        parallel_results = [query(temp_db) for temp_db in pdbs]

        if verbose:
            print('Centralized result: ', centralized_result)
            print('Parallel results: ', parallel_results)

        max_distance = 0
        for parallel_result in parallel_results:
            if max_distance < torch.abs(centralized_result - parallel_result): 
                max_distance = torch.abs(centralized_result - parallel_result)
        L1_sensitivity = max_distance
        return L1_sensitivity

# sens = CalculateSensitivity()
# sens.sensitivity(lambda x:x.sum(), num_entries=10, verbose=True)

In [35]:
sens = CalculateSensitivity()

db, _ = sens.create_db_and_parallels(num_entries=100)
pdb = sens.get_parallel_db(db=db, remove_index=10)

db[10]

tensor(1, dtype=torch.uint8)

Now we know that the 10th element of db is 1

Let's see if we can find it out using a query on pdb whose 10th element is removed.

In [36]:
# Differencing attack using sum query
sum(db) - sum(pdb)

tensor(1, dtype=torch.uint8)

In [40]:
# Differencing attack using mean query
def mean_query(db):
    return sum(db).float() / len(db)

# If mean query returns non zero, then the missing element was 1
print('1') if (mean_query(db) - mean_query(pdb)) > 0 else print('0')

1


In [53]:
# Differencing attack using threshold
# with threshold = sum(db).item()
sum(db).float() > (sum(db).item()-1) - sum(pdb).float() > (sum(db).item()-1)

tensor(False)

In [50]:
sum(db).float() > sum(db).item() - sum(pdb.float() > sum(db).item())

tensor(False)

So differential privacy should be immune to differential attacks.