In [17]:
import pandas as pd
import time
import matplotlib.pyplot as plt
import numpy as np
import opendp.prelude as dp
dp.enable_features("contrib")
dp.enable_features("floating-point")
from utilities import *

# Define Parameters 

Choices of Mechanism are 

1. Laplace Mechanism (central & pure DP) denoted as "laplace"
2. Stability Histogram (central & approximate DP) denoted as "stabilityhist"
3. Unary Encoding (LDP) denoted as "unaryencoding"
4. Randomised Response (LDP) denoted as "randresponse"
5. Optimised Local Hashing (LDP) denoted as "olh"
6. RAPPOR (LDP) denoted as "rappor"
7. Hadamarrd Mechanism (LDP) denoted as "hadamard"



In [18]:
#Level: county or ed
level = "county"

#Mechanism: laplace, stabilityhist, unaryencoding, randresponse, olh, hadamard, rappor
#Mechanism = "laplace"
#Mechanism_name = "Laplace Mechanism"

Mechanism = "unaryencoding"

#Path to data
path = "./"
save = True

max_influence = 2
epsilon = np.arange(0.5,5.5, 0.5)



In [19]:
if not (level == "county" or level == "ed"):
    raise Exception(f"The level does not equal county or ed. The currrent input is {level=}")


if not (Mechanism == "laplace" or Mechanism == "stabilityhist" or Mechanism == "unaryencoding"  or Mechanism == "randresponse" or Mechanism == "rappor"  or Mechanism == "olh" or Mechanism == "hadamard"): 
    raise Exception(f"The Mechanism is not supported or there is a typo with the input. Please check availiable Mechanisms are try again. /n  The current input is {Mechanism=}")

In [20]:
if level == "county":
    Level = level.capitalize()
if level == "ed":
    Level = level.upper()

#Outputs size=number of individuals, categories=list of all possible commutes, data_df = data where each row corresponds to an indidual, commutes = individuals commutes eg. commutes[1] = commute of individual 1 
size, categories, col_names, data_df, commutes = get_variables(path, level, Level)

if Mechanism == "stabilityhist":
    delta = 1/(2*size)
else:
    delta = 0 


if Mechanism == "olh":
    g = np.exp(budget[epsilon][0]) +  1
else:
    g = 0 


budget = [(e, delta) for e in epsilon]

## Compute Sensitive Counts

In [21]:
with open(f'commute_{level}_level_all.csv') as input_data:
    data = input_data.read()
    
#This is the dataset without differential privacy. 
histogram = (
    dp.t.make_split_dataframe(separator=",", col_names=col_names) >>
    dp.t.make_select_column(key=f"{Level}_commute", TOA=str) >>
    # Compute counts for each of the categories
    dp.t.then_count_by_categories(categories=categories)
)

sensitive_counts = histogram(data)

## Run Mechanism

In [22]:
if not (Mechanism == "laplace" or Mechanism == "stabilityhist" ):
    released_counts_client, elapsed_time_client = run_client(Mechanism, Level, budget, size, categories, commutes, sensitive_counts, g)
    released_counts, elapsed_time_server, all_rmse = run_server(Mechanism, Level, released_counts_client, sensitive_counts, size, budget, categories)
    total_elapsed_time = [sum(element) for element in zip(elapsed_time_client, elapsed_time_server)]
else: 
    released_counts, elapsed_time, all_rmse = run_central(Mechanism, col_names, Level, budget, max_influence, size, data, histogram, categories, sensitive_counts)

Starting Unary Encoding with an epsilon value of  0


KeyboardInterrupt: 

# Save File for Analysis

In [None]:
private_dataset_df = pd.DataFrame(categories, columns = [f'{Level} Level Commute'])
private_dataset_df['True Count'] =  sensitive_counts[:-1]
for i, name in enumerate(budget):
    private_dataset_df[f'Privacy {name[0]}'] = released_counts[i]

if save:
    private_dataset_df.to_csv(f'Data/{level}_{Mechanism}_dp_df.csv', sep=',', index=False, encoding='utf-8', mode='w')
    names = ("epsilon, delta, rmse, total_elapsed_time, elapsed_time_client, elapsed_time_server")
    np.savetxt(f'Data/rmse_{Mechanism}_{Level}.csv', [(budget[i][0], budget[i][1], all_rmse[i], total_elapsed_time[i], elapsed_time_client[i], elapsed_time_server[i]) for i in range(len(budget))] , header = names, delimiter=',')


In [None]:
print("Commute level counts:\n", sensitive_counts[0:10])
print("DP Commute level counts:\n", released_counts[0][0:10])

In [23]:
import xxhash


In [56]:
data=[1,2,3,4,5,5,6,7,8,9,10]

In [59]:
index = data
seed = 1
g = np.exp(budget[5][0]) +  1


In [60]:
prob_p = 1/2
prob_q = 1 / (np.exp(budget[5][0]) +  1)

In [61]:
def perturb(seed):
    # Taken directly from Wang (https://github.com/vvv214/LDP_Protocols/blob/master/olh.py#L55-L65)
    x = (xxhash.xxh32(str(index), seed=seed).intdigest() % g)
    y = x
    print(x)

    p_sample = np.random.random_sample()
    # the following two are equivalent
    # if p_sample > p:
    #     while not y == x:
    #         y = np.random.randint(0, g)
    if p_sample > prob_p:
        # perturb
        y = np.random.randint(0, g)

    return y

In [62]:
y = perturb(seed)

11.084987039795102


In [63]:
y

20