In [None]:
import pandas as pd
import time
import matplotlib.pyplot as plt
import numpy as np
import opendp.prelude as dp
dp.enable_features("contrib")
dp.enable_features("floating-point")
from utilities import *

# Define Parameters 

Choices of Mechanism are 

1. Laplace Mechanism (central & pure DP) denoted as "laplace"
2. Stability Histogram (central & approximate DP) denoted as "stabilityhist"
3. Unary Encoding (LDP) denoted as "unaryencoding"
4. Randomised Response (LDP) denoted as "randresponse"
5. Optimised Local Hashing (LDP) denoted as "olh"
6. RAPPOR (LDP) denoted as "rappor"
7. Hadamarrd Mechanism (LDP) denoted as "hadamard"



In [None]:
#Level: county or ed
level = "ed"

#Mechanism: laplace, stabilityhist, unaryencoding, randresponse, olh, hadamard, rappor
#Mechanism = "laplace"
#Mechanism_name = "Laplace Mechanism"

Mechanism = "randresponse"

#Path to data
path = "/Users/Ava/Library/CloudStorage/OneDrive-Personal/Thesis/Code/"
save = False

max_influence = 2
epsilon = np.arange(0.5,5.5, 0.5)



In [None]:
if not (level == "county" or level == "ed"):
    raise Exception(f"The level does not equal county or ed. The currrent input is {level=}")


if not (Mechanism == "laplace" or Mechanism == "stabilityhist" or Mechanism == "unaryencoding"  or Mechanism == "randresponse" or Mechanism == "rappor"  or Mechanism == "olh" or Mechanism == "hadamard"): 
    raise Exception(f"The Mechanism is not supported or there is a typo with the input. Please check availiable Mechanisms are try again. /n  The current input is {Mechanism=}")

In [None]:
if level == "county":
    Level = level.capitalize()
if level == "ed":
    Level = level.upper()

#Outputs size=number of individuals, categories=list of all possible commutes, data_df = data where each row corresponds to an indidual, commutes = individuals commutes eg. commutes[1] = commute of individual 1 
size, categories, col_names, data_df, commutes = get_variables(path, level, Level)

if Mechanism == "stabilityhist":
    delta = 1/(2*size)
else:
    delta = 0 

budget = [(e, delta) for e in epsilon]

## Compute Sensitive Counts

In [None]:
with open(f'commute_{level}_level_all.csv') as input_data:
    data = input_data.read()
    
#This is the dataset without differential privacy. 
histogram = (
    dp.t.make_split_dataframe(separator=",", col_names=col_names) >>
    dp.t.make_select_column(key=f"{Level}_commute", TOA=str) >>
    # Compute counts for each of the categories
    dp.t.then_count_by_categories(categories=categories)
)

sensitive_counts = histogram(data)

## Run Mechanism

In [None]:
released_counts, elapsed_time, all_rmse = run_dp(Mechanism, col_names, Level, budget, max_influence, size, data, histogram, categories, commutes, sensitive_counts)

# Save File for Analysis

In [None]:
private_dataset_df = pd.DataFrame(categories, columns = [f'{Level} Level Commute'])
private_dataset_df['True Count'] =  sensitive_counts[:-1]
for i, name in enumerate(budget):
    private_dataset_df[f'Privacy {name[0]}'] = released_counts[i]

if save:
    private_dataset_df.to_csv(f'{level}_{Mechanism}_dp_df.csv', sep=',', index=False, encoding='utf-8', mode='w')
    names = ("epsilon, delta, rmse, elapsed_time")
    np.savetxt(f'rmse_{Mechanism}_{Level}.csv', [(budget[i][0], budget[i][1], all_rmse[i], elapsed_time[i]) for i in range(len(budget))] , header = names, delimiter=',')


In [None]:
print("Commute level counts:\n", sensitive_counts[0:10])
print("DP Commute level counts:\n", released_counts[0][0:10])