In [1]:
import pandas as pd
import time
import matplotlib.pyplot as plt
import numpy as np
import opendp.prelude as dp
dp.enable_features("contrib")
dp.enable_features("floating-point")
from utilities import *

  from pandas.core.computation.check import NUMEXPR_INSTALLED
  from pandas.core import (


# Define Parameters 

Choices of Mechanism are 

1. Laplace Mechanism (central & pure DP) denoted as "laplace"
2. Stability Histogram (central & approximate DP) denoted as "stabilityhist"
3. Direct Encoding (LDP) denoted as "directencoding"
4. Randomised Response (LDP) denoted as "randresponse"
5. Optimised Local Hashing (LDP) denoted as "olh"
6. RAPPOR (LDP) denoted as "rappor"
7. Hadamarrd Mechanism (LDP) denoted as "hadamard"



In [2]:
#Level: county or ed
level = "ed"

#Mechanism: laplace, stabilityhist, directencoding, randresponse, olh, hadamard, rappor
#Mechanism = "laplace"
#Mechanism_name = "Laplace Mechanism"

Mechanism = "stabilityhist"
#Mechanism Name to use for Plots
Mechanism_name = "Stability Histogram"

#Path to data
path = "/Users/Ava/Library/CloudStorage/OneDrive-Personal/Thesis/Code/"
save = False

max_influence = 2
epsilon = np.arange(0.5,5.5, 0.5)


In [3]:

if not (level == "county" or level == "ed"):
    raise Exception(f"The level does not equal county or ed. The currrent input is {level=}")



if not (Mechanism == "laplace" or Mechanism == "stabilityhist" or Mechanism == "directencoding"  or Mechanism == "randresponse" or Mechanism == "rappor"  or Mechanism == "olh" or Mechanism == "hadamard"): 
    raise Exception(f"The Mechanism is not supported or there is a typo with the input. Please check availiable Mechanisms are try again. /n  The current input is {Mechanism=}")

In [4]:
if level == "county":
    Level = level.capitalize()
if level == "ed":
    Level = level.upper()

size, categories, col_names = get_variables(path, level, Level)

if Mechanism == "stabilityhist":
    delta = 1/(2*size)
else:
    delta = 0 

budget = [(e, delta) for e in epsilon]

with open(f'commute_{level}_level_all.csv') as input_data:
    data = input_data.read()

## Compute Sensitive Counts

In [5]:
#This is the dataset without differential privacy. 
histogram = (
    dp.t.make_split_dataframe(separator=",", col_names=col_names) >>
    dp.t.make_select_column(key=f"{Level}_commute", TOA=str) >>
    # Compute counts for each of the categories
    dp.t.then_count_by_categories(categories=categories)
)

sensitive_counts = histogram(data)

## Run Mechanism

In [6]:
released_counts, elapsed_time, all_rmse = run_dp(Mechanism, col_names, Level, budget, max_influence, size, data, histogram, categories, sensitive_counts)

locals()[f"{level}_released_counts_{Mechanism}"] = released_counts
locals()[f"{level}_elapsed_time_{Mechanism}"] = elapsed_time
locals()[f"{level}_rmse_{Mechanism}"] = all_rmse

Starting Stability Histogram with an epsilon value of  0
Finished Stability Histogram with an epsilon value of  0
Starting Stability Histogram with an epsilon value of  1
Finished Stability Histogram with an epsilon value of  1
Starting Stability Histogram with an epsilon value of  2


# Save File for Analysis

In [None]:
private_dataset_df = pd.DataFrame(categories, columns = [f'{Level} Level Commute'])
private_dataset_df['True Count'] =  sensitive_counts[:-1]
for i, name in enumerate(budget):
    private_dataset_df[f'Privacy {name[0]}'] = locals()[f"{level}_released_counts_{Mechanism}"][i]

if save:
    private_dataset_df.to_csv(f'{level}_{Mechanism}_dp_df.csv', sep=',', index=False, encoding='utf-8')


In [None]:
print("Commute level counts:\n", sensitive_counts[:-1])
print("DP Commute level counts:\n", locals()[f"{level}_released_counts_{Mechanism}"][0][:-1])