In [4]:
# Load the data and libraries
import pandas as pd
import numpy as np
import random
from scipy import stats
import matplotlib.pyplot as plt

def laplace_mech(v, sensitivity, epsilon):
    return v + np.random.laplace(loc=0, scale=sensitivity / epsilon)

def laplace_mech_vec(vec, sensitivity, epsilon):
    return [v + np.random.laplace(loc=0, scale=sensitivity / epsilon) for v in vec]

def gaussian_mech(v, sensitivity, epsilon, delta):
    return v + np.random.normal(loc=0, scale=sensitivity * np.sqrt(2*np.log(1.25/delta)) / epsilon)

def gaussian_mech_vec(vec, sensitivity, epsilon, delta):
    return [v + np.random.normal(loc=0, scale=sensitivity * np.sqrt(2*np.log(1.25/delta)) / epsilon)
            for v in vec]

def pct_error(orig, priv):
    return np.abs(orig - priv)/orig * 100.0

def range_query(df, col, a, b):
    return len(df[(df[col] >= a) & (df[col] < b)])

def workload_laplace(workload, epsilon, df, col):
    list_of_range_queries = []
    
    for (lb, ub) in workload:
        query = range_query(df, col, lb, ub)
        noisy_query = laplace_mech(query, 1, epsilon/len(workload))
        list_of_range_queries.append(noisy_query)
        
    return list_of_range_queries

def workload_gaussian(workload, epsilon, delta, df, col):
    list_of_range_queries = []
    
    for (lb, ub) in workload:
        query = range_query(df, col, lb, ub)
        noisy_query = gaussian_mech(query, 1, epsilon/len(workload), delta)
        list_of_range_queries.append(noisy_query)
        
    return list_of_range_queries

wine = pd.read_csv(r"C:\Users\thund\winequality-white.csv", sep=';')


# I have submitted the .csv file with this project, as the csv will not correctly be read
# when included as a link.

# wine dataset link: https://archive.ics.uci.edu/dataset/186/wine+quality

# wine = pd.read_csv('https://github.com/WadeBradford17/cs3110-final-project/blob/main/winequality-white.csv')

In [5]:
wine

Unnamed: 0,fixed acidity,volatile acidity,citric acid,residual sugar,chlorides,free sulfur dioxide,total sulfur dioxide,density,pH,sulphates,alcohol,quality
0,7.0,0.27,0.36,20.7,0.045,45.0,170.0,1.00100,3.00,0.45,8.8,6
1,6.3,0.30,0.34,1.6,0.049,14.0,132.0,0.99400,3.30,0.49,9.5,6
2,8.1,0.28,0.40,6.9,0.050,30.0,97.0,0.99510,3.26,0.44,10.1,6
3,7.2,0.23,0.32,8.5,0.058,47.0,186.0,0.99560,3.19,0.40,9.9,6
4,7.2,0.23,0.32,8.5,0.058,47.0,186.0,0.99560,3.19,0.40,9.9,6
...,...,...,...,...,...,...,...,...,...,...,...,...
4893,6.2,0.21,0.29,1.6,0.039,24.0,92.0,0.99114,3.27,0.50,11.2,6
4894,6.6,0.32,0.36,8.0,0.047,57.0,168.0,0.99490,3.15,0.46,9.6,5
4895,6.5,0.24,0.19,1.2,0.041,30.0,111.0,0.99254,2.99,0.46,9.4,6
4896,5.5,0.29,0.30,1.1,0.022,20.0,110.0,0.98869,3.34,0.38,12.8,7


In [6]:
random_lower_bounds_fixed_acidity = [random.randint(0, 100) for _ in range(100)]
random_lower_bounds_citric_acid = [random.randint(0, 100) for _ in range(100)]
random_lower_bounds_chlorides = [random.randint(0, 100) for _ in range(100)]
random_lower_bounds_total_sulfur_dioxide = [random.randint(0, 100) for _ in range(100)]
random_lower_bounds_pH = [random.randint(0, 100) for _ in range(100)]

random_workload_fixed_acidity = [(lb, random.randint(lb, 100)) for lb in random_lower_bounds_fixed_acidity]
random_workload_citric_acid = [(lb, random.randint(lb, 100)) for lb in random_lower_bounds_citric_acid]
random_workload_chlorides = [(lb, random.randint(lb, 100)) for lb in random_lower_bounds_chlorides]
random_workload_total_sulfur_dioxide = [(lb, random.randint(lb, 100)) for lb in random_lower_bounds_total_sulfur_dioxide]
random_workload_pH = [(lb, random.randint(lb, 100)) for lb in random_lower_bounds_pH]

real_answers_fixed_acidity = [range_query(wine, 'fixed acidity', lb, ub) for (lb, ub) in random_workload_fixed_acidity]
real_answers_citric_acid = [range_query(wine, 'citric acid', lb, ub) for (lb, ub) in random_workload_citric_acid]
real_answers_chlorides = [range_query(wine, 'chlorides', lb, ub) for (lb, ub) in random_workload_chlorides]
real_answers_total_sulfur_dioxide = [range_query(wine, 'total sulfur dioxide', lb, ub) for (lb, ub) in random_workload_total_sulfur_dioxide]
real_answers_pH = [range_query(wine, 'pH', lb, ub) for (lb, ub) in random_workload_pH]

In [8]:
print('RESULTS USING LAPLACE MECHANSISM')
print()

print('Laplace mech: fixed acidity', workload_laplace(random_workload_fixed_acidity, 1.0, wine, 'fixed acidity')[:5])
errors = [abs(r_a - l_a) for (r_a, l_a) in zip(real_answers_fixed_acidity, workload_laplace(random_workload_fixed_acidity, 1.0, wine, 'fixed acidity'))]
print('Average absolute error:', np.mean(errors))
print()

print('Laplace mech: citric acid', workload_laplace(random_workload_citric_acid, 1.0, wine, 'citric acid')[:5])
errors = [abs(r_a - l_a) for (r_a, l_a) in zip(real_answers_citric_acid, workload_laplace(random_workload_citric_acid, 1.0, wine, 'citric acid'))]
print('Average absolute error:', np.mean(errors))
print()

print('Laplace mech: chlorides', workload_laplace(random_workload_chlorides, 1.0, wine, 'chlorides')[:5])
errors = [abs(r_a - l_a) for (r_a, l_a) in zip(real_answers_chlorides, workload_laplace(random_workload_chlorides, 1.0, wine, 'chlorides'))]
print('Average absolute error:', np.mean(errors))
print()

print('Laplace mech: total sulfur dioxide', workload_laplace(random_workload_total_sulfur_dioxide, 1.0, wine, 'total sulfur dioxide')[:5])
errors = [abs(r_a - l_a) for (r_a, l_a) in zip(real_answers_total_sulfur_dioxide, workload_laplace(random_workload_total_sulfur_dioxide, 1.0, wine, 'total sulfur dioxide'))]
print('Average absolute error:', np.mean(errors))
print()

print('Laplace mech: pH', workload_laplace(random_workload_pH, 1.0, wine, 'pH')[:5])
errors = [abs(r_a - l_a) for (r_a, l_a) in zip(real_answers_pH, workload_laplace(random_workload_pH, 1.0, wine, 'pH'))]
print('Average absolute error:', np.mean(errors))
print()

RESULTS USING LAPLACE MECHANSISM

Laplace mech: fixed acidity [-28.668971508693396, -111.25781042793126, -19.962503410195588, 141.25624550064825, 61.57070346748017]
Average absolute error: 99.38464807618807

Laplace mech: citric acid [82.85065151043527, 80.43360460399043, 26.648128632717743, -71.96193309597527, -26.985010414176543]
Average absolute error: 91.6764921764764

Laplace mech: chlorides [46.12530870999628, 75.01850952616466, 81.0853355442966, 142.6265030716908, -158.42073329412256]
Average absolute error: 88.93523188426214

Laplace mech: total sulfur dioxide [105.10455952498778, 500.835598266626, 572.0095761430796, 159.82809885274838, 292.70583598286396]
Average absolute error: 86.53773237679793

Laplace mech: pH [18.293382359451467, 94.07847472805938, 173.67715058158012, 84.5164631624213, 15.102176702200268]
Average absolute error: 86.27816767951207



In [10]:
# and it’s less accurate than the Laplace mechanism
print('RESULTS USING GAUSSIAN MECHANSISM')
print()

print('Gaussian mech: fixed acidity', workload_gaussian(random_workload_fixed_acidity, 1.0, 1e-5, wine, 'fixed acidity')[:5])
errors = [abs(r_a - l_a) for (r_a, l_a) in zip(real_answers_fixed_acidity, workload_gaussian(random_workload_fixed_acidity, 1.0, 1e-5, wine, 'fixed acidity'))]
print('Average absolute error:', np.mean(errors))
print()

print('Gaussian mech: citric acid', workload_gaussian(random_workload_citric_acid, 1.0, 1e-5, wine, 'citric acid')[:5])
errors = [abs(r_a - l_a) for (r_a, l_a) in zip(real_answers_citric_acid, workload_gaussian(random_workload_citric_acid, 1.0, 1e-5, wine, 'citric acid'))]
print('Average absolute error:', np.mean(errors))
print()

print('Gaussian mech: chlorides', workload_gaussian(random_workload_chlorides, 1.0, 1e-5, wine, 'chlorides')[:5])
errors = [abs(r_a - l_a) for (r_a, l_a) in zip(real_answers_chlorides, workload_gaussian(random_workload_chlorides, 1.0, 1e-5, wine, 'chlorides'))]
print('Average absolute error:', np.mean(errors))
print()

print('Gaussian mech: total sulfur dioxide', workload_gaussian(random_workload_total_sulfur_dioxide, 1.0, 1e-5, wine, 'total sulfur dioxide')[:5])
errors = [abs(r_a - l_a) for (r_a, l_a) in zip(real_answers_total_sulfur_dioxide, workload_gaussian(random_workload_total_sulfur_dioxide, 1.0, 1e-5, wine, 'total sulfur dioxide'))]
print('Average absolute error:', np.mean(errors))
print()

print('Gaussian mech: pH', workload_gaussian(random_workload_pH, 1.0, 1e-5, wine, 'pH')[:5])
errors = [abs(r_a - l_a) for (r_a, l_a) in zip(real_answers_pH, workload_gaussian(random_workload_pH, 1.0, 1e-5, wine, 'pH'))]
print('Average absolute error:', np.mean(errors))
print()

RESULTS USING GAUSSIAN MECHANSISM

Gaussian mech: fixed acidity [258.123535481833, -349.4257999627463, -59.03812935390983, 192.52486928272026, -257.4166783538627]
Average absolute error: 384.06689172177

Gaussian mech: citric acid [307.927278169439, -550.2212307127728, -976.1756320737234, 264.4657253429463, -1251.4708674980304]
Average absolute error: 361.19700793501

Gaussian mech: chlorides [1120.7646319660935, -644.8276681219068, -62.109231713391196, 137.57260396625128, 671.9609833304081]
Average absolute error: 394.2435763796955

Gaussian mech: total sulfur dioxide [135.2667603313494, 403.7633844463201, 1207.1076292915795, 423.7201823258801, 203.37716754398144]
Average absolute error: 431.57163694991607

Gaussian mech: pH [967.5430066304217, 436.65310329149236, 28.123986468174675, -702.0639944232785, -731.6319845871087]
Average absolute error: 359.06406095416014

