# Final Project
Catie Crowell and Johnna Schulz

In [1]:
import pandas as pd
import numpy as np
from scipy import stats
import matplotlib.pyplot as plt

def laplace_mech(v, sensitivity, epsilon):
    return v + np.random.laplace(loc=0, scale=sensitivity / epsilon)

def gaussian_mech(v, sensitivity, epsilon, delta):
    return v + np.random.normal(loc=0, scale=sensitivity * np.sqrt(2*np.log(1.25/delta)) / epsilon)

def pct_error(orig, priv):
    return np.abs(orig - priv)/orig * 100.0

ocds = pd.read_csv('https://raw.githubusercontent.com/jjschulz/cs3110/main/ocd_patient_dataset.csv')
ocds

Unnamed: 0,Patient ID,Age,Gender,Ethnicity,Marital Status,Education Level,OCD Diagnosis Date,Duration of Symptoms (months),Previous Diagnoses,Family History of OCD,Obsession Type,Compulsion Type,Y-BOCS Score (Obsessions),Y-BOCS Score (Compulsions),Depression Diagnosis,Anxiety Diagnosis,Medications
0,1018,32,Female,African,Single,Some College,2016-07-15,203,MDD,No,Harm-related,Checking,17,10,Yes,Yes,SNRI
1,2406,69,Male,African,Divorced,Some College,2017-04-28,180,,Yes,Harm-related,Washing,21,25,Yes,Yes,SSRI
2,1188,57,Male,Hispanic,Divorced,College Degree,2018-02-02,173,MDD,No,Contamination,Checking,3,4,No,No,Benzodiazepine
3,6200,27,Female,Hispanic,Married,College Degree,2014-08-25,126,PTSD,Yes,Symmetry,Washing,14,28,Yes,Yes,SSRI
4,5824,56,Female,Hispanic,Married,High School,2022-02-20,168,PTSD,Yes,Hoarding,Ordering,39,18,No,No,
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
1495,5374,38,Male,Hispanic,Divorced,College Degree,2019-01-10,53,MDD,No,Contamination,Washing,21,33,Yes,Yes,SSRI
1496,5013,19,Female,Hispanic,Divorced,Graduate Degree,2022-09-14,160,GAD,Yes,Hoarding,Praying,25,16,Yes,Yes,SSRI
1497,6089,40,Male,Asian,Married,Some College,2018-03-13,100,,Yes,Contamination,Counting,2,15,Yes,Yes,Benzodiazepine
1498,3808,37,Female,Caucasian,Married,Some College,2018-04-14,210,GAD,Yes,Contamination,Washing,16,7,Yes,No,Benzodiazepine


In [2]:
ocds=ocds.drop(['Patient ID','Medications','OCD Diagnosis Date'],axis=1)

In [3]:
def age_at_onset():
    total = 0
    for i in range(0, len(ocds)):
        # age at onset
        duration = ocds.at[i, 'Age'] - (ocds.at[i, 'Duration of Symptoms (months)']/12)
        total = total + duration
    final = total/(i+1)
    return final

# Queries (without noise)

In [4]:
ybocs_obsessions = ocds['Y-BOCS Score (Obsessions)'].mean()
ybocs_compulsions = ocds['Y-BOCS Score (Compulsions)'].mean()
avg_onset_age = age_at_onset()
crosstab_obsession_compulsion = pd.crosstab(ocds['Obsession Type'], ocds['Compulsion Type'])

print("Y-BOCS Score (Obsessions): " + str(ybocs_obsessions))
print("\nY-BOCS Score (Compulsions): " + str(ybocs_compulsions))
print("\nAverage Age at Symptom Onset: " + str(avg_onset_age))
# final query requires machine learning, but can start with 2d hist of obsession/compulsion
#print(ocds[['Obsession Type', 'Compulsion Type']].value_counts())
print("\nObsession/Compulsion Type: ")
crosstab_obsession_compulsion

Y-BOCS Score (Obsessions): 20.048

Y-BOCS Score (Compulsions): 19.626

Average Age at Symptom Onset: 36.63588888888893

Obsession/Compulsion Type: 


Compulsion Type,Checking,Counting,Ordering,Praying,Washing
Obsession Type,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1
Contamination,65,50,58,67,66
Harm-related,58,74,64,70,67
Hoarding,55,56,58,47,62
Religious,58,64,55,55,71
Symmetry,56,72,50,47,55


# Queries (with LaPlace noise, ε = 1)

In [5]:
def dp_laplace_mean(col, epsilon):
    noisy_sum = laplace_mech(ocds[col].sum(), ocds[col].max(), epsilon/2)
    noisy_count = laplace_mech(len(ocds), 1, epsilon/2)
    return noisy_sum/noisy_count

def dp_laplace_age_at_onset(epsilon):
    total = 0
    for i in range(0, len(ocds)):
        # age at onset
        duration = ocds.at[i, 'Age'] - (ocds.at[i, 'Duration of Symptoms (months)']/12)
        total = total + duration
    total = laplace_mech(total, ocds['Duration of Symptoms (months)'].max(), epsilon/2)
    final = total/(laplace_mech(len(ocds), 1, epsilon/2))
    return final

def dp_laplace_crosstab_obsession_compulsion(epsilon):
    ct = pd.crosstab(ocds['Obsession Type'], ocds['Compulsion Type'])
    noisy_ct = ct.applymap(lambda x : laplace_mech(x, 1, epsilon))
    return noisy_ct

In [169]:
epsilon = 1

ybocs_obsessions_laplace = dp_laplace_mean('Y-BOCS Score (Obsessions)', epsilon)
ybocs_compulsions_laplace = dp_laplace_mean('Y-BOCS Score (Compulsions)', epsilon)
avg_onset_age_laplace = dp_laplace_age_at_onset(epsilon)
crosstab_obsession_compulsion_laplace = dp_laplace_crosstab_obsession_compulsion(epsilon)
crosstab_error_laplace = (abs(crosstab_obsession_compulsion_laplace - crosstab_obsession_compulsion) / abs(crosstab_obsession_compulsion)) * 100

print("Y-BOCS Score (Obsessions): " + str(ybocs_obsessions))
print("Y-BOCS Score (Obsessions) Error: " + str(pct_error(ybocs_obsessions, ybocs_obsessions_laplace)))

print("\nY-BOCS Score (Compulsions): " + str(ybocs_compulsions_laplace))
print("Y-BOCS Score (Compulsions) Error: " + str(pct_error(ybocs_compulsions, ybocs_compulsions_laplace)))

print("\nAverage Age at Symptom Onset: " + str(avg_onset_age_laplace))
print("Average Age at Symptom Onset Error: " + str(pct_error(avg_onset_age, avg_onset_age_laplace)))

print("\nObsession/Compulsion Type: ")
crosstab_obsession_compulsion_laplace

Y-BOCS Score (Obsessions): 20.048
Y-BOCS Score (Obsessions) Error: 0.11177911396825634

Y-BOCS Score (Compulsions): 19.76482512368857
Y-BOCS Score (Compulsions) Error: 0.7073531218208975

Average Age at Symptom Onset: 36.268670956208915
Average Age at Symptom Onset Error: 1.002344814926504

Obsession/Compulsion Type: 


Compulsion Type,Checking,Counting,Ordering,Praying,Washing
Obsession Type,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1
Contamination,62.948702,48.851598,60.892062,68.619829,65.677834
Harm-related,57.300544,74.946462,64.466081,69.994109,67.348637
Hoarding,56.907739,55.739965,57.226307,46.965766,62.895153
Religious,57.917958,64.273016,58.337229,53.92183,71.492138
Symmetry,55.071838,72.881205,51.123448,47.254881,55.657516


In [170]:
print("Obsession/Compulsion Type Error: ")
crosstab_error_laplace

Obsession/Compulsion Type Error: 


Compulsion Type,Checking,Counting,Ordering,Praying,Washing
Obsession Type,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1
Contamination,3.155843,2.296804,4.986314,2.417656,0.48813
Harm-related,1.205959,1.279003,0.728251,0.008415,0.520354
Hoarding,3.468616,0.464348,1.333953,0.072838,1.443796
Religious,0.141451,0.426588,6.06769,1.960309,0.693153
Symmetry,1.657432,1.223895,2.246895,0.5423,1.195484


# Queries (with Gaussian noise, ε = 1 & δ = 1e-5)

In [6]:
def dp_gaussian_mean(col, epsilon, delta):
    noisy_sum = gaussian_mech(ocds[col].sum(), ocds[col].max(), epsilon/2, delta)
    noisy_count = gaussian_mech(len(ocds), 1, epsilon/2, delta)
    return noisy_sum/noisy_count

def dp_gaussian_age_at_onset(epsilon, delta):
    total = 0
    for i in range(0, len(ocds)):
        # age at onset
        duration = ocds.at[i, 'Age'] - (ocds.at[i, 'Duration of Symptoms (months)']/12)
        total = total + duration
    total = gaussian_mech(total, ocds['Duration of Symptoms (months)'].max(), epsilon/2, delta)
    final = total/(gaussian_mech(len(ocds), 1, epsilon/2, delta))
    return final

def dp_gaussian_crosstab_obsession_compulsion(epsilon, delta):
    ct = pd.crosstab(ocds['Obsession Type'], ocds['Compulsion Type'])
    noisy_ct = ct.applymap(lambda x : gaussian_mech(x, 1, epsilon, delta))
    return noisy_ct

In [7]:
epsilon = 1
delta = 1e-5

ybocs_obsessions_gaussian = dp_gaussian_mean('Y-BOCS Score (Obsessions)', epsilon, delta)
ybocs_compulsions_gaussian = dp_gaussian_mean('Y-BOCS Score (Compulsions)', epsilon, delta)
avg_onset_age_gaussian = dp_gaussian_age_at_onset(epsilon, delta)
crosstab_obsession_compulsion_gaussian = dp_gaussian_crosstab_obsession_compulsion(epsilon, delta)
crosstab_error_gaussian = (abs(crosstab_obsession_compulsion_gaussian - crosstab_obsession_compulsion) / abs(crosstab_obsession_compulsion)) * 100


print("Y-BOCS Score (Obsessions): " + str(ybocs_obsessions_gaussian))
print("Y-BOCS Score (Obsessions) Error: " + str(pct_error(ybocs_obsessions, ybocs_obsessions_gaussian)))

print("\nY-BOCS Score (Compulsions): " + str(ybocs_compulsions_gaussian))
print("Y-BOCS Score (Compulsions) Error: " + str(pct_error(ybocs_obsessions, ybocs_compulsions_gaussian)))

print("\nAverage Age at Symptom Onset: " + str(avg_onset_age_gaussian))
print("Average Age at Symptom Onset Error: " + str(pct_error(avg_onset_age, avg_onset_age_gaussian)))

print("\nObsession/Compulsion Type: ")
crosstab_obsession_compulsion_gaussian

Y-BOCS Score (Obsessions): 20.75921956648624
Y-BOCS Score (Obsessions) Error: 3.547583631715097

Y-BOCS Score (Compulsions): 19.96032023405345
Y-BOCS Score (Compulsions) Error: 0.437349191672719

Average Age at Symptom Onset: 36.0393056946513
Average Age at Symptom Onset Error: 1.6284119543188296

Obsession/Compulsion Type: 


Compulsion Type,Checking,Counting,Ordering,Praying,Washing
Obsession Type,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1
Contamination,55.40252,48.548723,58.961222,62.153074,61.886501
Harm-related,65.982252,69.500709,70.602516,79.647963,67.674169
Hoarding,62.560277,55.386305,60.661378,45.403238,70.752562
Religious,61.028273,67.125049,54.687463,45.911598,68.2532
Symmetry,58.944189,73.438347,49.909431,52.667737,60.932639


In [8]:
print("Obsession/Compulsion Type Error: ")
crosstab_error_gaussian

Obsession/Compulsion Type Error: 


Compulsion Type,Checking,Counting,Ordering,Praying,Washing
Obsession Type,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1
Contamination,14.765353,2.902553,1.65728,7.234217,6.232575
Harm-related,13.762504,6.080123,10.316432,13.782804,1.006222
Hoarding,13.745959,1.095884,4.588583,3.397367,14.117035
Religious,5.22116,4.882889,0.568248,16.524368,3.868733
Symmetry,5.25748,1.997705,0.181139,12.059015,10.786616


# Predicting Obsession Type from Compulsion Type

In [174]:
def predict_compulsion(compulsion_type):
    # take highest val count in row
    return ocds[ocds['Compulsion Type']== compulsion_type]['Obsession Type'].max()

def predict_prob(compulsion_type, obsession_type):
    # add val counts of full row and divide individual by sum
    val = len(ocds[(ocds['Compulsion Type'] == compulsion_type) & (ocds['Obsession Type'] == obsession_type)])
    total = len(ocds[ocds['Compulsion Type']== compulsion_type])
    return val/total

print("Individuals with the washing compulsion are likely to be obsessed with " + str(predict_compulsion('Washing')))
print("\nThe probability that an individual has both the washing compulsion and the symmetry obsession is " + str(predict_prob('Washing', 'Symmetry')))

Individuals with the washing compulsion are likely to be obsessed with Symmetry

The probability that an individual has both the washing compulsion and the symmetry obsession is 0.17133956386292834


# Predict Y-BOCS Score

In [9]:
def big_prediction_ybocs_obsession(age, gender, ethnicity, education):
    predict = ocds[(ocds['Age'] == age) & (ocds['Gender'] == gender) & (ocds['Ethnicity'] == ethnicity) & (ocds['Education Level'] == education)]['Y-BOCS Score (Obsessions)'].mean()
    return predict
    
def big_prediction_ybocs_compulsion(age, gender, ethnicity, education):
    predict = ocds[(ocds['Age'] == age) & (ocds['Gender'] == gender) & (ocds['Ethnicity'] == ethnicity) & (ocds['Education Level'] == education)]['Y-BOCS Score (Compulsions)'].mean()
    return predict

In [15]:
ybocs_obsession_predict = big_prediction_ybocs_obsession(32, 'Female', 'African', 'Some College')
ybocs_compulsion_predict = big_prediction_ybocs_compulsion(32, 'Female', 'African', 'Some College')

print("Predicted Y-BOCS (Obsessions) Score: " + str(ybocs_obsession_predict))
print("Predicted Y-BOCS (Compulsions) Score: " + str(ybocs_compulsion_predict))

Predicted Y-BOCS (Obsessions) Score: 17.333333333333332
Predicted Y-BOCS (Compulsions) Score: 10.333333333333334


In [16]:
def big_prediction_ybocs_obsession_laplace(age, gender, ethnicity, education, epsilon):
    total = ocds[(ocds['Age'] == age) & (ocds['Gender'] == gender) & (ocds['Ethnicity'] == ethnicity) & (ocds['Education Level'] == education)]['Y-BOCS Score (Obsessions)'].sum()
    total = laplace_mech(total, ocds[(ocds['Age'] == age) & (ocds['Gender'] == gender) & (ocds['Ethnicity'] == ethnicity) & (ocds['Education Level'] == education)]['Y-BOCS Score (Obsessions)'].max(), epsilon/2)
    final = total/(laplace_mech(len(ocds[(ocds['Age'] == age) & (ocds['Gender'] == gender) & (ocds['Ethnicity'] == ethnicity) & (ocds['Education Level'] == education)]), 1, epsilon/2))
    return final

def big_prediction_ybocs_compulsion_laplace(age, gender, ethnicity, education, epsilon):
    total = ocds[(ocds['Age'] == age) & (ocds['Gender'] == gender) & (ocds['Ethnicity'] == ethnicity) & (ocds['Education Level'] == education)]['Y-BOCS Score (Compulsions)'].sum()
    total = laplace_mech(total, ocds[(ocds['Age'] == age) & (ocds['Gender'] == gender) & (ocds['Ethnicity'] == ethnicity) & (ocds['Education Level'] == education)]['Y-BOCS Score (Compulsions)'].max(), epsilon/2)
    final = total/(laplace_mech(len(ocds[(ocds['Age'] == age) & (ocds['Gender'] == gender) & (ocds['Ethnicity'] == ethnicity) & (ocds['Education Level'] == education)]), 1, epsilon/2))
    return final

In [32]:
epsilon = 2

ybocs_obsession_predict_laplace = big_prediction_ybocs_obsession_laplace(32, 'Female', 'African', 'Some College', epsilon)
ybocs_compulsion_predict_laplace = big_prediction_ybocs_compulsion_laplace(32, 'Female', 'African', 'Some College', epsilon)

print("Predicted Y-BOCS (Obsessions) Score (with LaPlace noise): " + str(ybocs_obsession_predict_laplace))
print("Predicted Y-BOCS (Obsessions) Score Error: " + str(pct_error(ybocs_obsession_predict, ybocs_obsession_predict_laplace)))

print("\nPredicted Y-BOCS (Compulsions) Score (with LaPlace noise): " + str(ybocs_compulsion_predict_laplace))
print("Predicted Y-BOCS (Compulsions) Score Error: " + str(pct_error(ybocs_compulsion_predict, ybocs_compulsion_predict_laplace)))

Predicted Y-BOCS (Obsessions) Score (with LaPlace noise): 25.0561473523411
Predicted Y-BOCS (Obsessions) Score Error: 44.55469626350636

Predicted Y-BOCS (Compulsions) Score (with LaPlace noise): 10.822791666559617
Predicted Y-BOCS (Compulsions) Score Error: 4.736693547351125


In [18]:
def big_prediction_ybocs_obsession_gaussian(age, gender, ethnicity, education, epsilon, delta):
    total = ocds[(ocds['Age'] == age) & (ocds['Gender'] == gender) & (ocds['Ethnicity'] == ethnicity) & (ocds['Education Level'] == education)]['Y-BOCS Score (Obsessions)'].sum()
    total = gaussian_mech(total, ocds[(ocds['Age'] == age) & (ocds['Gender'] == gender) & (ocds['Ethnicity'] == ethnicity) & (ocds['Education Level'] == education)]['Y-BOCS Score (Obsessions)'].max(), epsilon/2, delta)
    final = total/(gaussian_mech(len(ocds[(ocds['Age'] == age) & (ocds['Gender'] == gender) & (ocds['Ethnicity'] == ethnicity) & (ocds['Education Level'] == education)]), 1, epsilon/2, delta))
    return final

def big_prediction_ybocs_compulsion_gaussian(age, gender, ethnicity, education, epsilon, delta):
    total = ocds[(ocds['Age'] == age) & (ocds['Gender'] == gender) & (ocds['Ethnicity'] == ethnicity) & (ocds['Education Level'] == education)]['Y-BOCS Score (Compulsions)'].sum()
    total = gaussian_mech(total, ocds[(ocds['Age'] == age) & (ocds['Gender'] == gender) & (ocds['Ethnicity'] == ethnicity) & (ocds['Education Level'] == education)]['Y-BOCS Score (Compulsions)'].max(), epsilon/2, delta)
    final = total/(gaussian_mech(len(ocds[(ocds['Age'] == age) & (ocds['Gender'] == gender) & (ocds['Ethnicity'] == ethnicity) & (ocds['Education Level'] == education)]), 1, epsilon/2, delta))
    return final

In [51]:
epsilon = 2
delta = 1e-5

ybocs_obsession_predict_gaussian = big_prediction_ybocs_obsession_gaussian(32, 'Female', 'African', 'Some College', epsilon, delta)
ybocs_compulsion_predict_gaussian = big_prediction_ybocs_compulsion_gaussian(32, 'Female', 'African', 'Some College', epsilon, delta)


print("Predicted Y-BOCS (Obsessions) Score (with Gaussian noise): " + str(ybocs_obsession_predict_gaussian))
print("Predicted Y-BOCS (Obsessions) Score Error: " + str(pct_error(ybocs_obsession_predict, ybocs_obsession_predict_gaussian)))

print("\nPredicted Y-BOCS (Compulsions) Score (with Gaussian noise): " + str(ybocs_compulsion_predict_gaussian))
print("Predicted Y-BOCS (Compulsions) Score Error: " + str(pct_error(ybocs_compulsion_predict, ybocs_compulsion_predict_gaussian)))

Predicted Y-BOCS (Obsessions) Score (with Gaussian noise): -15.186696582296769
Predicted Y-BOCS (Obsessions) Score Error: 187.6155572055583

Predicted Y-BOCS (Compulsions) Score (with Gaussian noise): 68.49503401462653
Predicted Y-BOCS (Compulsions) Score Error: 562.8551678834824
