### Generate synthetic data using private marginals


In [22]:
# get dataset
import pandas as pd
import numpy as np
from scipy import stats
import matplotlib.pyplot as plt

def laplace_mech(v, sensitivity, epsilon):
    return v + np.random.laplace(loc=0, scale=sensitivity / epsilon)

def gaussian_mech(v, sensitivity, epsilon, delta):
    return v + np.random.normal(loc=0, scale=sensitivity * np.sqrt(2*np.log(1.25/delta)) / epsilon)

def gaussian_mech_vec(vec, sensitivity, epsilon, delta):
    return [v + np.random.normal(loc=0, scale=sensitivity * np.sqrt(2*np.log(1.25/delta)) / epsilon)
            for v in vec]

def pct_error(orig, priv):
    return np.abs(orig - priv)/orig * 100.0

bear = pd.read_csv('https://raw.githubusercontent.com/jbennett979/Data_Privacy_FP/refs/heads/main/north_america_bear_killings.csv')

In [26]:
#removing space from the column name age\n",
bear.columns = [c.lstrip() for c in bear]
# capitalizing columns name for age and gender
bear= bear.rename(columns={'age': 'Age'})
bear= bear.rename(columns={'gender': 'Gender'})

bear['Age'] = pd.to_numeric(bear['Age'], errors = 'coerce')

In [28]:
# calculates a differentially private one-way marginal for a given column
def dp_marginal(col, epsilon):
    hist = bear[col].value_counts()
    noisy_hist = hist.apply(lambda x: laplace_mech(x, sensitivity=1, epsilon=epsilon))

    # convert hist to 1-way marginal
    marginal = noisy_hist.clip(lower=0) / noisy_hist.clip(lower=0).sum()
    return marginal

dp_marginal('Age', 0.1)

Age
40    0.073666
24    0.015750
19    0.000000
37    0.000000
31    0.009343
        ...   
23    0.017472
43    0.000000
8     0.001103
76    0.000000
52    0.021447
Name: count, Length: 64, dtype: float64