In [44]:
# Load the data and libraries
import pandas as pd
import numpy as np
from scipy import stats
import matplotlib.pyplot as plt

def laplace_mech(v, sensitivity, epsilon):
    return v + np.random.laplace(loc=0, scale=sensitivity / epsilon)

def gaussian_mech(v, sensitivity, epsilon, delta):
    return v + np.random.normal(loc=0, scale=sensitivity * np.sqrt(2*np.log(1.25/delta)) / epsilon)

def gaussian_mech_vec(vec, sensitivity, epsilon, delta):
    return [v + np.random.normal(loc=0, scale=sensitivity * np.sqrt(2*np.log(1.25/delta)) / epsilon)
            for v in vec]

In [45]:
# Load the data and libraries
import pandas as pd
import numpy as np

bear = pd.read_csv('https://raw.githubusercontent.com/jbennett979/Data_Privacy_FP/refs/heads/main/north_america_bear_killings.csv')

In [54]:
# Load data files
import numpy as np
import urllib.request
import io

#url_x_pre = pd.read_csv('https://raw.githubusercontent.com/jbennett979/Data_Privacy_FP/refs/heads/main/north_america_bear_killings_processed_x.csv')
#url_y_pre = pd.read_csv('https://raw.githubusercontent.com/jbennett979/Data_Privacy_FP/refs/heads/main/north_america_bear_killings_processed_y.csv')

#url_x_pre = url_x_pre.dropna()
#url_y_pre = url_y_pre.dropna()

#X = url_x_pre.to_numpy()
#y = url_y_pre.to_numpy()

x_pre = bear[[' age', 'Month', 'Year', 'Grizzly', 'Hikers', 'Only one killed']]
y_pre = bear['Hunter']

x_pre[['Grizzly', 'Hikers', 'Only one killed']] = x_pre[['Grizzly', 'Hikers', 'Only one killed']].replace(0, -1)
print(x_pre)

X = x_pre.to_numpy()
y = y_pre.to_numpy()


      age  Month  Year  Grizzly  Hikers  Only one killed
0      27      6  2017       -1      -1                1
1      16      6  2017       -1      -1                1
2      27      5  2015       -1      -1                1
3      22      9  2014       -1       1                1
4      36      5  2014       -1      -1                1
..    ...    ...   ...      ...     ...              ...
159     1     10  1908       -1      -1                1
160    18     11  1906       -1      -1                1
161     3      5  1901       -1      -1               -1
162     5      5  1901       -1      -1               -1
163     7      5  1901       -1      -1               -1

[164 rows x 6 columns]


A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  x_pre[['Grizzly', 'Hikers', 'Only one killed']] = x_pre[['Grizzly', 'Hikers', 'Only one killed']].replace(0, -1)


In [55]:
# Split data into training and test sets
training_size = int(X.shape[0] * 0.8)

X_train = X[:training_size]
X_test = X[training_size:]

y_train = y[:training_size]
y_test = y[training_size:]

print('Train and test set sizes:', len(y_train), len(y_test))


Train and test set sizes: 131 33


In [56]:
# This is the gradient of the logistic loss
# The gradient is a vector that indicates the rate of change of the loss in each direction
def gradient(theta, xi, yi):
    exponent = yi * (xi.dot(theta))
    return - (yi*xi) / (1+np.exp(exponent))

def predict(xi, theta, bias=0):
    label = np.sign(xi @ theta + bias)
    return label

def accuracy(theta):
    return np.sum(predict(X_test, theta) == y_test)/X_test.shape[0]

def L2_clip(v, b):
    norm = np.linalg.norm(v, ord=2)
    
    if norm > b:
        return b * (v / norm)
    else:
        return v

def gradient_sum(theta, X, y, b):
    gradients = [L2_clip(gradient(theta, x_i, y_i), b) for x_i, y_i in zip(X,y)]
        
    # sum query
    # L2 sensitivity is b (by clipping performed above)
    return np.sum(gradients, axis=0)
   

def noisy_gradient_descent(iterations, epsilon, delta):
    theta = np.zeros(X_train.shape[1])
    b = 3

    noisy_count = laplace_mech(X_train.shape[0], 1, epsilon)

    for i in range(iterations):
        clipped_gradient_sum = gradient_sum(theta, X_train, y_train, b)
        noisy_gradient_sum = np.array(gaussian_mech_vec(clipped_gradient_sum, b, epsilon, delta))
        noisy_avg_gradient = noisy_gradient_sum / noisy_count
        theta = theta - noisy_avg_gradient

    return theta

theta = noisy_gradient_descent(10, 10.0, 1e-5)
print('Final accuracy:', accuracy(theta))

Final accuracy: 0.09090909090909091


In [57]:
def gaussian_mech_RDP_vec(vec, sensitivity, alpha, epsilon):
    sigma = np.sqrt((sensitivity**2 * alpha) / (2 * epsilon))
    return [v + np.random.normal(loc=0, scale=sigma) for v in vec]

def noisy_gradient_descent_RDP(iterations, alpha, epsilon_bar):
    epsilon_i = epsilon_bar/(iterations+1)
    theta = np.zeros(X_train.shape[1])
    noisy_count = gaussian_mech_RDP_vec(pd.Series([len(X_train)]), sensitivity=1, alpha=alpha, epsilon=epsilon_i)
    for i in range(iterations):
        grads = [gradient(theta, x_i, y_i) for x_i, y_i in zip(X_train, y_train)]
        b = 3 # clipping parameter (for the L2)
        clipped_grads = [L2_clip(g, b) for g in grads]
        sum_grad = np.sum(clipped_grads, axis=0)
        noisy_sum = gaussian_mech_RDP_vec(sum_grad, sensitivity=b, alpha=alpha, epsilon=epsilon_i)
        noisy_grad = np.array(noisy_sum) / noisy_count
        theta = theta - noisy_grad
    return theta

theta = noisy_gradient_descent_RDP(10, 20, 0.1)
print('Final accuracy:', accuracy(theta))

Final accuracy: 0.09090909090909091


  return - (yi*xi) / (1+np.exp(exponent))
