In [42]:
# Load the data and libraries
import pandas as pd
import numpy as np
from scipy import stats
import matplotlib.pyplot as plt

def laplace_mech(v, sensitivity, epsilon):
    return v + np.random.laplace(loc=0, scale=sensitivity / epsilon)

def gaussian_mech(v, sensitivity, epsilon, delta):
    return v + np.random.normal(loc=0, scale=sensitivity * np.sqrt(2*np.log(1.25/delta)) / epsilon)

def gaussian_mech_vec(vec, sensitivity, epsilon, delta):
    return [v + np.random.normal(loc=0, scale=sensitivity * np.sqrt(2*np.log(1.25/delta)) / epsilon)
            for v in vec]

In [43]:
# Load the data and libraries
import pandas as pd
import numpy as np

bear = pd.read_csv('https://raw.githubusercontent.com/jbennett979/Data_Privacy_FP/refs/heads/main/north_america_bear_killings.csv')

In [46]:
# Load data files
import numpy as np
import urllib.request
import io

url_x_pre = pd.read_csv('https://raw.githubusercontent.com/jbennett979/Data_Privacy_FP/refs/heads/main/north_america_bear_killings_processed_x.csv')
url_y_pre = pd.read_csv('https://raw.githubusercontent.com/jbennett979/Data_Privacy_FP/refs/heads/main/north_america_bear_killings_processed_y.csv')

url_x_pre = url_x_pre.dropna()
url_y_pre = url_y_pre.dropna()
print(url_x_pre)

X = url_x_pre.to_numpy()
print(X)
y = url_y_pre.to_numpy()


     27   6  2017  -1  -1.1  -1.2  1
0    16   6  2017  -1    -1    -1  1
1    27   5  2015  -1    -1    -1  1
2    22   9  2014  -1    -1     1  1
3    36   5  2014  -1    -1    -1  1
4    64   6  2013  -1    -1    -1  1
..   ..  ..   ...  ..   ...   ... ..
160   1  10  1908  -1    -1    -1  1
161  18  11  1906  -1    -1    -1  1
162   3   5  1901  -1    -1    -1 -1
163   5   5  1901  -1    -1    -1 -1
164   7   5  1901  -1    -1    -1 -1

[165 rows x 7 columns]


ValueError: could not convert string to float: '\xa0None'

In [40]:
# Split data into training and test sets
training_size = int(X.shape[0] * 0.8)

X_train = X[:training_size]
X_test = X[training_size:]

y_train = y[:training_size]
y_test = y[training_size:]

print(X_train)

print('Train and test set sizes:', len(y_train), len(y_test))


[['16' 6 2017 -1 -1 -1 1]
 ['27' 5 2015 -1 -1 -1 1]
 ['22' 9 2014 -1 -1 1 1]
 ['36' 5 2014 -1 -1 -1 1]
 ['64' 6 2013 -1 -1 -1 1]
 ['61' 7 2011 -1 -1 -1 1]
 ['72' 6 2011 -1 -1 -1 1]
 ['24' 8 2010 -1 -1 -1 1]
 ['37' 11 2018 -1 -1 -1 -1]
 ['0.83' 11 2018 -1 -1 -1 -1]
 ['18' 10 2018 -1 -1 -1 1]
 ['37' 9 2018 -1 -1 -1 1]
 ['44' 6 2018 -1 1 -1 1]
 ['38' 6 2016 -1 1 -1 1]
 ['63' 8 2015 -1 1 -1 1]
 ['53' 9 2014 1 -1 -1 1]
 ['54' 9 2014 1 -1 -1 1]
 ['31' 9 2014 -1 1 -1 1]
 ['49' 8 2012 -1 1 1 1]
 ['54' 10 2012 -1 1 -1 1]
 ['59' 8 2011 -1 1 1 1]
 ['57' 7 2011 -1 1 -1 1]
 ['48' 7 2010 -1 -1 -1 1]
 ['70' 6 2010 -1 1 -1 1]
 ['31' 7 2018 -1 -1 -1 1]
 ['37' 10 2009 -1 -1 -1 1]
 ['74' 8 2009 -1 -1 -1 1]
 ['70' 5 2008 -1 -1 -1 1]
 ['31' 7 2007 -1 -1 -1 1]
 ['11' 6 2007 -1 -1 -1 1]
 ['6' 4 2006 -1 -1 -1 1]
 ['30' 9 2005 -1 -1 -1 1]
 ['69' 8 2005 -1 -1 -1 1]
 ['71' 6 2005 -1 -1 -1 1]
 ['77' 9 2002 -1 -1 -1 1]
 ['31' 9 2002 -1 -1 -1 1]
 ['0.42' 8 2002 -1 -1 -1 1]
 ['93' 8 2001 -1 -1 -1 1]
 ['18' 6 2001 -1

In [39]:
def predict(xi, theta, bias=0):
    label = np.sign(xi @ theta + bias)
    return label

def accuracy(theta):
    return np.sum(predict(X_test, theta) == y_test)/X_test.shape[0]

def L2_clip(v, b):
    norm = np.linalg.norm(v, ord=2)
    
    if norm > b:
        return b * (v / norm)
    else:
        return v
    
# This is the gradient of the logistic loss
# The gradient is a vector that indicates the rate of change of the loss in each direction
def gradient(theta, xi, yi):
    print(xi)
    print(yi)
    exponent = yi * (xi.dot(theta))
    return - (yi*xi) / (1+np.exp(exponent))

def noisy_gradient_descent(iterations, epsilon, delta):
    theta = np.zeros(X_train.shape[1])
    noisy_count = laplace_mech(len(X_train), sensitivity=1, epsilon=epsilon)
    for i in range(iterations):
        grads = [gradient(theta, x_i, y_i) for x_i, y_i in zip(X_train, y_train)]
        b = 3 # clipping parameter (for the L2)
        clipped_grads = [L2_clip(g, b) for g in grads]
        sum_grad = np.sum(clipped_grads, axis=0)
        noisy_sum = gaussian_mech_vec(sum_grad, sensitivity=b, epsilon=epsilon, delta=delta)
        noisy_grad = np.array(noisy_sum) / noisy_count
        theta = theta - noisy_grad
    return theta

theta = noisy_gradient_descent(10, 10.0, 1e-5)
print('Final accuracy:', accuracy(theta))

['16' 6 2017 -1 -1 -1 1]
[-1]


TypeError: can't multiply sequence by non-int of type 'float'