In [1]:
import sys
sys.path.append('./privatizers')
sys.path.append('./dataProviders')
sys.path.append('./adapters')

from sklearn import tree
from sklearn.datasets import fetch_covtype
from adapters import generalAdapter as adapter
from sklearn.metrics import accuracy_score
from sklearn.model_selection import train_test_split
import numpy as np

dp = fetch_covtype()
X = dp.data
Y = dp.target

X_train, X_test, y_train, y_test = train_test_split( X, Y, test_size = 0.3, random_state = 100)


In [2]:
## Dataset
data_set = {
    'samples': X_train,
    'labels': y_train
}

## Size adjustment
MAX_SIZE = len(data_set['samples'])
desired_size = 100
size = min(desired_size, MAX_SIZE)

data_set['samples'] = data_set['samples'][:size]
data_set['labels'] = data_set['labels'][:size]

In [5]:
## Decision tree without privatization
clf = tree.DecisionTreeClassifier()
clf.fit(data_set['samples'], data_set['labels'])

y_pred = clf.predict(X_test)

print("Accuracy is", accuracy_score(y_test,y_pred))

Accuracy is 0.600651734899945


In [4]:
## Privatization: Laplace privatizer
from privatizers import laplacePrivatizer

ad = adapter.GeneralAdapter(7, 1)

dataInput = ad.fromRaw(data_set['samples'])
dataTarget = ad.fromRaw(data_set['labels'])

privatizer = laplacePrivatizer.LaplacePrivatizer(1.0)

inputSensitivity = privatizer.getSensitivityList(dataInput)
targetSensitivity = 7

epsilon = [1.0, 1.5, 2.0, 2.5, 3.0]

privatizer = laplacePrivatizer.LaplacePrivatizer(1.0)
privateData = privatizer.privatize(dataInput, sensitivityList = inputSensitivity)
for i in epsilon:
    privatizer = laplacePrivatizer.LaplacePrivatizer(i)
    privateData = privatizer.privatize(dataInput, sensitivityList = inputSensitivity)

    privateTargetsFloat = privatizer.privatize(dataTarget, sensitivityList = targetSensitivity)
    privateTargets = ad.toDiscreteValue(privateTargetsFloat)

    clf = tree.DecisionTreeClassifier()
    clf.fit(privateData, privateTargets)
    y_pred = clf.predict(X_test)

    print("Accuracy for epsilon = ", i," is", accuracy_score(y_test,y_pred))

Accuracy for epsilon =  1.0  is 0.07245961079493299
Accuracy for epsilon =  1.5  is 0.46734440976684416
Accuracy for epsilon =  2.0  is 0.48324765926197905
Accuracy for epsilon =  2.5  is 0.4857490361666973
Accuracy for epsilon =  3.0  is 0.49909927483018174
