In [1]:
# Let's start with a simple example of the scikit lib use

import sys
sys.path.append('./privatizers')
sys.path.append('./dataProviders')
sys.path.append('./adapters')

from sklearn import svm
from dataProviders import breastCancerDataProvider as dataProvider
from adapters import breastCancerAdapter as adapter
from sklearn.metrics import accuracy_score
from sklearn.model_selection import train_test_split

dp = dataProvider.BreastCancerDP()

X = dp.getAllData()
Y = dp.getAllTargets()

X_train, X_test, y_train, y_test = train_test_split( X, Y, test_size = 0.3, random_state = 100)


In [2]:
## Dataset
data_set = {
    'samples': X_train,
    'labels': y_train
}

## Size adjustment
MAX_SIZE = len(data_set['samples'])
desired_size = 100000
size = min(desired_size, MAX_SIZE)

data_set['samples'] = data_set['samples'][:size]
data_set['labels'] = data_set['labels'][:size]

In [3]:
## SVM without privatization
clf = svm.SVC(gamma='scale')
clf.fit(data_set['samples'], data_set['labels'])
y_pred = clf.predict(X_test)
print("Accuracy is", accuracy_score(y_test,y_pred)*100)

Accuracy is 90.64327485380117


In [7]:
## Privatization: Laplace privatizer
from privatizers import laplacePrivatizer

epsilon = 100.0
privatizer = laplacePrivatizer.LaplacePrivatizer(epsilon)
privateData = privatizer.privatize(adapter.fromRaw(data_set['samples']))

privateTargetsFloat = privatizer.privatize(adapter.fromRaw(data_set['labels']))
privateTargets = adapter.toBinaryInt(privateTargetsFloat)

clf = svm.SVC(gamma='scale')
clf.fit(privateData, privateTargets)

y_pred = clf.predict(X_test)
print("Accuracy is", accuracy_score(y_test,y_pred)*100)


Accuracy is 88.88888888888889
