In [27]:
# Let's start with a simple example of the scikit lib use

import sys
sys.path.append('./privatizers')
sys.path.append('./dataProviders')
sys.path.append('./adapters')

from sklearn.neural_network import MLPClassifier
from dataProviders import breastCancerDataProvider as dataProvider
from adapters import breastCancerAdapter as adapter
from sklearn.metrics import accuracy_score
from sklearn.model_selection import train_test_split

dp = dataProvider.BreastCancerDP()

X = dp.getAllData()
Y = dp.getAllTargets()

X_train, X_test, y_train, y_test = train_test_split( X, Y, test_size = 0.3, random_state = 100)


In [28]:
## Dataset
data_set = {
    'samples': X,
    'labels': Y
}

## Size adjustment
MAX_SIZE = len(data_set['samples'])
desired_size = 10000
size = min(desired_size, MAX_SIZE)

data_set['samples'] = data_set['samples'][:size]
data_set['labels'] = data_set['labels'][:size]

In [29]:
## SVM without privatization
clf = MLPClassifier(solver='lbfgs', alpha=1e-5, hidden_layer_sizes=(5, 2), random_state=1)
clf.fit(data_set['samples'], data_set['labels'])  

y_pred = clf.predict(X_test)
print("Accuracy is", accuracy_score(y_test,y_pred)*100)

Accuracy is 40.35087719298245


In [33]:
## Privatization: Laplace privatizer
from privatizers import laplacePrivatizer

scale = 1000.0
privatizer = laplacePrivatizer.LaplacePrivatizer(scale)
privateData = privatizer.privatize(adapter.fromRaw(data_set['samples']))

privateTargetsFloat = privatizer.privatize(adapter.fromRaw(data_set['labels']))
privateTargets = adapter.toBinaryInt(privateTargetsFloat)

clf = MLPClassifier(solver='lbfgs', alpha=1e-5, hidden_layer_sizes=(5, 2), random_state=1)
clf.fit(privateData, privateTargets)

y_pred = clf.predict(X_test)
print("Accuracy is", accuracy_score(y_test,y_pred)*100)

Accuracy is 59.64912280701754


In [34]:
## Privatization: Exponential privatizer
from privatizers import exponentialPrivatizer

scale = 1000.0
privatizer = exponentialPrivatizer.ExponentialPrivatizer(scale)
privateData = privatizer.privatize(adapter.fromRaw(data_set['samples']))

privateTargetsFloat = privatizer.privatize(adapter.fromRaw(data_set['labels']))
privateTargets = adapter.toBinaryInt(privateTargetsFloat)

clf = MLPClassifier(solver='lbfgs', alpha=1e-5, hidden_layer_sizes=(5, 2), random_state=1)
clf.fit(privateData, privateTargets)

y_pred = clf.predict(X_test)
print("Accuracy is", accuracy_score(y_test,y_pred)*100)

Accuracy is 59.64912280701754
