In [1]:
import sys
sys.path.append('./privatizers')
sys.path.append('./dataProviders')
sys.path.append('./adapters')

from sklearn.neural_network import MLPClassifier
from sklearn.datasets import fetch_covtype
from adapters import generalAdapter as adapter
from sklearn.metrics import accuracy_score
from sklearn.model_selection import train_test_split

dp = fetch_covtype()

X = dp.data
Y = dp.target

In [2]:
# ## Size adjustment
# desired_size = 10000
# min_test_size = 0.3
# test_size = 0.3

# total_size = len(X)

# if (desired_size < min_test_size * total_size):
#     test_size = 1-(desired_size / total_size);

# X_train, X_test, y_train, y_test = train_test_split(X, Y, test_size = test_size)

# ## Dataset
# data_set = {
#     'samples': X_train,
#     'labels': y_train
# }

In [3]:
## Multiple Dataset sizes
desired_sizes = [10, 100, 1000, 10000]
data_sets = []

min_test_size = 0.2

total_size = len(X)

for desired_size in desired_sizes:
    test_size = 0.2
    if (desired_size < (1-min_test_size) * total_size):
        test_size = 1-(desired_size / total_size);

    X_train, X_test, y_train, y_test = train_test_split(X, Y, test_size = test_size)

    ## Dataset
    single_data_set = {
        'samples': X_train,
        'labels': y_train
    }
    data_sets.append(single_data_set)

In [4]:
for data_set in data_sets:
    ## SVM without privatization
    clf = MLPClassifier(solver='adam', hidden_layer_sizes=(100, 50), max_iter=1000)
    clf.fit(data_set['samples'], data_set['labels'])  

    y_pred = clf.predict(X_test)
    print(str(accuracy_score(y_test,y_pred)).replace('.', ','))

    ## With cross validation
    # scores = cross_val_score(clf, X, Y, cv=5)
    # for v in scores:
    #     print(str(v).replace('.', ','))

0,47236310270186965
0,46173810708006136
0,4777517810483843
0,6610876829208493


In [5]:
## Privatization: Laplace privatizer
from privatizers import laplacePrivatizer

for data_set in data_sets:
    ad = adapter.GeneralAdapter(7, 1)

    dataInput = ad.fromRaw(data_set['samples'])
    dataTarget = ad.fromRaw(data_set['labels'])

    privatizer = laplacePrivatizer.LaplacePrivatizer(1.0)

    inputSensitivity = privatizer.getSensitivityList(dataInput)
    targetSensitivity = 7

    epsilon = [1.0, 1.5, 2.0, 2.5, 3.0]

    privatizer = laplacePrivatizer.LaplacePrivatizer(1.0)
    privateData = privatizer.privatize(dataInput, sensitivityList = inputSensitivity)
    for i in epsilon:
        privatizer = laplacePrivatizer.LaplacePrivatizer(i)
        privateData = privatizer.privatize(dataInput, sensitivityList = inputSensitivity)

        privateTargetsFloat = privatizer.privatize(dataTarget, sensitivityList = targetSensitivity)
        privateTargets = ad.toDiscreteValue(privateTargetsFloat)

        clf = MLPClassifier(solver='adam', hidden_layer_sizes=(100, 50), max_iter=1000)
        clf.fit(privateData, privateTargets)

        y_pred = clf.predict(X_test)
        print(str(accuracy_score(y_test,y_pred)).replace('.', ','))

0,48776207855526676
0,41821187645793784
0,392412068397862
0,4720145986424103
0,43037974683544306
0,444938109882104
0,4408681428761567
0,3220230047704777
0,4139667817839205
0,4386195036181376
0,3495828458946572
0,36058261472613534
0,42873004420222344
0,4347421770470673
0,3144504844031299
0,3741129783612253
0,48698976553907797
0,36797300231869035
0,5078754912331089
0,5008195974865677
