In [1]:
import sys
sys.path.append('./privatizers')
sys.path.append('./dataProviders')
sys.path.append('./adapters')

from sklearn.naive_bayes import GaussianNB
from sklearn.datasets import fetch_covtype
from adapters import generalAdapter as adapter
from sklearn.metrics import accuracy_score
from sklearn.model_selection import train_test_split

from sklearn.datasets import load_iris


# dp = fetch_covtype()
dp = load_iris()

X = dp.data
Y = dp.target

In [2]:
# ## Size adjustment
# desired_size = 100
# min_test_size = 0.3
# test_size = 0.3

# total_size = len(X)

# if (desired_size < min_test_size * total_size):
#     test_size = 1-(desired_size / total_size);

# X_train, X_test, y_train, y_test = train_test_split(X, Y, test_size = test_size)

# ## Dataset
# data_set = {
#     'samples': X_train,
#     'labels': y_train
# }

In [3]:
## Multiple Dataset sizes
desired_sizes = [100]
data_sets = []

min_test_size = 0.2

total_size = len(X)

for desired_size in desired_sizes:
    test_size = 0.2
    if (desired_size < (1-min_test_size) * total_size):
        test_size = 1-(desired_size / total_size);

    X_train, X_test, y_train, y_test = train_test_split(X, Y, test_size = test_size)

    ## Dataset
    single_data_set = {
        'samples': X_train,
        'labels': y_train
    }
    data_sets.append(single_data_set)

In [4]:
## SVM without privatization
def run_without_privatization():
    for data_set in data_sets:
        gnb = GaussianNB()
        gnb.fit(data_set['samples'], data_set['labels'])

        y_pred = gnb.predict(X_test)
        print(str(accuracy_score(y_test,y_pred)).replace('.', ','))

        ## With cross validation
        # scores = cross_val_score(gnb, X, Y, cv=5)
        # for v in scores:
        #     print(str(v).replace('.', ','))

In [5]:
## Privatization: Laplace privatizer
from privatizers import laplacePrivatizer

def run_with_privatization():
    for data_set in data_sets:
        ad = adapter.GeneralAdapter(7, 1)

        dataInput = ad.fromRaw(data_set['samples'])
        dataTarget = ad.fromRaw(data_set['labels'])

        privatizer = laplacePrivatizer.LaplacePrivatizer(1.0)

        inputSensitivity = privatizer.getSensitivityList(dataInput)
        targetSensitivity = 7

    #     epsilon = [1.0, 1.5, 2.0, 2.5, 3.0]
    #     epsilon = [30.0, 20.0, 10.0, 5.0, 4.0, 3.0, 2.0, 1.0, 0.5, 0.01, 0.001]
        epsilon = [100.0, 90.0, 80.0, 70.0, 60.0, 50.0, 40.0, 30.0, 20.0, 10.0, 5.0, 4.0, 3.0, 2.0, 1.0, 0.5, 0.01, 0.001]

        privatizer = laplacePrivatizer.LaplacePrivatizer(1.0)
        privateData = privatizer.privatize(dataInput, sensitivityList = inputSensitivity)
        for i in epsilon:
            privatizer = laplacePrivatizer.LaplacePrivatizer(i)
            privateData = privatizer.privatize(dataInput, sensitivityList = inputSensitivity)

    #         privateTargetsFloat = privatizer.privatize(dataTarget, sensitivityList = targetSensitivity)
    #         privateTargets = ad.toDiscreteValue(privateTargetsFloat)
            privateTargets = dataTarget

            gnb = GaussianNB()
            gnb.fit(privateData, privateTargets)

            y_pred = gnb.predict(X_test)
            print(str(accuracy_score(y_test,y_pred)).replace('.', ','))

In [6]:
for i in range(5):
    print("Iteration ", i)
    print("Without")
    run_without_privatization()
    print("With")
    run_with_privatization()

Iteration  0
Without
0,9803921568627451
With
0,9803921568627451
0,9803921568627451
0,9803921568627451
0,9803921568627451
0,9803921568627451
0,9803921568627451
0,9803921568627451
0,9803921568627451
0,9803921568627451
0,9803921568627451
0,9019607843137255
0,9607843137254902
0,7254901960784313
0,6274509803921569
0,5882352941176471
0,29411764705882354
0,29411764705882354
0,3333333333333333
Iteration  1
Without
0,9803921568627451
With
0,9803921568627451
0,9803921568627451
0,9803921568627451
0,9803921568627451
0,9803921568627451
0,9803921568627451
0,9803921568627451
0,9803921568627451
0,9803921568627451
0,9607843137254902
0,9019607843137255
0,8627450980392157
0,9019607843137255
0,803921568627451
0,37254901960784315
0,37254901960784315
0,3333333333333333
0,29411764705882354
Iteration  2
Without
0,9803921568627451
With
0,9803921568627451
0,9803921568627451
0,9803921568627451
0,9803921568627451
0,9803921568627451
0,9803921568627451
0,9803921568627451
0,9803921568627451
0,9803921568627451
0,9803