## Start

In [None]:
# ------------ Test Piecewise Mechanism ------------------------------ #
import sys, pathlib

# Path of the folder *above* "test"
project_root = pathlib.Path().resolve().parent

# Add it (only once) to sys.path
if str(project_root) not in sys.path:
    sys.path.insert(0, str(project_root))


from other_solutions.PM import *
from internal.initialize_data import *
from internal.normalizer import *
from internal.evaluation import *
import os
import progressbar  # pip install progressbar

EPSILON = os.environ.get('EPSILON', 1)
RADNOM_SEED = os.environ.get('RADNOM_SEED', 10)
DATASET_NUMBER = os.environ.get('DATASET_NUMBER', 2)
LIMITED_NUMBER = os.environ.get('LIM', 0)
EVOLUTION_DOMAIN_SIZE = 360  # in order to Syn.csv
ALPHA = 0.4
epsiolon1 = ALPHA * EPSILON

## Initialize dataset

In [2]:
dataset, _ = read_dataset(f'../dataset/Data{DATASET_NUMBER}-coarse.dat', dataFrame=None, limited_number=int(LIMITED_NUMBER))
domains = attributes_domain(f'../dataset/Data{DATASET_NUMBER}-coarse.domain')
number_of_users = len(dataset)
pm_obj = PM_Class(len(dataset[0]), EPSILON, RADNOM_SEED)

print('dataset[0] is',dataset[0])
print('number of users is', number_of_users)
print('k is', pm_obj.k)

Reading dataset ...
Reading domains ...
dataset[0] is [4, 0, 0, 0, 12, 0, 0, 0, 0, 0, 0, 0, 6, 0, 0]
number of users is 45222
k is 1


## Normalize Dataset

In [3]:
# normalize to [-1,1]
normalized_dataset = normalize_dataset(dataset, domains)

Normalizing dataset to [-1,1]


## Perturbation

In [4]:
print('Perturbation with Piecewise Mechanism ...')
retrieval_dataset = []
for row in normalized_dataset:
    retrieval_dataset.append(pm_obj.perturb_tuple_PM(row))

Perturbation with Piecewise Mechanism ...


In [5]:
print_table(normalized_dataset[0], retrieval_dataset[0],
            'Normalized Dataset', 'Perturbed Dataset')

|   Normalized Dataset |   Perturbed Dataset |
|----------------------|---------------------|
|            -0.466667 |              0      |
|            -1        |              0      |
|            -1        |              0      |
|            -1        |              0      |
|             0.6      |              0      |
|            -1        |              0      |
|            -1        |              0      |
|            -1        |              0      |
|            -1        |              0      |
|            -1        |              0      |
|            -1        |              0      |
|            -1        |             -1.5289 |
|            -0.2      |              0      |
|            -1        |              0      |
|            -1        |              0      |


## Evaluation

In [6]:
# denormalizing
denormalized = denormalize_dataset(retrieval_dataset, domains)

print('MSE is', findMSE(normalized_dataset, retrieval_dataset))
_, avg = average_variation_distance(dataset, denormalized)
print('Average Variation Distance is', avg)

Denormaizing dataset ...
MSE is 0.8787913445058734
Average Variation Distance is 0.6021214601077911
