## Start

In [4]:
import sys, pathlib

# Path of the folder *above* "test"
project_root = pathlib.Path().resolve().parent

# Add it (only once) to sys.path
if str(project_root) not in sys.path:
    sys.path.insert(0, str(project_root))

from internal.initialize_data import *
from internal.LOLOHA import *
from client.client import *
from server.server import *

import progressbar  # pip install progressbar

EPSILON = 1
RADNOM_SEED = 10
DATASET_NUMBER = 2
B = 0.005
DELTA = 0.001
LIMITED_NUMBER = 100
EVOLUTION_DOMAIN_SIZE = 360  # in order to Syn.csv
ALPHA = 0.4
epsiolon1 = ALPHA * EPSILON

## Initialize dataset

In [5]:
df = read_evolution_dataset('../dataset/Syn.csv')
dataset, evolution_dataset = read_dataset(f'../dataset/Data{DATASET_NUMBER}-coarse.dat', dataFrame=df, limited_number=LIMITED_NUMBER)
domains = attributes_domain(f'../dataset/Data{DATASET_NUMBER}-coarse.domain')
tau = len(evolution_dataset[0])
number_of_users = len(dataset)

print('dataset[0] is',dataset[0])
print('evolution_dataset[0][:10] is',evolution_dataset[0][:10])
print('tau is', tau)
print('number of users is', number_of_users)

Reading dataset ...
Append evolution data ...
Reading domains ...
dataset[0] is [4, 0, 0, 0, 12, 0, 0, 0, 0, 0, 0, 0, 6, 0, 0]
evolution_dataset[0][:10] is [102, 56, 56, 56, 56, 145, 145, 145, 145, 145]
tau is 120
number of users is 100


## Real frequency for each data collection $t \in [\tau]$

In [6]:
dic_real_freq = compute_frequency(evolution_dataset, tau, EVOLUTION_DOMAIN_SIZE)
dic_real_freq[0]

array([0.  , 0.01, 0.  , 0.  , 0.  , 0.  , 0.  , 0.  , 0.01, 0.  , 0.  ,
       0.  , 0.  , 0.01, 0.01, 0.  , 0.  , 0.  , 0.  , 0.  , 0.02, 0.01,
       0.  , 0.  , 0.  , 0.  , 0.  , 0.  , 0.  , 0.  , 0.  , 0.  , 0.  ,
       0.  , 0.01, 0.  , 0.  , 0.  , 0.  , 0.  , 0.01, 0.  , 0.  , 0.01,
       0.  , 0.  , 0.  , 0.  , 0.01, 0.01, 0.01, 0.  , 0.02, 0.01, 0.01,
       0.  , 0.  , 0.  , 0.01, 0.  , 0.  , 0.  , 0.  , 0.  , 0.01, 0.  ,
       0.  , 0.  , 0.  , 0.  , 0.  , 0.01, 0.  , 0.  , 0.  , 0.  , 0.  ,
       0.  , 0.  , 0.  , 0.01, 0.  , 0.  , 0.  , 0.  , 0.  , 0.  , 0.01,
       0.01, 0.  , 0.  , 0.01, 0.  , 0.  , 0.  , 0.  , 0.  , 0.  , 0.  ,
       0.01, 0.  , 0.  , 0.02, 0.  , 0.  , 0.01, 0.01, 0.  , 0.  , 0.  ,
       0.  , 0.  , 0.  , 0.  , 0.  , 0.  , 0.  , 0.  , 0.  , 0.  , 0.  ,
       0.01, 0.  , 0.  , 0.  , 0.  , 0.  , 0.  , 0.01, 0.  , 0.02, 0.  ,
       0.  , 0.  , 0.01, 0.01, 0.  , 0.  , 0.  , 0.  , 0.  , 0.  , 0.  ,
       0.  , 0.  , 0.  , 0.  , 0.  , 0.  , 0.01, 0.

## Reduce domain size by hashing

In [7]:
g = compute_optimal_domain_size(EPSILON, ALPHA)
hashed_evolution_dataset, user_hash_functions = reduce_domain_dataset(evolution_dataset, g)

# Revise the domains (append evolution domain)
domains.append(list(range(g)))

# Client
client_obj = Client(EPSILON, RADNOM_SEED, B, DELTA)
# Server
server_obj = Server(domains)

print_table(evolution_dataset[0][:10], hashed_evolution_dataset[0][:10], 'evolution_dataset', 'hashed_evolution_dataset')

print('user hash functions are', user_hash_functions)

Reducing domain ...
|   evolution_dataset |   hashed_evolution_dataset |
|---------------------|----------------------------|
|                 102 |                          1 |
|                  56 |                          0 |
|                  56 |                          0 |
|                  56 |                          0 |
|                  56 |                          0 |
|                 145 |                          0 |
|                 145 |                          0 |
|                 145 |                          0 |
|                 145 |                          0 |
|                 145 |                          0 |
user hash functions are [7738027610190837472, 8626972237627649996, 5993720277083531080, 1055802978109315311, 5883490775981491677, 2822948385601967912, 4725253217072501770, 4612987033664339872, 5007681451284570534, 2647159966783034006, 7712471105073079460, 5957068449952623113, 2007053736414676656, 7698327011413103440, 7584104827084424359, 6731

## Perturbation with GRR

In [8]:
perturbed_evolution_dataset = perturbation_GRR(hashed_evolution_dataset, g, EPSILON, 0.2)

print_table(hashed_evolution_dataset[0][:10], perturbed_evolution_dataset[0][:10], 'hashed_evolution_dataset', 'perturbed_evolution_dataset')

Perturbation with GRR ...
|   hashed_evolution_dataset |   perturbed_evolution_dataset |
|----------------------------|-------------------------------|
|                          1 |                             0 |
|                          0 |                             1 |
|                          0 |                             0 |
|                          0 |                             0 |
|                          0 |                             0 |
|                          0 |                             1 |
|                          0 |                             0 |
|                          0 |                             0 |
|                          0 |                             1 |
|                          0 |                             1 |


## Normalize Dataset

In [9]:
# normalize to [-1,1]
normalized_dataset = normalize_dataset(dataset, domains)
normalized_evolution_dataset = normalize_dataset(perturbed_evolution_dataset, [list(range(g)) for _ in range(tau)])

print_table(perturbed_evolution_dataset[0][:10], normalized_evolution_dataset[0][:10], 'perturbed_evolution_dataset', 'normalized_evolution_dataset')

Normalizing dataset to [-1,1]
Normalizing dataset to [-1,1]
|   perturbed_evolution_dataset |   normalized_evolution_dataset |
|-------------------------------|--------------------------------|
|                             0 |                             -1 |
|                             1 |                              1 |
|                             0 |                             -1 |
|                             0 |                             -1 |
|                             0 |                             -1 |
|                             1 |                              1 |
|                             0 |                             -1 |
|                             0 |                             -1 |
|                             1 |                              1 |
|                             1 |                              1 |


## Wheel of Differential

In [10]:
print('Wheel of Differential ...')
retrieval_dataset = []
retrieval_evolutional_dataset = []

for data, data_e in zip(normalized_dataset, normalized_evolution_dataset):
    perturbed_data = client_obj.send_perturbed_avg_eigenvector(data, data_e)
    retrieval_data = server_obj.received_avg_eigenvector(perturbed_data)
    retrieval_dataset.append(retrieval_data[0][:-1])
    retrieval_evolutional_dataset.append(get_coloumn_dataset(retrieval_data, -1))


Wheel of Differential ...


## Evaluation

In [11]:
print_table([*normalized_dataset[0], normalized_evolution_dataset[0][0]], [*retrieval_dataset[0], retrieval_evolutional_dataset[0][0]],
            'normalized data', 'retrival data')

print('domain size of retrieval data is',len(retrieval_dataset[0]))
print('tau is', len(retrieval_evolutional_dataset[0]))

|   normalized data |   retrival data |
|-------------------|-----------------|
|         -0.466667 |       -0.467062 |
|         -1        |       -1.0004   |
|         -1        |       -1.0004   |
|         -1        |       -1.0004   |
|          0.6      |        0.599605 |
|         -1        |       -1.0004   |
|         -1        |       -1.0004   |
|         -1        |       -1.0004   |
|         -1        |       -1.0004   |
|         -1        |       -1.0004   |
|         -1        |       -1.0004   |
|         -1        |       -1.0004   |
|         -0.2      |       -0.200395 |
|         -1        |       -1.0004   |
|         -1        |       -1.0004   |
|         -1        |       -1.0004   |
domain size of retrieval data is 15
tau is 120


In [12]:
# denormalizing
denormalized = denormalize_dataset(retrieval_dataset, domains)
denormalized_evolution_dataset = denormalize_dataset(retrieval_evolutional_dataset, [list(range(g)) for _ in range(tau)])
rounded_evolution_dataset = round_dataset(denormalized_evolution_dataset)


Denormaizing dataset ...
Denormaizing dataset ...
Rounding dataset ...


In [13]:
print_table(dataset[0], denormalized[0], 'original', 'retrieved')
print_table(hashed_evolution_dataset[0][:10], rounded_evolution_dataset[0][:10], 'original evolution', 'retrieved evolution')

print('MSE is', findMSE(normalized_dataset, retrieval_dataset))
_, avg = average_variation_distance(dataset, denormalized)
print('Average Variation Distance is', avg)

|   original |    retrieved |
|------------|--------------|
|          4 |  3.99704     |
|          0 | -0.00118541  |
|          0 | -0.00296352  |
|          0 | -0.00296352  |
|         12 | 11.997       |
|          0 | -0.00118541  |
|          0 | -0.00256838  |
|          0 | -0.00098784  |
|          0 | -0.000790272 |
|          0 | -0.000197568 |
|          0 | -0.00296352  |
|          0 | -0.00296352  |
|          6 |  5.99704     |
|          0 | -0.00790272  |
|          0 | -0.000197568 |
|   original evolution |   retrieved evolution |
|----------------------|-----------------------|
|                    1 |                     0 |
|                    0 |                     1 |
|                    0 |                     0 |
|                    0 |                     0 |
|                    0 |                     0 |
|                    0 |                     1 |
|                    0 |                     0 |
|                    0 |                     0 |


## Evaluate Frequency Estimation

In [15]:
prog = progressbar.ProgressBar(maxval=tau)
prog.start()

dic_estimate_freq = []
for t in range(tau):
    dic_estimate_freq.append(LOLOHA_Aggregator(get_coloumn_dataset(rounded_evolution_dataset, t), user_hash_functions, EVOLUTION_DOMAIN_SIZE, EPSILON, epsiolon1, ALPHA))
    prog.update(t) 

prog.finish()

print_table(dic_real_freq[0][:10], dic_estimate_freq[0][:10], 'real frequency', 'estimate frequency')

print('MSE of frequency is', findMSE(dic_real_freq, dic_estimate_freq))

100% |########################################################################|

|   real frequency |   estimate frequency |
|------------------|----------------------|
|             0    |           0          |
|             0.01 |           0.00171821 |
|             0    |           0          |
|             0    |           0          |
|             0    |           0.00343643 |
|             0    |           0          |
|             0    |           0.0120275  |
|             0    |           0          |
|             0.01 |           0.00515464 |
|             0    |           0          |
MSE of frequency is 4.8033765512103453e-05



