## Start

In [1]:
from internal.initialize_data import *
from internal.LOLOHA import *
from client.client import *
from server.server import *

from rich.progress import Progress  # pip install rich

EPSILON = 1
RADNOM_SEED = 10
DATASET_NUMBER = 2
B = 0.005
DELTA = 0.001
GET_EVOLUTION = True
LIMITED_NUMBER = 100

## Initialize dataset

In [2]:
dataset, evolution_dataset = read_dataset(f'dataset/Data{DATASET_NUMBER}-coarse.dat', GET_EVOLUTION, limited_number=LIMITED_NUMBER)
domains = attributes_domain(f'dataset/Data{DATASET_NUMBER}-coarse.domain')


print('dataset[0] is',dataset[0])
print('evolution_dataset[0][:10] is',evolution_dataset[0][:10])

Reading dataset ...
Append evolution data ...
Reading domains ...
dataset[0] is [4, 0, 0, 0, 12, 0, 0, 0, 0, 0, 0, 0, 6, 0, 0]
evolution_dataset[0][:10] is [102, 56, 56, 56, 56, 145, 145, 145, 145, 145]


## Reduce domain size by hashing

In [3]:
hashed_evolution_dataset, g = reduce_domain(evolution_dataset, EPSILON, RADNOM_SEED, 0.4)

# Revise the domains (append evolution domain)
domains.append(list(range(g)))

# Client
client_obj = Client(EPSILON, RADNOM_SEED, B, DELTA)
# Server
server_obj = Server(domains)

print_table(evolution_dataset[0][:10], hashed_evolution_dataset[0][:10], 'evolution_dataset', 'hashed_evolution_dataset')

Reducing domain ...
Optimal domain size is 2
|   evolution_dataset |   hashed_evolution_dataset |
|---------------------|----------------------------|
|                 102 |                          0 |
|                  56 |                          1 |
|                  56 |                          1 |
|                  56 |                          1 |
|                  56 |                          1 |
|                 145 |                          1 |
|                 145 |                          1 |
|                 145 |                          1 |
|                 145 |                          1 |
|                 145 |                          1 |


## Perturbation with GRR

In [4]:
perturbed_evolution_dataset = perturbation_GRR(hashed_evolution_dataset, g, EPSILON, 0.2)

print_table(hashed_evolution_dataset[0][:10], perturbed_evolution_dataset[0][:10], 'hashed_evolution_dataset', 'perturbed_evolution_dataset')

Perturbation with GRR ...
|   hashed_evolution_dataset |   perturbed_evolution_dataset |
|----------------------------|-------------------------------|
|                          0 |                             1 |
|                          1 |                             0 |
|                          1 |                             1 |
|                          1 |                             1 |
|                          1 |                             1 |
|                          1 |                             0 |
|                          1 |                             1 |
|                          1 |                             1 |
|                          1 |                             0 |
|                          1 |                             0 |


## Normalize Dataset

In [5]:
# normalize to [-1,1]
tau = len(evolution_dataset[0])
normalized_dataset = normalize_dataset(dataset, domains)
normalized_evolution_dataset = normalize_dataset(perturbed_evolution_dataset, [list(range(g)) for _ in range(tau)])

print_table(perturbed_evolution_dataset[0][:10], normalized_evolution_dataset[0][:10], 'perturbed_evolution_dataset', 'normalized_evolution_dataset')

Normalizing dataset to [-1,1]
Normalizing dataset to [-1,1]
|   perturbed_evolution_dataset |   normalized_evolution_dataset |
|-------------------------------|--------------------------------|
|                             1 |                              1 |
|                             0 |                             -1 |
|                             1 |                              1 |
|                             1 |                              1 |
|                             1 |                              1 |
|                             0 |                             -1 |
|                             1 |                              1 |
|                             1 |                              1 |
|                             0 |                             -1 |
|                             0 |                             -1 |


## Wheel of Differential

In [11]:
retrieval_dataset = []
retrieval_evolutional_dataset = []

for data, data_e in zip(normalized_dataset, normalized_evolution_dataset):
    perturbed_data = client_obj.send_perturbed_avg_eigenvector(data, data_e)
    retrieval_data = server_obj.received_avg_eigenvector(perturbed_data)
    retrieval_dataset.append(retrieval_data[0][:-1])
    retrieval_evolutional_dataset.append([row[-1] for row in retrieval_data])



## Evaluation

In [7]:
print_table([*normalized_dataset[0], normalized_evolution_dataset[0][0]], [*retrieval_dataset[0], retrieval_evolutional_dataset[0][0]],
            'normalized data', 'retrival data')

print('domain size of retrieval data is',len(retrieval_dataset[0]))
print('tau is', len(retrieval_evolutional_dataset[0]))

|   normalized data |   retrival data |
|-------------------|-----------------|
|         -0.466667 |       -0.467062 |
|         -1        |       -1.0004   |
|         -1        |       -1.0004   |
|         -1        |       -1.0004   |
|          0.6      |        0.599605 |
|         -1        |       -1.0004   |
|         -1        |       -1.0004   |
|         -1        |       -1.0004   |
|         -1        |       -1.0004   |
|         -1        |       -1.0004   |
|         -1        |       -1.0004   |
|         -1        |       -1.0004   |
|         -0.2      |       -0.200395 |
|         -1        |       -1.0004   |
|         -1        |       -1.0004   |
|          1        |        0.999605 |
domain size of retrieval data is 15
tau is 120


In [8]:
# denormalizing
denormalized = denormalize_dataset(retrieval_dataset, domains)
denormalized_evolution_dataset = denormalize_dataset(retrieval_evolutional_dataset, [list(range(g)) for _ in range(tau)])


Denormaizing dataset ...
Denormaizing dataset ...


In [9]:
print_table(dataset[0], denormalized[0], 'original', 'retrieved')
print_table(hashed_evolution_dataset[0][:10], denormalized_evolution_dataset[0][:10], 'original evolution', 'retrieved evolution')

print('MSE is', findMSE(normalized_dataset, retrieval_dataset))
_, avg = average_variation_distance(dataset, denormalized)
print('Average Variation Distance is', avg)

|   original |    retrieved |
|------------|--------------|
|          4 |  3.99704     |
|          0 | -0.00118541  |
|          0 | -0.00296352  |
|          0 | -0.00296352  |
|         12 | 11.997       |
|          0 | -0.00118541  |
|          0 | -0.00256838  |
|          0 | -0.00098784  |
|          0 | -0.000790272 |
|          0 | -0.000197568 |
|          0 | -0.00296352  |
|          0 | -0.00296352  |
|          6 |  5.99704     |
|          0 | -0.00790272  |
|          0 | -0.000197568 |
|   original evolution |   retrieved evolution |
|----------------------|-----------------------|
|                    0 |           0.999802    |
|                    1 |          -0.000204816 |
|                    1 |           0.999802    |
|                    1 |           0.999802    |
|                    1 |           0.999802    |
|                    1 |          -0.000204816 |
|                    1 |           0.999802    |
|                    1 |           0.999802    |
