# Debugging notebook

In [49]:
from data_generators import ContinuousGenerator
import math
import testutilities
from scipy.special import comb
from netmechanism import FeaturesLattice, TargetsLattice
import time
from itertools import chain
import numpy as np

## Parameter setup

In [50]:
# General parameters
batch_size = 3000
n_private = 300 # 20 for test, 20 to train since test_frac is set to 0.5
test_frac = 0.8

In [84]:
# Experiment specific
dim = 2 # This won't really work for higher dimensions.
# Mesh quality parameters
num_points_feat = 5
num_points_targ = 5
epsilon = 0.1

In [85]:
# Generate the private data
private_data = ContinuousGenerator(d = dim, n = n_private)
private_data.generate_data(test_frac = test_frac, seed = 23)
print ("Coefficients of the model from which the private data was generated are", private_data.coefs)
# Calculate its 'contribution' to the utility
F_tilde_x = testutilities.get_private_F_tilde(private_data)

Coefficients of the model from which the private data was generated are [[0.03459577]
 [0.89392521]]


In [86]:
# Visualise data
#%matplotlib tk
#private_data.plot_data()

In [87]:
# Generate the synthetic features and targets
OutputLattice = FeaturesLattice()
OutputLattice.generate_l2_lattice(dim = dim, num_points = num_points_feat)
features = OutputLattice.points
OutputLattice2 = TargetsLattice()
OutputLattice2.generate_lattice(dim = dim, num_points = num_points_targ)
targets = OutputLattice2.points

In [88]:
# Calculate the constant that multiplies the utility to get the score
scaled_epsilon = epsilon/2 
# Inverse global sensitivity
igs = private_data.features.shape[0]/2 
# Utility scaling constant 
scaling_const = igs*scaled_epsilon

In [89]:
# Set other parameters necessary for the code to work
n_batches = math.ceil(comb(features.shape[0], dim, exact = False)/batch_size)
print ("Number of batches is", n_batches)
experiment_name = 'test_struct_integrity'
directory = 'C:/Users/alexc/OneDrive/Documents/GitHub/Thesis/Experiments/' + experiment_name + '/OutcomeSpace'
base_filename_s = "s_eps" + str(epsilon).replace(".", "") + "d" + str(dim)

Number of batches is 1


## Run the experiment

In [90]:
t_start = time.time()
results = []
for batch_index in range(n_batches):
    results.append(testutilities.evaluate_sample_score(batch_index, features, targets, scaling_const, F_tilde_x, dim, batch_size, \
                                                       base_filename_s, directory))
t_elapsed = time.time()
print("Time elapsed for single core processing of this small case is..." + " " + str(t_elapsed - t_start))

#To Borja:
# We process the outcomes in batches. For each batch, a tuple is appended to results. Each tuple contains:
# [0]: A scaled version of the maximum utility for that batch. The scaling constant is calculated above in the scaling_const
# [1]: A matrix containing the scaled utilities for the batch. For a fixed row index X'X is the same, only X'y changes.
# [2]: np.sum(np.exp(scaled_utilities)), a partial sum that we can use to work out the partition function
# [3]: A list of tuples with indices. The first index is the batch index, the second and third represent the row and column 
# corresponding to the max of the matrix of scaled utilities. There are multiple combinations that maximise the scaled utility.
# I use this to 'recover' the synthethic data sets and print them out

Time elapsed for single core processing of this small case is... 0.003999948501586914


In [91]:
# Recover the synthetic datasets that yields maximum utility
synthetic_datasets = testutilities.get_optimal_datasets(results, features, targets, batch_size, dim)

## Print the datasets

In [92]:
# To Borja: print the datasets here. Each dataset is an element in the synthetic_datasets list 
# Alter range_lim_up, range_lim_low to print specific ones
range_lim_low = 0
range_lim_up = len(synthetic_datasets)
for index in range(range_lim_low, range_lim_up):
    print(synthetic_datasets[index])
print (len(synthetic_datasets))

[[ 0.  -0.5 -0.5]
 [-0.5  0.   0. ]]
[[ 0.  -0.5 -0.5]
 [ 0.5  0.   0. ]]
[[ 0.   0.5  0.5]
 [-0.5  0.   0. ]]
[[0.  0.5 0.5]
 [0.5 0.  0. ]]
4


In [93]:
# Print F_tilde_x
print(F_tilde_x)

[[0.07376403 0.00409484 0.0062124 ]
 [0.00409484 0.11673498 0.10449401]]


In [94]:
# Calculate and print F_tilde_r
for dataset in synthetic_datasets:
    print(testutilities.get_synthetic_F_tilde(dataset, dim))
# print (testutilities.get_synthetic_F_tilde(synthetic_datasets[31], dim))
# print (synthetic_datasets[31])

[[0.125 0.    0.   ]
 [0.    0.125 0.125]]
[[0.125 0.    0.   ]
 [0.    0.125 0.125]]
[[0.125 0.    0.   ]
 [0.    0.125 0.125]]
[[0.125 0.    0.   ]
 [0.    0.125 0.125]]


IndexError: list index out of range

In [80]:
for synthetic_dataset in synthetic_datasets:
    print(-np.max(np.abs(F_tilde_x - testutilities.get_synthetic_F_tilde(dataset, dim))))

-0.07673498083068855
-0.07673498083068855
-0.07673498083068855
-0.07673498083068855
-0.07673498083068855
-0.07673498083068855
-0.07673498083068855
-0.07673498083068855
-0.07673498083068855
-0.07673498083068855
-0.07673498083068855
-0.07673498083068855
-0.07673498083068855
-0.07673498083068855
-0.07673498083068855
-0.07673498083068855
-0.07673498083068855
-0.07673498083068855
-0.07673498083068855
-0.07673498083068855
-0.07673498083068855
-0.07673498083068855
-0.07673498083068855
-0.07673498083068855
-0.07673498083068855
-0.07673498083068855
-0.07673498083068855
-0.07673498083068855
-0.07673498083068855
-0.07673498083068855
-0.07673498083068855
-0.07673498083068855
-0.07673498083068855
-0.07673498083068855
-0.07673498083068855
-0.07673498083068855
-0.07673498083068855
-0.07673498083068855
-0.07673498083068855
-0.07673498083068855
-0.07673498083068855
-0.07673498083068855


## Sanity checks

In [32]:
# Calculate the utilities and scores of the recovered datasets. They should be identical for all datasets 
scores, scaled_utilities, utilities = testutilities.calculate_recovered_scores(synthetic_datasets, F_tilde_x, scaling_const, dim)
# Ensure all datasets give the same utility/score/scaled_utility
scores = np.array(scores)
utilities = np.array(utilities)
scaled_utilities = np.array(scaled_utilities)
assert np.all(np.isclose(scores - scores[0], 0.0, rtol = 1e-9))
assert np.all(np.isclose(utilities - utilities[0], 0.0, rtol = 1e-9))
assert np.all(np.isclose(scaled_utilities - scaled_utilities[0], 0.0, rtol = 1e-9))

In [33]:
print(scores)
print(utilities)
print(scaled_utilities)

[0.76070084 0.76070084]
[-0.02735151 -0.02735151]
[-0.27351511 -0.27351511]


In [161]:
# Check the maximum scaled utility matches with the calculated results
max_scaled_utilities = []
for index in maxima_indices:
    max_scaled_utilities.append(results[index][0])
assert np.all(np.isclose(max_scaled_utilities - max_scaled_utilities[0], 0.0, rtol = 1e-9))
assert np.all(np.isclose(scaled_utilities - max_scaled_utilities[0], 0.0, rtol = 1e-9))