# Demo

In [1]:
# run for Jax GPU
from dpkernel import dpmmd, dphsic, human_readable_dict
from jax import random
import jax.numpy as jnp
from jax.config import config
config.update("jax_enable_x64", True)
X = jnp.array([1, 2])
X.device()

StreamExecutorGpuDevice(id=0, process_index=0, slice_index=0)

## dpMMD

In [2]:
# generate data for two-sample testing
m = 400
n = 500
d = 10
key = random.PRNGKey(0)
key, subkey = random.split(key)
subkeys = random.split(subkey, num=2)
X = random.uniform(subkeys[0], shape=(m, d))
Y = random.uniform(subkeys[1], shape=(n, d)) + 1

In [3]:
# compile function
epsilon = 30 / jnp.sqrt(n)
key, subkey = random.split(key)
output, dictionary = dpmmd(subkey, X, Y, epsilon, return_dictionary=True)

In [4]:
# Now the function runs fast for any inputs X and Y of the compiled shaped (500, 10) and any epsilon
# If the shape is changed, the function will need to be compiled again
# key = random.PRNGKey(1)  # different initialisation
subkeys = random.split(subkey, num=2)  # different randomness
X = random.uniform(subkeys[0], shape=(m, d))
Y = random.uniform(subkeys[1], shape=(n, d)) + 1 
%timeit output, dictionary = dpmmd(subkey, X, Y, epsilon, return_dictionary=True)

1.1 ms ± 142 ns per loop (mean ± std. dev. of 7 runs, 1,000 loops each)


In [5]:
# generate data and run test
# key = random.PRNGKey(2)  # different initialisation
subkeys = random.split(subkey, num=2)  # different randomness
X = random.uniform(subkeys[0], shape=(m, d))
Y = random.uniform(subkeys[1], shape=(n, d)) + 1
key, subkey = random.split(key)
output, dictionary = dpmmd(subkey, X, Y, epsilon, return_dictionary=True)

In [6]:
# output is a jax array consisting of either 0 or 1
output 

Array(1, dtype=int64)

In [7]:
# to convert it to an int use: 
output.item()

1

In [8]:
# test details can be returned in a dictionary
human_readable_dict(dictionary) # use to convert jax arrays to scalars
dictionary

{'Bandwidth': 3.1622776601683795,
 'DP delta': 0,
 'DP epsilon': 1.3416407864998738,
 'Kernel gaussian': True,
 'Non-privatised MMD V-statistic': 1.0303134075639377,
 'Number of permutations': 2000,
 'Privacy Laplace noise for MMD V-statistic': 0.0024464216479214897,
 'Privatised MMD V-statistic': 1.0327598292118592,
 'Privatised MMD quantile': 0.07304867614381999,
 'Privatised p-value': 0.0004997501382604241,
 'Privatised p-value threshold': 0.05,
 'Test level': 0.05,
 'dpMMD test reject': True}

In [9]:
# Various parameters of the test can be adjusted.
# See the docstring in dpkernel.py for details.
key, subkey = random.split(key)
dpmmd(
    subkey,
    X,
    Y,
    epsilon,
    delta=0,
    alpha=0.05,
    kernel="gaussian",
    bandwidth_multiplier=1,
    number_permutations=2000, 
    return_dictionary=False,
    min_mem_kernel=False,
    min_mem_permutations=False,
)

Array(1, dtype=int64)

## dpHSIC

In [10]:
# generate data for independence testing
n = 500
d_X = d_Y = 10
key = random.PRNGKey(0)
key, subkey = random.split(key)
subkeys = random.split(subkey, num=2)
X = random.uniform(subkeys[0], shape=(n, d_X))
Y = X + 0.01 * random.uniform(subkeys[1], shape=(n, d_Y))

In [11]:
# compile function
epsilon = 30 / jnp.sqrt(n)
key, subkey = random.split(key)
output, dictionary = dphsic(subkey, X, Y, epsilon, return_dictionary=True)

In [12]:
# Now the function runs fast for any inputs X and Y of the compiled shaped (500, 100) and any epsilon
# If the shape is changed, the function will need to be compiled again
# key = random.PRNGKey(1)  # different initialisation
subkeys = random.split(subkey, num=2)  # different randomness
# subkeys = random.split(subkey, num=2)
X = random.uniform(subkeys[0], shape=(n, d_X))
Y = X + 0.01 * random.uniform(subkeys[1], shape=(n, d_Y))
%timeit output, dictionary = dphsic(subkey, X, Y, epsilon, return_dictionary=True)

25.5 ms ± 2.26 µs per loop (mean ± std. dev. of 7 runs, 10 loops each)


In [13]:
# generate data and run test
# key = random.PRNGKey(2)  # different initialisation
subkeys = random.split(subkey, num=2)  # different randomness
X = random.uniform(subkeys[0], shape=(n, d_X))
Y = X + 0.01 * random.uniform(subkeys[1], shape=(n, d_Y))
key, subkey = random.split(key)
output, dictionary = dphsic(subkey, X, Y, epsilon, return_dictionary=True)

In [14]:
# output is a jax array consisting of either 0 or 1
output 

Array(1, dtype=int64)

In [15]:
# to convert it to an int use: 
output.item()

1

In [16]:
# test details can be returned in a dictionary
human_readable_dict(dictionary) # use to convert jax arrays to scalars
dictionary

{'Bandwidth X': 3.1622776601683795,
 'Bandwidth Y': 3.1622776601683795,
 'DP delta': 0,
 'DP epsilon': 1.3416407864998738,
 'Kernel X gaussian': True,
 'Kernel Y gaussian': True,
 'Non-privatised HSIC V-statistic': 0.04372783993115604,
 'Number of permutations': 2000,
 'Privacy Laplace noise for HSIC V-statistic': 0.005524549037481856,
 'Privatised HSIC V-statistic': 0.04925238896863789,
 'Privatised HSIC quantile': 0.03282314987336105,
 'Privatised p-value': 0.015992004424333572,
 'Privatised p-value threshold': 0.05,
 'Test level': 0.05,
 'dpHSIC test reject': True}

In [17]:
# Various parameters of the test can be adjusted.
# See the docstring in dpkernel.py for details.
key, subkey = random.split(key)
dphsic(
    subkey,
    X,
    Y,
    epsilon,
    delta=0,
    alpha=0.05,
    bandwidth_multiplier_X=1,
    bandwidth_multiplier_Y=1,
    kernel_X="gaussian",
    kernel_Y="gaussian",
    number_permutations=2000,
    return_dictionary=False,
    min_mem_kernel=False,
)

Array(1, dtype=int64)