In [1]:
from cookiemonster.epsilon_calculator import (
    get_epsilon_from_accuracy_for_counts,
    get_epsilon_for_high_probability_relative_error_wrt_prior,
    get_epsilon_for_relative_rmse_wrt_prior
)

In [2]:
import numpy as np
import plotly.express as px

In [3]:
batch_size = 20_000
estimated_average_count = 0.005
sensitivity = 5
true_query_result = 100

expected_result = estimated_average_count * batch_size

print(f"Expected result (prior): {expected_result}, true query result: {true_query_result}")


epsilon = get_epsilon_for_high_probability_relative_error_wrt_prior(
    sensitivity=sensitivity,
    expected_result=expected_result,
    relative_error=0.05,
    failure_probability=0.01
)

n_repetitions = 100
repeated_noisy_results = true_query_result*np.ones(n_repetitions) + np.random.laplace(scale=sensitivity / epsilon, size=n_repetitions)
px.scatter(repeated_noisy_results, range_y=[0,200])

Expected result (prior): 100.0, true query result: 100


In [4]:
batch_size = 20_000
estimated_average_count = 0.05
sensitivity = 5
true_query_result = 100

expected_result = estimated_average_count * batch_size

# If we overestimate the true result, we get noisy results!

print(f"Expected result (prior): {expected_result}, true query result: {true_query_result}")


epsilon = get_epsilon_for_high_probability_relative_error_wrt_prior(
    sensitivity=sensitivity,
    expected_result=expected_result,
    relative_error=0.05,
    failure_probability=0.01
)

n_repetitions = 100
repeated_noisy_results = true_query_result*np.ones(n_repetitions) + np.random.laplace(scale=sensitivity / epsilon, size=n_repetitions)
px.scatter(repeated_noisy_results, range_y=[0,200])

Expected result (prior): 1000.0, true query result: 100


In [5]:
batch_size = 20_000
estimated_average_count = 1
sensitivity = 5
true_query_result = 100

expected_result = estimated_average_count * batch_size

# The Turbo formula is implicitly assuming the average will be 1. 
# That can be a huge overestimation, if the true result happens to be low.
print(f"Expected result (prior): {expected_result}, true query result: {true_query_result}")

epsilon = get_epsilon_for_high_probability_relative_error_wrt_prior(
    sensitivity=sensitivity,
    expected_result=expected_result,
    relative_error=0.05,
    failure_probability=0.01
)

n_repetitions = 100
repeated_noisy_results = true_query_result*np.ones(n_repetitions) + np.random.laplace(scale=sensitivity / epsilon, size=n_repetitions)
px.scatter(repeated_noisy_results, range_y=[0,200])

Expected result (prior): 20000, true query result: 100


In [6]:
batch_size = 20_000
estimated_average_count = 1
sensitivity = 5
true_query_result = 100

expected_result = estimated_average_count * batch_size

# The Turbo formula is implicitly assuming the average will be 1. 
# That can be a huge overestimation, if the true result happens to be low.
print(f"Expected result (prior): {expected_result}, true query result: {true_query_result}")

epsilon = get_epsilon_from_accuracy_for_counts(
    cap_value=sensitivity,
    n=batch_size,
    a=0.05,
    b=0.01
)

n_repetitions = 100
repeated_noisy_results = true_query_result*np.ones(n_repetitions) + np.random.laplace(scale=sensitivity / epsilon, size=n_repetitions)
px.scatter(repeated_noisy_results, range_y=[0,200])

Expected result (prior): 20000, true query result: 100


In [7]:
batch_size = 20_000
estimated_average_count = 0.005
sensitivity = 5
true_query_result = 100

expected_result = estimated_average_count * batch_size

# RMSE works the same as the high proba bound, but needs only one parameter
# (implicitly fixes beta = exp(-srt(2)) = 0.25 which is pretty big! So would need to take smaller absolute error to compensate)
# Maybe simpler to use RMSE to combine bias and variance.

print(f"Expected result (prior): {expected_result}, true query result: {true_query_result}")


epsilon = get_epsilon_for_relative_rmse_wrt_prior(
    sensitivity=sensitivity,
    expected_result=expected_result,
    relative_error=0.05,
)

n_repetitions = 100
repeated_noisy_results = true_query_result*np.ones(n_repetitions) + np.random.laplace(scale=sensitivity / epsilon, size=n_repetitions)
px.scatter(repeated_noisy_results, range_y=[0,200])

Expected result (prior): 100.0, true query result: 100
