Implementation of differential privacy value anonymization using Laplace mechanism.
$ pip install diff-priv-laplace-python
The Laplace mechanism consists of adding noise, generated through the Laplace distribution and the privacy budget, to a value. The derived value is said to be "anonymized" if the privacy budget used is good enough.
The privacy budget epsilon
defines how much privacy protection to apply.
- If
epsilon
is large less noise will be added, therefore more information leakage exists so less privacy protection will be present. - If
epsilon
is small (must be larger than zero) more noise will be added, therefore less information leakage so more privacy protection will be present.
When using a data set one tends to issue multiple statistical queries such that one output might be correlated with another. In doing so we reveal more, therefore leak more information so less privacy protection will be present. In order to address this problem, we divide the provided privacy budget into the amount of queries performed creating a query privacy budget. This query privacy budget is then used for each statistic query in order to strengthen the privacy protection.
Unlike the pessimistic approach of sequential composition, when using disjoint data sets we assume there isn't any correlation between statistical queries. Therefore, if we have a privacy budget for each query we choose the maximum one and use it for all queries.
The Laplace sanitizer is an extension to the Laplace mechanism that is usable if it's possible to decompose categorical data into disjoint/independent subsets (e.g. a histogram or a contingency table). Under these circumstances it's possible to use parallel composition statistical queries.
For a complete API documentation checkout the python docs.
import numpy as np
from diffpriv_laplace import DiffPrivSequentialStatisticsQuery, DiffPrivStatisticKind
epsilon = 0.1
data = np.array(list(range(0, 20)) + [100.0])
kinds = DiffPrivStatisticKind.mean | DiffPrivStatisticKind.variance
results = DiffPrivSequentialStatisticsQuery.query(data, kinds, epsilon)
import numpy as np
from diffpriv_laplace import DiffPrivSequentialStatisticsQuery, DiffPrivStatisticKind
epsilon = 0.1
data = np.array([list(range(0, 20)) + [100.0]] * 3)
kinds = [DiffPrivStatisticKind.mean | DiffPrivStatisticKind.variance] * 3
results = DiffPrivSequentialStatisticsQuery.query(data, kinds, epsilon, axis=1)
import numpy as np
from diffpriv_laplace import DiffPrivParallelStatisticsQuery, DiffPrivStatisticKind
epsilon = 0.1
data = np.array(list(range(0, 20)) + [100.0])
kinds = DiffPrivStatisticKind.mean | DiffPrivStatisticKind.variance
results = DiffPrivParallelStatisticsQuery.query(data, kinds, epsilon)
import numpy as np
from diffpriv_laplace import DiffPrivParallelStatisticsQuery, DiffPrivStatisticKind
epsilon = 0.1
data = np.array([list(range(0, 20)) + [100.0]] * 3)
kinds = [DiffPrivStatisticKind.mean | DiffPrivStatisticKind.variance] * 3
results = DiffPrivParallelStatisticsQuery.query(data, kinds, epsilon, axis=1)
import numpy as np
from diffpriv_laplace import DiffPrivLaplaceSanitizer
epsilon = 0.1
data = np.array([0.01, -0.01, 0.03, -0.001, 0.1] * 2)
def selector_positive(data):
return data >= 0.0
def selector_negative(data):
return data < 0.0
selectors = [selector_positive, selector_negative]
value = DiffPrivLaplaceSanitizer.count(data, selectors, epsilon)
The anonymized statistics.
import numpy as np
from diffpriv_laplace import DiffPrivStatistics
epsilon = 0.1
data = np.array([True, False, True, False, True] * 2)
value = DiffPrivStatistics.count(data, epsilon)
import numpy as np
from diffpriv_laplace import DiffPrivStatistics
epsilon = 0.1
data = np.array([0.01, -0.01, 0.03, -0.001, 0.1] * 2)
def condition(data):
return data >= 0.0
value = DiffPrivStatistics.count(data, epsilon, condition=condition)
import numpy as np
from diffpriv_laplace import DiffPrivStatistics
epsilon = 0.1
data = np.array(list(range(1, 101)))
value = DiffPrivStatistics.min(data, epsilon)
import numpy as np
from diffpriv_laplace import DiffPrivStatistics
epsilon = 0.1
data = np.array(list(range(1, 101)))
value = DiffPrivStatistics.max(data, epsilon)
import numpy as np
from diffpriv_laplace import DiffPrivStatistics
epsilon = 0.1
data = np.array(list(range(1, 101)))
value = DiffPrivStatistics.median(data, epsilon)
import numpy as np
from diffpriv_laplace import DiffPrivStatistics
epsilon = 0.1
data = np.array([True, False, True, False, True] * 2)
value = DiffPrivStatistics.proportion(data, epsilon)
import numpy as np
from diffpriv_laplace import DiffPrivStatistics
epsilon = 0.1
data = np.array([0.01, -0.01, 0.03, -0.001, 0.1] * 2)
def condition(data):
return data >= 0.0
value = DiffPrivStatistics.proportion(data, epsilon, condition=condition)
import numpy as np
from diffpriv_laplace import DiffPrivStatistics
epsilon = 0.1
data = np.array(list(range(1, 101)))
value = DiffPrivStatistics.sum(data, epsilon)
import numpy as np
from diffpriv_laplace import DiffPrivStatistics
epsilon = 0.1
data = np.array(list(range(1, 101)))
value = DiffPrivStatistics.mean(data, epsilon)
import numpy as np
from diffpriv_laplace import DiffPrivStatistics
epsilon = 0.1
data = np.array(list(range(1, 101)))
value = DiffPrivStatistics.variance(data, epsilon)
The core Laplace mechanism used to construct the anonymized statistics.
from diffpriv_laplace import DiffPrivLaplaceMechanism
epsilon = 0.1
anonymizer = DiffPrivLaplaceMechanism(epsilon)
from diffpriv_laplace import DiffPrivLaplaceMechanism
epsilon = 0.1
value = 32.0
# Using the class method
anonymized = DiffPrivLaplaceMechanism.anonymize_count_with_budget(value, epsilon)
# Using an instance
anonymizer = DiffPrivLaplaceMechanism(epsilon)
anonymized = anonymizer.anonymize_count(value)
from diffpriv_laplace import DiffPrivLaplaceMechanism
epsilon = 0.1
value = 32.0
# Using the class method
anonymized = DiffPrivLaplaceMechanism.anonymize_min_with_budget(value, epsilon)
# Using an instance
anonymizer = DiffPrivLaplaceMechanism(epsilon)
anonymized = anonymizer.anonymize_min(value)
from diffpriv_laplace import DiffPrivLaplaceMechanism
epsilon = 0.1
value = 32.0
# Using the class method
anonymized = DiffPrivLaplaceMechanism.anonymize_max_with_budget(value, epsilon)
# Using an instance
anonymizer = DiffPrivLaplaceMechanism(epsilon)
anonymized = anonymizer.anonymize_max(value)
from diffpriv_laplace import DiffPrivLaplaceMechanism
epsilon = 0.1
value = 32.0
# Using the class method
anonymized = DiffPrivLaplaceMechanism.anonymize_median_with_budget(value, epsilon)
# Using an instance
anonymizer = DiffPrivLaplaceMechanism(epsilon)
anonymized = anonymizer.anonymize_median(value)
from diffpriv_laplace import DiffPrivLaplaceMechanism
epsilon = 0.1
n = 50.0
value = 32.0
# Using the class method
anonymized = DiffPrivLaplaceMechanism.anonymize_proportion_with_budget(value, n, epsilon)
# Using an instance
anonymizer = DiffPrivLaplaceMechanism(epsilon)
anonymized = anonymizer.anonymize_proportion(value, n)
from diffpriv_laplace import DiffPrivLaplaceMechanism
epsilon = 0.1
lower = 0.1
upper = 100.3
value = 32.0
# Using the class method
anonymized = DiffPrivLaplaceMechanism.anonymize_sum_with_budget(value, lower, upper, epsilon)
# Using an instance
anonymizer = DiffPrivLaplaceMechanism(epsilon)
anonymized = anonymizer.anonymize_sum(value, lower, upper)
from diffpriv_laplace import DiffPrivLaplaceMechanism
epsilon = 0.1
lower = 0.1
upper = 100.3
n = 50.0
value = 32.0
# Using the class method
anonymized = DiffPrivLaplaceMechanism.anonymize_mean_with_budget(value, lower, upper, n, epsilon)
# Using an instance
anonymizer = DiffPrivLaplaceMechanism(epsilon)
anonymized = anonymizer.anonymize_mean(value, lower, upper, n)
from diffpriv_laplace import DiffPrivLaplaceMechanism
epsilon = 0.1
lower = 0.1
upper = 100.3
n = 50.0
value = 32.0
# Using the class method
anonymized = DiffPrivLaplaceMechanism.anonymize_variance_with_budget(value, lower, upper, n, epsilon)
# Using an instance
anonymizer = DiffPrivLaplaceMechanism(epsilon)
anonymized = anonymizer.anonymize_variance(value, lower, upper, n)
Please open an issue for anything not on this list!