# Understanding Differentially Private mean

This notebook shows how to calculate a DP mean two ways.

I started writing it a few months ago. It's messy and not well-documented, but I'm including it in case useful. 

In [169]:
#import diffprivlib
import numpy as np
import pandas as pd
from random import randrange, uniform

from opendp.mod import enable_features
enable_features("contrib")

### Compare OpenDP mean with unknown N to mean using Exponential mechanism
# https://docs.opendp.org/en/stable/user/transformations/aggregation-mean.html

In [171]:
N = randrange(100,200)


bounds = (0.,30.)

x = [uniform(*bounds) for i in range(N)]

truemean = np.mean(x)
#print([round(i,1) for i in x])
print(np.mean(x),N)

15.676380217409728 119


In [175]:
## 'Naive' method using OpenDP
from opendp.transformations import make_count, make_clamp, make_bounded_resize
from opendp.measurements import make_base_discrete_laplace, make_base_laplace
from opendp.transformations import make_bounded_sum
from opendp.combinators import make_basic_composition

epsilon = 1.


In [173]:
def dpmean_naive(x: list[float], epsilon: float, bounds: list[float]) -> float:
    count_meas = make_count(TIA=float) >> make_base_discrete_laplace(4./epsilon)
    sum_meas = make_clamp(bounds) >> make_bounded_sum(bounds) >> make_base_laplace(4./3.*30./epsilon)

    dp_fraction_meas = make_basic_composition([
        sum_meas,
        count_meas
    ])

    dp_sum, dp_count = dp_fraction_meas(x)

    return dp_sum/dp_count, dp_fraction_meas

In [176]:

#print(f"dp count: {dp_count}; true count: {N}; difference: {dp_count - N}")

dpmean, dpmeas = dpmean_naive(x,epsilon,bounds)
print(f"dp mean: {dpmean:.2f}; true mean: {truemean:.2f}; difference: {dpmean - truemean:.2f}")
print("epsilon:", dpmeas.map(1))
#print(f"epsilon (count): {count_meas.map(1)}; epsilon (sum): {sum_meas.map(1)}")

dp mean: 15.45; true mean: 15.68; difference: -0.22
epsilon: 1.0000000069849195


In [164]:
## 'Resizing' method using OpenDP
from opendp.transformations import make_sized_bounded_mean
from opendp.measurements import make_base_discrete_laplace

dp_count = int(count_meas(x)*0.9)

mean_meas = (
    make_clamp(bounds) >>
    make_bounded_resize(dp_count, bounds, constant=15.) >>
    make_sized_bounded_mean(dp_count, bounds) >>
    make_base_laplace((30.*4./3.)/dp_count/epsilon)
)

dp_mean = mean_meas(x)

print(f"dp count: {dp_count}; true count: {N}; difference: {dp_count - N}")
print(f"dp mean: {dp_mean:.2f}; true mean: {truemean:.2f}; difference: {dp_mean - truemean:.2f}")
print(f"epsilon: {make_basic_composition([count_meas,mean_meas]).map(1)}")
print(f"epsilon (count): {count_meas.map(1)}; epsilon (mean): {mean_meas.map(1)}\n")


## What if you just guess the count?
guess_count = 100

mean_meas = (
    make_clamp(bounds) >>
    make_bounded_resize(guess_count, bounds, constant=15.) >>
    make_sized_bounded_mean(guess_count, bounds) >>
    make_base_laplace(30./guess_count/epsilon)
)

dp_mean2 = mean_meas(x)

print("Guess count method")
print(f"guess count: {guess_count}; true count: {N}; difference: {guess_count - N}")
print(f"dp mean: {dp_mean2:.2f}; true mean: {truemean:.2f}; difference: {(dp_mean2 - truemean)/truemean:.1%}")
print(f"epsilon: {mean_meas.map(1)}")
print(f"epsilon (count): {0.}; epsilon (mean): {mean_meas.map(1)}")




dp count: 128; true count: 152; difference: -24
dp mean: 14.25; true mean: 14.54; difference: -0.29
epsilon: 0.20000000000006202
epsilon (count): 0.05; epsilon (mean): 0.150000000000062

Guess count method
guess count: 100; true count: 152; difference: -52
dp mean: 16.37; true mean: 14.54; difference: 12.6%
epsilon: 0.20000000000006146
epsilon (count): 0.0; epsilon (mean): 0.20000000000006146
