Worksheet showing additional calculations for:
   http://www.win-vector.com/blog/2015/10/a-simple-differentially-private-procedure/

In [1]:
import sympy

In [2]:
# Estimate the Laplace parameter needed to establish epsilon differential privacy
# (by adding Laplace noise) and see what variance this yields.
# Facts needed:
#  MaclaurinSeries (for estimates)
#  Definition of differential privacy 
#    http://www.win-vector.com/blog/2015/10/a-simpler-explanation-of-differential-privacy/
#  CDF and variance of Laplace distribution as a function of parameter "b"
#    https://en.wikipedia.org/wiki/Laplace_distribution
[n,bInv,epsilon] =  sympy.symbols(['n','bInv','epsilon'])

def MaclaurinSeries(expr,var):
    f0 = sympy.simplify(expr.subs(var,0))
    if((len(f0.free_symbols)==0) and (abs(float(f0))<1.0e-10)):
        f0 = 0
    f1 = sympy.simplify(sympy.diff(expr,var).subs(var,0))
    if((len(f1.free_symbols)==0) and (abs(float(f1))<1.0e-10)):
        f1 = 0
    return f0 + var*f1

cutPoint = 1/(2*n)
LaplaceCDF1 = 1 - 0.5*sympy.exp(-bInv/n)
LaplaceCDF2 = 0.5*sympy.exp(-bInv/n)
logRat = sympy.log(LaplaceCDF1/LaplaceCDF2)
#print(logRat)
pError = MaclaurinSeries(logRat,bInv)
#print(pError)
b = 1/sympy.solve(pError-epsilon,bInv)[0]
#print(b)
variance = 2*b*b
print(variance)

8.0/(epsilon**2*n**2)


In [3]:
# Estimate the sample size needed to work a differential privacy example 
# by Bootstrap methods, and the variance of that method.
#   facts from http://www.win-vector.com/blog/2015/10/a-simple-differentially-private-procedure/
[n,Z,epsilon] = sympy.symbols(['n','Z','epsilon'])
# we draw (with replacement) n/Z samples from a set of size n that has n-1 zeros and 1 one.
# error if we draw the 1 one or more times. 
# The expected number of 1s in the draw is (n/Z)*(1/n) = 1/Z
# By Markov's inequality this gives us pError <= 1/Z
pError = 1/Z
Z = sympy.solve(pError-epsilon,Z)[0]
#print(Z)
# The process of counting how many 1s show up in the Bootstrap (with replacement) 
# sample is Poisson with intensity equal to the mean.  The mean is 1/Z (above).
# So the count is a mean 1/Z variance 1/Z random variable.  We actually return
# frequency which is count/bootStrapSize = count/(n/Z).  Variance scales as a square
# so the new variance is (1/Z)/(n/Z)**2.  And we are done.
var = (1/Z)/(n/Z)**2
print(var)

1/(epsilon*n**2)
