Our basic implementation is based on [text generation of private-transformers](https://github.com/lxuechen/private-transformers/tree/main/examples/table2text), we will show how to calculate parameters in Bayesian Confidentiality with redaction policy $\pi$ in this file.



In [1]:
import math
from autodp.autodp_core import Mechanism
from autodp import mechanism_zoo, transformer_zoo
from autodp.calibrator_zoo import generalized_eps_delta_calibrator
from autodp.mechanism_zoo import SubsampleGaussianMechanism


class DPSGDMech(Mechanism):
    def __init__(self, prob, sigma, niter, name='NoisySGD'):
        Mechanism.__init__(self)
        self.name = name
        self.params = {'prob': prob, 'sigma': sigma, 'niter': niter}

        # create such a mechanism as in previously
        subsample = transformer_zoo.AmplificationBySampling()  # by default this is using poisson sampling
        mech = mechanism_zoo.GaussianMechanism(sigma=sigma)
        prob = prob
        # Create subsampled Gaussian mechanism
        SubsampledGaussian_mech = subsample(mech, prob, improved_bound_flag=True)

        # Now run this for niter iterations
        compose = transformer_zoo.Composition()
        mech = compose([SubsampledGaussian_mech], [niter])

        # Now we get it and let's extract the RDP function and assign it to the current mech being constructed
        rdp_total = mech.RenyiDP
        self.propagate_updates(rdp_total, type_of_update='RDP')

In [2]:
"""
Fix delta parameter to be the same, returns the epsilon parameter in Bayesian Confidentiality
Args:
    dpsgd_eps: epsilon parameter in DP SGD
    dpsgd_delta: delta parameter in DP SGD
    batch_size: batch size in training
    sample_size: total number of samples
    epochs: how many epochs
    gamma: 1 - empirical recall rates of the redaction policy. 
           e.g. Redaction policy mask 90% of sensitive data -> gamma = 1 - 0.9 = 0.1 
"""
dpsgd_eps = 2
dpsgd_delta = 1e-5
batch_size = 1000
sample_size = 10000
epochs = 10
gamma = 0.1

niter = sample_size / batch_size * epochs
sample_rate = batch_size / sample_size
general_calibrate = generalized_eps_delta_calibrator()
params = {'sigma': None, 'coeff': niter, 'prob': sample_rate}
mech = general_calibrate(SubsampleGaussianMechanism, dpsgd_eps, dpsgd_delta, [0, 1000], 
                         params=params, para_name='sigma', name='Subsampled_Gaussian')
sigma = mech.params['sigma']
dpsgd = DPSGDMech(prob=sample_rate, sigma=sigma, niter=niter)

confidentiality_delta = dpsgd_delta / gamma
old_eps = dpsgd.get_approxDP(delta=confidentiality_delta)
confidentiality_eps = math.log(1 + gamma * (math.e ** old_eps - 1))
print('DPSGD eps:', dpsgd_eps)
print('Confidentiality eps:', confidentiality_eps)

  w = xb - ((xb - xc) * tmp2 - (xb - xa) * tmp1) / denom


DPSGD eps: 2
Confidentiality eps: 0.3743881227244274
