In this notebook, we demo the functionality of the primary mechanisms in the 'rem' module. After loading in a dataset and specifying a workload of marginals, we consider three use cases:
- Measuring a workload of marginals and reconstructing answers to the same workload;
- Measuring a workload of marginals using Scalable MWEM and reconstructing answers to a larger workload of marginals;
- Measuring a workload of residuals using ResidualPlanner and reconstructing answers to a given workload of marginals.

For each use case, we use the following methods to reconstruct marginal answers:
- GReM-MLE
- GReM-MLE (Trunc)
- GReM-MLE (Trunc+Rescale)
- GReM-LNN
- EMP (when marginals are measured)

### Setup

In [1]:
from rem.algebra import MarginalWorkload, VStack
from rem.cdp2adp import cdp_rho
from rem.reconstruction import gremMLE, gremLNN, emp
from rem.mechanism import scalableMWEM, residualPlanner
from rem.utils import importData, calcErrors
import itertools

In [2]:
# Load the dataset
dat = importData('synthetic')
dat.df.head()

Unnamed: 0,0,1,2
0,2,0,0
1,2,0,1
2,1,0,0
3,2,1,0
4,2,2,0


In [3]:
# Set Parameters
epsilon = 0.5
delta = 0.000000001
rho = cdp_rho(epsilon, delta)
num_marg = 2
Q = list(itertools.combinations([str(num) for num in range(dat.df.shape[1])], num_marg))
marginals = VStack([MarginalWorkload(tup, dat.domain) for tup in Q])

### Measure Full Workload of Marginals and Reconstruct Marginals

In [4]:
# Measure Marginal Workload
y, y_sigmas = marginals.getAnswers(dat, rho = rho/len(Q), return_sigma=True)
y

[tensor([12.4406, 36.3858,  3.1725, 49.1996, -6.9361, 37.1481,  9.5615, 53.3457,
         34.3743]),
 tensor([21.7902, 36.0701, 33.0252, -0.8881, 28.6148, 51.8473, 37.9684, 40.8163,
         21.3402]),
 tensor([-13.0345,  14.3716,  21.3542,  33.3547,  27.3334,  35.4241,  23.0510,
          24.7818,  18.8904])]

In [5]:
# Decompose marginal answers into Residual answers
residuals, res_answers, res_sigmas = marginals.decomposeIntoResiduals(y = y, sigma = y_sigmas)

In [6]:
# Reconstruct Marginal answers from Residual answers using GReM-MLE
gmle = gremMLE(target_marginals = marginals, 
               residuals = residuals, 
               res_answers = res_answers, 
               res_sigmas = res_sigmas)
gmle.getMarginals()

100%|██████████| 3/3 [00:00<00:00, 409.39it/s]


[tensor([10.8602, 45.1099,  8.3486, 41.1652, -4.6660, 35.8701,  1.9739, 56.0626,
         33.5431]),
 tensor([15.0768, 25.4375, 23.8044, -1.1475, 24.4362, 49.0805, 37.2622, 36.1909,
         18.1265]),
 tensor([-4.7407, 26.5845, 32.1555, 31.3440, 29.2418, 35.9208, 24.5883, 30.2383,
         22.9352])]

In [7]:
# Apply Truncation post-processing to the reconstructed marginals
gmle.getMarginals(postprocessing='trunc')

[tensor([10.8602, 45.1099,  8.3486, 41.1652,  0.0000, 35.8701,  1.9739, 56.0626,
         33.5431]),
 tensor([15.0768, 25.4375, 23.8044,  0.0000, 24.4362, 49.0805, 37.2622, 36.1909,
         18.1265]),
 tensor([ 0.0000, 26.5845, 32.1555, 31.3440, 29.2418, 35.9208, 24.5883, 30.2383,
         22.9352])]

In [8]:
# Apply Truncation and Rescaling post-processing to the reconstructed marginals
gmle.getMarginals(postprocessing = 'trunc+rescale')

[tensor([10.6427, 44.2063,  8.1814, 40.3406,  0.0000, 35.1516,  1.9344, 54.9396,
         32.8712]),
 tensor([15.0014, 25.3103, 23.6853,  0.0000, 24.3140, 48.8350, 37.0758, 36.0099,
         18.0359]),
 tensor([ 0.0000, 26.0437, 31.5012, 30.7062, 28.6469, 35.1899, 24.0881, 29.6230,
         22.4686])]

In [9]:
# Reconstruct Marginal answers from Residual answers using GReM-LNN
glnn = gremLNN(target_marginals = marginals, 
               residuals = residuals, 
               res_answers = res_answers, 
               res_sigmas = res_sigmas)
glnn.solve(t = 0.01)
glnn.getMarginals()

4016.70166015625 68.51028442382812
[-4.6548919677734375, -1.1363840103149414, -4.72962760925293]
[-4.6548919677734375, -1.1363840103149414, -4.72962760925293]
4017.192626953125 -0.6940033435821533
4017.321044921875 -1.093764066696167
4017.49169921875 -1.2896062135696411
4017.683349609375 -1.4394776821136475
4017.908935546875 -1.4865247011184692
4018.140625 -1.5083510875701904
4018.373046875 -1.509914755821228
4018.6044921875 -1.4953563213348389
4018.832275390625 -1.4681419134140015
4019.0556640625 -1.4311507940292358
4019.27197265625 -1.3867831230163574
4019.4814453125 -1.337017297744751
4019.682861328125 -1.283473014831543
4019.875732421875 -1.2275002002716064
4020.059326171875 -1.170173168182373
4020.234619140625 -1.1123738288879395
4020.40087890625 -1.0548038482666016
4020.558837890625 -0.998015284538269
4020.707763671875 -0.9424490928649902
4020.84814453125 -0.888432502746582
4020.980224609375 -0.8362045884132385
4021.104736328125 -0.7859576940536499
4021.220947265625 -0.7378002405

100%|██████████| 3/3 [00:00<00:00, 960.89it/s]


[tensor([1.1983e+01, 4.2855e+01, 9.3288e+00, 3.9082e+01, 1.9073e-06, 3.3644e+01,
         3.0970e+00, 5.3808e+01, 3.4523e+01]),
 tensor([1.4683e+01, 2.5558e+01, 2.3925e+01, 9.5367e-07, 2.4040e+01, 4.8685e+01,
         3.6869e+01, 3.6312e+01, 1.8247e+01]),
 tensor([-1.9073e-06,  2.4295e+01,  2.9866e+01,  2.9224e+01,  3.0380e+01,
          3.7059e+01,  2.2328e+01,  3.1235e+01,  2.3932e+01])]

In [10]:
# Efficient Marginal Reconstruction (Decomposition + GReM-MLE)
inf = emp(marginals, marginals, y, y_sigmas)
inf.getMarginals()

100%|██████████| 3/3 [00:00<00:00, 476.43it/s]


[tensor([10.8602, 45.1099,  8.3486, 41.1652, -4.6660, 35.8701,  1.9739, 56.0626,
         33.5431]),
 tensor([15.0768, 25.4375, 23.8044, -1.1475, 24.4362, 49.0805, 37.2622, 36.1909,
         18.1265]),
 tensor([-4.7407, 26.5845, 32.1555, 31.3440, 29.2418, 35.9208, 24.5883, 30.2383,
         22.9352])]

In [11]:
# Compute Error
true_answers = marginals.getAnswers(dataset = dat, sigma = 0)
supported_errors = {
    'GReM-MLE': calcErrors(true_answers, gmle.getMarginals()),
    'GReM-MLE (Trunc)': calcErrors(true_answers, gmle.getMarginals(postprocessing='trunc')),
    'GReM-MLE (Trunc+Rescale)': calcErrors(true_answers, gmle.getMarginals(postprocessing='trunc+rescale')),
    'GReM-LNN': calcErrors(true_answers, glnn.getMarginals()),
    'EMP': calcErrors(true_answers, inf.getMarginals())
}
supported_errors

100%|██████████| 3/3 [00:00<00:00, 893.36it/s]
100%|██████████| 3/3 [00:00<00:00, 1017.95it/s]


{'GReM-MLE': 12.17251530400029,
 'GReM-MLE (Trunc)': 11.781617906358505,
 'GReM-MLE (Trunc+Rescale)': 11.452325891565394,
 'GReM-LNN': 11.250283983018663,
 'EMP': 12.17251530400029}

### Measure Workload of Marginals using Scalable MWEM and Reconstruct Marginals

In [12]:
# Run Scalable MWEM
scalable = scalableMWEM(marginals, rho, 2)
scalable.run(dat)
scalable.getMarginals()

100%|██████████| 3/3 [00:00<00:00, 1377.44it/s]
 50%|█████     | 1/2 [00:00<00:00,  3.10it/s]

('1', '2') 101.86922 105.83386


100%|██████████| 2/2 [00:00<00:00, 955.10it/s]
100%|██████████| 2/2 [00:00<00:00,  5.94it/s]


('0', '2') 112.67417 132.05836


100%|██████████| 3/3 [00:00<00:00, 1068.43it/s]


[tensor([19.0191,  3.1911,  2.7695, 45.6221, 29.7942, 29.3726, 23.4609,  7.6329,
          7.2113]),
 tensor([ 13.5657, -14.9273,  26.3414,  42.7206,  51.6761,  10.3922,   3.4878,
          23.8229,  10.9943]),
 tensor([ -3.5516,  42.1748,  49.4789,  35.2754,  -3.2027,   8.5454,  28.0502,
          21.5995, -10.2964])]

In [13]:
# Measured Marginals
scalable.measured_marginal_output

(<rem.algebra.VStack at 0x7f83aca0f7c0>,
 [tensor([198.8847]),
  tensor([-17.2760,  41.4357,  40.2534,  21.5511,  -3.9417,  -0.6801,  14.3259,
           20.8604, -19.5219]),
  tensor([ 13.5963, -27.8820,  21.8732,  42.7512,  38.7214,   5.9240,   3.5184,
           10.8682,   6.5261])],
 [35.564040441541906, 23.70936029436127, 23.70936029436127])

In [14]:
# Measured Residuals
scalable.measured_residual_output

(<rem.algebra.VStack at 0x7f83accc2b30>,
 [tensor([198.8847]),
  tensor([97.0067]),
  tensor([-39.7535,  38.3031]),
  tensor([47.4839,  1.2648]),
  tensor([-84.2045,   4.4440,  32.0274, -43.6440]),
  tensor([115.8966]),
  tensor([ 38.1583, -12.6157]),
  tensor([-79.8090,  66.4838]),
  tensor([ 37.4485, -82.5526,  11.3796,  28.4554])],
 [35.564040441541906,
  71.12808088308381,
  41.06581664478991,
  41.06581664478991,
  23.70936029436127,
  71.12808088308381,
  41.06581664478991,
  41.06581664478991,
  23.70936029436127])

In [15]:
# Reconstruct Marginal answers from Residual answers using GReM-MLE
gmle2 = gremMLE(target_marginals = marginals,
                residuals = scalable.measured_residual_output[0],
                res_answers = scalable.measured_residual_output[1],
                res_sigmas = scalable.measured_residual_output[2])

100%|██████████| 3/3 [00:00<00:00, 872.12it/s]


In [16]:
# Reconstruct Marginal answers from Residual answers using GReM-LNN
glnn2 = gremLNN(target_marginals = marginals, 
               residuals = scalable.measured_residual_output[0], 
               res_answers = scalable.measured_residual_output[1], 
               res_sigmas = scalable.measured_residual_output[2])
glnn2.solve(t = 0.01)

6265.67333984375 41.20879364013672
[-0.6427898406982422, -18.33963394165039, -13.708718299865723]
[-0.8639793395996094, -18.33963394165039, -27.287681579589844]
6267.60302734375 -7.742343902587891
6269.6318359375 -11.840568542480469
6272.09326171875 -14.289457321166992
6274.837890625 -15.86983871459961
6277.77294921875 -16.894620895385742
6280.81005859375 -17.483596801757812
6283.88330078125 -17.73309326171875
6286.97607421875 -17.687164306640625
6290.04833984375 -17.406742095947266
6293.04052734375 -16.972009658813477
6295.93115234375 -16.424814224243164
6298.7041015625 -15.798571586608887
6301.3525390625 -15.119800567626953
6303.86865234375 -14.409451484680176
6306.251953125 -13.68392562866211
6308.49951171875 -12.955977439880371
6310.61376953125 -12.236841201782227
6312.58837890625 -11.538542747497559
6314.4287109375 -10.86557388305664
6316.14208984375 -10.220879554748535
6317.73681640625 -9.606186866760254
6319.22021484375 -9.022375106811523
6320.59814453125 -8.469676971435547
6321

In [17]:
# Compute Error
true_answers = marginals.getAnswers(dataset = dat, sigma = 0)
scalableMWEM_errors = {
    'GReM-MLE': calcErrors(true_answers, gmle2.getMarginals()),
    'GReM-MLE (Trunc)': calcErrors(true_answers, gmle2.getMarginals(postprocessing='trunc')),
    'GReM-MLE (Trunc+Rescale)': calcErrors(true_answers, gmle2.getMarginals(postprocessing='trunc+rescale')),
    'GReM-LNN': calcErrors(true_answers, glnn2.getMarginals())
}
scalableMWEM_errors

100%|██████████| 3/3 [00:00<00:00, 1062.21it/s]


{'GReM-MLE': 14.359751383463541,
 'GReM-MLE (Trunc)': 13.175381978352865,
 'GReM-MLE (Trunc+Rescale)': 12.57287964997468,
 'GReM-LNN': 13.561056066442418}

### Measure Workload of Residuals using ResidualPlanner and Reconstruct Marginals

In [18]:
# Measure Residuals using ResidualPlanner and Reconstruct Marginals
rp = residualPlanner(data = dat,
                     target_marginals = Q,
                     rho = rho)
rp.getMarginals()

100%|██████████| 3/3 [00:00<00:00, 1008.73it/s]


[tensor([-3.9973, 29.2927, 15.9062, 14.5640, 12.7836, 30.8175, 16.2475, 38.3362,
         12.3611]),
 tensor([ 9.9337, 14.3769, 16.8910, 13.2826, 16.7298, 28.1526, 28.0793, 32.0943,
          6.7713]),
 tensor([18.8344, 12.5186, -4.5388, 11.4881, 39.8156, 29.1088, 20.9731, 10.8668,
         27.2449])]

In [19]:
# Reconstruct Marginal answers from Residual answers using GReM-MLE (equivalent to ResidualPlanner reconstruction)
gmle_rp = gremMLE(target_marginals = marginals, 
               residuals = rp.residuals, 
               res_answers = rp.z, 
               res_sigmas = rp.z_sigmas)
gmle_rp.getMarginals()

100%|██████████| 3/3 [00:00<00:00, 917.19it/s]


[tensor([-3.9973, 29.2927, 15.9062, 14.5640, 12.7836, 30.8175, 16.2475, 38.3362,
         12.3611]),
 tensor([ 9.9337, 14.3769, 16.8910, 13.2826, 16.7298, 28.1526, 28.0793, 32.0943,
          6.7713]),
 tensor([18.8344, 12.5186, -4.5388, 11.4881, 39.8156, 29.1088, 20.9731, 10.8668,
         27.2449])]

In [20]:
# Reconstruct Marginal answers from Residual answers using GReM-LNN
glnn_rp = gremLNN(target_marginals = marginals, 
               residuals = rp.residuals, 
               res_answers = rp.z, 
               res_sigmas = rp.z_sigmas)
glnn_rp.solve(t = 0.01)
glnn_rp.getMarginals()

0.09000182896852493 49.983455657958984
[-3.963912010192871, 6.804634094238281, -4.505496025085449]
[-3.963912010192871, 0.0, -4.505496025085449]
0.33488786220550537 -0.6620253920555115
0.4839431345462799 -1.0005085468292236
0.6578367352485657 -1.0732994079589844
0.841347873210907 -1.116685152053833
1.0300742387771606 -1.136654019355774
1.2205305099487305 -1.1381536722183228
1.4099761247634888 -1.125265121459961
1.596300721168518 -1.1013364791870117
1.7778908014297485 -1.069114327430725
1.9535577297210693 -1.0308358669281006
2.1224422454833984 -0.9883168339729309
2.283963203430176 -0.9430276155471802
2.4377646446228027 -0.8961347341537476
2.583653688430786 -0.8485788106918335
2.721583366394043 -0.8010919690132141
2.8516170978546143 -0.7542476058006287
2.973904609680176 -0.708477258682251
3.088649272918701 -0.6641112565994263
3.1961097717285156 -0.6213862895965576
3.296570301055908 -0.5804668664932251
3.3903515338897705 -0.5414518117904663
3.477768898010254 -0.5044079422950745
3.55915570

100%|██████████| 3/3 [00:00<00:00, 990.39it/s]


[tensor([-1.4305e-06,  2.7534e+01,  1.4147e+01,  1.3058e+01,  1.3446e+01,
          3.1480e+01,  1.4741e+01,  3.8999e+01,  1.3024e+01]),
 tensor([10.0094, 14.4526, 17.2193, 13.1381, 16.5853, 28.2608, 27.9348, 31.9498,
          6.8795]),
 tensor([ 1.7057e+01,  1.0742e+01, -2.2650e-06,  1.2270e+01,  4.0597e+01,
          2.7112e+01,  2.1755e+01,  1.1649e+01,  2.5248e+01])]

In [21]:
# Compute Error
true_answers = marginals.getAnswers(dataset = dat, sigma = 0)
residualPlanner_errors = {
    'ResidualPlanner': calcErrors(true_answers, rp.getMarginals()),
    'GReM-MLE': calcErrors(true_answers, gmle_rp.getMarginals()),
    'GReM-MLE (Trunc)': calcErrors(true_answers, gmle_rp.getMarginals(postprocessing='trunc')),
    'GReM-MLE (Trunc+Rescale)': calcErrors(true_answers, gmle_rp.getMarginals(postprocessing='trunc+rescale')),
    'GReM-LNN': calcErrors(true_answers, glnn_rp.getMarginals())
}
residualPlanner_errors

100%|██████████| 3/3 [00:00<00:00, 942.82it/s]
100%|██████████| 3/3 [00:00<00:00, 994.70it/s]


{'ResidualPlanner': 9.655842816388166,
 'GReM-MLE': 9.655842251247831,
 'GReM-MLE (Trunc)': 9.339691162109375,
 'GReM-MLE (Trunc+Rescale)': 9.226512485080294,
 'GReM-LNN': 9.350058096426503}