In [11]:
import opendp.prelude as dp
import polars as pl

dp.enable_features("contrib")


# Minimal dataframe
NB_REGIONS = 3
NB_ROWS = 6

df = pl.DataFrame({"region": [1, 1, 2, 2, 3, 3], "income": [1000.0, 1000.0, 2000.0, 2000.0, 3000.0, 3000.0]}, schema={"region": pl.Int64, "income": pl.Float64})
lf = df.lazy()

# Minimal domain
lf_domain = dp.lazyframe_domain([
    dp.series_domain("region", dp.atom_domain(T=dp.i64)),
    dp.series_domain("income", dp.atom_domain(T=dp.f64))
])
lf_domain = dp.with_margin(lf_domain, by=[], public_info="lengths", max_partition_length=NB_ROWS)

# Group-by query: average income per region, add noïse with scale of 1.0
income_lower_bound, income_upper_bound = 1_000, 100_000
plan = lf.group_by("region").agg([
    pl.col("income").dp.mean(bounds=(income_lower_bound, income_upper_bound), scale=1.0)
]).sort("income")

In [18]:
# Bad domain: add a margin that does not include max_num_partitions
bad_domain = dp.with_margin(lf_domain, by=["region"], public_info="lengths", max_partition_length=NB_ROWS)

# Construct and collect a measurement with bad_domain works
bad_meas = dp.make_private_lazyframe(bad_domain, dp.symmetric_distance(), dp.max_divergence(T=float), plan)
bad_res = bad_meas(lf).collect()

# Privacy map fails
bad_meas.map(1)

OpenDPException: 
  FailedFunction("max_num_partitions must be known when the metric is not sensitive to ordering (SymmetricDistance)")

In [19]:
# Good domain: includes everything that is needed
good_domain = dp.with_margin(lf_domain, by=["region"], public_info="lengths", max_partition_length=NB_ROWS, max_num_partitions=NB_REGIONS)

# Measurement and privacy map work as expected
good_meas = dp.make_private_lazyframe(good_domain, dp.symmetric_distance(), dp.max_divergence(T=float), plan)
good_res = good_meas(lf).collect()

good_meas.map(1)

4.796163466380676e-09