# OpenDP

In [1]:
import pandas as pd

df = pd.read_csv("https://raw.githubusercontent.com/mwaskom/seaborn-data/master/penguins.csv")
df.head(2)

Unnamed: 0,species,island,bill_length_mm,bill_depth_mm,flipper_length_mm,body_mass_g,sex
0,Adelie,Torgersen,39.1,18.7,181.0,3750.0,MALE
1,Adelie,Torgersen,39.5,17.4,186.0,3800.0,FEMALE


## Logger in notebook

In [2]:
import opendp_polars.prelude as dp
import opendp_polars.transformations as trans
import opendp_polars.measurements as meas
from opendp_logger import make_load_json
from opendp_logger import enable_logging
from opendp.mod import enable_features
enable_logging()

dp.enable_features("contrib")

### Basic OpenDP

In [3]:
columns = ["species", "island", "bill_length_mm", "bill_depth_mm", "flipper_length_mm", "body_mass_g", "sex"]
bill_length_min, bill_length_max = 30.0, 60.0
nb_penguins = 343
avg_bill_length = 44.13

In [19]:
bill_length_transformation_pipeline = (
    trans.make_split_dataframe(separator=",", col_names=columns) >>
    trans.make_select_column(key="bill_length_mm", TOA=str) >>
    trans.then_cast_default(TOA=float) >>
    trans.then_clamp(bounds=(bill_length_min, bill_length_max)) >>
    trans.then_resize(size=nb_penguins, constant=avg_bill_length) >>
    trans.then_variance() >>
    meas.then_laplace(scale=5.0)
)

In [20]:
#to_ast(bill_length_transformation_pipeline)

In [21]:
client_pipeline = bill_length_transformation_pipeline.to_json()

In [22]:
opendp_pipe = make_load_json(client_pipeline)

In [23]:
cost = opendp_pipe.map(d_in=1)
cost

0.5247813411085608

In [24]:
release_data = opendp_pipe(df.to_csv())
release_data

9.833094510123821

In [25]:
import json
from functools import wraps
from polars import LazyFrame, DataFrame
import importlib
import pkg_resources

OPENDP_VERSION = pkg_resources.get_distribution("opendp_polars").version

__all__ = ["enable_logging"]
LOGGED_CLASSES = (
    dp.Transformation,
    dp.Measurement,
    dp.Function,
    dp.Domain,
    dp.Metric,
    dp.Measure,
    dp.PartialConstructor,
)
WRAPPED_MODULES = [
    "transformations",
    "measurements",
    "combinators",
    "domains",
    "metrics",
    "measures",
    "prelude",
    "core",
]


def wrap_func(f, module_name):
    @wraps(f)
    def wrapper(*args, **kwargs):
        chain = f(*args, **kwargs)
        if isinstance(chain, LOGGED_CLASSES):
            chain.log = {
                "_type": "constructor",
                "func": f.__name__,
                "module": module_name,
            }
            args and chain.log.setdefault("args", args)
            kwargs and chain.log.setdefault("kwargs", kwargs)
        return chain

    return wrapper


def to_ast(item):
    if isinstance(item, LOGGED_CLASSES):
        if not hasattr(item, "log"):
            msg = "invoke `opendp_logger.enable_logging()` before constructing your measurement"
            print(item)
            raise ValueError(msg)

        return to_ast(item.log)
    if isinstance(item, tuple):
        return [to_ast(e) for e in item]
    if isinstance(item, DataFrame):
        # TODO: extremely inefficient
        return {"_type": "DataFrame", "_item": item.lazy().serialize()}
    if isinstance(item, LazyFrame):
        return {"_type": "LazyFrame", "_item": item.serialize()}
    if isinstance(item, list):
        return {"_type": "list", "_items": [to_ast(e) for e in item]}
    if isinstance(item, dict):
        return {key: to_ast(value) for key, value in item.items()}
    if isinstance(item, (dp.RuntimeType, type)):
        return str(dp.RuntimeType.parse(item))
    return item


def to_json(chain, *args, **kwargs):
    return json.dumps(
        {"version": OPENDP_VERSION, "ast": chain.to_ast()}, *args, **kwargs
    )

WHITELIST = [
    "lazyframe_domain_with_counts",
    "dataframe_domain_with_counts",
    "l1",
    "l2",
    "transformation_function"
]

def enable_logging():
    for name in WRAPPED_MODULES:
        module = importlib.import_module(f"opendp_polars.{name}")
        for f in dir(module):
            is_constructor = f.startswith("make_") or f.startswith("then_")
            is_elem = any(f.endswith(s) for s in ["domain", "distance", "divergence"])
            if is_constructor or is_elem or f in WHITELIST:
                module.__dict__[f] = wrap_func(getattr(module, f), name)

    for cls in LOGGED_CLASSES:
        cls.to_ast = to_ast
        cls.to_json = to_json

    trans_shift_inner = dp.Transformation.__rshift__

    @wraps(trans_shift_inner)
    def trans_shift_outer(lhs: dp.Transformation, rhs):
        chain = trans_shift_inner(lhs, rhs)
        if isinstance(rhs, dp.PartialConstructor):
            chain.log = {"_type": "partial_chain", "lhs": lhs.log, "rhs": rhs.log}
        return chain

    dp.Transformation.__rshift__ = trans_shift_outer

    meas_shift_inner = dp.Measurement.__rshift__

    @wraps(meas_shift_inner)
    def meas_shift_outer(lhs: dp.Measurement, rhs):
        chain = meas_shift_inner(lhs, rhs)
        if isinstance(rhs, dp.PartialConstructor):
            chain.log = {"_type": "partial_chain", "lhs": lhs.log, "rhs": rhs.log}
        return chain

    dp.Measurement.__rshift__ = meas_shift_outer

    # only run once
    enable_logging.__code__ = (lambda: None).__code__

In [31]:
for name in WRAPPED_MODULES:
    module = importlib.import_module(f"opendp_polars.{name}")
    for f in dir(module):
        print(f)

ATOM_EQUIVALENCE_CLASSES
ATOM_MAP
AbsoluteDistance
Any
AnyDomain
AnyDomainPtr
AnyFunction
AnyMeasure
AnyMeasurement
AnyMeasurementPtr
AnyMetric
AnyObject
AnyObjectPtr
AnyQueryable
AnyTransformation
AnyTransformationPtr
AtomDomain
BoolPtr
Callable
CallbackFn
Carrier
ChangeOneDistance
DataFrame
Dict
DiscreteDistance
Domain
DomainDescriptor
ELEMENTARY_TYPES
ExtrinsicObject
FfiError
FfiResult
FfiResultPayload
FfiSlice
FfiSlicePtr
FixedSmoothedMaxDivergence
Function
GLOBAL_FEATURES
GenericType
HASHABLE_TYPES
HammingDistance
HashMap
Hashable
INTEGER_TYPES
INT_SIZES
InsertDeleteDistance
L1Distance
L2Distance
LazyFrame
List
MapDomain
MaxDivergence
Measure
Measurement
Metric
NUMERIC_TYPES
OpenDPException
OptionDomain
Optional
PRIMITIVE_TYPES
PartialConstructor
PrivacyMeasure
Queryable
RuntimeType
RuntimeTypeDescriptor
SMDCurve
SensitivityMetric
Sequence
Series
SeriesDomain
SmoothedMaxDivergence
String
SymmetricDistance
Transformation
TransitionFn
Tuple
Type
Union
UnknownType
UnknownTypeExceptio

### OpenDP Polars

In [26]:
import polars as pl

In [32]:
# Define dtype domain with bounds
lf_domain = dp.lazyframe_domain([
    dp.series_domain("species", dp.atom_domain(T=str)),
    dp.series_domain("island", dp.atom_domain(T=str)),
    dp.series_domain("bill_length_mm", dp.atom_domain(T=float, bounds=(30.0, 65.0))),
    dp.series_domain("bill_depth_mm", dp.atom_domain(T=float)),
    dp.series_domain("flipper_length_mm", dp.atom_domain(T=float)),
    dp.series_domain("body_mass_g", dp.atom_domain(T=float)),
    dp.series_domain("sex", dp.atom_domain(T=str))
])

In [40]:
domains = [
    dp.series_domain("B", dp.atom_domain(T=int)),
]
domain = dp.lazyframe_domain(domains)
json_domain = domain.to_json()
#json_domain = to_ast(domain)
json_domain

'{"version": "0.9.0.dev0", "ast": {"_type": "constructor", "func": "lazyframe_domain", "module": "prelude", "args": [{"_type": "list", "_items": [{"_type": "constructor", "func": "series_domain", "module": "prelude", "args": ["B", {"_type": "constructor", "func": "atom_domain", "module": "prelude", "kwargs": {"T": "i32"}}]}]}]}}'

In [41]:
domain = domain.with_counts(pl.LazyFrame({"B": [1], "counts": [50]}, schema_overrides={"B": pl.Int32, "counts": pl.UInt32}))
to_ast(domain)

LazyFrameDomain(B: i32; margins=[{"B"}])


ValueError: invoke `opendp_logger.enable_logging()` before constructing your measurement

In [35]:
total_counts = pl.LazyFrame({
    "counts": [2_032_543]
}, schema_overrides={"counts": pl.UInt32})

species_counts = pl.LazyFrame({
    "species": ["Adelie", "Chinstrap", "Gentoo"], 
    "counts": [152, 68, 124]
}, schema_overrides={"species": pl.Utf8, "counts": pl.UInt32})

lf_domain = lf_domain.with_counts(total_counts).with_counts(species_counts)

In [36]:
lf_domain.to_json()

ValueError: invoke `opendp_logger.enable_logging()` before constructing your measurement

In [26]:
# Prepare a list of candidates
candidates = [x * 5.0 for x in range(6, 14)]
print(candidates)

partition = ['species']

[30.0, 35.0, 40.0, 45.0, 50.0, 55.0, 60.0, 65.0]


In [27]:
metric = dp.symmetric_distance()
expr_domain = dp.expr_domain(lf_domain, grouping_columns=partition)
temperature = 1.0

In [48]:
def make_quantile_pipeline(quantile):
    # Create expression
    return (
        (dp.csv_domain(lf_domain), metric)
        >> dp.t.then_scan_csv()
        >> dp.t.then_groupby_stable(partition)
        # >> dp.m.then_private_agg(
        #     dp.c.make_basic_composition(
        #         [
        #             (expr_domain, dp.l1(metric))
        #             >> dp.t.then_col('bill_length_mm')
        #             >> dp.m.then_private_quantile_expr(candidates, temperature, quantile)
        #         ]
        #     )
        # )
        # >> dp.t.make_collect(lf_domain, metric)
    )

In [49]:
q25 = make_quantile_pipeline(0.25)
q50 = make_quantile_pipeline(0.5)
q75 = make_quantile_pipeline(0.75)

In [50]:
q25

<opendp_polars.mod.Transformation at 0x7f7db4511140>

In [52]:
to_ast(q25)

in ast
Transformation(
    input_domain   = CsvDomain(species: str, island: str, bill_length_mm: f64, bill_depth_mm: f64, flipper_length_mm: f64, body_mass_g: f64, sex: str),
    output_domain  = LazyGroupByDomain { lazy_frame_domain: LazyFrameDomain(species: str, island: str, bill_length_mm: f64, bill_depth_mm: f64, flipper_length_mm: f64, body_mass_g: f64, sex: str; margins=[{}, {"species"}]), grouping_columns: ["species"] },
    input_metric   = SymmetricDistance(),
    output_metric  = L1(SymmetricDistance)
)
item of logged classes
	 has log
in ast
{'_type': 'partial_chain', 'lhs': {'_type': 'constructor', 'func': 'make_scan_csv', 'module': 'transformations', 'kwargs': {'input_domain': CsvDomain(species: str, island: str, bill_length_mm: f64, bill_depth_mm: f64, flipper_length_mm: f64, body_mass_g: f64, sex: str), 'input_metric': SymmetricDistance(), 'cache': True, 'low_memory': False, 'rechunk': True}}, 'rhs': {'_type': 'constructor', 'func': 'then_groupby_stable', 'module': 'tran

ValueError: invoke `opendp_logger.enable_logging()` before constructing your measurement

In [60]:
for name in WRAPPED_MODULES:
    module = importlib.import_module(f"opendp.{name}")
    for f in dir(module):
        print(f)

ATOM_EQUIVALENCE_CLASSES
ATOM_MAP
AbsoluteDistance
AllDomain
Any
AnyMeasurement
AnyMeasurementPtr
AnyObject
AnyObjectPtr
AnyTransformation
AnyTransformationPtr
BoolPtr
BoundedDomain
Callable
CallbackFn
Carrier
ChangeOneDistance
DatasetMetric
Dict
Domain
ELEMENTARY_TYPES
FfiError
FfiResult
FfiResultPayload
FfiSlice
FfiSlicePtr
FixedSmoothedMaxDivergence
GLOBAL_FEATURES
GenericType
HammingDistance
HashMap
Hashable
INT_SIZES
InherentNullDomain
InsertDeleteDistance
L1Distance
L2Distance
List
MaxDivergence
Measurement
OpenDPException
OptionNullDomain
Optional
PrivacyMeasure
RuntimeType
RuntimeTypeDescriptor
SMDCurve
SensitivityMetric
Sequence
SizedDomain
SmoothedMaxDivergence
String
SymmetricDistance
Transformation
Tuple
Type
Union
UnknownType
UnknownTypeException
Vec
VectorDomain
ZeroConcentratedDivergence
__all__
__builtins__
__cached__
__doc__
__file__
__loader__
__name__
__package__
__spec__
assert_features
binary_search
binary_search_chain
binary_search_param
c_int_limits
c_to_py
check

ModuleNotFoundError: No module named 'opendp.domains'

In [62]:
import json
from functools import wraps
from polars import LazyFrame, DataFrame
import importlib
import pkg_resources

OPENDP_VERSION = pkg_resources.get_distribution("opendp_polars").version

__all__ = ["enable_logging"]
LOGGED_CLASSES = (
    dp.Transformation,
    dp.Measurement,
    dp.Function,
    dp.Domain,
    dp.Metric,
    dp.Measure,
    dp.PartialConstructor,
)
WRAPPED_MODULES = [
    "transformations",
    "measurements",
    "combinators",
    "domains",
    "metrics",
    "measures",
    "prelude",
    "core",
]

WHITELIST = [
    "lazyframe_domain_with_counts",
    "dataframe_domain_with_counts",
    "l1",
    "l2",
    "transformation_function"
]

def enable_logging():
    for name in WRAPPED_MODULES:
        module = importlib.import_module(f"opendp.{name}")
        for f in dir(module):
            is_constructor = f.startswith("make_") or f.startswith("then_")
            is_elem = any(f.endswith(s) for s in ["domain", "distance", "divergence"])
            if is_constructor or is_elem or f in WHITELIST:
                module.__dict__[f] = wrap_func(getattr(module, f), name)

    for cls in LOGGED_CLASSES:
        cls.to_ast = to_ast
        cls.to_json = to_json

    trans_shift_inner = dp.Transformation.__rshift__

    @wraps(trans_shift_inner)
    def trans_shift_outer(lhs: dp.Transformation, rhs):
        chain = trans_shift_inner(lhs, rhs)
        if isinstance(rhs, dp.PartialConstructor):
            chain.log = {"_type": "partial_chain", "lhs": lhs.log, "rhs": rhs.log}
        return chain

    dp.Transformation.__rshift__ = trans_shift_outer

    meas_shift_inner = dp.Measurement.__rshift__

    @wraps(meas_shift_inner)
    def meas_shift_outer(lhs: dp.Measurement, rhs):
        chain = meas_shift_inner(lhs, rhs)
        if isinstance(rhs, dp.PartialConstructor):
            chain.log = {"_type": "partial_chain", "lhs": lhs.log, "rhs": rhs.log}
        return chain

    dp.Measurement.__rshift__ = meas_shift_outer

    # only run once
    enable_logging.__code__ = (lambda: None).__code__

def wrap_func(f, module_name):
    @wraps(f)
    def wrapper(*args, **kwargs):
        chain = f(*args, **kwargs)
        if isinstance(chain, LOGGED_CLASSES):
            chain.log = {
                "_type": "constructor",
                "func": f.__name__,
                "module": module_name,
            }
            args and chain.log.setdefault("args", args)
            kwargs and chain.log.setdefault("kwargs", kwargs)
        return chain

    return wrapper

### OpenDP Context

In [21]:
!pip install opendp==0.8.0

Collecting opendp==0.8.0
  Downloading opendp-0.8.0-py3-none-any.whl (30.1 MB)
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m30.1/30.1 MB[0m [31m12.3 MB/s[0m eta [36m0:00:00[0m00:01[0m00:01[0m
[?25hInstalling collected packages: opendp
  Attempting uninstall: opendp
    Found existing installation: opendp 0.6.2
    Uninstalling opendp-0.6.2:
      Successfully uninstalled opendp-0.6.2
[31mERROR: pip's dependency resolver does not currently take into account all the packages that are installed. This behaviour is the source of the following dependency conflicts.
smartnoise-sql 1.0.0 requires opendp<0.7.0,>=0.6.0, but you have opendp 0.8.0 which is incompatible.[0m[31m
[0mSuccessfully installed opendp-0.8.0
[0m
[1m[[0m[34;49mnotice[0m[1;39;49m][0m[39;49m A new release of pip is available: [0m[31;49m23.0.1[0m[39;49m -> [0m[32;49m23.2.1[0m
[1m[[0m[34;49mnotice[0m[1;39;49m][0m[39;49m To update, run: [0m[32;49mpip install --upgrade pip[0m

In [67]:
from typing import List

In [68]:
context = dp.Context.compositor(
    data=[1, 2, 3],
    privacy_unit=dp.unit_of(contributions=1),
    privacy_loss=dp.loss_of(epsilon=3.0),
    domain=dp.domain_of(List[int]),
    split_evenly_over=1
)

In [70]:
dp_sum = context.query().clamp((0, 5)).sum().laplace().release()
dp_sum.release()

AttributeError: 'PartialConstructor' object has no attribute 'log'

## Logger in Server

In [1]:
from fso_sdd_demo.client import Client
import numpy as np

In [2]:
APP_URL = "http://sdd_server_dev:80" # Onyxia: "https://sdd-server-demo.lab.sspcloud.fr"
USER_NAME = "Dr. Antartica"
DATASET_NAME = "PENGUIN"
client = Client(url=APP_URL, user_name = USER_NAME, dataset_name = DATASET_NAME)

In [3]:
penguin_metadata = client.get_dataset_metadata()['']['Schema']['Table']
penguin_metadata

{'max_ids': 1,
 'row_privacy': True,
 'censor_dims': False,
 'species': {'type': 'string',
  'cardinality': 3,
  'categories': ['Adelie', 'Chinstrap', 'Gentoo']},
 'island': {'type': 'string',
  'cardinality': 3,
  'categories': ['Torgersen', 'Biscoe', 'Dream']},
 'bill_length_mm': {'type': 'float', 'lower': 30.0, 'upper': 65.0},
 'bill_depth_mm': {'type': 'float', 'lower': 13.0, 'upper': 23.0},
 'flipper_length_mm': {'type': 'float', 'lower': 150.0, 'upper': 250.0},
 'body_mass_g': {'type': 'float', 'lower': 2000.0, 'upper': 7000.0},
 'sex': {'type': 'string', 'cardinality': 2, 'categories': ['MALE', 'FEMALE']}}