# OpenDP

In [4]:
import pandas as pd

df = pd.read_csv("https://raw.githubusercontent.com/mwaskom/seaborn-data/master/penguins.csv")
df.head(2)

Unnamed: 0,species,island,bill_length_mm,bill_depth_mm,flipper_length_mm,body_mass_g,sex
0,Adelie,Torgersen,39.1,18.7,181.0,3750.0,MALE
1,Adelie,Torgersen,39.5,17.4,186.0,3800.0,FEMALE


## Logger in notebook

In [5]:
import opendp_polars.prelude as dp
import opendp_polars.transformations as trans
import opendp_polars.measurements as meas
from opendp_logger import make_load_json

dp.enable_features("contrib")

### Basic OpenDP

In [6]:
columns = ["species", "island", "bill_length_mm", "bill_depth_mm", "flipper_length_mm", "body_mass_g", "sex"]
bill_length_min, bill_length_max = 30.0, 60.0
nb_penguins = 343
avg_bill_length = 44.13

In [7]:
bill_length_transformation_pipeline = (
    trans.make_split_dataframe(separator=",", col_names=columns) >>
    trans.make_select_column(key="bill_length_mm", TOA=str) >>
    trans.then_cast_default(TOA=float) >>
    trans.then_clamp(bounds=(bill_length_min, bill_length_max)) >>
    trans.then_resize(size=nb_penguins, constant=avg_bill_length) >>
    trans.then_variance() >>
    meas.then_laplace(scale=5.0)
)

In [8]:
client_pipeline = bill_length_transformation_pipeline.to_json()

In [9]:
opendp_pipe = make_load_json(client_pipeline)

In [10]:
cost = opendp_pipe.map(d_in=1)
cost

0.5247813411085608

In [11]:
release_data = opendp_pipe(df.to_csv())
release_data

-15.265025718710955

### OpenDP Polars

In [22]:
import polars as pl

In [23]:
# Define dtype domain with bounds
lf_domain = dp.lazyframe_domain([
    dp.series_domain("species", dp.atom_domain(T=str)),
    dp.series_domain("island", dp.atom_domain(T=str)),
    dp.series_domain("bill_length_mm", dp.atom_domain(T=float, bounds=(30.0, 65.0))),
    dp.series_domain("bill_depth_mm", dp.atom_domain(T=float)),
    dp.series_domain("flipper_length_mm", dp.atom_domain(T=float)),
    dp.series_domain("body_mass_g", dp.atom_domain(T=float)),
    dp.series_domain("sex", dp.atom_domain(T=str))
])

In [24]:
total_counts = pl.LazyFrame({
    "counts": [2_032_543]
}, schema_overrides={"counts": pl.UInt32})

species_counts = pl.LazyFrame({
    "species": ["Adelie", "Chinstrap", "Gentoo"], 
    "counts": [152, 68, 124]
}, schema_overrides={"species": pl.Utf8, "counts": pl.UInt32})

lf_domain = lf_domain.with_counts(total_counts).with_counts(species_counts)

In [25]:
# Prepare a list of candidates
candidates = [x * 5.0 for x in range(6, 14)]
print(candidates)

partition = ['species']

[30.0, 35.0, 40.0, 45.0, 50.0, 55.0, 60.0, 65.0]


In [26]:
metric = dp.symmetric_distance()
expr_domain = dp.expr_domain(lf_domain, grouping_columns=partition)
temperature = 1.0

In [27]:
def make_quantile_pipeline(quantile):
    # Create expression
    return (
        (dp.csv_domain(lf_domain), metric)
        >> dp.t.then_scan_csv()
        >> dp.t.then_groupby_stable(partition)
        >> dp.m.then_private_agg(
            dp.c.make_basic_composition(
                [
                    (expr_domain, dp.l1(metric))
                    >> dp.t.then_col('bill_length_mm')
                    >> dp.m.then_private_quantile_expr(candidates, temperature, quantile)
                ]
            )
        )
        >> dp.t.make_collect(lf_domain, metric)
    )

In [28]:
q25 = make_quantile_pipeline(0.25)
q50 = make_quantile_pipeline(0.5)
q75 = make_quantile_pipeline(0.75)

In [29]:
q25

<opendp_polars.mod.Measurement at 0x7fed6f4b32c0>

In [30]:
client_pipeline = q25.to_json()

ValueError: invoke `opendp_logger.enable_logging()` before constructing your measurement

### OpenDP Context

In [21]:
!pip install opendp==0.8.0

Collecting opendp==0.8.0
  Downloading opendp-0.8.0-py3-none-any.whl (30.1 MB)
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m30.1/30.1 MB[0m [31m12.3 MB/s[0m eta [36m0:00:00[0m00:01[0m00:01[0m
[?25hInstalling collected packages: opendp
  Attempting uninstall: opendp
    Found existing installation: opendp 0.6.2
    Uninstalling opendp-0.6.2:
      Successfully uninstalled opendp-0.6.2
[31mERROR: pip's dependency resolver does not currently take into account all the packages that are installed. This behaviour is the source of the following dependency conflicts.
smartnoise-sql 1.0.0 requires opendp<0.7.0,>=0.6.0, but you have opendp 0.8.0 which is incompatible.[0m[31m
[0mSuccessfully installed opendp-0.8.0
[0m
[1m[[0m[34;49mnotice[0m[1;39;49m][0m[39;49m A new release of pip is available: [0m[31;49m23.0.1[0m[39;49m -> [0m[32;49m23.2.1[0m
[1m[[0m[34;49mnotice[0m[1;39;49m][0m[39;49m To update, run: [0m[32;49mpip install --upgrade pip[0m

In [67]:
from typing import List

In [68]:
context = dp.Context.compositor(
    data=[1, 2, 3],
    privacy_unit=dp.unit_of(contributions=1),
    privacy_loss=dp.loss_of(epsilon=3.0),
    domain=dp.domain_of(List[int]),
    split_evenly_over=1
)

In [70]:
dp_sum = context.query().clamp((0, 5)).sum().laplace().release()
dp_sum.release()

AttributeError: 'PartialConstructor' object has no attribute 'log'

## Logger in Server

In [1]:
from fso_sdd_demo.client import Client
import numpy as np

In [2]:
APP_URL = "http://sdd_server_dev:80" # Onyxia: "https://sdd-server-demo.lab.sspcloud.fr"
USER_NAME = "Dr. Antartica"
DATASET_NAME = "PENGUIN"
client = Client(url=APP_URL, user_name = USER_NAME, dataset_name = DATASET_NAME)

In [3]:
penguin_metadata = client.get_dataset_metadata()['']['Schema']['Table']
penguin_metadata

{'max_ids': 1,
 'row_privacy': True,
 'censor_dims': False,
 'species': {'type': 'string',
  'cardinality': 3,
  'categories': ['Adelie', 'Chinstrap', 'Gentoo']},
 'island': {'type': 'string',
  'cardinality': 3,
  'categories': ['Torgersen', 'Biscoe', 'Dream']},
 'bill_length_mm': {'type': 'float', 'lower': 30.0, 'upper': 65.0},
 'bill_depth_mm': {'type': 'float', 'lower': 13.0, 'upper': 23.0},
 'flipper_length_mm': {'type': 'float', 'lower': 150.0, 'upper': 250.0},
 'body_mass_g': {'type': 'float', 'lower': 2000.0, 'upper': 7000.0},
 'sex': {'type': 'string', 'cardinality': 2, 'categories': ['MALE', 'FEMALE']}}