## Minimal OpenDP example on the income dataset

(Just to check if it's working as intended)

In [1]:
from lomas_client.client import Client

APP_URL = "http://lomas_server_dev:80" # Onyxia: "https://lomas-server-demo.lab.sspcloud.fr"
USER_NAME = "Dr. FSO"
DATASET_NAME = "FSO_INCOME_SYNTHETIC"
client = Client(url=APP_URL, user_name = USER_NAME, dataset_name = DATASET_NAME)

In [2]:
income_metadata = client.get_dataset_metadata()
income_metadata

{'max_ids': 1,
 'region': {'type': 'int'},
 'eco_branch': {'type': 'int'},
 'profession': {'type': 'int'},
 'education': {'type': 'int'},
 'age': {'type': 'int'},
 'sex': {'type': 'int'},
 'income': {'type': 'float', 'lower': 1000, 'upper': 100000}}

In [3]:
NB_ROWS = 200
SEED = 0

df_dummy = client.get_dummy_dataset(
    nb_rows = NB_ROWS, 
    seed = SEED
)

print(df_dummy.shape)
df_dummy.head()

(200, 7)


Unnamed: 0,region,eco_branch,profession,education,age,sex,income
0,-7268,-8759,4470,-7097,2279,4470,99981.049234
1,799,903,-4654,-6748,-2700,1345,20528.687956
2,-155,-5597,-5224,-3552,9480,8039,53955.91164
3,9648,7582,3729,4274,-178,-7578,29734.556213
4,3123,9381,-5878,-2574,9842,8165,31113.182182


In [4]:
import opendp.prelude as dp
import opendp.transformations as trans
import opendp.measurements as meas

dp.enable_features("contrib")

columns = ["region", "eco_branch", "profession", "education", "age", "sex", "income"]

income_min = float(income_metadata["income"]["lower"])
income_max = float(income_metadata["income"]["upper"])

num_rows_pipeline = (
    trans.make_split_dataframe(separator=",", col_names=columns) >>
    trans.make_select_column(key="income", TOA=str) >>
    trans.then_count() >>
    meas.then_laplace(scale=0.5) # scale arbitrary
)

num_rows = client.opendp_query(
    opendp_pipeline = num_rows_pipeline,
)["query_response"]


In [5]:
income_average_pipeline = (
    trans.make_split_dataframe(separator=",", col_names=columns) >>
    trans.make_select_column(key="income", TOA=str) >>
    trans.then_cast_default(TOA=float) >>
    trans.then_clamp(bounds=(income_min, income_max)) >>
    trans.then_resize(size=num_rows, constant=1000.0) >> # Arbitrary constant
    trans.then_mean() >>
    meas.then_laplace(scale=0.5)
)

income_average = client.opendp_query(
    opendp_pipeline = income_average_pipeline,
    dummy=True
)

In [6]:
income_average

{'query_response': 1002.8797912105409}