# Quick Testing and Verify Observability

In [6]:
from lomas_client import Client
import opendp as dp
import opendp.transformations as trans
import opendp.measurements as meas

## Basic Queries

In [7]:
APP_URL = "http://lomas_server"
USER_NAME = "Dr. Antartica"
DATASET_NAME = "PENGUIN"
client = Client(url=APP_URL, user_name = USER_NAME, dataset_name = DATASET_NAME)

In [8]:
penguin_metadata = client.get_dataset_metadata()
penguin_metadata

{'max_ids': 1,
 'rows': 344,
 'row_privacy': True,
 'censor_dims': False,
 'columns': {'species': {'private_id': False,
   'nullable': False,
   'max_partition_length': None,
   'max_influenced_partitions': None,
   'max_partition_contributions': None,
   'type': 'string',
   'cardinality': 3,
   'categories': ['Adelie', 'Chinstrap', 'Gentoo']},
  'island': {'private_id': False,
   'nullable': False,
   'max_partition_length': None,
   'max_influenced_partitions': None,
   'max_partition_contributions': None,
   'type': 'string',
   'cardinality': 3,
   'categories': ['Torgersen', 'Biscoe', 'Dream']},
  'bill_length_mm': {'private_id': False,
   'nullable': False,
   'max_partition_length': None,
   'max_influenced_partitions': None,
   'max_partition_contributions': None,
   'type': 'float',
   'precision': 64,
   'lower': 30.0,
   'upper': 65.0},
  'bill_depth_mm': {'private_id': False,
   'nullable': False,
   'max_partition_length': None,
   'max_influenced_partitions': None,
   'm

In [12]:
df_dummy = client.get_dummy_dataset(nb_rows = 100, seed = 0)
df_dummy.head()

Unnamed: 0,species,island,bill_length_mm,bill_depth_mm,flipper_length_mm,body_mass_g,sex
0,Gentoo,Biscoe,46.799577,16.196816,239.680123,3010.84047,FEMALE
1,Chinstrap,Dream,38.133052,14.875077,208.332005,6689.525543,MALE
2,Chinstrap,Torgersen,58.06582,19.725266,154.021822,2473.883392,MALE
3,Adelie,Torgersen,62.323556,14.951074,221.148682,2024.497075,FEMALE
4,Adelie,Dream,39.31456,18.776879,206.902585,3614.604018,MALE


In [13]:
ib = client.get_initial_budget()
print(ib)

tsb = client.get_total_spent_budget()
print(tsb)

rb = client.get_remaining_budget()
print(rb)

initial_epsilon=10.0 initial_delta=0.005
total_spent_epsilon=0.0 total_spent_delta=0.0
remaining_epsilon=10.0 remaining_delta=0.005


In [14]:
QUERY = "SELECT AVG(bill_length_mm) AS avg_bill_length_mm FROM df"
EPSILON = 0.5
DELTA = 1e-4
cost = client.smartnoise_sql.cost(query = QUERY, epsilon = EPSILON, delta = DELTA)
cost

CostResponse(epsilon=1.0, delta=4.999999999999449e-05)

In [15]:
dummy_res = client.smartnoise_sql.query(query = QUERY, epsilon = 100.0, delta = 0.99, dummy = True, nb_rows = 100, seed = 0)
dummy_res

QueryResponse(epsilon=200.0, delta=0.495, requested_by='Dr. Antartica', result=SmartnoiseSQLQueryResult(res_type=<DPLibraries.SMARTNOISE_SQL: 'smartnoise_sql'>, df=   avg_bill_length_mm
0           48.588302))

In [16]:
res = client.smartnoise_sql.query(query = QUERY, epsilon = EPSILON, delta = DELTA, dummy = False)
res

QueryResponse(epsilon=1.0, delta=4.999999999999449e-05, requested_by='Dr. Antartica', result=SmartnoiseSQLQueryResult(res_type=<DPLibraries.SMARTNOISE_SQL: 'smartnoise_sql'>, df=   avg_bill_length_mm
0           44.292243))

In [20]:
columns = list(penguin_metadata["columns"].keys())
bill_length_min = penguin_metadata['columns']['bill_length_mm']['lower']
bill_length_max = penguin_metadata['columns']['bill_length_mm']['upper']
bill_length_transformation_pipeline = (
    trans.make_split_dataframe(separator=",", col_names=columns) >>
    trans.make_select_column(key="bill_length_mm", TOA=str) >>
    trans.then_cast_default(TOA=float) >>
    trans.then_clamp(bounds=(bill_length_min, bill_length_max)) >>
    trans.then_resize(size=100, constant=44.6) >>
    trans.then_variance()
)

In [21]:
# No instruction for noise addition mechanism: Expect to fail !!!
client.opendp.query(opendp_pipeline = bill_length_transformation_pipeline, dummy=True)

ValidationError: 1 validation error for tagged-union[InvalidQueryExceptionModel,ExternalLibraryExceptionModel,UnauthorizedAccessExceptionModel,InternalServerExceptionModel]
  JSON input should be string, bytes or bytearray [type=json_type, input_value={'type': 'InvalidQueryExc...cessed in this server.'}, input_type=dict]
    For further information visit https://errors.pydantic.dev/2.10/v/json_type

In [22]:
var_bill_length_measurement_pipeline = (
    bill_length_transformation_pipeline >>
    meas.then_laplace(scale=5.0)
)
dummy_var_res = client.opendp.query(opendp_pipeline = var_bill_length_measurement_pipeline, dummy=True)
dummy_var_res

QueryResponse(epsilon=2.4500000000007707, delta=0.0, requested_by='Dr. Antartica', result=OpenDPQueryResult(res_type=<DPLibraries.OPENDP: 'opendp'>, value=99.65514137530252))

In [23]:
cost_res = client.opendp.cost(opendp_pipeline = var_bill_length_measurement_pipeline)
cost_res

CostResponse(epsilon=2.4500000000007707, delta=0.0)

In [24]:
var_res = client.opendp.query(opendp_pipeline = var_bill_length_measurement_pipeline)
var_res

QueryResponse(epsilon=2.4500000000007707, delta=0.0, requested_by='Dr. Antartica', result=OpenDPQueryResult(res_type=<DPLibraries.OPENDP: 'opendp'>, value=30.445058750420067))

## Many Queries

In [25]:
for i in range(0,10):
    print(i)
    ib = client.get_initial_budget()
    print(ib)
    tsb = client.get_total_spent_budget()
    print(tsb)
    rb = client.get_remaining_budget()
    print(rb)
    cost = client.smartnoise_sql.cost(query = QUERY, epsilon = EPSILON, delta = DELTA)
    print(cost)

0
initial_epsilon=10.0 initial_delta=0.005
total_spent_epsilon=3.4500000000007707 total_spent_delta=4.999999999999449e-05
remaining_epsilon=6.54999999999923 remaining_delta=0.004950000000000006
epsilon=1.0 delta=4.999999999999449e-05
1
initial_epsilon=10.0 initial_delta=0.005
total_spent_epsilon=3.4500000000007707 total_spent_delta=4.999999999999449e-05
remaining_epsilon=6.54999999999923 remaining_delta=0.004950000000000006
epsilon=1.0 delta=4.999999999999449e-05
2
initial_epsilon=10.0 initial_delta=0.005
total_spent_epsilon=3.4500000000007707 total_spent_delta=4.999999999999449e-05
remaining_epsilon=6.54999999999923 remaining_delta=0.004950000000000006
epsilon=1.0 delta=4.999999999999449e-05
3
initial_epsilon=10.0 initial_delta=0.005
total_spent_epsilon=3.4500000000007707 total_spent_delta=4.999999999999449e-05
remaining_epsilon=6.54999999999923 remaining_delta=0.004950000000000006
epsilon=1.0 delta=4.999999999999449e-05
4
initial_epsilon=10.0 initial_delta=0.005
total_spent_epsilon=3

In [26]:
for i in range(0,10):
    print(i)
    dummy_res = client.smartnoise_sql.query(query = QUERY, epsilon = 100.0, delta = 0.99, dummy = True, nb_rows = 100, seed = 0)
    print(dummy_res)

0
epsilon=200.0 delta=0.495 requested_by='Dr. Antartica' result=SmartnoiseSQLQueryResult(res_type=<DPLibraries.SMARTNOISE_SQL: 'smartnoise_sql'>, df=   avg_bill_length_mm
0            48.57558)
1
epsilon=200.0 delta=0.495 requested_by='Dr. Antartica' result=SmartnoiseSQLQueryResult(res_type=<DPLibraries.SMARTNOISE_SQL: 'smartnoise_sql'>, df=   avg_bill_length_mm
0           48.548767)
2
epsilon=200.0 delta=0.495 requested_by='Dr. Antartica' result=SmartnoiseSQLQueryResult(res_type=<DPLibraries.SMARTNOISE_SQL: 'smartnoise_sql'>, df=   avg_bill_length_mm
0           48.554348)
3
epsilon=200.0 delta=0.495 requested_by='Dr. Antartica' result=SmartnoiseSQLQueryResult(res_type=<DPLibraries.SMARTNOISE_SQL: 'smartnoise_sql'>, df=   avg_bill_length_mm
0           48.605091)
4
epsilon=200.0 delta=0.495 requested_by='Dr. Antartica' result=SmartnoiseSQLQueryResult(res_type=<DPLibraries.SMARTNOISE_SQL: 'smartnoise_sql'>, df=   avg_bill_length_mm
0           48.577882)
5
epsilon=200.0 delta=0.495 re