Initlization of request function, constants and getting the auth token

In [None]:
from requests import request
from pprint import pprint

SERVER_API_ROOT = "http://localhost:8000/v2"
ROOT_USER = {"username": "root", "password": "123"}

TUM_EVAL = "?engine=tumult"
OPEN_EVAL = "?engine=opendp"
GDP_EVAL = "?engine=googledp"

def make_request(endpoint, method, token=None, body=None, content_type="application/json"):
    headers = { "Content-Type": content_type }
    if token is not None:
        headers["Authorization"] = f"Bearer {token}"
    request_args={
      "url": f"{SERVER_API_ROOT}{endpoint}",
      "method": method,
      "headers": headers,
    }
    if content_type == "application/json" and body is not None:
      request_args["json"] = body
    if content_type == "text/csv" and body is not None:
      request_args["data"] = body

    return request(**request_args)

response = make_request(endpoint="/login", method="POST", body=ROOT_USER).json()

TOKEN = response['token']

Creating the dataset

In [None]:
create_dataset_body = {
    "name": "salaries",
    "owner": "root",
    "schema": [
        { "name": "name",   "type": { "name": "Text" } },
        { "name": "age",    "type": { "name": "Int", "low": 18, "high": 100 } },
        { "name": "job",    "type": { "name": "Enum", "labels": ["Accountant", "Dentist", "High School Teacher", "Software Engineer"] } },
        { "name": "salary", "type": { "name": "Int", "low": 0, "high": 100000 } }
    ],
    "privacy_notion": "PureDP",
    "total_budget": { "epsilon": 50 }
}


response = make_request(endpoint="/datasets", method="POST", token=TOKEN, body=create_dataset_body)
pprint(response.json())

Uploading the data to the dataset

In [None]:
with open("demo_data.csv") as csv:
    data = csv.read()

response = make_request(endpoint="/datasets/1/upload", method="POST", token=TOKEN, content_type="text/csv", body=data.encode())
pprint(response)

Allocating budget to user

In [None]:
allocate_budget_body = {"epsilon":1}

response = make_request(endpoint="/budgets/allocations/root/1", method="POST", token=TOKEN, body=allocate_budget_body)
pprint(response)

Validating a count query

In [None]:
VALIDATE_COUNT_BODY = {
    "budget": {
        "epsilon": 0.2
    },
    "dataset": 1,
    "query": [
        {
            "count": {
                "column": "age",
                "mech": "Laplace"
            }
        }
    ]
}

validate_count_response = make_request(endpoint=f'/queries/validate', method="POST", token=TOKEN, body=VALIDATE_COUNT_BODY)
pprint(validate_count_response.json())

Running evaluate on tumult, opendp and googledp connectors and counting the rows without DP using a for loop

In [None]:
# count no dp

import csv

with open('demo_data.csv', mode='r') as csv_file:
    csv_no_dp = csv.DictReader(csv_file)
    n_rows = 0
    for row in csv_no_dp:
        n_rows += 1

print(f"Count no DP: {n_rows}")

# count opendp
ODP_COUNT_BODY = {
    "budget": {
        "epsilon": 0.2
    },
    "dataset": 1,
    "query": [
        {
            "count": {
                "column": "age",
                "mech": "Laplace"
            }
        }
    ]
}

ocd_resp = make_request(endpoint=f'/queries/evaluate{OPEN_EVAL}', method="POST", token=TOKEN, body=ODP_COUNT_BODY)
print("Count Opendp result:")
pprint(ocd_resp.json())

# count googledp
GDP_COUNT_BODY = {
    "budget": {
        "epsilon": 0.2
    },
    "dataset": 1,
    "query": [
        {
            "count": {
                "column": "age",
                "mech": "Laplace"
            }
        }
    ]
}

gcd_resp = make_request(endpoint=f'/queries/evaluate{GDP_EVAL}', method="POST", token=TOKEN, body=GDP_COUNT_BODY)
print("Count GoogleDp result")
pprint(gcd_resp.json())

# count tumult
TUM_COUNT_BODY = {
    "budget": {
        "epsilon": 0.2
    },
    "dataset": 1,
    "query": [
        {
            "count": {
                "mech": "Laplace"
            }
        }
    ]
}

tcb_resp = make_request(endpoint=f'/queries/evaluate{TUM_EVAL}', method="POST", token=TOKEN, body=TUM_COUNT_BODY)
print("Count Tumult result")
pprint(tcb_resp.json())


Evalualting Minimum query on GoogleDP connector. (Should fail as GoogleDP does not support min)

In [None]:
MIN_BODY = {
    "budget": {
        "epsilon": 0.2
    },
    "dataset": 1,
    "query": [
        {
            "min": {
                "column": "age"
            }
        }
    ]
}

fail_min_resp = make_request(endpoint=f'/queries/evaluate{GDP_EVAL}', method="POST", token=TOKEN, body=MIN_BODY)
print("GoogleDP Min Query")
pprint(fail_min_resp.json())

Running validate on the min query to see if any connector supports min

In [None]:
min_validate_resp = make_request(endpoint=f'/queries/validate', method="POST", token=TOKEN, body=MIN_BODY)
pprint(min_validate_resp.json())

Running min query on tumult connector

In [None]:
tminb_resp = make_request(endpoint=f'/queries/evaluate{TUM_EVAL}', method="POST", token=TOKEN, body=MIN_BODY)
print("Tumult Min Result")
pprint(tminb_resp.json())

We have now spend a total of 0.8 epsilon, we have 0.2 epsilon left allocated to this user
What happens if we want to make a query that costs 0.5 epsilon

In [None]:
NOT_ENOUGH_EPS_EVAL = {
    "budget": {
        "epsilon": 0.5
    },
    "dataset": 1,
    "query": [
        {
            "filter": ["age > 20", "age < 60"]
        },
        {
            "bin": {
                "age": [20,30,40,50,60]
            }
        },
        {
            "count": {
                "column": "age",
                "mech": "Laplace"
            }
        }
    ]
}

nee_eval = make_request(endpoint=f'/queries/evaluate{GDP_EVAL}', method="POST", token=TOKEN, body=NOT_ENOUGH_EPS_EVAL)
pprint(nee_eval.json())

Let's check how much budget the user has left

In [None]:
budget_check_resp = make_request(endpoint=f'/budgets/users/root', method="GET", token=TOKEN)
pprint(budget_check_resp.json())

We see that the user only has 0.2 epsilon left on dataset 1
Let's do the query with 0.2 epsilon

In [None]:
ENOUGH_EPS_EVAL = {
    "budget": {
        "epsilon": 0.2
    },
    "dataset": 1,
    "query": [
        {
            "filter": ["age > 20", "age < 60"]
        },
        {
            "bin": {
                "age": [20,30,40,50,60]
            }
        },
        {
            "count": {
                "column": "age",
                "mech": "Laplace"
            }
        }
    ]
}

eee_eval = make_request(endpoint=f'/queries/evaluate{GDP_EVAL}', method="POST", token=TOKEN, body=ENOUGH_EPS_EVAL)
pprint(eee_eval.json())

Checking the budget of the user

In [None]:
budget_check_resp = make_request(endpoint=f'/budgets/users/root', method="GET", token=TOKEN)
pprint(budget_check_resp.json())