In [1]:
import json
from DataManager import DataManager

In [2]:
# Connect to the data manager
dm = DataManager()
dm.connect("email@urban.org", "password123")


In [None]:
# List projects
response = dm.get_projects()
for project in response.json():
    print(project)

In [None]:
# List all scenarios
response = dm.get_scenarios()
for scenario in response.json():
    print(scenario)

In [None]:
# List the scenarios for a specific project
response = dm.get_scenarios_for_project(1)
for scenario in response.json():
    print(scenario)

In [None]:
# List the variables for a specific project
response = dm.get_variables_for_project(1)
response = json.loads(json.loads(response.text))
for var_group in response:
    print(var_group)

In [None]:
project_name = "BabyBonds"
scenarios = ["Baseline_v1", "BabyBonds_v1"]
person_variables = ["perid", "race", "sex", "hispanic", "year_died", "birth_year"]
family_variables = ["fam_id", "numkids"]
birth_year_range = [1981, 2018] # optional
year_range = [1950, 2100] # optional

response = dm.generate_dataset(
    project_name,
    scenarios,
    family_variables=family_variables,
    person_variables=person_variables,
    birth_year_range=birth_year_range,
    year_range=year_range)

job_id = response.json()['job_id']
print(f'Job ID: {job_id}')

In [None]:
# View job status and get download links
response = dm.get_dataset_status(job_id, "csv") # only `csv` or `parquet` allowed
for k, v in response.json().items():
    print(k, v)

family_url = response.json()['family_url']
person_url = response.json()['person_url']

In [None]:
# Download the files locally to a specified location
# Only .zip (for csv files) or .pq allowed
dm.download_file(family_url, "/path/to/download.zip") # only .zip or .pq allowed
dm.download_file(person_url, "/path/to/download.zip") # only .zip or .pq allowed

In [None]:
# Request a dataset, query its status, and download the files locally using one function
# This function checks when download files are ready every 5 seconds and downloads
# them to specified output directory to files called family_data and person_data

# Dataset specs
project_name = "BabyBonds"
scenarios = ["Baseline_v1", "BabyBonds_v1"]
person_variables = ["perid", "race", "sex", "hispanic", "year_died", "birth_year"]
family_variables = ["fam_id", "numkids"]
birth_year_range = [1981, 2018] # optional
year_range = [1950, 2100] # optional

output_dir = "/path/to/local/dir"
output_dir = "/Users/epark/Downloads"
file_type = "csv" # `csv` or `parquet` only
dm.get_dataset(
    output_dir,
    file_type,
    project_name,
    scenarios,
    family_variables=family_variables,
    person_variables=person_variables,
    birth_year_range=birth_year_range,
    year_range=year_range
)
