# GEMINI query API tutorial

## Imports and instantiate `GEMINIQuerier`.

In [1]:
"""GEMINI query API tutorial."""

import cyclops.query.ops as qo
from cyclops.query import GEMINIQuerier


querier = GEMINIQuerier(
    host="db.gemini-hpc.ca",
    database="delirium_v3_0_0",
    user="username",
    password="password",
)
# List all tables.
querier.list_tables()

2023-03-21 17:09:15,267 [1;37mINFO[0m cyclops.query.orm - Database setup, ready to run queries!


['information_schema.sql_implementation_info',
 'information_schema.sql_parts',
 'information_schema.sql_sizing',
 'information_schema.sql_features',
 'public.ip_administrative',
 'public.derived_variables',
 'public.diagnosis',
 'public.vitals',
 'public.intervention',
 'public.er_administrative',
 'public.imaging',
 'public.pharmacy',
 'public.lab',
 'public.room_transfer',
 'public.ip_scu',
 'public.lookup_ip_administrative',
 'public.lookup_ccsr',
 'public.hospital',
 'public.lookup_er_administrative',
 'public.blood_transfusion',
 'public.lookup_ip_scu',
 'public.lookup_diagnosis',
 'public.lookup_room_transfer',
 'public.lookup_imaging']

## Example 1a. Create a table with only one hospitalization per patient, keeping the most recent encounter for each patient. Sort the dataset by `patient_id_hashed` and `discharge_date_time`, and then keep the recent record.

In [2]:
ops = qo.Sequential(
    qo.OrderBy(
        ["patient_id_hashed", "discharge_date_time"],
        ascending=[True, False],
    ),
    qo.Distinct("patient_id_hashed"),
)
encounters_qi = querier.public.ip_administrative(ops=ops)
encounters = encounters_qi.run()
print(f"{len(encounters)} rows extracted!")

2023-03-21 17:09:20,365 [1;37mINFO[0m cyclops.query.orm - Query returned successfully!
2023-03-21 17:09:20,366 [1;37mINFO[0m cyclops.utils.profile - Finished executing function run_query in 4.825447 s


189606 rows extracted!


## Example 1b. From the above set of encounters, take a subset of patients who were discharged between April 1, 2015 and March 31, 2016.

In [3]:
ops = qo.Sequential(
    qo.ConditionAfterDate("discharge_date_time", "2015-04-01"),
    qo.ConditionBeforeDate("discharge_date_time", "2016-03-31"),
)
encounters_qi = querier.get_interface(encounters_qi.query, ops=ops)
encounters = encounters_qi.run()
print(f"{len(encounters)} rows extracted!")

2023-03-21 17:09:21,372 [1;37mINFO[0m cyclops.query.orm - Query returned successfully!
2023-03-21 17:09:21,373 [1;37mINFO[0m cyclops.utils.profile - Finished executing function run_query in 0.992901 s


17071 rows extracted!


## Example 1c. From the above set of encounters, get the total number of admissions for each hospital.

In [4]:
ops = qo.GroupByAggregate("hospital_id", {"hospital_id": ("count", "count")})
encounters_per_site_qi = querier.get_interface(encounters_qi.query, ops=ops)
encounters_per_site = encounters_per_site_qi.run()
print(f"{len(encounters_per_site)} rows extracted!")

2023-03-21 17:09:22,181 [1;37mINFO[0m cyclops.query.orm - Query returned successfully!
2023-03-21 17:09:22,182 [1;37mINFO[0m cyclops.utils.profile - Finished executing function run_query in 0.800708 s


7 rows extracted!


## Example 2a. How many sodium tests were placed between Apr 1, 2015 and May 31, 2015 at SMH?

In [5]:
encounter_ops = qo.ConditionEquals("hospital_id", "SMH")
encounters_qi = querier.public.ip_administrative(ops=encounter_ops)
lab_ops = qo.Sequential(
    qo.ConditionAfterDate("sample_collection_date_time", "2015-04-01"),
    qo.ConditionBeforeDate("sample_collection_date_time", "2015-05-31"),
    qo.ConditionSubstring("lab_test_name_mapped", "sodium"),
    qo.GroupByAggregate("hospital_id", {"hospital_id": ("count", "count")}),
)
lab_qi = querier.public.lab(
    ops=lab_ops,
    join=qo.JoinArgs(join_table=encounters_qi.query, on="genc_id"),
)
sodium_tests = lab_qi.run()
print(f"{len(sodium_tests)} rows extracted!")
print(sodium_tests)

2023-03-21 17:09:59,972 [1;37mINFO[0m cyclops.query.orm - Query returned successfully!
2023-03-21 17:09:59,973 [1;37mINFO[0m cyclops.utils.profile - Finished executing function run_query in 37.740935 s


1 rows extracted!


Unnamed: 0,hospital_id,count
0,SMH,6053
