# TempoQL Demo on Full MIMIC-IV Dataset

Before running, please `cd` into this repo and run `pip install .` in the virtual environment of your choice. This will install required dependencies so that you can run this notebook.

This example will use the MIMIC-IV-OMOP dataset available [here](https://physionet.org/content/mimic-iv-demo-omop/0.9/). Please download it and set the path to it in the `base_path` variable below.

In [1]:
from tempo_ql import GenericDataset, formats, QueryEngine, FileVariableStore
import numpy as np
import os
import pandas as pd
import time

In [None]:
# GCP project in which to run queries - make sure it has access to MIMIC-IV through physionet.org
project_id = "ai-clinician"
# name of a dataset within your project to store temporary results. Required if you plan to subset the data to run queries
scratch_dataset = None # "tempo_ql_scratch_mimic"
# directory to store temporary variables
variable_store_dir = "mimiciv_data"

# provide your Gemini API key here if you'd like to try the LLM-assisted authoring workflow
gemini_api_key = open("gemini_key.txt").read().strip()

In [9]:
# Initialize query engine and variable store
dataset = GenericDataset(f'bigquery://{project_id}', formats.mimiciv(), 
                        scratch_schema_name=f'{project_id}.{scratch_dataset}' if scratch_dataset is not None else None)

if not os.path.exists(variable_store_dir): os.mkdir(variable_store_dir)
var_store = FileVariableStore(variable_store_dir)
query_engine = QueryEngine(dataset, variable_stores=[var_store])


In [16]:
query_engine.query("{Respiratory Rate; scope = chartevents}")

<Events '220210: Respiratory Rate': 88998 values>
             id                time                 eventtype  value
0      30014281 2187-04-28 00:29:00  220210: Respiratory Rate     22
1      30014281 2187-04-28 01:00:00  220210: Respiratory Rate     20
2      30014281 2187-04-28 02:00:00  220210: Respiratory Rate     19
3      30014281 2187-04-28 03:00:00  220210: Respiratory Rate     20
4      30014281 2187-04-28 04:00:00  220210: Respiratory Rate     24
...         ...                 ...                       ...    ...
88993  39987311 2161-05-31 17:00:00  220210: Respiratory Rate     23
88994  39987311 2161-05-31 18:00:00  220210: Respiratory Rate     22
88995  39987311 2161-05-31 19:00:00  220210: Respiratory Rate     21
88996  39987311 2161-05-31 20:00:00  220210: Respiratory Rate     21
88997  39987311 2161-05-31 21:00:00  220210: Respiratory Rate     20

[88998 rows x 4 columns]

In [None]:
# Equivalent BigQuery SQL code

import pandas_gbq

df = pandas_gbq.read_gbq(f"""
    WITH matching_eventids AS (
        SELECT DISTINCT d.itemid AS itemid FROM `physionet-data.mimiciv_3_1_icu.d_items` d
        WHERE d.label = 'Respiratory Rate'
    )
    SELECT ce.stay_id AS stay_id, 
                    ce.charttime AS time, 
                    ce.itemid AS eventtype,
                    ce.value AS value
                FROM `physionet-data.mimiciv_3_1_icu.chartevents` ce
                INNER JOIN `physionet-data.mimiciv_3_1_icu.icustays` stays
                ON ce.stay_id = stays.stay_id
                INNER JOIN matching_eventids 
                ON ce.itemid = matching_eventids.itemid
                ORDER BY stay_id, time ASC
""", project_id=project_id)
df

Downloading: 100%|[32m█████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████[0m|[0m


Unnamed: 0,stay_id,time,eventtype,value
0,30014281,2187-04-28 00:29:00,220210,22
1,30014281,2187-04-28 01:00:00,220210,20
2,30014281,2187-04-28 02:00:00,220210,19
3,30014281,2187-04-28 03:00:00,220210,20
4,30014281,2187-04-28 04:00:00,220210,24
...,...,...,...,...
88993,39987311,2161-05-31 17:00:00,220210,23
88994,39987311,2161-05-31 18:00:00,220210,22
88995,39987311,2161-05-31 19:00:00,220210,21
88996,39987311,2161-05-31 20:00:00,220210,21


In [14]:
query_engine.interactive(api_key=gemini_api_key)

TempoQLWidget(api_status='Configured', ids_length=1000, list_names=['Safety Measures', 'Heart Rate', 'Respirat…