In [None]:
from tempo_ql.generic import GenericDataset
from tempo_ql.evaluator import QueryEngine
from tempo_ql.generic.variable_store import VariableStore

import duckdb
from pathlib import Path
import pandas as pd
import matplotlib.pyplot as plt

In [None]:
# define all the tables that will be part of the dataset. Each table should be
# present in the duckdb database with the name defined by the 'source' key
TABLE_INFO = [
    {
        'source': 'drug_exposure',
        'type': 'interval',
        'id_field': 'visit_occurrence_id',
        'concept_id_field': 'drug_source_concept_id',
        'start_time_field': 'drug_exposure_start_datetime',
        'end_time_field': 'drug_exposure_end_datetime',
        'default_value_field': 'quantity',
        'scope': 'Drug'
    },
    {
        'source': 'condition_occurrence',
        'type': 'interval',
        'id_field': 'visit_occurrence_id',
        'concept_id_field': 'condition_source_concept_id',
        'start_time_field': 'condition_start_datetime',
        'end_time_field': 'condition_end_datetime',
        'scope': 'Condition'
    },
    {
        'source': 'procedure_occurrence',
        'type': 'event',
        'id_field': 'visit_occurrence_id',
        'concept_id_field': 'procedure_source_concept_id',
        'time_field': 'procedure_datetime',
        'scope': 'Procedure'
    },
    {
        'source': 'observation',
        'type': 'event',
        'id_field': 'visit_occurrence_id',
        'concept_id_field': 'observation_source_concept_id',
        'time_field': 'observation_datetime',
        'default_value_field': 'value_as_string',
        'scope': 'Observation'
    },
    {
        'source': 'measurement',
        'type': 'event',
        'id_field': 'visit_occurrence_id',
        'concept_id_field': 'measurement_source_concept_id',
        'time_field': 'measurement_datetime',
        'default_value_field': 'value_as_number',
        'scope': 'Measurement'
    },
    {
        'source': 'device_exposure',
        'type': 'interval',
        'id_field': 'visit_occurrence_id',
        'concept_id_field': 'device_source_concept_id',
        'start_time_field': 'device_exposure_start_datetime',
        'end_time_field': 'device_exposure_end_datetime',
        'scope': 'Device'
    },
    {
        'source': 'visit_occurrence',
        'type': 'interval',
        'id_field': 'visit_occurrence_id',
        'start_time_field': 'visit_start_datetime',
        'end_time_field': 'visit_end_datetime',
        'interval_type': 'Visit',
        'scope': 'Visit',
        'attributes': {
            'Admit Time': {
                'value_field': 'visit_start_datetime'
            },
            'Discharge Time': {
                'value_field': 'visit_end_datetime'
            }
        }
    },
    {
        'source': 'person',
        'id_field': 'visit_occurrence_id',
        'attributes': {
            'Gender': {
                'value_field': 'gender_concept_id',
                'convert_concept': True,
                'scope': 'Gender'
            },
            'Birth Date': {
                'value_field': 'birth_datetime',
                'convert_concept': False
            },
            'Race': {
                'value_field': 'race_concept_id',
                'convert_concept': True,
                'scope': 'Race'
            },
            'Ethnicity': {
                'value_field': 'ethnicity_concept_id',
                'convert_concept': True,
                'scope': 'Ethnicity'
            }
        }
    }
]

# define one or more vocabulary tables. Each should have a concept id, concept name,
# and scope field and contain the concept mappings for one or more scopes
VOCABULARY_INFO = [
    {
        'source': 'concept',
        'concept_id_field': 'concept_id',
        'concept_name_field': 'concept_name',
        'scope_field': 'domain_id',
        'scopes': ['Drug', 'Condition', 'Procedure', 'Observation', 'Measurement', 'Device']
    },
    {
        'source': 'gender_concept',
        'scopes': ['Gender']
    }
]



In [None]:
local_db = duckdb.connect(":memory:myconn")
base_path = 'tempo_ql/omop/mimic-iv-demo-data-in-the-omop-common-data-model-0.9/1_omop_data_csv'
for csv_file in Path(base_path).glob('*.csv'):
    table_name = csv_file.stem
    if table_name == 'person':
        # join person to visit occurrence table so we can use visit_occurrence_id as the trajectory key
        local_db.execute(f"""
        create table {table_name} as 
            select distinct p.*, v.visit_occurrence_id 
            from read_csv_auto('{base_path}/visit_occurrence.csv', header=true, ignore_errors=true, parallel=false) v
            left join read_csv_auto('{csv_file}', header=true, ignore_errors=true, parallel=false) p
            on v.person_id = p.person_id
        """)
    else:
        local_db.execute(f"create table {table_name} as select * from read_csv_auto('{csv_file}', header=true, ignore_errors=true, parallel=false)")

# create the gender concept mapping table
gender_concepts = pd.DataFrame([
    {'concept_id': 8507, 'concept_name': 'Male', 'scope': 'Gender'},
    {'concept_id': 8532, 'concept_name': 'Female', 'scope': 'Gender'},
])
local_db.execute("create table gender_concept as select * from gender_concepts")

# Initialize query engine and variable store
var_store = VariableStore('duckdb:///:memory:varstore', table_prefix='tempo_var_')
query_engine = QueryEngine(GenericDataset("duckdb:///:memory:myconn", TABLE_INFO, VOCABULARY_INFO), variable_stores=[var_store])

In [None]:
query_engine.query("{Visit}")

In [None]:
query_engine.query("{name contains /Temperature/; scope = 'Measurement'}")

In [None]:
# we can now store variables by name...
var_store['SimpleTemperature'] = query_engine.query("union({Temperature Celsius}, ({Temperature Fahrenheit} - 32) * 5 / 9) where #value < 50")

In [None]:
# ... and then use their results later
query_engine.query("last SimpleTemperature before #now every 4 hours")

In [None]:
# example of creating Intervals from two Events sets
query_engine.query("intervals({Admit Time}, {Discharge Time})")