In [7]:
import sys
import os

from tempo_ql.generic.dataset import GenericDataset
from tempo_ql.evaluator import QueryEngine
from tempo_ql.generic.variable_store import DatabaseVariableStore
import duckdb
import numpy as np
from pathlib import Path
import pandas as pd
import matplotlib.pyplot as plt

ModuleNotFoundError: No module named 'tempo_ql'

In [2]:
# define all the tables that will be part of the dataset. Each table should be
# present in the duckdb database with the name defined by the 'source' key
TABLE_INFO = [
    {
        'source': 'drug_exposure',
        'type': 'interval',
        'id_field': 'visit_occurrence_id',
        'concept_id_field': 'drug_source_concept_id',
        'start_time_field': 'drug_exposure_start_datetime',
        'end_time_field': 'drug_exposure_end_datetime',
        'default_value_field': 'quantity',
        'scope': 'Drug'
    },
    {
        'source': 'condition_occurrence',
        'type': 'interval',
        'id_field': 'visit_occurrence_id',
        'concept_id_field': 'condition_source_concept_id',
        'start_time_field': 'condition_start_datetime',
        'end_time_field': 'condition_end_datetime',
        'scope': 'Condition'
    },
    {
        'source': 'procedure_occurrence',
        'type': 'event',
        'id_field': 'visit_occurrence_id',
        'concept_id_field': 'procedure_source_concept_id',
        'time_field': 'procedure_datetime',
        'scope': 'Procedure'
    },
    {
        'source': 'observation',
        'type': 'event',
        'id_field': 'visit_occurrence_id',
        'concept_id_field': 'observation_source_concept_id',
        'time_field': 'observation_datetime',
        'default_value_field': 'value_as_string',
        'scope': 'Observation'
    },
    {
        'source': 'measurement',
        'type': 'event',
        'id_field': 'visit_occurrence_id',
        'concept_id_field': 'measurement_source_concept_id',
        'time_field': 'measurement_datetime',
        'default_value_field': 'value_as_number',
        'scope': 'Measurement'
    },
    {
        'source': 'device_exposure',
        'type': 'interval',
        'id_field': 'visit_occurrence_id',
        'concept_id_field': 'device_source_concept_id',
        'start_time_field': 'device_exposure_start_datetime',
        'end_time_field': 'device_exposure_end_datetime',
        'scope': 'Device'
    },
    {
        'source': 'visit_occurrence',
        'type': 'interval',
        'id_field': 'visit_occurrence_id',
        'start_time_field': 'visit_start_datetime',
        'end_time_field': 'visit_end_datetime',
        'interval_type': 'Visit',
        'scope': 'Visit',
        'attributes': {
            'Admit Time': {
                'value_field': 'visit_start_datetime'
            },
            'Discharge Time': {
                'value_field': 'visit_end_datetime'
            }
        }
    },
    {
        'source': 'person',
        'id_field': 'visit_occurrence_id',
        'attributes': {
            'Gender': {
                'value_field': 'gender_concept_id',
                'convert_concept': True,
                'scope': 'Gender'
            },
            'Birth Date': {
                'value_field': 'birth_datetime',
                'convert_concept': False
            },
            'Race': {
                'value_field': 'race_concept_id',
                'convert_concept': True,
                'scope': 'Race'
            },
            'Ethnicity': {
                'value_field': 'ethnicity_concept_id',
                'convert_concept': True,
                'scope': 'Ethnicity'
            }
        }
    }
]

# define one or more vocabulary tables. Each should have a concept id, concept name,
# and scope field and contain the concept mappings for one or more scopes
VOCABULARY_INFO = [
    {
        'source': 'concept',
        'concept_id_field': 'concept_id',
        'concept_name_field': 'concept_name',
        'scope_field': 'domain_id',
        'scopes': ['Drug', 'Condition', 'Procedure', 'Observation', 'Measurement', 'Device']
    },
    {
        'source': 'gender_concept',
        'scopes': ['Gender']
    }
]



In [3]:
local_db = duckdb.connect("local_qe")
base_path = '/Users/maziyong/Desktop/tempo_ql/mimic-iv-demo-data-in-the-omop-common-data-model-0.9/1_omop_data_csv'
for csv_file in Path(base_path).glob('*.csv'):
    table_name = csv_file.stem
    if table_name == 'person':
        # join person to visit occurrence table so we can use visit_occurrence_id as the trajectory key
        local_db.execute(f"""
        create table {table_name} as 
            select distinct p.*, v.visit_occurrence_id 
            from read_csv_auto('{base_path}/visit_occurrence.csv', header=true, ignore_errors=true, parallel=false) v
            left join read_csv_auto('{csv_file}', header=true, ignore_errors=true, parallel=false) p
            on v.person_id = p.person_id
        """)
    else:
        local_db.execute(f"create table {table_name} as select * from read_csv_auto('{csv_file}', header=true, ignore_errors=true, parallel=false)")

# create the gender concept mapping table
gender_concepts = pd.DataFrame([
    {'concept_id': 8507, 'concept_name': 'Male', 'scope': 'Gender'},
    {'concept_id': 8532, 'concept_name': 'Female', 'scope': 'Gender'},
])
local_db.execute("create table gender_concept as select * from gender_concepts")
local_db.close()

In [3]:
# Initialize query engine and variable store
var_store = DatabaseVariableStore('duckdb:///local_varstore', table_prefix='tempo_var_')
query_engine = QueryEngine(GenericDataset("duckdb:///local_qe", TABLE_INFO, VOCABULARY_INFO), variable_stores=[var_store])



In [10]:
Widget()

NameError: name 'Widget' is not defined

In [11]:
from tempo_ql.widget import Widget
import os

# # sample query: last {name contains /Temperature/i; scope = Measurement} before #now every 1 day
# w = Widget(dev=True, query_engine = query_engine)
# w

ModuleNotFoundError: No module named 'tempo_ql'

In [5]:
w.query_engine.get_last_sql_query()

'SELECT measurement.visit_occurrence_id AS id, measurement.measurement_datetime AS time, measurement.measurement_source_concept_id AS eventtype, measurement.value_as_number AS value \nFROM measurement \nWHERE measurement.measurement_source_concept_id IN (2000030033, 2000030106, 2000030018, 2000030092, 2000001021)'

In [8]:
b.values()

dict_values([{'result': <TimeIndex: 478 IDs, 3515 steps>
                       id             mintime
0    -9176297757944464068 2154-02-05 17:09:00
1    -9176297757944464068 2154-02-06 17:09:00
2    -9176297757944464068 2154-02-07 17:09:00
3    -9176297757944464068 2154-02-08 17:09:00
4    -9176297757944464068 2154-02-09 17:09:00
...                   ...                 ...
3510  9099373231105163027 2112-10-16 18:46:00
3511  9099373231105163027 2112-10-17 18:46:00
3512  9135233101578966180 2114-01-22 09:20:00
3513  9197703010583516730 2112-02-05 14:48:00
3514  9218061359648594772 2111-06-22 10:37:00

[3515 rows x 2 columns]}, {'result': <Events '2000030018, 2000001021, 2000030092, 2000030106': 4057 values>
                       id                time   eventtype  value
0    -9149771978458038515 2177-03-15 05:30:00  2000030018   98.5
1    -9149771978458038515 2177-03-15 10:00:00  2000030018   98.2
2    -9149771978458038515 2177-03-15 17:00:00  2000030018   98.1
3    -9149771978458038

In [6]:
a,b = w.query_engine.query("last {name contains /Temperature/i; scope = Measurement} before #now every 1 day",return_subqueries = True)
b

100%|█████████████████████████████| 1/1 [00:00<00:00,  7.94it/s]

Matching concepts: {'Measurement': [(2000030033, 'Skin Temperature'), (2000030106, 'Blood Temperature CCO (C)'), (2000030018, 'Temperature Fahrenheit'), (2000030092, 'Temperature Celsius'), (2000001021, 'Temperature|Blood|Blood Gas')]}
[(69, 74), (75, 76), (77, 80)] every 1 day
[(6, 10), (11, 19), (20, 34), (36, 41), (44, 55)] name contains /Temperature/i; scope = Measurement
[(0, 4), (6, 10), (11, 19), (20, 34), (36, 41), (44, 55), (57, 63), (64, 68)] last {name contains /Temperature/i; scope = Measurement} before #now
[(0, 4), (6, 10), (11, 19), (20, 34), (36, 41), (44, 55), (57, 63), (64, 68)] last {name contains /Temperature/i; scope = Measurement} before #now
[(0, 4), (6, 10), (11, 19), (20, 34), (36, 41), (44, 55), (57, 63), (64, 68), (69, 74), (75, 76), (77, 80)] last {name contains /Temperature/i; scope = Measurement} before #now every 1 day





{'every 1 day': {'result': <TimeIndex: 478 IDs, 3515 steps>
                         id             mintime
  0    -9176297757944464068 2154-02-05 17:09:00
  1    -9176297757944464068 2154-02-06 17:09:00
  2    -9176297757944464068 2154-02-07 17:09:00
  3    -9176297757944464068 2154-02-08 17:09:00
  4    -9176297757944464068 2154-02-09 17:09:00
  ...                   ...                 ...
  3510  9099373231105163027 2112-10-16 18:46:00
  3511  9099373231105163027 2112-10-17 18:46:00
  3512  9135233101578966180 2114-01-22 09:20:00
  3513  9197703010583516730 2112-02-05 14:48:00
  3514  9218061359648594772 2111-06-22 10:37:00
  
  [3515 rows x 2 columns]},
 'name contains /Temperature/i; scope = Measurement': {'result': <Events '2000030018, 2000001021, 2000030092, 2000030106': 4057 values>
                         id                time   eventtype  value
  0    -9149771978458038515 2177-03-15 05:30:00  2000030018   98.5
  1    -9149771978458038515 2177-03-15 10:00:00  2000030018   9

In [5]:
w.data.get_ids().value_counts()

person_id
-3908355835367628651    224
-4353160957725823366    203
 4668337230155062633    199
 4498126063475867818    172
 4352191084057402257    159
                       ... 
 3665089643642765251      3
 8527170356523164323      3
 2601314283911413076      2
 3912882389848878631      2
 5548892236933978704      1
Name: count, Length: 100, dtype: int64

In [3]:
v = w.data.get_values()
print(v)
print(v.index.nunique())
print(v.nunique())
print(v.index.value_counts())
print(v.value_counts().sort_index())

43324    117
43364     65
43401     74
43340    104
43376     65
        ... 
27143     64
29588     77
25971    123
26137     64
29510     79
Name: value, Length: 52709, dtype: int16
52709
223
43324    1
17662    1
18617    1
17712    1
18003    1
        ..
42175    1
41379    1
41898    1
42049    1
29510    1
Name: count, Length: 52709, dtype: int64
value
-23     1
-22     1
-19     1
-16     1
-8      1
       ..
 329    1
 330    1
 331    1
 354    1
 801    1
Name: count, Length: 223, dtype: int64


In [4]:
cate = w.query_engine.query("{Gender}")
bin = w.query_engine.query("{Gender} = \"8507\" ")
cont = w.query_engine.query("{name contains /Temperature/; scope = 'Measurement'}")

Matching concepts: {'measurement': [(2000030033, 'Skin Temperature'), (2000030106, 'Blood Temperature CCO (C)'), (2000030018, 'Temperature Fahrenheit'), (2000030092, 'Temperature Celsius'), (2000001021, 'Temperature|Blood|Blood Gas')]}


In [11]:
from utils import make_query_result_summary
make_query_result_summary(cate)

{'name': 'gender_concept_id',
 'values': {'type': 'categorical', 'counts': {'8507': 57, '8532': 43}}}

In [12]:
from utils import make_query_result_summary
make_query_result_summary(bin)

{'name': 'gender_concept_id',
 'values': {'type': 'binary', 'mean': np.float64(0.57)}}

In [13]:
from utils import make_query_result_summary
make_query_result_summary(cont)

{'name': '2000030018, 2000001021, 2000030092, 2000030106',
 'occurrences': {'type': 'continuous',
  'mean': np.float64(97.39832388464383),
  'std': np.float64(67.52390699175157),
  'hist': {0.0: 143,
   10.0: 284,
   20.0: 402,
   30.0: 305,
   40.0: 256,
   50.0: 277,
   60.0: 131,
   70.0: 152,
   80.0: 166,
   90.0: 0,
   100.0: 318,
   110.0: 119,
   120.0: 250,
   130.0: 0,
   140.0: 140,
   150.0: 316,
   160.0: 0,
   170.0: 172,
   180.0: 0,
   190.0: 199,
   200.0: 203,
   210.0: 0,
   220.0: 224}},
 'values': {'type': 'continuous',
  'mean': np.float64(88.19449844059463),
  'std': np.float64(23.124624586727734),
  'hist': {30.0: 681,
   40.0: 3,
   50.0: 0,
   60.0: 0,
   70.0: 0,
   80.0: 0,
   90.0: 3031,
   100.0: 342}}}

In [11]:
import sys
sys.path.append('/Users/maziyong/Desktop/tempo_ql/tempo-ql')
from tempo_ql.omop.dataset import OMOPDataset
from tempo_ql.evaluator import QueryEngine
import duckdb
from pathlib import Path

local_db = duckdb.connect("testdb")
for csv_file in Path('/Users/maziyong/Desktop/tempo_ql/mimic-iv-demo-data-in-the-omop-common-data-model-0.9/1_omop_data_csv').glob('*.csv'):
    table_name = csv_file.stem
    local_db.execute(f"create table {table_name} as select * from read_csv_auto('{csv_file}', header=true, ignore_errors=true, parallel=false)")



In [5]:
query_engine = QueryEngine(OMOPDataset("duckdb:////Users/maziyong/Desktop/tempo_ql/tempo_ql/tempo_ql/testdb"))



In [3]:
query_engine.query("{Gender}").get_ids()

Index([-9066461348710750663, -8993675534959689080, -8970844422700220177,
       -8928428202649726867, -8891617624507360381, -8769042030325953499,
       -8659404739579738033, -8492299714241840941, -8352232581952957278,
       -8254164865273971123, -8205283012979532608, -8090189584974691216,
       -7938198040010520706, -7671795861352464589, -7636167699948083600,
       -7437341330444582833, -7391666713304457659, -6681895148320589913,
       -6525152599927900344, -6289874722419061830, -6225647829918357531,
       -6022656226246460545, -5829006308524050971, -5342370696241135313,
       -4873075614181207858, -4502092208250381979, -4353160957725823366,
       -4234372750442829205, -4183220989401122518, -3908355835367628651,
       -3780452582396805474, -3611589607736625713, -3420195391796315831,
       -3210373572193940939, -3024822967781525875, -2575767131279873665,
       -2500070523286875699, -2312013739856114142, -2286362762396278035,
       -2067961723109232727, -1616052813658226820, 