In [64]:
import os
import sys

sys.path.append("..")

import re
from collections import OrderedDict

import config
import matplotlib
import matplotlib.pyplot as plt
import numpy as np
import pandas as pd
import sqlalchemy
from evidently import ColumnMapping
from evidently.dashboard import Dashboard
from evidently.dashboard.tabs import DataQualityTab
from evidently.model_profile import Profile
from evidently.model_profile.sections import DataQualityProfileSection
from sqlalchemy import func, select
from sqlalchemy.sql.expression import and_, or_

import cyclops
import cyclops.query_mimic as qm
import cyclops.query_utils as q_utils
from cyclops.orm import Database
from cyclops.processors.column_names import (
    ADMIT_TIMESTAMP,
    AGE,
    DIAGNOSIS_CODE,
    ENCOUNTER_ID,
    HOSPITAL_ID,
    LAB_TEST_NAME,
    LAB_TEST_RESULT_UNIT,
    LAB_TEST_RESULT_VALUE,
    LAB_TEST_TIMESTAMP,
    REFERENCE_RANGE,
)
from cyclops.processors.diagnosis_codes import DiagnosisProcessor
from cyclops.processors.feature_handler import FeatureHandler
from cyclops.processors.labs import LabsProcessor
from cyclops.queries import query_gemini_delirium_diagnosis, query_gemini_delirium_lab

%load_ext autoreload
%autoreload 2 # Load when external files are updated
%load_ext nb_black

The autoreload extension is already loaded. To reload it, use:
  %reload_ext autoreload
The nb_black extension is already loaded. To reload it, use:
  %reload_ext nb_black


<IPython.core.display.Javascript object>

# Setup ORM

In [65]:
os.environ["USER"] = "postgres"
os.environ["PGPASSWORD"] = "pwd"
cfg = config.read_config("../configs/default/*.yaml")
db = Database(cfg)

<IPython.core.display.Javascript object>

MIMIC EDA
See sample_code/delirium.ipynb

In sample_code/data_layer.ipynb:
from evidently.dashboard.tabs import DataQualityTab

# SQLAlchemy

SQLAlchemy is a Python SQL toolkit and Object Relational Mapper (ORM).

An ORM allows users to stay away writing queries as raw SQL strings, which become highly unmanageable as more complex queries are required.

In this tutorial, we'll introduce the basic functionalities provided by SQLAlchemy's ORM, as well as the functionality we've developed for simplifying and extending the use of this ORM.

Note: This tutorial focuses on querying from the database, not on modifying it, e.g., inserting or deleting rows/tables.

# Query objects

There are a few objects representing queries/relations to understand:
 - `cyclops.query_utils.DBTable` (DBTable)
  - A class defined in this repository used by `cyclops.orm.Database` to organize tables as attributes
 - `sqlalchemy.sql.schema.Table` (Table)
  - The SQLAlchemy table object
  - The DBTable object has attribute `data` which accesses the corresponding Table object
 - `sqlalchemy.sql.selectable.Select` (Select)
  - The SQLAlchemy object returned by function `sqlalchemy.select`
  - Executing queries requires them to be Select objects
 - `sqlalchemy.sql.selectable.Subquery` (Subquery)
  - The SQLAlchemy object returned by method `.subquery()`, e.g., on a `sqlalchemy.select` object
  - This is necessary when wanting to chain queries together

## Running queries

Queries can be executed with function `db.run_query`, which return a `pandas.DataFrame` object such that each column represents an attribute of the same name.

We've made this simpler by allowing users to pass in any of the query objects. Consider the following equivalent queries:

In [66]:
# DBTable
query = db.mimic_hosp.d_icd_diagnoses
type(query)

cyclops.query_utils.DBTable

<IPython.core.display.Javascript object>

In [67]:
db.run_query(query, limit=2)

2022-03-31 09:44:57,891 [1;37mINFO[0m cyclops.orm     - Query returned successfully!
[1;37mINFO[0m:cyclops.orm:Query returned successfully!
2022-03-31 09:44:57,892 [1;37mINFO[0m cyclops.utils.profile - Finished executing function wrapper_func in 0.006134 s
[1;37mINFO[0m:cyclops.utils.profile:Finished executing function wrapper_func in 0.006134 s


Unnamed: 0,icd_code,icd_version,long_title
0,10,9,Cholera due to vibrio cholerae
1,11,9,Cholera due to vibrio cholerae el tor


<IPython.core.display.Javascript object>

In [68]:
# Table
query = db.mimic_hosp.d_icd_diagnoses.data
type(query)

sqlalchemy.sql.schema.Table

<IPython.core.display.Javascript object>

In [69]:
db.run_query(query, limit=2)

2022-03-31 09:44:57,923 [1;37mINFO[0m cyclops.orm     - Query returned successfully!
[1;37mINFO[0m:cyclops.orm:Query returned successfully!
2022-03-31 09:44:57,924 [1;37mINFO[0m cyclops.utils.profile - Finished executing function wrapper_func in 0.004669 s
[1;37mINFO[0m:cyclops.utils.profile:Finished executing function wrapper_func in 0.004669 s


Unnamed: 0,icd_code,icd_version,long_title
0,10,9,Cholera due to vibrio cholerae
1,11,9,Cholera due to vibrio cholerae el tor


<IPython.core.display.Javascript object>

In [70]:
# Select
query = select(db.mimic_hosp.d_icd_diagnoses.data)
type(query)

sqlalchemy.sql.selectable.Select

<IPython.core.display.Javascript object>

In [71]:
db.run_query(query, limit=2)

2022-03-31 09:44:57,951 [1;37mINFO[0m cyclops.orm     - Query returned successfully!
[1;37mINFO[0m:cyclops.orm:Query returned successfully!
2022-03-31 09:44:57,952 [1;37mINFO[0m cyclops.utils.profile - Finished executing function wrapper_func in 0.003538 s
[1;37mINFO[0m:cyclops.utils.profile:Finished executing function wrapper_func in 0.003538 s


Unnamed: 0,icd_code,icd_version,long_title
0,10,9,Cholera due to vibrio cholerae
1,11,9,Cholera due to vibrio cholerae el tor


<IPython.core.display.Javascript object>

In [72]:
# Subquery
query = select(db.mimic_hosp.d_icd_diagnoses.data).subquery()
type(query)

sqlalchemy.sql.selectable.Subquery

<IPython.core.display.Javascript object>

In [73]:
db.run_query(query, limit=2)

2022-03-31 09:44:57,973 [1;37mINFO[0m cyclops.orm     - Query returned successfully!
[1;37mINFO[0m:cyclops.orm:Query returned successfully!
2022-03-31 09:44:57,974 [1;37mINFO[0m cyclops.utils.profile - Finished executing function wrapper_func in 0.003724 s
[1;37mINFO[0m:cyclops.utils.profile:Finished executing function wrapper_func in 0.003724 s


Unnamed: 0,icd_code,icd_version,long_title
0,10,9,Cholera due to vibrio cholerae
1,11,9,Cholera due to vibrio cholerae el tor


<IPython.core.display.Javascript object>

## Attribute access

Attributes, or columns, are accessed by name. How to access an column depends on the object.

In [74]:
# DBTable
col = db.mimic_hosp.d_icd_diagnoses.icd_code
type(col)

sqlalchemy.sql.schema.Column

<IPython.core.display.Javascript object>

There is no convenient attribute access for a `sqlalchemy.sql.schema.Table` object. This was largely the motivation for creating the `cyclops.query_utils.DBTable` object.

There is no direct attribute access for a `sqlalchemy.sql.selectable.Select` object. Instead, we must first convert it to a subquery.

In [75]:
# Subquery
subquery = select(db.mimic_hosp.d_icd_diagnoses.data).subquery()
type(subquery.c.icd_code)

sqlalchemy.sql.schema.Column

<IPython.core.display.Javascript object>

Note that difference in attribute access between a DBTable and a subquery, where in a subquery, we must use access `.c` before accessing column.

# Querying

SQLAlchemy does not have implementations for many desired functionalities.

Here, we introduce `cyclops.query_utils` (imported as `q_utils`), which provides general-purpose functions we'll use throughout the remainder of the notebook to make building effective queries quick and simple.
 - Handling conditions: `equals_cond`, `in_list_condition`, `string_format_cond`, `substring_cond`, `startswith_cond`, `endswith_cond`
 - Attribute handling: `get_attributes`, `rename_attributes`, `reorder_attributes`, `drop_attributes`, `apply_to_attributes`
 - It automatically handles conversions between different query type objects, so users need not worry about what to pass in

## Conditions

Row conditions are represented by the `sqlalchemy.sql.elements.BinaryExpression` object.

In [76]:
cond = db.mimic_hosp.diagnoses_icd.icd_code == "0011"
type(cond)

sqlalchemy.sql.elements.BinaryExpression

<IPython.core.display.Javascript object>

We can use method `where` to use a binary expression object to filter rows of a `sqlalchemy.sql.selectable.Select` object:

In [77]:
cond = db.mimic_hosp.d_icd_diagnoses.icd_code == "0010"
query = select(db.mimic_hosp.d_icd_diagnoses.data).where(cond)
db.run_query(query)

2022-03-31 09:44:58,025 [1;37mINFO[0m cyclops.orm     - Query returned successfully!
[1;37mINFO[0m:cyclops.orm:Query returned successfully!
2022-03-31 09:44:58,026 [1;37mINFO[0m cyclops.utils.profile - Finished executing function wrapper_func in 0.016161 s
[1;37mINFO[0m:cyclops.utils.profile:Finished executing function wrapper_func in 0.016161 s


Unnamed: 0,icd_code,icd_version,long_title
0,10,9,Cholera due to vibrio cholerae


<IPython.core.display.Javascript object>

Method `filter` may be used equivalently:

In [78]:
cond = db.mimic_hosp.d_icd_diagnoses.icd_code == "0010"
query = select(db.mimic_hosp.d_icd_diagnoses.data).filter(cond)
db.run_query(query)

2022-03-31 09:44:58,059 [1;37mINFO[0m cyclops.orm     - Query returned successfully!
[1;37mINFO[0m:cyclops.orm:Query returned successfully!
2022-03-31 09:44:58,060 [1;37mINFO[0m cyclops.utils.profile - Finished executing function wrapper_func in 0.015436 s
[1;37mINFO[0m:cyclops.utils.profile:Finished executing function wrapper_func in 0.015436 s


Unnamed: 0,icd_code,icd_version,long_title
0,10,9,Cholera due to vibrio cholerae


<IPython.core.display.Javascript object>

More complex conditions have been handled in `cyclops.query_utils`, available for convenience.

Consider the substring condition:

In [79]:
cond = q_utils.substring_cond(db.mimic_hosp.d_icd_diagnoses.long_title, "Heart-")
query = select(db.mimic_hosp.d_icd_diagnoses.data).where(cond)
db.run_query(query)

2022-03-31 09:44:58,170 [1;37mINFO[0m cyclops.orm     - Query returned successfully!
[1;37mINFO[0m:cyclops.orm:Query returned successfully!
2022-03-31 09:44:58,172 [1;37mINFO[0m cyclops.utils.profile - Finished executing function wrapper_func in 0.097989 s
[1;37mINFO[0m:cyclops.utils.profile:Finished executing function wrapper_func in 0.097989 s


Unnamed: 0,icd_code,icd_version,long_title
0,T863,10,Complications of heart-lung transplant
1,T8630,10,Unspecified complication of heart-lung transplant
2,T8631,10,Heart-lung transplant rejection
3,T8632,10,Heart-lung transplant failure
4,T8633,10,Heart-lung transplant infection
5,T8639,10,Other complications of heart-lung transplant
6,Z48280,10,Encounter for aftercare following heart-lung t...
7,Z954,10,Presence of other heart-valve replacement


<IPython.core.display.Javascript object>

Notice that we received results with "heart-" despite specifying "Heart-". This is because the function automatically assumed we wanted to ignore case. Different pre-processing assumptions are made for condition functions and can be explored in `cyclops.query_utils`.

These pre-processing assumptions are entirely optional. If case matters, we could simply specify this:

In [80]:
cond = q_utils.substring_cond(
    db.mimic_hosp.d_icd_diagnoses.long_title, "Heart-", lower=False
)
query = select(db.mimic_hosp.d_icd_diagnoses.data).where(cond)
db.run_query(query)

2022-03-31 09:44:58,229 [1;37mINFO[0m cyclops.orm     - Query returned successfully!
[1;37mINFO[0m:cyclops.orm:Query returned successfully!
2022-03-31 09:44:58,230 [1;37mINFO[0m cyclops.utils.profile - Finished executing function wrapper_func in 0.034915 s
[1;37mINFO[0m:cyclops.utils.profile:Finished executing function wrapper_func in 0.034915 s


Unnamed: 0,icd_code,icd_version,long_title
0,T8631,10,Heart-lung transplant rejection
1,T8632,10,Heart-lung transplant failure
2,T8633,10,Heart-lung transplant infection


<IPython.core.display.Javascript object>

Assumptions may be made regarding case, trimming of whitespace, and type conversions.

## Joins

We can perform joins in SQLAlchemy.

For example, say we want to get a table back with patient diagnoses (found in `db.mimic_hosp.diagnoses_icd`), but include the titles of the diagnoses (found in `db.mimic_hosp.d_icd_diagnoses`).

When creating complex queries, many joins consist of at least one, if not two, subqueries. So, we'll demonstrate an inner join using a subquery:

In [81]:
query = select(db.mimic_hosp.diagnoses_icd.data, subquery).join(
    subquery, db.mimic_hosp.diagnoses_icd.icd_code == subquery.c.icd_code
)
db.run_query(query, limit=3)

2022-03-31 09:44:58,558 [1;37mINFO[0m cyclops.orm     - Query returned successfully!
[1;37mINFO[0m:cyclops.orm:Query returned successfully!
2022-03-31 09:44:58,560 [1;37mINFO[0m cyclops.utils.profile - Finished executing function wrapper_func in 0.314942 s
[1;37mINFO[0m:cyclops.utils.profile:Finished executing function wrapper_func in 0.314942 s


Unnamed: 0,subject_id,hadm_id,seq_num,icd_code,icd_version,icd_code_1,icd_version_1,long_title
0,15998114,22987966,2,V6284,9,V6284,9,Suicidal ideation
1,19754677,27138064,8,71590,9,71590,9,"Osteoarthrosis, unspecified whether generalize..."
2,19754677,27138064,2,42833,9,42833,9,Acute on chronic diastolic heart failure


<IPython.core.display.Javascript object>

A cross product can be done by simply not using the `join` method:

In [82]:
subquery = select(db.mimic_hosp.d_icd_diagnoses.data).subquery()
query = select(db.mimic_hosp.diagnoses_icd.data, subquery)
db.run_query(query, limit=3)

2022-03-31 09:44:58,577 [1;37mINFO[0m cyclops.orm     - Query returned successfully!
[1;37mINFO[0m:cyclops.orm:Query returned successfully!
2022-03-31 09:44:58,578 [1;37mINFO[0m cyclops.utils.profile - Finished executing function wrapper_func in 0.003934 s
[1;37mINFO[0m:cyclops.utils.profile:Finished executing function wrapper_func in 0.003934 s


Unnamed: 0,subject_id,hadm_id,seq_num,icd_code,icd_version,icd_code_1,icd_version_1,long_title
0,12926949,21005278,4,79311,9,10,9,Cholera due to vibrio cholerae
1,17995056,24718820,3,V6284,9,10,9,Cholera due to vibrio cholerae
2,17995056,24718820,5,25000,9,10,9,Cholera due to vibrio cholerae


<IPython.core.display.Javascript object>

It is smart to test queries with joins using a small `limit` in `db.run_query`, since cross products can create unreasonably large tables.

Notice that in the inner join we have duplicated, unwanted columns `icd_code_1`, `icd_version_1`. This is because we selected the entire `db.mimic_hosp.d_icd_diagnoses` table despite only wanting the `long_title` column.

We can fix this by selecting this column specifically:

In [83]:
subquery = select(db.mimic_hosp.d_icd_diagnoses.data).subquery()

running_query = select(db.mimic_hosp.diagnoses_icd.data, subquery.c.long_title).join(
    subquery, db.mimic_hosp.diagnoses_icd.icd_code == subquery.c.icd_code
)
db.run_query(running_query, limit=3)

2022-03-31 09:44:58,901 [1;37mINFO[0m cyclops.orm     - Query returned successfully!
[1;37mINFO[0m:cyclops.orm:Query returned successfully!
2022-03-31 09:44:58,903 [1;37mINFO[0m cyclops.utils.profile - Finished executing function wrapper_func in 0.310452 s
[1;37mINFO[0m:cyclops.utils.profile:Finished executing function wrapper_func in 0.310452 s


Unnamed: 0,subject_id,hadm_id,seq_num,icd_code,icd_version,long_title
0,17190208,26495862,38,Y838,10,Other surgical procedures as the cause of abno...
1,19754677,27138064,8,71590,9,"Osteoarthrosis, unspecified whether generalize..."
2,19754677,27138064,2,42833,9,Acute on chronic diastolic heart failure


<IPython.core.display.Javascript object>

We can select any combination of tables and columns in this manner.

## Group by, Order by

We can perform use the `group_by` method to group by different columns:

In [84]:
query = select(
    db.mimic_hosp.diagnoses_icd.icd_code,
    func.count(db.mimic_hosp.diagnoses_icd.icd_code),
).group_by(db.mimic_hosp.diagnoses_icd.icd_code)
db.run_query(query, limit=3)

2022-03-31 09:44:59,298 [1;37mINFO[0m cyclops.orm     - Query returned successfully!
[1;37mINFO[0m:cyclops.orm:Query returned successfully!
2022-03-31 09:44:59,299 [1;37mINFO[0m cyclops.utils.profile - Finished executing function wrapper_func in 0.383287 s
[1;37mINFO[0m:cyclops.utils.profile:Finished executing function wrapper_func in 0.383287 s


Unnamed: 0,icd_code,count_1
0,20,8
1,29,1
2,30,34


<IPython.core.display.Javascript object>

The typically restrictions apply that anything selected must be in an aggregate function or included in the group by.

We can also take the previous query and order the rows in ascending or descending order:

In [85]:
# Descending
query = query.order_by(db.mimic_hosp.diagnoses_icd.icd_code.desc())
db.run_query(query, limit=3)

2022-03-31 09:44:59,687 [1;37mINFO[0m cyclops.orm     - Query returned successfully!
[1;37mINFO[0m:cyclops.orm:Query returned successfully!
2022-03-31 09:44:59,689 [1;37mINFO[0m cyclops.utils.profile - Finished executing function wrapper_func in 0.379678 s
[1;37mINFO[0m:cyclops.utils.profile:Finished executing function wrapper_func in 0.379678 s


Unnamed: 0,icd_code,count_1
0,Z9989,8
1,Z9981,3050
2,Z993,1841


<IPython.core.display.Javascript object>

In [86]:
# Back to ascending
subquery = query.subquery()
query = select(subquery).order_by(subquery.c.icd_code)
db.run_query(query, limit=3)

2022-03-31 09:45:00,122 [1;37mINFO[0m cyclops.orm     - Query returned successfully!
[1;37mINFO[0m:cyclops.orm:Query returned successfully!
2022-03-31 09:45:00,124 [1;37mINFO[0m cyclops.utils.profile - Finished executing function wrapper_func in 0.414600 s
[1;37mINFO[0m:cyclops.utils.profile:Finished executing function wrapper_func in 0.414600 s


Unnamed: 0,icd_code,count_1
0,20,8
1,29,1
2,30,34


<IPython.core.display.Javascript object>

## Attribute handling

Let's re-order the attributes in previously defined query, perhaps where we want `icd_code` and its corresponding title, `long_title`, next to one another:

In [87]:
running_query = q_utils.reorder_attributes(
    running_query,
    ["subject_id", "hadm_id", "seq_num", "icd_code", "long_title", "icd_version"],
)

db.run_query(running_query, limit=2)

2022-03-31 09:45:02,336 [1;37mINFO[0m cyclops.orm     - Query returned successfully!
[1;37mINFO[0m:cyclops.orm:Query returned successfully!
2022-03-31 09:45:02,338 [1;37mINFO[0m cyclops.utils.profile - Finished executing function wrapper_func in 2.191950 s
[1;37mINFO[0m:cyclops.utils.profile:Finished executing function wrapper_func in 2.191950 s


Unnamed: 0,subject_id,hadm_id,seq_num,icd_code,long_title,icd_version
0,10158684,22072764,1,20,Typhoid fever,9
1,11816842,25448380,1,20,Typhoid fever,9


<IPython.core.display.Javascript object>

Note that all existing attributes should be passed into this function, otherwise we'll receive an error.

We can also drop any attributes we might want removed:

In [88]:
query_drop = q_utils.drop_attributes(running_query, "seq_num")
db.run_query(query_drop, limit=2)

2022-03-31 09:45:05,155 [1;37mINFO[0m cyclops.orm     - Query returned successfully!
[1;37mINFO[0m:cyclops.orm:Query returned successfully!
2022-03-31 09:45:05,157 [1;37mINFO[0m cyclops.utils.profile - Finished executing function wrapper_func in 2.795270 s
[1;37mINFO[0m:cyclops.utils.profile:Finished executing function wrapper_func in 2.795270 s


Unnamed: 0,subject_id,hadm_id,icd_code,long_title,icd_version
0,10158684,22072764,20,Typhoid fever,9
1,11816842,25448380,20,Typhoid fever,9


<IPython.core.display.Javascript object>

Like many of the `q_utils` functions which accept attributes, we may pass in a single attribute, or multiple:

In [89]:
query_drop = q_utils.drop_attributes(running_query, ["seq_num", "long_title"])
db.run_query(query_drop, limit=2)

2022-03-31 09:45:05,239 [1;37mINFO[0m cyclops.orm     - Query returned successfully!
[1;37mINFO[0m:cyclops.orm:Query returned successfully!
2022-03-31 09:45:05,241 [1;37mINFO[0m cyclops.utils.profile - Finished executing function wrapper_func in 0.060159 s
[1;37mINFO[0m:cyclops.utils.profile:Finished executing function wrapper_func in 0.060159 s


Unnamed: 0,subject_id,hadm_id,icd_code,icd_version
0,12926949,21005278,79311,9
1,17995056,24718820,V6284,9


<IPython.core.display.Javascript object>

We can rename attributes:

In [90]:
query_rename = q_utils.rename_attributes(running_query, {"long_title": "icd_title"})
db.run_query(query_rename, limit=2)

2022-03-31 09:45:07,433 [1;37mINFO[0m cyclops.orm     - Query returned successfully!
[1;37mINFO[0m:cyclops.orm:Query returned successfully!
2022-03-31 09:45:07,434 [1;37mINFO[0m cyclops.utils.profile - Finished executing function wrapper_func in 2.170368 s
[1;37mINFO[0m:cyclops.utils.profile:Finished executing function wrapper_func in 2.170368 s


Unnamed: 0,subject_id,hadm_id,seq_num,icd_code,icd_title,icd_version
0,10158684,22072764,1,20,Typhoid fever,9
1,11816842,25448380,1,20,Typhoid fever,9


<IPython.core.display.Javascript object>

### Applying functions

We need to be careful. Consider the diagnosis ICD code values:

In [91]:
query = db.mimic_hosp.d_icd_diagnoses
df = db.run_query(query, limit=2)
df

2022-03-31 09:45:07,461 [1;37mINFO[0m cyclops.orm     - Query returned successfully!
[1;37mINFO[0m:cyclops.orm:Query returned successfully!
2022-03-31 09:45:07,462 [1;37mINFO[0m cyclops.utils.profile - Finished executing function wrapper_func in 0.004560 s
[1;37mINFO[0m:cyclops.utils.profile:Finished executing function wrapper_func in 0.004560 s


Unnamed: 0,icd_code,icd_version,long_title
0,10,9,Cholera due to vibrio cholerae
1,11,9,Cholera due to vibrio cholerae el tor


<IPython.core.display.Javascript object>

In [92]:
df["icd_code"].values

array(['0010   ', '0011   '], dtype=object)

<IPython.core.display.Javascript object>

There is lots of trailing whitespace which we may not have expected.

There are built-in function in SQLAlchemy such as sqlalchemy.func.trim which can remove leading/trailing whitespace, however it is difficult to apply these function in-place. We have created functionality for this.

Here, we use `q_utils.trim_attributes` to trim leading/trailing whitespace from an attribute:

In [93]:
query = q_utils.trim_attributes(db.mimic_hosp.d_icd_diagnoses, "icd_code")
df = db.run_query(query, limit=2)
df["icd_code"].values

2022-03-31 09:45:07,486 [1;37mINFO[0m cyclops.orm     - Query returned successfully!
[1;37mINFO[0m:cyclops.orm:Query returned successfully!
2022-03-31 09:45:07,487 [1;37mINFO[0m cyclops.utils.profile - Finished executing function wrapper_func in 0.004210 s
[1;37mINFO[0m:cyclops.utils.profile:Finished executing function wrapper_func in 0.004210 s


array(['0010', '0011'], dtype=object)

<IPython.core.display.Javascript object>

It is smart to perform sanity checks:

In [94]:
t = db.mimic_hosp.d_icd_diagnoses
df = db.run_query(t, limit=5)
icd_codes_untrimmed = df["icd_code"].values.astype("str")
np.char.strip(icd_codes_untrimmed) == icd_codes_untrimmed  # Trim using NumPy

2022-03-31 09:45:07,498 [1;37mINFO[0m cyclops.orm     - Query returned successfully!
[1;37mINFO[0m:cyclops.orm:Query returned successfully!
2022-03-31 09:45:07,499 [1;37mINFO[0m cyclops.utils.profile - Finished executing function wrapper_func in 0.002870 s
[1;37mINFO[0m:cyclops.utils.profile:Finished executing function wrapper_func in 0.002870 s


array([False, False, False, False, False])

<IPython.core.display.Javascript object>

In [95]:
query = q_utils.trim_attributes(t, "icd_code")
df = db.run_query(query, limit=5)
icd_codes_trimmed = df["icd_code"].values.astype("str")
np.char.strip(icd_codes_trimmed) == icd_codes_trimmed  # Trim using NumPy

2022-03-31 09:45:07,512 [1;37mINFO[0m cyclops.orm     - Query returned successfully!
[1;37mINFO[0m:cyclops.orm:Query returned successfully!
2022-03-31 09:45:07,513 [1;37mINFO[0m cyclops.utils.profile - Finished executing function wrapper_func in 0.003301 s
[1;37mINFO[0m:cyclops.utils.profile:Finished executing function wrapper_func in 0.003301 s


array([ True,  True,  True,  True,  True])

<IPython.core.display.Javascript object>

Like many of the functions which accept attributes, we may pass in a single attribute, or multiple:

In [96]:
query = q_utils.trim_attributes(
    db.mimic_hosp.d_icd_diagnoses, ["icd_code", "icd_version"]
)
df = db.run_query(query, limit=2)
df[["icd_code", "icd_version"]].values

2022-03-31 09:45:07,526 [1;37mINFO[0m cyclops.orm     - Query returned successfully!
[1;37mINFO[0m:cyclops.orm:Query returned successfully!
2022-03-31 09:45:07,527 [1;37mINFO[0m cyclops.utils.profile - Finished executing function wrapper_func in 0.003709 s
[1;37mINFO[0m:cyclops.utils.profile:Finished executing function wrapper_func in 0.003709 s


array([['0010', '9'],
       ['0011', '9']], dtype=object)

<IPython.core.display.Javascript object>

Note that certain functions will assume certain datatype conversions. For example, when trimming, the column would be converted to a string in order for this operation to be defined.

The ICD version was originally not a string:

In [97]:
query = db.mimic_hosp.d_icd_diagnoses
df = db.run_query(query, limit=2)
df["icd_version"].values

2022-03-31 09:45:07,539 [1;37mINFO[0m cyclops.orm     - Query returned successfully!
[1;37mINFO[0m:cyclops.orm:Query returned successfully!
2022-03-31 09:45:07,540 [1;37mINFO[0m cyclops.utils.profile - Finished executing function wrapper_func in 0.002712 s
[1;37mINFO[0m:cyclops.utils.profile:Finished executing function wrapper_func in 0.002712 s


array([9, 9])

<IPython.core.display.Javascript object>

String conversions are made with `string_format_cond`, `substring_cond`, `startswith_cond`, `endswith_cond`. For example,

In [98]:
query = select(db.mimic_hosp.diagnoses_icd.data).where(
    q_utils.startswith_cond(db.mimic_hosp.diagnoses_icd.icd_version, 1)
)
db.run_query(query, limit=3)

2022-03-31 09:45:07,551 [1;37mINFO[0m cyclops.orm     - Query returned successfully!
[1;37mINFO[0m:cyclops.orm:Query returned successfully!
2022-03-31 09:45:07,552 [1;37mINFO[0m cyclops.utils.profile - Finished executing function wrapper_func in 0.003051 s
[1;37mINFO[0m:cyclops.utils.profile:Finished executing function wrapper_func in 0.003051 s


Unnamed: 0,subject_id,hadm_id,seq_num,icd_code,icd_version
0,17190208,26495862,29,Y92239,10
1,17190208,26495862,38,Y838,10
2,17190208,26495862,20,K91840,10


<IPython.core.display.Javascript object>

There is also support for creating custom in-place functions using `q_utils.apply_to_attributes`.

Here we will use `sqlalchemy.func.lower` to convert the strings in a column to lowercase:

In [99]:
query = q_utils.apply_to_attributes(
    db.mimic_hosp.d_icd_diagnoses, "long_title", func.lower
)
db.run_query(query, limit=2)

2022-03-31 09:45:07,567 [1;37mINFO[0m cyclops.orm     - Query returned successfully!
[1;37mINFO[0m:cyclops.orm:Query returned successfully!
2022-03-31 09:45:07,568 [1;37mINFO[0m cyclops.utils.profile - Finished executing function wrapper_func in 0.004373 s
[1;37mINFO[0m:cyclops.utils.profile:Finished executing function wrapper_func in 0.004373 s


Unnamed: 0,icd_code,icd_version,long_title
0,10,9,cholera due to vibrio cholerae
1,11,9,cholera due to vibrio cholerae el tor


<IPython.core.display.Javascript object>

In particular, `q_utils.apply_to_attributes` accepts any function which takes a Column object as its only argument and similarly returns a Column object.