### This notebook is to look at vitals data closely, do some EDA.

## Imports

In [12]:
import sys
sys.path.append("..")

import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import sqlalchemy
from sqlalchemy import func, select, desc
from sqlalchemy.sql.expression import and_

import config
from cyclops.orm import DB

## Load config, setup ORM

In [13]:
cfg = config.read_config("../configs/default/*.yaml")
db = DB(cfg)

## Find out mapped vital measurements

In [20]:
"""Unique vital measurement names and counts. Get top k vitals measured.

`measurement_mapped` has common vital measurements mapped across sites by GEMINI.
`measurement_name` has raw vitals names which are much greater in number (most different, unmapped across sites).
"""

k = 11
UNIQUE_VITALS_NAMES = "unique_vitals_names"
UNIQUE_VITALS_COUNTS = "unique_vitals_counts"

unique_vitals_names = func.lower(db.public.vitals.measurement_mapped)
query = select(unique_vitals_names.label(UNIQUE_VITALS_NAMES),
               func.count(unique_vitals_names).label(UNIQUE_VITALS_COUNTS)
              )
query = query.group_by(UNIQUE_VITALS_NAMES).order_by(desc(UNIQUE_VITALS_COUNTS))

vitals_counts = db.run_query(query)
print(f"Num. of unique vitals measurements: {len(vitals_counts)}")
print(vitals_counts.head(k))

Num. of unique vitals measurements: 12
   unique_vitals_names  unique_vitals_counts
0          temperature               1056374
1           heart rate                839616
2          systolic bp                837673
3         diastolic bp                837591
4    oxygen saturation                681903
5     respiratory rate                671321
6      oxygen_delivery                609249
7                  cam                472200
8                 fio2                445510
9     oxygen flow rate                335422
10              weight                120526


## Get the mapped lab tests for patients with delirium label

In [19]:
GENC_ID = "genc_id"
PATIENT_ID = "patient_id"
VITALS_NAME = "vitals_name"

common_vitals = list(vitals_counts.head(k)[UNIQUE_VITALS_NAMES])
vitals_name = func.lower(db.public.vitals.measurement_mapped)
vitals_query = select(db.public.vitals.genc_id,
                      vitals_name.label(VITALS_NAME)).subquery()
query = select(db.public.ip_administrative.patient_id_hashed.label(PATIENT_ID),
               db.public.ip_administrative.genc_id,
               db.public.ip_administrative.hospital_id,
               db.public.ip_administrative.del_present,
               db.public.ip_administrative.gemini_cohort,
               vitals_query.c.vitals_name,
              ).join(vitals_query,
                     db.public.ip_administrative.genc_id == vitals_query.c.genc_id
                    ).where(and_(vitals_query.c.vitals_name.in_(common_vitals),
                                 db.public.ip_administrative.gemini_cohort == True))
vitals = db.run_query(query)
print(vitals[GENC_ID].nunique())


# No vitals data for delirium subset??? Maybe ask why?

0
