In [1]:
%load_ext autoreload
%autoreload 2

In [4]:
import pandas as pd
from proto.etl.config import SSHInfoEicu, DBInfoEicu
from proto.etl.utils import connect_to_db_via_ssh
conn = connect_to_db_via_ssh(SSHInfoEicu, DBInfoEicu)

In [5]:
query_schema = 'set search_path to eicu_crd;'
patientunitstayid = 2704494
query = query_schema + """
select *
from admissiondrug
where patientunitstayid = {}
order by drugoffset
""".format(patientunitstayid)

df = pd.read_sql_query(query, conn)
df.head()

Unnamed: 0,admissiondrugid,patientunitstayid,drugoffset,drugenteredoffset,drugnotetype,specialtytype,usertype,rxincluded,writtenineicu,drugname,drugdosage,drugunit,drugadmitfrequency,drughiclseqno
0,13383113,2704494,-87132,2153,Comprehensive Progress,eCM Primary,THC Physician,True,False,HYDRALAZINE HCL ...,0.0,,,89
1,13383111,2704494,-87132,2153,Comprehensive Progress,eCM Primary,THC Physician,True,False,COREG ...,0.0,,,13795
2,13383112,2704494,-87132,2153,Comprehensive Progress,eCM Primary,THC Physician,True,False,EXEMESTANE ...,0.0,,,20803
3,13383114,2704494,-87132,2153,Comprehensive Progress,eCM Primary,THC Physician,True,False,IMDUR ...,0.0,,,6341
4,13383116,2704494,-87132,2153,Comprehensive Progress,eCM Primary,THC Physician,True,False,PROTONIX ...,0.0,,,22008


In [5]:
query = query_schema + """
select 
  pt.hospitalid
  , count(pt.patientunitstayid) as number_of_patients
  , count(ad.patientunitstayid) as number_of_patients_with_admdrug
from patient pt
left join admissiondrug ad
  on pt.patientunitstayid = ad.patientunitstayid
group by pt.hospitalid
""".format(patientunitstayid)

df = pd.read_sql_query(query, con)
df['data completion'] = df['number_of_patients_with_admdrug'] / df['number_of_patients'] * 100.0
df.sort_values('number_of_patients_with_admdrug', ascending=False, inplace=True)
df.head(n=10)

Unnamed: 0,hospitalid,number_of_patients,number_of_patients_with_admdrug,data completion
54,420,342170,341168,99.707163
158,142,33887,33003,97.39133
12,382,29047,28871,99.394085
163,365,25941,25735,99.20589
72,281,25564,25092,98.153654
141,391,22992,22840,99.3389
6,394,23574,22485,95.380504
68,440,23644,22343,94.497547
100,283,20045,19724,98.398603
118,443,20072,18666,92.995217


In [6]:
query = query_schema + """
select *
from admissiondx
where patientunitstayid = {}
""".format(patientunitstayid)

df = pd.read_sql_query(query, con)
df.head()

Unnamed: 0,admissiondxid,patientunitstayid,admitdxenteredoffset,admitdxpath,admitdxname,admitdxtext
0,7981672,2704494,20,admission diagnosis|Non-operative Organ System...,Hematology,Hematology
1,7981671,2704494,20,admission diagnosis|Was the patient admitted f...,No,No
2,7981673,2704494,20,admission diagnosis|All Diagnosis|Non-operativ...,Thrombocytopenia,Thrombocytopenia


In [7]:
for i, row in df.iterrows():
    print('Row {}'.format(i+1))
    for c in ['admitdxpath', 'admitdxname', 'admitdxtext']:
        print('  {}: {}'.format(c, row[c]))

Row 1
  admitdxpath: admission diagnosis|Non-operative Organ Systems|Organ System|Hematology
  admitdxname: Hematology
  admitdxtext: Hematology
Row 2
  admitdxpath: admission diagnosis|Was the patient admitted from the O.R. or went to the O.R. within 4 hours of admission?|No
  admitdxname: No
  admitdxtext: No
Row 3
  admitdxpath: admission diagnosis|All Diagnosis|Non-operative|Diagnosis|Hematology|Thrombocytopenia
  admitdxname: Thrombocytopenia
  admitdxtext: Thrombocytopenia


In [8]:
query = query_schema + """
select *
from diagnosis
where patientunitstayid = {}
order by diagnosisoffset
""".format(patientunitstayid)

df = pd.read_sql_query(query, con)
df


Unnamed: 0,diagnosisid,patientunitstayid,activeupondischarge,diagnosisoffset,diagnosisstring,icd9code,diagnosispriority
0,32722297,2704494,False,48,renal|disorder of kidney|acute renal failure,"584.9, N17.9",Major
1,31935854,2704494,False,48,oncology|chest tumors|breast CA|female,"174.9, C50.919",Major
2,33818792,2704494,False,48,cardiovascular|shock / hypotension|hypotension,"458.9, I95.9",Primary
3,32391279,2704494,False,48,oncology|hematologic malignancy|leukemia,,Major
4,33010439,2704494,False,2153,renal|disorder of kidney|acute renal failure,"584.9, N17.9",Major
5,33098216,2704494,False,2153,cardiovascular|shock / hypotension|hypotension,"458.9, I95.9",Primary
6,32825172,2704494,False,2153,oncology|chest tumors|breast CA|female,"174.9, C50.919",Major
7,33368560,2704494,False,2153,oncology|hematologic malignancy|leukemia,,Major
8,33376417,2704494,True,2159,cardiovascular|shock / hypotension|hypotension,"458.9, I95.9",Primary
9,32981160,2704494,True,2159,oncology|chest tumors|breast CA|female,"174.9, C50.919",Major


In [9]:
icd9 = '250.'
query = query_schema + """
select diagnosisstring, icd9code
, count(distinct patientunitstayid) as n
from diagnosis
where icd9code like '%{}%'
group by diagnosisstring, icd9code
order by diagnosisstring, n desc
""".format(icd9)

df = pd.read_sql_query(query, con)
df

Unnamed: 0,diagnosisstring,icd9code,n
0,endocrine|glucose metabolism|DKA,"250.13, E10.1",5024
1,endocrine|glucose metabolism|DKA|with altered ...,"250.33, E10.11",333
2,endocrine|glucose metabolism|diabetes mellitus...,"250.00, E11.9",678
3,endocrine|glucose metabolism|diabetes mellitus...,"250.02, E11.65",791
4,endocrine|glucose metabolism|diabetes mellitus...,"250.01, E10.9",67
5,endocrine|glucose metabolism|diabetes mellitus...,"250.03, E10.65",211
6,endocrine|glucose metabolism|hyperosmolar nonk...,"250.23, E13.00",148
7,endocrine|glucose metabolism|hyperosmolar nonk...,"250.22, E11.01",5


In [11]:
dx = 'endocrine|glucose metabolism'
query = query_schema + """
select diagnosisstring, icd9code
, count(distinct patientunitstayid) as n
from diagnosis
where diagnosisstring like '%{}%'
group by diagnosisstring, icd9code
order by diagnosisstring, n desc
""".format(dx)

df = pd.read_sql_query(query, con)
df

Unnamed: 0,diagnosisstring,icd9code,n
0,endocrine|glucose metabolism|DKA,"250.13, E10.1",5024
1,endocrine|glucose metabolism|DKA|with altered ...,"250.33, E10.11",333
2,endocrine|glucose metabolism|diabetes mellitus,,14620
3,endocrine|glucose metabolism|diabetes mellitus...,,497
4,endocrine|glucose metabolism|diabetes mellitus...,,3263
5,endocrine|glucose metabolism|diabetes mellitus...,"250.00, E11.9",678
6,endocrine|glucose metabolism|diabetes mellitus...,"250.02, E11.65",791
7,endocrine|glucose metabolism|diabetes mellitus...,"250.01, E10.9",67
8,endocrine|glucose metabolism|diabetes mellitus...,"250.03, E10.65",211
9,endocrine|glucose metabolism|hyperglycemia,"790.6, R73.9",5485


In [13]:
patientunitstayid = 242380
query = query_schema + """
select *
from infusiondrug
where patientunitstayid = {}
order by infusionoffset
""".format(patientunitstayid)

df = pd.read_sql_query(query, con)
df.head()

Unnamed: 0,infusiondrugid,patientunitstayid,infusionoffset,drugname,drugrate,infusionrate,drugamount,volumeoffluid,patientweight
0,2001050,242380,59,Nitroglycerin (mcg/min),10,3,50,250,
1,1953142,242380,59,Heparin (units/hr),1000,20,25000,500,
2,1973839,242380,239,Nitroglycerin (mcg/min),20,6,50,250,
3,1951808,242380,239,Heparin (units/hr),1000,20,25000,500,
4,1977744,242380,419,Nitroglycerin (mcg/min),0,0,50,250,


In [16]:
query = query_schema + """
select labname, count(*) as n
from lab
group by labname
order by n desc
""".format(patientunitstayid)

lab = pd.read_sql_query(query, con)
print('{} total vlues for {} distinct labs.'.format(lab['n'].sum(), lab.shape[0]))
print('\nTop 25 labs by frequency:')
lab.head(25)

39132531 total vlues for 158 distinct labs.

Top 25 labs by frequency:


Unnamed: 0,labname,n
0,bedside glucose,3175835
1,potassium,1493261
2,sodium,1393205
3,glucose,1319496
4,Hgb,1298708
5,chloride,1283839
6,Hct,1279704
7,creatinine,1277760
8,BUN,1270484
9,calcium,1226978
