# Exploring MIMIC II Database with Holoviews and Bokeh


In [None]:
import numpy as np
import scipy.stats as ss
import pandas as pd
import holoviews as hv
import random
hv.extension('bokeh')

%opts Curve Scatter [tools=['hover']]


In [None]:
import pymysql.cursors

# Connect to the database
connection = pymysql.connect(host='mysql',
                             user='jovyan',
                             password='jovyan',
                             db='mimic2',
                             charset='utf8mb4',
                             cursorclass=pymysql.cursors.DictCursor)

pd.read_sql("show tables", connection)

## Look at Age Distribution of Patients


In [None]:
df = pd.read_sql('''
    SELECT FLOOR(DATEDIFF(dod, dob)/365) age_years, sex, subject_id
    from d_patients
    ''',connection)

df.head()

In [None]:
df[df.sex=='M']["age_years"].unique()

In [None]:
age_table = hv.Table(df)
age_table

In [None]:
male = hv.Histogram(np.histogram(age_table[age_table["sex"] == 'M']["age_years"], 20))
female = hv.Histogram(np.histogram(age_table[age_table["sex"] == 'F']["age_years"], 20))

(male+female).redim.label(x="age at death (years)", Frequency="Count")

In [None]:
%%opts Histogram (alpha=0.3)
(male*female).redim.label(x="age at death (years)", Frequency="Count")

In [None]:
hv.help(hv.Histogram)

In [None]:
females = np.random.choice(df[df.sex=='F']["subject_id"], 20)
males = np.random.choice(df[df.sex=='M']["subject_id"], 20)

female_ids = ", ".join([str(f) for f in females])
male_ids = ", ".join([str(m) for m in males])
female_ids+male_ids


df2 = pd.read_sql(
'''
select 
    c.subject_id, 
    p.dob,
    c.charttime,
    c.itemid,
    c.value1num,
    c.value2num,
    p.sex,
    FLOOR(DATEDIFF(c.charttime, p.dob)/365) age,
    FLOOR(DATEDIFF(c.charttime, p.dob)/3650)*10 decade
from d_patients p left join chartevents c on p.subject_id = c.subject_id
where 
	c.itemid = 51
    -- Arterial Blood Pressure
	and value1num != 0
	and value2num != 0
    and p.subject_id in (%s)
    
    '''%(female_ids+male_ids),connection)
print(len(df2.subject_id.unique()))
df2.head()

In [None]:
df2.subject_id.unique()

In [None]:
%%opts BoxWhisker [width=800 height=400 show_legend=False]
hv.BoxWhisker(df2, kdims=["sex", "decade"], vdims=["value1num"]).redim(value1num="systolic bp (mmHG)")

In [None]:
hv.Table(df2)

In [None]:
%%opts Curve [width=600]
#agg = macro.aggregate('year', function=np.mean, spreadfn=np.std)
#(hv.Curve(agg) * hv.ErrorBars(agg, kdims=['year'], vdims=['growth', 'growth_std']))

bp = hv.Dataset(df2[['charttime', 'subject_id', 'sex', 'value1num', 'value2num']], kdims=['charttime', 'subject_id', 'sex'])

In [None]:
%%opts Bars [width=600] (alpha=0.35)

agg = bp.aggregate(["sex"], function=np.mean, spreadfn=np.std)
#bp.aggregate()

(hv.Bars(agg) * hv.ErrorBars(agg, kdims=['sex'], vdims=['value1num', 'value1num_std'])).redim(value1num="bp (mmHg)")*\
(hv.Bars(agg, kdims=['sex'], vdims=['value2num', 'value2num_std']) * hv.ErrorBars(agg, kdims=['sex'], vdims=['value2num', 'value2num_std'])).redim(value2num="diastolic bp")



In [None]:
%%opts Bars [width=600] (alpha=0.35)

agg = bp.sort("subject_id").aggregate(["subject_id"], function=np.mean, spreadfn=np.std)
#bp.aggregate()

(hv.Bars(agg) * hv.ErrorBars(agg, kdims=['subject_id'], vdims=['value1num', 'value1num_std'])).redim(value1num="bp (mmHg)")*\
(hv.Bars(agg, kdims=['subject_id'], vdims=['value2num', 'value2num_std']) * hv.ErrorBars(agg, kdims=['subject_id'], vdims=['value2num', 'value2num_std'])).redim(value2num="diastolic bp")



In [None]:
(hv.Bars(agg) * hv.ErrorBars(agg, kdims=['sex'], vdims=['value1num', 'value1num_std'])).redim(value1num="systolic bp")+(hv.Bars(agg, kdims=['sex'], vdims=['value2num', 'value2num_std']) * hv.ErrorBars(agg, kdims=['sex'], vdims=['value2num', 'value2num_std'])).redim(value2num="diastolic bp")

In [None]:
%%opts Bars [width=600] (alpha=0.35)
(hv.Bars(agg, kdims=['sex'], vdims=['value2num', 'value2num_std']) * hv.ErrorBars(agg, kdims=['sex'], vdims=['value2num', 'value2num_std'])).redim(value2num="diastolic bp")

### Urine output

In [None]:
urinedf = pd.read_sql("""
SELECT 
    p.subject_id,
    p.sex, 
    ie.charttime,
    ie.volume,
    ie.volumeuom 
FROM d_patients p left join ioevents ie  on p.subject_id = ie.subject_id

where ie.itemid in (55, 56, 57, 61, 65, 69, 85, 94, 96, 288, 405,
428, 473, 651, 715, 1922, 2042, 2068, 2111, 2119, 2130, 2366, 2463,
2507, 2510, 2592, 2676, 2810, 2859, 3053, 3175, 3462, 3519, 3966, 3987,
4132, 4253, 5927) and p.subject_id in (%s)


LIMIT 100000"""%(female_ids+male_ids), connection)
hv.Table(urinedf)

## Respiratory  Rate

In [None]:
rrdf = pd.read_sql("""
SELECT 
    p.subject_id,
    p.sex,
    c.charttime,
    c.value1num
    
FROM d_patients p left join chartevents c  on p.subject_id = c.subject_id 

WHERE c.itemid in (219, 615, 618) and c.subject_id in (%s)

LIMIT 100000
"""%(female_ids+ male_ids), connection)
rrdf.sort_values(by=["charttime"], inplace=True)
hv.Table(rrdf)

In [None]:
hrdf = pd.read_sql("""
SELECT 
    p.subject_id,
    p.sex,
    c.charttime,
    c.value1num
    
FROM d_patients p left join chartevents c  on p.subject_id = c.subject_id 

WHERE c.itemid = 211 and c.subject_id in (%s)

LIMIT 100000
"""%(female_ids+male_ids), connection)
hv.Table(hrdf)

In [None]:
selected_subject = random.choice(df2.subject_id)
print(selected_subject)

In [None]:
%%opts Curve [width=800 height=400 show_legend=False xrotation=45 legend=True]

systolic =\
    hv.Curve(df2[df2.subject_id==selected_subject], 
             kdims=["charttime"], 
             vdims=["value1num"]).redim(value1num="mmHg")
diastolic = \
    hv.Curve(df2[df2.subject_id==selected_subject], 
             kdims=["charttime"], 
             vdims=["value2num"]).redim(value2num="mmHg")
    
urine = \
    hv.Curve(urinedf[urinedf.subject_id==selected_subject], 
             kdims=["charttime"], 
             vdims=["volume"]).redim(volume="mL")
heartrate = hv.Curve(hrdf[hrdf.subject_id==selected_subject], 
                     kdims=["charttime"], vdims=["value1num"]).redim(value1num="beats per minute")
resprate = hv.Curve(rrdf[rrdf.subject_id==selected_subject], 
                     kdims=["charttime"], vdims=["value1num"]).redim(value1num="breaths per minute")


In [None]:
systolic*diastolic

In [None]:
%%opts Curve [width=800 height=400 show_legend=False xrotation=45 show_legend=True]

(heartrate+resprate).cols(1)

In [None]:
(resprate*heartrate)

In [None]:
%%opts Histogram [width=400 height=400 show_legend=False](alpha=0.5)

bpm = hv.Histogram(np.histogram(heartrate["beats per minute"], 20)).redim(Frequency="count", x="heart rate")
rpm = hv.Histogram(np.histogram(resprate["breaths per minute"], 20)).redim(Frequency="count", x="rate (per minute)")
rpm*bpm

In [None]:
%%opts Spikes [width=800 height=400 show_legend=False xrotation=45 show_legend=True]

hv.Spikes(resprate)

In [None]:
df2.columns#, df2.drop(labels=["dob"]).head()

In [None]:
df2.drop(labels=["dob", "sex"], axis=1).head()

In [None]:
%%opts Spikes [width=700 xrotation=45]


spikes = bp.to(hv.Spikes, kdims='charttime', vdims='value1num', groupby='subject_id')
spikes