In [None]:
import pandas as pd
import seaborn as sns
import matplotlib.pyplot as plt
import numpy as np
from src.cohort_metrics import Metric, tidy_flow, tidy_labs, OxygenContent, OxygenDelivery,OxygenConsumption
from src.utils import rebin_time, tidy_meds
from scipy import integrate
# from src.cohort import CohortMetrics
from src.swan import SWAN
from tricorder.procedure_codesets import cabg_names, aortic_names, valve_surgeries
from src.cohort import ProcedureCohort
import matplotlib.pyplot as plt
from tqdm.auto import tqdm
from pandas_profiling import ProfileReport

In [None]:
swan = SWAN('/Users/elijahc/data/compass/SWAN/')
# eids = swan.procedures.sel(order_name=cabg_names).encounter_id.unique()

$C_{a}O_2 = 1.34 \cdot Hgb \cdot O_2Sat_a$

$C_{v}O_2 = 1.34 \cdot Hgb \cdot O_2Sat_v$

In [None]:
pc = swan.create_procedure_cohort(procedures=cabg_names+aortic_names+valve_surgeries)
pc.add_continuous_metric(OxygenConsumption)
pc.add_continuous_metric(OxygenDelivery)

In [None]:
pc.mechanical_ventilation_duration

In [None]:
od = pc.metrics.OxygenDelivery.compute(sample=30)
od = pc.align_metric(od)

In [None]:
%matplotlib inline

In [None]:
df = rebin_time(od,on='q8h').query('btime > -1 & btime < 100')
df = df.merge(pc.get_post_op_delirium(detail='encounter'),on='encounter_id',how='left')
df = df.query('name == "DO2_I"')
df = df.groupby(['post_op_delirium','encounter_id','btime']).value.agg('mean').reset_index()
df

In [None]:
g = sns.lineplot(x='btime',y='value',hue='post_op_delirium',data=df)
g.set_ylabel('DO2')
g.set_xlabel('hours')

In [None]:
sns.set_style('whitegrid')
btimes = df.groupby(['btime','post_op_delirium']).agg({'encounter_id':'nunique'}).reset_index()
g = sns.barplot(data=btimes,x='btime',y='encounter_id',hue='post_op_delirium',dodge=False)
g.set_ylabel('Num unique encounters')

In [None]:
# swan.transfusion.search('TRANSFUSE RBC:')
swan.transfusion.search('TRANSFUSE PLATELETS:').values.tolist()

In [None]:
bp = swan.transfusion.sel(
    transfusion_name=swan.transfusion.search('TRANSFUSE RBC:'), 
    # transfusion_name=swan.transfusion.search('ANE'), 
    # transfusion_name=['TRANSFUSE RBC'],
    encounter_id=pc.eid)
bp.number_of_units = bp.number_of_units.apply(lambda s: s.split(' ')[0]).astype(int)
bp.transfu
# bp.number_of_units = 
# bp.groupby(['encounter_id','transfusion_name']).number_of_units.value_counts()

In [None]:
pc.procedure_info

In [None]:
swan.transfusion.sel(encounter_id=pc.eid).groupby(['encounter_id','transfusion_name']).count()

In [None]:
preop GFR < 60

In [None]:
pressers = ['EPINEPHRINE','PHENYLEPHRINE','VASOPRESSIN','DOPAMINE','DOBUTAMINE']
seds = ['PROPOFOL','FENTANYL','MIDAZOLAM','DEXMEDETOMIDINE']
m_names = swan.flowsheet.search('VOLUME (ML) ')

In [None]:
m_names = [n for n in m_names if n.split('VOLUME (ML)')[1].strip() in pressers]
meds = tidy_flow(swan.flowsheet.sel(display_name=m_names,encounter_id=pc.eid))
meds = pc.align_metric(meds)
meds.time = meds.time / np.timedelta64(1,'D')*24
meds = meds.query('time < 36 & time > 0')
tot_meds = meds.groupby(['encounter_id','name']).value.sum().reset_index()
tot_meds = df[['encounter_id','post_op_delirium']].drop_duplicates().merge(tot_meds,on='encounter_id',how='left')
sns.set(rc={'figure.figsize':(15,8)})
sns.set_style('whitegrid')
sns.boxplot(y='name',x='value',hue='post_op_delirium',data=tot_meds)

In [None]:
s_names = [n for n in m_names if n.split('VOLUME (ML)')[1].strip() in seds]
meds = tidy_flow(swan.flowsheet.sel(display_name=s_names, encounter_id=pc.eid))
meds = pc.align_metric(meds)
meds.time = meds.time / np.timedelta64(1,'D')*24
meds = meds.query('time < 38 & time > 0')
tot_meds = meds.groupby(['encounter_id','name']).value.sum().reset_index()
tot_meds = df[['encounter_id','post_op_delirium']].drop_duplicates().merge(tot_meds,on='encounter_id',how='left')
sns.set(rc={'figure.figsize':(15,8)})
sns.set_style('whitegrid')
sns.stripplot(y='name',x='value',dodge=True, hue='post_op_delirium',data=tot_meds)

In [None]:
sorted(swan.flowsheet.unique().tolist())

In [None]:
from scipy import integrate


In [None]:
seds = ['DEXMEDETOMIDINE','PROPOFOL','FENTANYL','MIDAZOLAM','KETAMINE']
med_n = []
for n in seds:
    med_n += swan.medications.search(n).values.tolist()
med_n
m = tidy_meds(swan.medications.sel(medication_name=med_n,encounter_id=pc.eid))
m = pc.align_metric(m)
m['hours'] = m.time/np.timedelta64(1,'D')*24
m_seds = m.query('hours <= 36 & hours >= 12').groupby(['name','encounter_id']).apply(lambda d: integrate.trapezoid(y=d.value,x=d.hours)).rename('dose').reset_index()
m_seds.name = m_seds.name.apply(lambda s: s.split(' ')[0])
m_seds = pc.get_post_op_delirium(detail='encounter').merge(m_seds, on='encounter_id', how='left')
m_seds.name.value_counts()

In [None]:
hct = pc.labs(names=['HEMATOCRIT'])
hct = pc.align_metric(hct,pc.icu_start)
hct.groupby('encounter_id').apply(lambda d: d.sort_values(by='time').set_index('time').value.values[0])
# hct.time = hct.time/np.timedelta64(1,'D')

In [None]:
sns.catplot(y='dose',col='name',x='post_op_delirium',data=m_seds.query('dose > 0 & dose < 4000'),kind='box',sharey=False)

In [None]:
swan.flowsheet.sel(display_name=['WEIGHT','HEIGHT'], encounter_id=pc.eid).groupby('encounter_id').display_name.value_counts().rename('count').reset_index()

In [None]:
data=swan.flowsheet.sel(display_name=['WEIGHT','HEIGHT'],encounter_id=pc.eid).groupby('encounter_id').display_name.value_counts().rename('count').reset_index()

In [None]:
sns.histplot(data=swan.flowsheet.sel(display_name=['WEIGHT','HEIGHT'],encounter_id=pc.eid).groupby('encounter_id').display_name.value_counts().rename('count').reset_index(),
            x='display_name',multiple='dodge',element='bars',
           )

In [None]:
pc.get_post_op_delirium(detail='encounter')

In [None]:
dat = od['DO2_I'].reset_index()

dat = dat.merge(pc.mortality, on='encounter_id', how='left')
dat = dat.merge(pc.offset, on='encounter_id')
dat['rhour'] = dat['hour'] - (dat['offset']*24)
dat

In [None]:
dat = dat.rename(columns={'death':'mortality'})
dat['mortality'] = dat.mortality.replace({True:'Non-Survivors',False:'Survivors'})

In [None]:
swan.labs.search('')

In [None]:
fig, axs = plt.subplots(1,2,figsize=(16,4))
y='DO2_I'
y_u = y+' ({})'.format(OxygenDelivery.units[y])
dat = dat.rename(columns={y:y_u})
g = sns.lineplot(
    x='rhour',
    y=y_u,
    hue='mortality',data=dat, ax=axs[0])
g.set_xlim(-24,24*5)

sns.histplot(x=y_u, hue='mortality',data=dat.query('hour >= 18'),
         common_norm=False,stat='probability', ax=axs[1])

In [None]:
asfd

In [None]:
pc.offset

In [None]:
f= tidy_flow(swan.flowsheet.sel(display_name=['CARDIAC OUTPUT','CCO'],encounter_id=eids))

In [None]:
f['day'] = f.time / np.timedelta64(1,'D')

f['hour'] = (f.day*24).round().astype(int)
f['hour'] = pd.to_timedelta(f.hour-f.hour.min(), unit='hour')

In [None]:
f.groupby(['encounter_id','hour','name']).mean()

In [None]:
do.compute(100).head(100)

In [None]:
class RespiratoryQuotient(Metric):
    REQUIRES = {
        'labs':['TCO2 VENOUS','TCO2 ARTERIAL','O2SAT ARTERIAL MEASURED','O2SAT VENOUS MEASURED'],
    }
    
    def requires(self):
        pass
    
rq = RespiratoryQuotient(db=swan, encounter_id=eids)

In [None]:
rq.db_sample(2).groupby(['encounter_id','name']).mean()

In [None]:
rq.db_fetch().encounter_id.unique()

In [None]:
cao2 = OxygenContent(swan,encounter_id=eids)
# eids = cao2.db_fetch().encounter_id.drop_duplicates().sample(500)
# cao2 = OxygenContent(swan, encounter_id=eids)

In [None]:
df = cao2.compute_oxygen_content_AV(sample=5, with_delivery=True)
# df['DO2'] = df.groupby(level='encounter_id').apply(lambda d: d.CaO2.interpolate()*d.CCO.interpolate()).values
df

In [None]:
df_long = pd.melt(df.reset_index(),id_vars=['encounter_id','hour'],value_vars=['CCO','CaO2','CvO2','DO2'],var_name='name')

sns.catplot(col='name',x='hour',y='value',hue='name',data=df_long)

In [None]:
from tricorder.cohort import ProcedureCohort

In [None]:
swan.procedures.cohort(names=)

In [None]:
pc.mortality

In [None]:
ca = ca.merge(pc.mortality, on='encounter_id', how='left')

In [None]:
tc = swan.labs.sel(lab_component_name=['TCO2 VENOUS','TCO2 ARTERIAL'], encounter_id=eids)

In [None]:
tc = tidy_labs(tc)

In [None]:
tc = tc.sort_values(['encounter_id','time'])
tc['hour'] = (tc.time/np.timedelta64(1,'D') * 24).round().astype(int)
tc['rhour'] = tc.groupby('encounter_id').apply(lambda d: d.hour - d.hour.min()).values

freqh = 12
bn = np.arange(-1,tc.rhour.max(),freqh)
labels = np.arange(freqh,tc.rhour.max(),freqh)
tc['rhour_b'] = pd.cut(tc.rhour,bins=bn,
                       labels=labels,
                      )
tc_pv = pd.pivot_table(index=['encounter_id','rhour_b'],values='value', aggfunc='mean', columns='name',data=tc)
tc_pv = tc_pv.reset_index()

In [None]:
tc_pv.head()

In [None]:
sns.lineplot(x='rhour_b',y='TCO2 ARTERIAL', data=tc_pv)

In [None]:
tc_pv

In [None]:
tc_pv = tc_pv.merge(pc.mortality, on='encounter_id', how='left')
sns.scatterplot(x='rhour_b',y='TCO2 ARTERIAL', hue='death',data=tc_pv)

In [None]:
binned.query('name == "TCO2 VENOUS"')

In [None]:
swan.labs.search('TC')

In [None]:
swan.labs.search('BIC')

In [None]:
sns.displot(col='death', x='value', hue='name',data=ca,)

In [None]:
c2 = ca.set_index(pd.to_timedelta(ca.rday.values,unit='D')).sort_index()
c2

In [None]:
c2['rday_b'] = pd.cut(c2.rday.values,np.arange(-0.5,c2.rday.round().max(),0.5),labels=False)

In [None]:
enc = c2.encounter_id.drop_duplicates().sample(1)
dat=c2[c2.encounter_id.isin(enc)]
sns.scatterplot(x='rday_b',y='value',hue='name',data=dat)

In [None]:
ca.groupby(['encounter_id','name']).count()

In [None]:
ca.groupby('encounter_id').count()

In [None]:
cao2.db_fetch().name.value_counts()

In [None]:
df = cao2.db_fetch(sample=1)

In [None]:
df[df.name.str.contains('HEMOGLOBIN')]

In [None]:
df.name.unique()

In [None]:
cao2.db_fetch().groupby(['encounter_id','time','name']).count()

In [None]:
l_e = swan.labs.sel(lab_component_name=cao2.lab_names).encounter_id.unique()

In [None]:
f_e = swan.flowsheet.sel(display_name=['CCI']).encounter_id.unique()

In [None]:
len(np.intersect1d(l_e,f_e))

In [None]:
swan.labs.search('O2')

In [None]:
eids = hgb.encounter_id.value_counts().index.intersection(sao2.encounter_id.value_counts().index)

In [None]:
sao2.encounter_id.value_counts()

In [None]:
sao2 = swan.labs.sel(lab_component_name=['FIO2','O2SAT ARTERIAL MEASURED'])
sao2
# sao2.query('encounter_id == 169906456092')

In [None]:
sns.lineplot(
#     x='lab_collection_days_since_birth',
    x='hours',
    y='value',hue='name',data=hgb.query('encounter_id == 169906456092'))

In [None]:
hgb.groupby('lab_component_name').count()

In [None]:
swan.labs.unique().tolist()

In [None]:
swan.flowsheet.search('O2')

In [None]:
swan.flowsheet.sel(display_name=['PAP'])