### Script for function to pull vitals data

This function loads chart events (specified by user-entered `event` variable) into a dataframe, filters events by ventilataed patients, determines the last reading before extubation, takes the mean of this value for any duplicate readings (happens when multiple sensors used simultanrously) and returns the data as a column in the master dataframe

#### impot useful libraries

In [2]:
import sys
sys.path.append("../extumate")

import pandas as pd
import numpy as np
import seaborn as sns
import matplotlib.pyplot as plt
from extumate.config import data_dir, processed_dir

#### user-defined variables that change every time

In [3]:
event = "bloodpressure"
export_dir = processed_dir

datafile = data_dir+event
mean_export_file = export_dir+event
std_export_file = export_dir+"std_"+event
meanColumns = ['hadm_id',event]
stdColumns = ['hadm_id','std_'+event]

In [3]:
df = pd.read_feather(datafile)
df['valuenum']= pd.to_numeric(df['valuenum'])

In [4]:
# to convert fahrenheit to celcius: (F-32) x 5/9
df['valuenum'] = np.where(df['itemid']==223761,(df['valuenum']-32)*(5/9),df['valuenum'])

In [5]:
# convert height in cm to inches- divide by 2.74
df['valuenum'] = np.where(df['itemid']==226730,(df['valuenum']/2.54),df['valuenum'])

In [6]:
df[df['hadm_id']==27317316]

Unnamed: 0,subject_id,hadm_id,stay_id,charttime,storetime,itemid,value,valuenum,valueuom,warning,endtime,re_intub_class,time_on_vent
6054,10254097,27317316,35596154,2138-11-01 12:01:00,2138-11-01 12:35:00,220181,61,61.0,mmHg,0,2138-11-06 09:52:00,0,120.866667
6055,10254097,27317316,35596154,2138-11-01 08:29:00,2138-11-01 08:42:00,220181,85,85.0,mmHg,0,2138-11-06 09:52:00,0,120.866667
6056,10254097,27317316,35596154,2138-11-01 09:01:00,2138-11-01 09:58:00,220181,83,83.0,mmHg,0,2138-11-06 09:52:00,0,120.866667
6057,10254097,27317316,35596154,2138-11-01 10:01:00,2138-11-01 10:44:00,220181,65,65.0,mmHg,0,2138-11-06 09:52:00,0,120.866667
6058,10254097,27317316,35596154,2138-11-01 11:01:00,2138-11-01 12:35:00,220181,90,90.0,mmHg,0,2138-11-06 09:52:00,0,120.866667
...,...,...,...,...,...,...,...,...,...,...,...,...,...
6254,10254097,27317316,35596154,2138-11-06 20:01:00,2138-11-06 20:09:00,220181,77,77.0,mmHg,0,2138-11-06 09:52:00,0,120.866667
6255,10254097,27317316,35596154,2138-11-03 08:01:00,2138-11-03 08:09:00,220181,66,66.0,mmHg,0,2138-11-06 09:52:00,0,120.866667
6256,10254097,27317316,35596154,2138-11-03 09:01:00,2138-11-03 09:06:00,220181,73,73.0,mmHg,0,2138-11-06 09:52:00,0,120.866667
6257,10254097,27317316,35596154,2138-11-03 10:01:00,2138-11-03 10:35:00,220181,76,76.0,mmHg,0,2138-11-06 09:52:00,0,120.866667


#### Convert times to pandas datetime object, find the time difference between each event and the time of extubation ('endtime') and find the last time an event occurred before extubation

In [7]:
df['endtime'] = pd.to_datetime(df['endtime'])
df['charttime'] = pd.to_datetime(df['charttime'])
df['time_diff']= df['endtime']-df['charttime']

In [8]:
df['timediffhour'] = df['time_diff'] / np.timedelta64(1, 'h') # convert time to hours
df_pre = df.copy()
df_pre = df[df['timediffhour']>0 & (df['timediffhour']<2)]

In [9]:
df_pre['min']=df_pre.groupby('hadm_id')['timediffhour'].transform('min') # find the last reading 
            # time before extubation occured and place in a new channel ('min')

A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  df_pre['min']=df_pre.groupby('hadm_id')['timediffhour'].transform('min') # find the last reading


In [10]:
last_value_pre = df_pre[df_pre['timediffhour'] == df_pre['min']] 
            # select only rows where 'timediffhour' is the same as the minimum time ('min')

In [11]:
last_value_pre

Unnamed: 0,subject_id,hadm_id,stay_id,charttime,storetime,itemid,value,valuenum,valueuom,warning,endtime,re_intub_class,time_on_vent,time_diff,timediffhour,min
79,10004235,24181354,30276431,2196-02-27 16:00:00,2196-02-27 16:06:00,225312,88,88.0,mmHg,0,2196-02-27 16:28:00,0,71.600000,0 days 00:28:00,0.466667,0.466667
169,10005348,25239799,31523640,2130-10-28 04:00:00,2130-10-28 04:04:00,220052,78,78.0,mmHg,0,2130-10-28 04:35:00,0,11.833333,0 days 00:35:00,0.583333,0.583333
229,10019003,27525946,30460871,2153-04-14 17:00:00,2153-04-14 17:29:00,220052,65,65.0,mmHg,0,2153-04-14 17:50:00,0,22.066667,0 days 00:50:00,0.833333,0.833333
632,10035631,29276678,35275147,2116-03-09 18:00:00,2116-03-09 18:03:00,220052,79,79.0,mmHg,0,2116-03-09 18:11:00,0,217.983333,0 days 00:11:00,0.183333,0.183333
783,10035747,27083519,39236053,2126-05-15 12:00:00,2126-05-15 12:04:00,220181,88,88.0,mmHg,0,2126-05-15 13:00:00,0,143.000000,0 days 01:00:00,1.000000,1.000000
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
2928988,19965610,28545396,30691619,2125-09-28 12:00:00,2125-09-28 12:48:00,220181,74,74.0,mmHg,0,2125-09-28 12:51:00,0,52.733333,0 days 00:51:00,0.850000,0.850000
2929066,19970491,22119205,30220330,2131-02-13 09:00:00,2131-02-13 09:21:00,220181,90,90.0,mmHg,0,2131-02-13 09:17:00,0,48.733333,0 days 00:17:00,0.283333,0.283333
2929129,19970491,20897702,32207420,2131-05-11 21:20:00,2131-05-11 21:26:00,220052,62,62.0,mmHg,0,2131-05-11 21:22:00,0,25.366667,0 days 00:02:00,0.033333,0.033333
2929265,19970491,29133530,38263194,2132-04-28 10:00:00,2132-04-28 10:01:00,220181,111,111.0,mmHg,0,2132-04-28 12:19:00,0,90.316667,0 days 02:19:00,2.316667,2.316667


In [12]:
avg_dupValues = last_value_pre.groupby('hadm_id')['valuenum'].mean() # where duplicate values exist,take the mean
meanBP = avg_dupValues.reset_index()
meanBP.columns = (meanColumns)

In [13]:
svents = pd.read_csv(export_dir +'sample_vents.csv')
mean_df = pd.merge(left = svents, right=meanBP, how = 'left', left_on='hadm_id', right_on='hadm_id')   

In [14]:
mean_df

Unnamed: 0,hadm_id,endtime,time_on_vent,re_intub_class,subject_id,admittime,dischtime,deathtime,admission_type,admission_location,discharge_location,insurance,language,marital_status,ethnicity,edregtime,edouttime,hospital_expire_flag,hours_to_death,bloodpressure
0,28038802,2185-12-20 09:59:00,108.800000,0,18917458,2185-12-15 00:17:00,2185-12-22 17:15:00,,EW EMER.,EMERGENCY ROOM,HOSPICE,Medicaid,ENGLISH,MARRIED,WHITE,2185-12-14 23:05:00,2185-12-15 01:31:00,0,,107.0
1,21790335,2140-03-11 13:44:00,33.000000,0,19704964,2140-03-09 14:44:00,2140-03-15 13:00:00,,URGENT,PHYSICIAN REFERRAL,HOME,Other,?,MARRIED,UNABLE TO OBTAIN,,,0,,82.0
2,24357541,2177-02-10 14:00:00,659.416667,0,19004463,2176-12-16 18:11:00,2177-02-15 17:00:00,,URGENT,TRANSFER FROM HOSPITAL,REHAB,Medicare,ENGLISH,MARRIED,WHITE,,,0,,102.0
3,22188993,2145-11-04 18:40:00,143.666667,0,11538389,2145-10-29 12:46:00,2145-11-09 17:30:00,2145-11-09 17:30:00,EW EMER.,EMERGENCY ROOM,DIED,Medicare,ENGLISH,MARRIED,UNKNOWN,2145-10-29 11:07:00,2145-10-29 14:00:00,1,-118.833333,72.0
4,21880799,2134-05-22 17:58:00,54.966667,0,16606203,2134-05-20 09:51:00,2134-05-22 18:00:00,,EW EMER.,EMERGENCY ROOM,CHRONIC/LONG TERM ACUTE CARE,Medicare,?,MARRIED,HISPANIC/LATINO,2134-05-20 06:11:00,2134-05-20 11:15:00,0,,65.0
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
16399,29960248,2146-12-09 14:23:00,73.466667,1,16691924,2146-11-30 16:09:00,2146-12-17 17:04:00,,DIRECT EMER.,CLINIC REFERRAL,SKILLED NURSING FACILITY,Medicare,ENGLISH,WIDOWED,WHITE,,,0,,64.5
16400,29962016,2135-10-30 11:45:00,218.083333,1,16796371,2135-10-21 08:16:00,2135-11-04 13:30:00,,EW EMER.,EMERGENCY ROOM,REHAB,Medicare,ENGLISH,SINGLE,BLACK/AFRICAN AMERICAN,2135-10-21 05:47:00,2135-10-21 09:45:00,0,,71.0
16401,29974575,2131-03-03 19:54:00,98.900000,1,10020944,2131-02-27 15:34:00,2131-03-13 17:01:00,,EW EMER.,EMERGENCY ROOM,SKILLED NURSING FACILITY,Medicare,ENGLISH,,UNKNOWN,2131-02-27 13:16:00,2131-02-27 16:40:00,0,,73.0
16402,29987115,2148-02-19 10:00:00,44.000000,1,19652570,2148-02-16 17:42:00,2148-02-20 19:00:00,2148-02-20 19:00:00,URGENT,TRANSFER FROM HOSPITAL,DIED,Other,ENGLISH,,UNKNOWN,,,1,-33.000000,122.0


In [15]:
mean_df.to_feather(mean_export_file)

### Getting variability over last 6 hours

In [16]:
df_var = df.copy()
df_var = df[(df['timediffhour']>0)&(df['timediffhour']<6)]

varBP = df_var.groupby('hadm_id')['valuenum'].std()
varBP = varBP.reset_index()
varBP.columns = (stdColumns)

In [17]:
df_var['itemid'].value_counts()

220052    69347
220181    48279
225312     6003
Name: itemid, dtype: int64

In [18]:
#plt.hist(merged_inner_var['timediffhour'])

In [19]:
std_df = pd.merge(left = svents, right=varBP, how = 'left', left_on='hadm_id', right_on='hadm_id')

In [20]:
#std_df[std_df['std_pulseox']==0]

In [21]:
#df_pre[df_pre['hadm_id']==21880799]

In [22]:
std_df.to_feather(std_export_file)