In [1]:
#import relevant packages
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
from pathlib import Path
import pyarrow as pa
import seaborn as sns
from datetime import timedelta, datetime

dir = Path(r"S:\Fackler_OSS_364376\data\IRB-364376-v1-230215")

In [2]:
# read ptsd records
fp = dir.joinpath('EHR', 'ptsd_record.csv.gz')
ptsd_record = pd.read_csv(fp, compression="gzip")
patients_ptsd = ptsd_record['pat_enc_csn_sid'].unique()

# read flow table of patient EHR records
fp = dir.joinpath('EHR', 'flowsheet.csv.gz')
data = pd.read_csv(fp, compression="gzip")
data = data.drop(columns = ['meas_comment', 'meas_template_id'])
# Note: pandas took 50 seconds to load the table. Consider porting to PySpark RDD

In [3]:
fp = dir.joinpath('EHR', 'd_flo_measures.csv.gz')
dict = pd.read_csv(fp, compression="gzip")

names = ["State Behavioral Scale",
"-3 Unresponsive", 
"-2 Responsive to noxious stimuli", 
"-1 Responsive to gentle touch or voice",
"0 Awake and Able to calm",
"+1 Restless and difficult to calm",
"+2 Agitated",
"State Behavioral Scale (SBS)",
"Achieved Level of Sedation",
"Sedation / Delirium",
"Richmond Agitation - Sedation Scale",
"Richmond agitation sedation scale",
"Richmond Agitation Sedation Scale (RASS)"]

# dict[dict['disp_name'].isin(names)]


In [4]:
# flow_meas_id for SBS and RASS
fmid = [304080016, 304080017, 304080018, 304080019, 304080020, 304080021]

sbs = data[data['meas_id'].isin(fmid)]
print(sbs.shape)
# 25878 entries

# calculate sbs score from offset
sbs['SBS'] = sbs['meas_id'] - 304080019
sbs = sbs.drop(columns=['meas_value', 'meas_id'])
sbs['recorded_time'] = pd.to_datetime(sbs['recorded_time'], format='%Y-%m-%d %H:%M:%S')
sbs_indiv = sbs.groupby('pat_enc_csn_sid')

# Identify patients with SBS and waveform data
patients_ehr = list(sbs_indiv.groups.keys())

patients = set(patients_ehr) & set(patients_ptsd)

print(len(patients_ehr))
print(len(patients_ptsd))
print(len(patients))

(25878, 5)
1351
748
581


A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  if __name__ == '__main__':


# Inclusion Exclusion criteria
- patients on ventilator
        - contains MEDIBUSVITALS file (from Drager ventilator)
        - overlapping ventilator time in (vent_dur)
        - 
- patient on beta blocker
- patients on neuromuscular blockers

### Mechanical ventilators

levels of ventilator support
- 1: room air
- 2: supplemental O2
- 3: regular nasal cannula
- 4: high-flow nasal cannula
- 5: noninvasive positive pressure ventilation
- 6: conventional mechanical ventilation
- 7: high freq oscillation or jet ventilation 

In [5]:
fp = dir.joinpath('EHR', 'vent_dur.csv.gz')
vent_record = pd.read_csv(fp, compression="gzip")

mech_vent = [5, 6, 7]
vent_record = vent_record[vent_record['level'].isin(mech_vent)]
vent_record.head()

Unnamed: 0,osler_sid,pat_enc_csn_sid,seq,level,start_time,stop_time,last_recorded_time,dur_hr,post_hr
3,8D48836C-6F5F-4270-8DB6-A058C3607B24,1000005531,24,6,2018-10-31 07:50:00,2018-10-31 17:50:00,2018-10-31 17:49:00,10.0,0.016666
5,4EA242D1-FE29-4F21-B284-3AB7D08A31AF,1000000332,2,6,2017-06-06 10:39:00,2017-06-06 11:50:00,2017-06-06 10:51:00,1.183333,0.983333
11,047A349F-0410-4AA8-88A8-A781FAE1570E,1000000003,5,6,2016-07-08 21:38:00,2016-07-11 14:25:00,2016-07-11 08:15:00,64.783333,6.166666
12,047A349F-0410-4AA8-88A8-A781FAE1570E,1000000003,6,6,2016-07-11 14:25:00,2016-07-13 14:56:00,2016-07-13 08:02:00,48.516666,6.9
17,047A349F-0410-4AA8-88A8-A781FAE1570E,1000000579,2,6,2016-07-07 10:22:00,2016-07-07 14:45:00,2016-07-07 12:57:00,4.383333,1.8


In [6]:
patient_vent = set(vent_record['pat_enc_csn_sid'])
patients = patients - patient_vent
len(patients)
# 324 patients never on mechanical ventilators throughout stay

324

### Drug
exclude neuromuscular and beta blockers

- medication dictionary: d_med
- accm_med_admin
- anes_dur

In [7]:
fp = dir.joinpath('EHR', 'med_admin.csv.gz')
med_admin = pd.read_csv(fp, compression="gzip")

med_admin.head()

  exec(code_obj, self.user_global_ns, self.user_ns)


Unnamed: 0,osler_sid,pat_enc_csn_sid,order_med_sid,line,hosp_admsn_time,hosp_disch_time,medication_name,generic_name,medication_id,thera_classname,...,number_of_times,time_unit,now_yn,reason,mar_imm_link_id,mar_admin_dept,mar_ord_dat,pat_supplied_yn,sensitive_yn,mar_action_c
0,567070AE-DFC6-438F-90C7-D5045EE7C0C3,1000000152,3006487,10,2016-12-06 16:28:00,2016-12-16 12:08:00,"HEPARIN (PORCINE) (PF) 1,000 UNIT/500 ML IN 0....","heparin (porcine) (PF) 1,000 unit/500 mL in 0....",4089008.0,ANTICOAGULANTS,...,,,,,,JHH BLOOMBERG 4S,,,N,121
1,567070AE-DFC6-438F-90C7-D5045EE7C0C3,1000000152,3006487,11,2016-12-06 16:28:00,2016-12-16 12:08:00,"HEPARIN (PORCINE) (PF) 1,000 UNIT/500 ML IN 0....","heparin (porcine) (PF) 1,000 unit/500 mL in 0....",4089008.0,ANTICOAGULANTS,...,,,,,,JHH BLOOMBERG 4S,,,N,121
2,567070AE-DFC6-438F-90C7-D5045EE7C0C3,1000000152,3006487,12,2016-12-06 16:28:00,2016-12-16 12:08:00,"HEPARIN (PORCINE) (PF) 1,000 UNIT/500 ML IN 0....","heparin (porcine) (PF) 1,000 unit/500 mL in 0....",4089008.0,ANTICOAGULANTS,...,,,,,,JHH BLOOMBERG 4S,,,N,121
3,567070AE-DFC6-438F-90C7-D5045EE7C0C3,1000000152,3006487,13,2016-12-06 16:28:00,2016-12-16 12:08:00,"HEPARIN (PORCINE) (PF) 1,000 UNIT/500 ML IN 0....","heparin (porcine) (PF) 1,000 unit/500 mL in 0....",4089008.0,ANTICOAGULANTS,...,,,,,,JHH BLOOMBERG 4S,,,N,121
4,567070AE-DFC6-438F-90C7-D5045EE7C0C3,1000000152,3006487,14,2016-12-06 16:28:00,2016-12-16 12:08:00,"HEPARIN (PORCINE) (PF) 1,000 UNIT/500 ML IN 0....","heparin (porcine) (PF) 1,000 unit/500 mL in 0....",4089008.0,ANTICOAGULANTS,...,,,,,,JHH BLOOMBERG 4S,,,N,121


In [8]:
print(med_admin.keys())
print(med_admin['pharm_classname'].unique())

Index(['osler_sid', 'pat_enc_csn_sid', 'order_med_sid', 'line',
       'hosp_admsn_time', 'hosp_disch_time', 'medication_name', 'generic_name',
       'medication_id', 'thera_classname', 'pharm_classname',
       'pharm_subclassname', 'taken_time', 'ordering_date', 'order_end_time',
       'scheduled_time', 'saved_time', 'mar_time_source', 'mar_action',
       'user_sid', 'mar_doc_user_sid', 'mar_billing_prov_sid', 'route', 'sig',
       'site', 'dose_unit', 'infusion_rate', 'mar_inf_rate_unit',
       'mar_duration', 'duration_unit', 'frequency', 'freq_period',
       'freq_type', 'number_of_times', 'time_unit', 'now_yn', 'reason',
       'mar_imm_link_id', 'mar_admin_dept', 'mar_ord_dat', 'pat_supplied_yn',
       'sensitive_yn', 'mar_action_c'],
      dtype='object')
['HEPARIN AND RELATED PREPARATIONS' 'OPIOID ANALGESICS' nan
 'COMPOUNDING KIT' 'ANTIHISTAMINES - 1ST GENERATION'
 'ANTICHOLINERGICS,QUATERNARY AMMONIUM' 'ANTISERA'
 'CEPHALOSPORIN ANTIBIOTICS - 1ST GENERATION' 'OSMOTIC 

In [9]:
excl_drugs = ['NEUROMUSCULAR BLOCKING AGENTS', 'BETA-ADRENERGIC AGENTS', 'ALPHA/BETA-ADRENERGIC BLOCKING AGENTS', 'BETA-ADRENERGIC BLOCKING AGENTS']

med_admin_filter = med_admin[med_admin['pharm_classname'].isin(excl_drugs)]
# med_admin_filter['pharm_classname'].unique()
patients_med = set(med_admin_filter['pat_enc_csn_sid'])
ex_patients = patients - patients_med

In [10]:
ex_patients = np.array(list(ex_patients))
print(len(ex_patients))
np.save('./DONOTPUSH/patients', ex_patients)

287
