In [1]:
import numpy as np
import pandas as pd
import gzip

In [2]:
dx_icd = pd.read_csv('hosp/diagnoses_icd.csv.gz')
icu_pt = pd.read_csv('icu/icustays.csv.gz')
df_lab = pd.read_csv('hosp/labevents.csv.gz')
df_lab_id = pd.read_csv('hosp/d_labitems.csv.gz')

# ICU PT WITH > 24 H Data

In [4]:
icu_pt.head()

Unnamed: 0,subject_id,hadm_id,stay_id,first_careunit,last_careunit,intime,outtime,los
0,10000032,29079034,39553978,Medical Intensive Care Unit (MICU),Medical Intensive Care Unit (MICU),2180-07-23 14:00:00,2180-07-23 23:50:47,0.410266
1,10000980,26913865,39765666,Medical Intensive Care Unit (MICU),Medical Intensive Care Unit (MICU),2189-06-27 08:42:00,2189-06-27 20:38:27,0.497535
2,10001217,24597018,37067082,Surgical Intensive Care Unit (SICU),Surgical Intensive Care Unit (SICU),2157-11-20 19:18:02,2157-11-21 22:08:00,1.118032
3,10001217,27703517,34592300,Surgical Intensive Care Unit (SICU),Surgical Intensive Care Unit (SICU),2157-12-19 15:42:24,2157-12-20 14:27:41,0.948113
4,10001725,25563031,31205490,Medical/Surgical Intensive Care Unit (MICU/SICU),Medical/Surgical Intensive Care Unit (MICU/SICU),2110-04-11 15:52:22,2110-04-12 23:59:56,1.338588


In [6]:
icu_pt_24 = icu_pt[icu_pt['los']>=1]
icu_pt_24.head()

Unnamed: 0,subject_id,hadm_id,stay_id,first_careunit,last_careunit,intime,outtime,los
2,10001217,24597018,37067082,Surgical Intensive Care Unit (SICU),Surgical Intensive Care Unit (SICU),2157-11-20 19:18:02,2157-11-21 22:08:00,1.118032
4,10001725,25563031,31205490,Medical/Surgical Intensive Care Unit (MICU/SICU),Medical/Surgical Intensive Care Unit (MICU/SICU),2110-04-11 15:52:22,2110-04-12 23:59:56,1.338588
5,10001884,26184834,37510196,Medical Intensive Care Unit (MICU),Medical Intensive Care Unit (MICU),2131-01-11 04:20:05,2131-01-20 08:27:30,9.171817
6,10002013,23581541,39060235,Cardiac Vascular Intensive Care Unit (CVICU),Cardiac Vascular Intensive Care Unit (CVICU),2160-05-18 10:00:53,2160-05-19 17:33:33,1.314352
8,10002155,23822395,33685454,Coronary Care Unit (CCU),Coronary Care Unit (CCU),2129-08-04 12:45:00,2129-08-10 17:02:38,6.178912


In [10]:
patient_id = icu_pt_24['subject_id'].drop_duplicates()

In [11]:
len(patient_id)

42264

In [12]:
patient_id[0:100]

2      10001217
4      10001725
5      10001884
6      10002013
8      10002155
         ...   
149    10019917
150    10019957
152    10020187
154    10020306
155    10020640
Name: subject_id, Length: 100, dtype: int64

# HF ICU PT WITH >24H

In [13]:
hfref_id = dx_icd[dx_icd['icd_code'].str.startswith('4282')|dx_icd['icd_code'].str.startswith('I502')].subject_id

In [14]:
hfpef_id = dx_icd[dx_icd['icd_code'].str.startswith('4283')|dx_icd['icd_code'].str.startswith('I503')].subject_id

In [15]:
hf_id = pd.concat([hfref_id, hfpef_id])

In [16]:
hf_id = hf_id.drop_duplicates()

In [17]:
len(hf_id)

20189

In [27]:
icu_hf_pt_24 = icu_pt_24[icu_pt_24["subject_id"].isin(hf_id)]

In [28]:
len(icu_hf_pt_24["subject_id"].unique())

11110

In [105]:
icu_hf_pt = icu_pt[icu_pt["subject_id"].isin(hf_id)]

In [106]:
len(icu_hf_pt["subject_id"].unique())

12426

In [109]:
sum(hfref_id.drop_duplicates().isin(icu_hf_pt_24["subject_id"]))

5971

In [108]:
sum(hfpef_id.drop_duplicates().isin(icu_hf_pt_24["subject_id"]))

6436

In [78]:
hfref_icd_24_id = icu_pt_24[icu_pt_24["subject_id"].isin(hfref_id)]["subject_id"]

In [79]:
hfpef_icd_24_id = icu_pt_24[icu_pt_24["subject_id"].isin(hfpef_id)]["subject_id"]

# HF ICU PT WITH PROBNP VALUE AND >24H DATA

In [29]:
df_lab_comp = pd.merge(df_lab, df_lab_id, on='itemid', how='left')

In [30]:
pt_with_pro_bnp = df_lab_comp[(df_lab_comp['itemid'] == 50963)]

In [32]:
# Using isin() to create a boolean mask where subject_id is present in pt_with_pro_bnp
is_in_pro_bnp = icu_hf_pt_24["subject_id"].isin(pt_with_pro_bnp["subject_id"])

# Using ~ to negate the boolean mask, giving us True where subject_id is NOT in pt_with_pro_bnp
not_in_pro_bnp = ~is_in_pro_bnp

# Selecting rows from icu_hf_pt where subject_id is NOT in pt_with_pro_bnp
icu_hf_pt_24_without_bnp = icu_hf_pt_24[not_in_pro_bnp]

icu_hf_pt_24_with_bnp = icu_hf_pt_24[is_in_pro_bnp]

In [57]:
pt_with_pro_bnp.head(2)

Unnamed: 0,labevent_id,subject_id,hadm_id,specimen_id,itemid,order_provider_id,charttime,storetime,value,valuenum,valueuom,ref_range_lower,ref_range_upper,flag,priority,comments,label,fluid,category
5090,5099,10000935,25849114.0,55494709,50963,,2187-10-11 06:25:00,2187-10-11 18:35:00,___,617.0,pg/mL,0.0,226.0,abnormal,ROUTINE,"REFERENCE VALUES VARY WITH AGE, SEX, AND RENAL...",NTproBNP,Blood,Chemistry
5917,5927,10000980,,33821244,50963,,2188-01-03 13:18:00,2188-01-03 14:42:00,___,4571.0,pg/mL,0.0,624.0,abnormal,STAT,"REFERENCE VALUES VARY WITH AGE, SEX, AND RENAL...",NTproBNP,Blood,Chemistry


In [61]:
pt_with_pro_bnp[(pt_with_pro_bnp["subject_id"].isin(hfref_id)) & (pt_with_pro_bnp["valuenum"] > 200)].head()

Unnamed: 0,labevent_id,subject_id,hadm_id,specimen_id,itemid,order_provider_id,charttime,storetime,value,valuenum,valueuom,ref_range_lower,ref_range_upper,flag,priority,comments,label,fluid,category
5917,5927,10000980,,33821244,50963,,2188-01-03 13:18:00,2188-01-03 14:42:00,___,4571.0,pg/mL,0.0,624.0,abnormal,STAT,"REFERENCE VALUES VARY WITH AGE, SEX, AND RENAL...",NTproBNP,Blood,Chemistry
6206,6216,10000980,,46753637,50963,,2189-06-27 06:48:00,2189-06-27 07:56:00,___,2826.0,pg/mL,0.0,624.0,abnormal,STAT,"REFERENCE VALUES VARY WITH AGE, SEX, AND RENAL...",NTproBNP,Blood,Chemistry
6891,6902,10000980,,88477575,50963,,2190-11-06 16:00:00,2190-11-06 18:12:00,___,4279.0,pg/mL,0.0,624.0,abnormal,STAT,"REFERENCE VALUES VARY WITH AGE, SEX, AND RENAL...",NTproBNP,Blood,Chemistry
7582,7593,10000980,,77976228,50963,,2191-05-30 12:40:00,2191-05-30 13:55:00,___,6440.0,pg/mL,0.0,624.0,abnormal,STAT,"REFERENCE VALUES VARY WITH AGE, SEX, AND RENAL...",NTproBNP,Blood,Chemistry
7725,7736,10000980,,52441038,50963,,2191-07-16 11:55:00,2191-07-16 13:37:00,___,10180.0,pg/mL,0.0,624.0,abnormal,STAT,"REFERENCE VALUES VARY WITH AGE, SEX, AND RENAL...",NTproBNP,Blood,Chemistry


### Number of ICU HFrEF pt with >24h data WITH PROBNP 

In [65]:
positive_pro_bnp = pt_with_pro_bnp[pt_with_pro_bnp["valuenum"] > 200]
negative_pro_bnp = pt_with_pro_bnp[pt_with_pro_bnp["valuenum"] <= 200]

In [80]:
len(positive_pro_bnp[positive_pro_bnp["subject_id"].isin(hfref_icd_24_id)]["subject_id"].drop_duplicates())

3721

In [81]:
len(negative_pro_bnp[negative_pro_bnp["subject_id"].isin(hfref_icd_24_id)]["subject_id"].drop_duplicates())

291

### Number of ICU HFpEF pt with >24h data WITH PROBNP 

In [82]:
len(positive_pro_bnp[positive_pro_bnp["subject_id"].isin(hfpef_icd_24_id)]["subject_id"].drop_duplicates())

4455

In [83]:
len(negative_pro_bnp[negative_pro_bnp["subject_id"].isin(hfpef_icd_24_id)]["subject_id"].drop_duplicates())

566

### Number of ICU pt with >24h data WITH PROBNP and CONFLICTING DX

In [86]:
len(positive_pro_bnp[(positive_pro_bnp["subject_id"].isin(hfref_icd_24_id)) & (positive_pro_bnp["subject_id"].isin\
                                                                               (hfpef_icd_24_id))]["subject_id"].drop_duplicates())

1110

In [87]:
len(negative_pro_bnp[(negative_pro_bnp["subject_id"].isin(hfref_icd_24_id)) & (negative_pro_bnp["subject_id"].isin\
                                                                               (hfpef_icd_24_id))]["subject_id"].drop_duplicates())

123

### Number of ICU HFrEF pt with >24h data WITH PROBNP > 200

In [62]:
len(pt_with_pro_bnp[(pt_with_pro_bnp["subject_id"].isin(hfref_id)) & (pt_with_pro_bnp["valuenum"] > 200)]\
    ["subject_id"].drop_duplicates())

6106

### Number of ICU HFrEF pt with >24h data WITH PROBNP <= 200

In [73]:
len(pt_with_pro_bnp[(pt_with_pro_bnp["subject_id"].isin(hfref_id)) & (pt_with_pro_bnp["valuenum"] <= 200)]\
    ["subject_id"].drop_duplicates())

541

### Number of ICU HFpEF pt with >24h data WITH PROBNP > 200

In [47]:
len(pt_with_pro_bnp[(pt_with_pro_bnp["subject_id"].isin(hfpef_id)) & (pt_with_pro_bnp["valuenum"] > 200)]\
    ["subject_id"].drop_duplicates())

4644

### Number of ICU HFpEF pt with >24h data WITH PROBNP <= 200

In [74]:
len(pt_with_pro_bnp[(pt_with_pro_bnp["subject_id"].isin(hfpef_id)) & (pt_with_pro_bnp["valuenum"] <= 200)]\
    ["subject_id"].drop_duplicates())

1092

### Number of ICU HF pt with >24h data WITH PROBNP and CONFLICTING ICD DX

In [52]:
len(icu_hf_pt_24_with_bnp[icu_hf_pt_24_with_bnp["subject_id"].isin(hfpef_id)&icu_hf_pt_24_with_bnp["subject_id"]\
                          .isin(hfref_id)].subject_id.unique())

1136

In [51]:
3865 + 4644 - 1136

7373

### Number of ICU HF pt with >24h data WITHOUT PROBNP

In [33]:
len(icu_hf_pt_24_without_bnp["subject_id"].drop_duplicates())

3737

### Number of ICU HFrEF pt with >24h data without PROBNP

In [110]:
sum(icu_hf_pt_24_without_bnp["subject_id"].drop_duplicates().isin(hfref_icd_24_id))

2106

### Number of ICU HFpEF pt with >24h data without PROBNP

In [111]:
sum(icu_hf_pt_24_without_bnp["subject_id"].drop_duplicates().isin(hfpef_icd_24_id))

1792

### Number of ICU pt with >24h data without PROBNP and  CONFLICTING ICD DX

In [41]:
len(icu_hf_pt_24_without_bnp[icu_hf_pt_24_without_bnp["subject_id"].isin(hfpef_id)&icu_hf_pt_24_without_bnp["subject_id"]\
                             .isin(hfref_id)].subject_id.unique())

161

In [42]:
2106+1792-161

3737

# ICU with 24h and absence of HF ICD codes with ProBNP

In [88]:
icu_non_hf_pt_24 = icu_pt_24[~(icu_pt_24["subject_id"].isin(hf_id))]

In [94]:
icu_non_hf_pt_24.head()

Unnamed: 0,subject_id,hadm_id,stay_id,first_careunit,last_careunit,intime,outtime,los
2,10001217,24597018,37067082,Surgical Intensive Care Unit (SICU),Surgical Intensive Care Unit (SICU),2157-11-20 19:18:02,2157-11-21 22:08:00,1.118032
4,10001725,25563031,31205490,Medical/Surgical Intensive Care Unit (MICU/SICU),Medical/Surgical Intensive Care Unit (MICU/SICU),2110-04-11 15:52:22,2110-04-12 23:59:56,1.338588
5,10001884,26184834,37510196,Medical Intensive Care Unit (MICU),Medical Intensive Care Unit (MICU),2131-01-11 04:20:05,2131-01-20 08:27:30,9.171817
10,10002348,22725460,32610785,Neuro Intermediate,Neuro Intermediate,2112-11-30 23:24:00,2112-12-10 18:25:13,9.792512
11,10002428,20321825,34807493,Medical Intensive Care Unit (MICU),Medical Intensive Care Unit (MICU),2156-04-30 21:53:00,2156-05-02 22:27:20,2.023843


In [93]:
dx_icd[dx_icd["subject_id"].isin(icu_non_hf_pt_24["subject_id"])].head()

Unnamed: 0,subject_id,hadm_id,seq_num,icd_code,icd_version
409,10001217,24597018,1,3240,9
410,10001217,24597018,2,3484,9
411,10001217,24597018,3,3485,9
412,10001217,24597018,4,5180,9
413,10001217,24597018,5,340,9


In [98]:
pt_with_pro_bnp[pt_with_pro_bnp["subject_id"].isin(icu_non_hf_pt_24["subject_id"])].head(2)

Unnamed: 0,labevent_id,subject_id,hadm_id,specimen_id,itemid,order_provider_id,charttime,storetime,value,valuenum,valueuom,ref_range_lower,ref_range_upper,flag,priority,comments,label,fluid,category
12842,12858,10001725,,58052149,50963,P76BRY,2111-05-16 12:40:00,2111-05-16 21:56:00,___,35.0,pg/mL,0.0,192.0,,ROUTINE,"REFERENCE VALUES VARY WITH AGE, SEX, AND RENAL...",NTproBNP,Blood,Chemistry
14858,14876,10001884,,4029327,50963,P46WR5,2128-07-15 12:39:00,2128-07-15 20:19:00,___,233.0,pg/mL,0.0,353.0,,ROUTINE,"REFERENCE VALUES VARY WITH AGE, SEX, AND RENAL...",NTproBNP,Blood,Chemistry


In [101]:
len(pt_with_pro_bnp[pt_with_pro_bnp["subject_id"].isin(icu_non_hf_pt_24["subject_id"])].drop_duplicates())

8507

In [112]:
len(icu_non_hf_pt_24[icu_non_hf_pt_24["subject_id"].isin(pt_with_pro_bnp["subject_id"])].drop_duplicates())

8337

In [113]:
len(icu_non_hf_pt_24[icu_non_hf_pt_24["subject_id"].isin(positive_pro_bnp["subject_id"])].drop_duplicates())

6516

In [114]:
len(icu_non_hf_pt_24[icu_non_hf_pt_24["subject_id"].isin(negative_pro_bnp["subject_id"])].drop_duplicates())

2213

# ICU with 24h and absence of HF ICD codes and without ProBNP

In [103]:
icu_24_non_hf_non_bnp = icu_non_hf_pt_24[~icu_non_hf_pt_24["subject_id"].isin(pt_with_pro_bnp["subject_id"])]

In [115]:
len(icu_24_non_hf_non_bnp["subject_id"].drop_duplicates())

25778