In [2]:
# Import libraries
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import psycopg2
pd.set_option('display.height', 1000)
pd.set_option('display.max_rows', 500)
pd.set_option('display.max_columns', 500)
pd.set_option('display.width', 1000)

# below imports are used to print out pretty pandas dataframes
from IPython.display import display, HTML

%matplotlib inline
plt.style.use('ggplot')

# information used to create a database connection
sqluser = 'ephlius'
dbname = 'mimic'
schema_name = 'mimiciii'

# Connect to postgres with a copy of the MIMIC-III database
con = psycopg2.connect(dbname=dbname, user=sqluser)

# the below statement is prepended to queries to ensure they select from the right schema
#query_schema = 'set search_path to ' + schema_name + ';'

height has been deprecated.



In [3]:
query = """
---------------------------------------------------------------------------------
--CO
--FEATURES: GENDER, AGE, ICU_LENGTH_OF_STAY, ICUSTAY_ID_ORDER, EXCLUSION_1ST_CARE_UNIT
--REMARD: 1. FIRST CARE UNIT IS NON 'MICU' IS MARKED WITH 1
---------------------------------------------------------------------------------
 WITH co AS
  (
   SELECT icu.subject_id, icu.hadm_id, icu.icustay_id, pat.gender
 , EXTRACT(EPOCH FROM outtime - intime)/60.0/60.0/24.0 as icu_length_of_stay
 , EXTRACT('epoch' from icu.intime - pat.dob) / 60.0 / 60.0 / 24.0 / 365.242 as age
 , RANK() OVER (PARTITION BY icu.subject_id ORDER BY icu.intime) AS icustay_id_order
 , CASE
    WHEN ICU.FIRST_CAREUNIT = 'MICU' THEN 0
    ELSE 1 END
    AS exclusion_1st_care_unit
   FROM icustays icu
   INNER JOIN patients pat
   ON icu.subject_id = pat.subject_id
 )
---------------------------------------------------------------------------------
--SERV
--FEATURES: CURRENT_SERVICE, SURGICAL
--REMARKS: SURGICAL RELATED SERVICES IS MARKED WITH 1 
---------------------------------------------------------------------------------
, serv AS
 (
   SELECT icu.hadm_id, icu.icustay_id, se.curr_service
 , CASE
    WHEN curr_service like '%SURG' then 1
    WHEN curr_service = 'ORTHO' then 1
    ELSE 0 END
   AS surgical
 , RANK() OVER (PARTITION BY icu.hadm_id ORDER BY se.transfertime DESC) as rank
   FROM icustays icu
   INNER JOIN services se
   ON icu.hadm_id = se.hadm_id
   AND se.transfertime < icu.intime + interval '12' hour
 )
---------------------------------------------------------------------------------
--ICD9
--FEATURES: ICD9 CODE, HM
--REMARKS: 1.ONLY INCLUDES HM PATIENTS ADMISSIONS
--         2. PRIMARY AND SECONDARY DIAGNOSIS IS INDICATED IN HM_INDICATOR: PRIMARY IS 1, SECONDARY IS 2
--         3. IF PATIENT PRIMARY DIAGNOSIS IS HM, THEN SECONDARY DIAGNOSIS IS IGNORED.
--         4. IF PATIENT PRIMARY DIAGNOSIS IS NOT HM AND ONLY SECONDARY DIAGNOSIS IS HM. THEN SECONDARY DIAGNOSIS IS RECORDED.
--         5. FOUR MAIN HM CATEGORYS: Lymphoma, Myeloma, Leukemia, Myelodysplastic syndrome
---------------------------------------------------------------------------------
, icd9_hm AS
 (
   SELECT hadm_id,icd9_code
 , CASE 
    WHEN
      ( icd9_code is not null
      AND
        (substring(icd9_code,1,3) IN ('200', '201', '202', '203', '204', '205', '206', '207', '208') 
         OR ICD9_CODE IN ('23872', '23873', '23874', '23875')
        )
      AND 
        seq_num = 1
       )
     THEN 1
     WHEN 
     ( icd9_code is not null
      AND
        (substring(icd9_code,1,3) IN ('200', '201', '202', '203', '204', '205', '206', '207', '208') 
         OR ICD9_CODE IN ('23872', '23873', '23874', '23875')
         )
     AND 
        seq_num != 1
       )
     THEN 2
     ELSE null 
     END AS HM_INDICATOR
    
    FROM DIAGNOSES_ICD
 )
 
 ----
 , HM_TYPE as(
   select hadm_id, HM_INDICATOR, ICD9_CODE
   ,  CASE
      WHEN substring(icd9_code,1,3) IN ('200', '201', '202')
      THEN 'lymphoma'
      
      WHEN substring(icd9_code,1,3) = '203'
      THEN 'myeloma'
      
      WHEN substring(icd9_code,1,3) IN ('204', '205', '206', '207', '208')
      THEN 'leukemia'
      
      WHEN ICD9_CODE IN ('23872', '23873', '23874', '23875')
      THEN 'myelodysplastic_syndrome'
      
      else null
      END as D_TYPE

    FROM icd9_hm
    where HM_INDICATOR is not null
   )
   , cleanup as (
   select hadm_id,
   min (HM_indicator) as indicator
   from hm_type
   group by hadm_id
   )
 
, icd9 as (
 SELECT HM_TYPE.HADM_ID, HM_INDICATOR, 
  MAX (d_type) as D_TYPE
 FROM HM_TYPE
 inner join cleanup
 on HM_TYPE.hadm_id=cleanup.hadm_id
 AND cleanup.indicator = HM_TYPE.HM_indicator
GROUP BY HM_TYPE.HADM_ID, HM_INDICATOR
ORDER BY HM_TYPE.HADM_ID, HM_INDICATOR
 )
 
 
---------------------------------------------------------------------------------
--FINAL_CO
--FEATURES: SELECT AGE, GENDER, ICUSTAY_ID_ORDER, ICU_LENGTH_OF_STAY, ICD9_CODE
--EXCLUSION: 1. LENGTH OF STAY <1 DAY
--           2. AGE < 16 YEARS
--           3. NOT FIRST ICU STAY
--           4. SURGICAL SERVICE
--           5. NON HM ADMISSIONS
---------------------------------------------------------------------------------
, FINAL_CO as (
  
   SELECT 
    HM_INDICATOR, D_TYPE, co.subject_id, co.hadm_id, co.icustay_id  
    ,co.age, co.gender, co.icustay_id_order, 
    co.icu_length_of_stay, 
    --icd9.icd9_code
   exclusion_1st_care_unit
  , CASE
        WHEN co.icu_length_of_stay < 1 then 1
    ELSE 0 END
    AS exclusion_los
  , CASE
        WHEN co.age < 16 then 1
    ELSE 0 END
    AS exclusion_age
  , CASE 
        WHEN co.icustay_id_order != 1 THEN 1
    ELSE 0 END 
    AS exclusion_first_stay
  , CASE
        WHEN serv.surgical = 1 THEN 1
    ELSE 0 END
    as exclusion_surgical 
    
   FROM co
   INNER JOIN serv
    ON  co.icustay_id = serv.icustay_id
    AND serv.rank = 1
   INNER join icd9
    on  co.hadm_id=icd9.hadm_id
 )

---------------------------------------------------------------------------------
--RBC_RESULT
--FEATURES: RBC_ITEMID, RBC_1
--REMARKS: RBC_1 IS MARKED WITH 1 FOR PATIENTS NEED RED BLOD CELL TRANSFUSION
---------------------------------------------------------------------------------
, RBC AS (
  SELECT subject_id, hadm_id, icustay_id, itemid as RBC_itemid
  FROM inputevents_mv
  WHERE(itemid in (225168,226368) AND amount>0 )
  
  UNION ALL

  SELECT subject_id, hadm_id, icustay_id,itemid as RBC_itemid
  FROM inputevents_cv
  WHERE (itemid in (30001,30104) AND amount>0 )
)

, RBC_result as (
   select subject_id, hadm_id, icustay_id,
   max(CASE WHEN RBC.RBC_itemid >0 then 1 ELSE 0 END)as RBC_1
   from RBC
   GROUP BY subject_id, hadm_id, icustay_id
  )

---------------------------------------------------------------------------------
--PLATELET_RESULT
--FEATURES: PLATELET_ITEMID, PLATELET_1
--REMARKS: PLATELET_1 IS MARKED WITH 1 FOR PATIENTS NEED PLATELET TRANSFUSION
---------------------------------------------------------------------------------
, platelet AS (
   select subject_id, hadm_id, icustay_id, itemid as platelet_itemid
   from inputevents_mv
   where (itemid in (225170,226369) AND amount>0 )
   
   UNION ALL
   
   select subject_id, hadm_id, icustay_id,itemid as platelet_itemid
   from inputevents_cv
   where (itemid = 30006 AND amount>0 )
)

, platelet_result as (
   select subject_id, hadm_id, icustay_id,
   max(CASE WHEN platelet.platelet_itemid >0 then 1 ELSE 0 END)as platelet_1
   from platelet
   GROUP BY subject_id, hadm_id, icustay_id
   ORDER BY subject_id, hadm_id, icustay_id
)
---------------------------------------------------------------------------------
--POSITIVE_BLOOD_RESULT
--FEATURES: P_BLOOD_CULTURE_1
--REMARKS: P_BLOOD_CULTURE_1 IS MARKED WITH 1 FOR PATIENTS WHO HAS POSITIVE BLOOD CULTURE
---------------------------------------------------------------------------------
, positive_blood_result as (

  select subject_id, hadm_id,
  max (case when (org_itemid is null AND ab_itemid is null) THEN 1 ELSE 0 END ) as p_blood_culture_1
  from microbiologyevents
  where (spec_type_desc = 'BLOOD CULTURE')
  GROUP BY subject_id, hadm_id

)

---------------------------------------------------------------------------------
--VASOPRESSORDURATIONS_RESULT
--FEATURES: VASOPRESSOR_1
--REMARKS: VASOPRESSOR_1 IS MARKED WITH 1 FOR PATIENTS WHO NEEDS VASOPRESSOR
---------------------------------------------------------------------------------
,  VASOPRESSORDURATIONS_result as (
   select icustay_id
   ,max(CASE WHEN VASOPRESSORDURATIONS.duration_hours>0 then 1 ELSE 0 END) as vasopressor_1 
   from VASOPRESSORDURATIONS
   GROUP BY icustay_id
   ORDER BY icustay_id
)
---------------------------------------------------------------------------------
--VENTIDURATIONS_RESULT
--FEATURES: VENTILATION_1
--REMARKS: VENTILATION_1 IS MARKED WITH 1 FOR PATIENTS WHO NEEDS MECHANICAL VENTILATION
---------------------------------------------------------------------------------
, ventdurations_result as (
  select icustay_id
  ,max(CASE WHEN ventdurations.duration_hours>0 then 1 ELSE 0 END) as ventilation_1
  from ventdurations
  GROUP BY icustay_id
  ORDER BY icustay_id
)
--------------------------------------------------------------------------------
--AST_RESULT
--FEATURES: AST
---------------------------------------------------------------------------------
, AST_result as (
   select subject_id, hadm_id, icustay_id, 
   max (ast) as AST
   from siqi_AST_ALT
   where ast is not null
   group by subject_id, hadm_id, icustay_id
   order by subject_id, hadm_id, icustay_id
)
-------------------------------------------------------------------------------
--ALT_RESULT
--FEATURES: AST
---------------------------------------------------------------------------------

, ALT_result as (
  select subject_id, hadm_id, icustay_id, 
  max (ALT) as ALT
  from siqi_AST_ALT
  where ALT is not null
  group by subject_id, hadm_id, icustay_id
  order by subject_id, hadm_id, icustay_id
)
---------------------------------------------------------------------------------
--ABG_result
--FEATURES: pH, pCO2, pO2, bicarbonate, baseexcess, spO2,lactate
--REMARKS: Arterial Blood Gas labtest result on the first day
---------------------------------------------------------------------------------
,  ABG_result as (
   select icustay_id
   --,max(CASE WHEN VASOPRESSORDURATIONS.duration_hours>0 then 1 ELSE 0 END) as vasopressor_1 
   , max(ph) as ph
   , max(pco2) as pco2
   , max(po2) as po2
   , max(bicarbonate) as bicarbonate
   , max(baseexcess) as baseexcess
   , max(spo2) as spo2
   , max(lactate) as lactate
   from bloodgasfirstdayarterial
   group by icustay_id
   ORDER BY icustay_id
)
---------------------------------------------------------------------------------
--comorbidity
--FEATURES: Hypertension, Congestive_heart_failure, cardiac_arrhythmias, 
--          chronic_pulmonary, depression, diabetes_uncomplicated, hypothyroidism,
--         renal_failure, rheumatoid_arthritis,liver_disease, peptic_ulcer
--REMARKS: co-morbidity table
---------------------------------------------------------------------------------
, comorbidity as (
   select hadm_id,
   hypertension, congestive_heart_failure, cardiac_arrhythmias,
   chronic_pulmonary, depression, diabetes_uncomplicated, hypothyroidism, renal_failure,
   rheumatoid_arthritis, liver_disease, peptic_ulcer
   from ELIXHAUSER_QUAN
)

-------------------------------------------------------------------------------
--FINALCO_WTH_SCORE_N_EXCLUSION
--FEATURES: EVERYTHING EXCEPT LAB TEST RESULT
-------------------------------------------------------------------------------
, finalco_wth_score_n_exclusion as (
  select 
  final_co.subject_id, final_co.hadm_id, final_co.icustay_id
  , final_co.HM_INDICATOR, final_co.D_TYPE
  
  ---AST, ALT
  , AST_RESULT.AST
  , ALT_RESULT.ALT
  
  ---Renal replacement therapy
  , siqi_rrt.rrt as RRT
  
  ---SOFA score
  , SOFA.sofa
  
  ---postive blood test
  , positive_blood_result.p_blood_culture_1
  
  ---Red blood cell transfusion
  , RBC_result.RBC_1
  
  ---Platelet transfusion
  ,platelet_result.platelet_1
   
  ---patient with vesopressor service
  , VASOPRESSORDURATIONS_result.vasopressor_1
 
  
  ---patient with mechanical ventilation service
  , ventdurations_result.ventilation_1
  
  ---four vital signs, heart rate, respiration, arterial blood preasure mean, body temperature
  , siqi_vital_signs.heart_rate
  , siqi_vital_signs.respiration
  , siqi_vital_signs.arterial_bp_mean
  , siqi_vital_signs.arterial_BP_systolic
  , siqi_vital_signs.arterial_BP_diastolic
  , siqi_vital_signs.temperature
  
  ---patient with ABG lab test
  , ABG_result.ph as abg_ph
  , ABG_result.pco2 as abg_pco2
  , ABG_result.po2 as abg_po2
  , ABG_result.bicarbonate as abg_bicarbonate
  , ABG_result.baseexcess as abg_baseexcess
  , ABG_result.spo2 as abg_spo2
  , ABG_result.lactate as abg_lactate
  
  ---comorbidity 
  , hypertension, congestive_heart_failure, cardiac_arrhythmias, chronic_pulmonary
  , depression, diabetes_uncomplicated, hypothyroidism, renal_failure
  , rheumatoid_arthritis, liver_disease, peptic_ulcer
   
  ,final_co.age, final_co.gender, final_co.icu_length_of_stay 


  FROM final_co
  left join SOFA
  on final_co.hadm_id = SOFA.hadm_id
  AND final_co.icustay_id=SOFA.icustay_id
  AND final_co.subject_id=SOFA.subject_id
  
  left join siqi_rrt
  on final_co.hadm_id = siqi_rrt.hadm_id
  AND final_co.icustay_id=siqi_rrt.icustay_id
  AND final_co.subject_id=siqi_rrt.subject_id
  
  left join AST_RESULT
  on  final_co.hadm_id = AST_RESULT.hadm_id
  AND final_co.icustay_id=AST_RESULT.icustay_id
  AND final_co.subject_id=AST_RESULT.subject_id
  
  left join ALT_RESULT
  on  final_co.hadm_id = ALT_RESULT.hadm_id
  AND final_co.icustay_id=ALT_RESULT.icustay_id
  AND final_co.subject_id=ALT_RESULT.subject_id
  
  left join positive_blood_result
  on  final_co.hadm_id = positive_blood_result.hadm_id
  AND final_co.subject_id=positive_blood_result.subject_id
  
  left join platelet_result
   on  final_co.hadm_id = platelet_result.hadm_id
  AND final_co.icustay_id=platelet_result.icustay_id
  AND final_co.subject_id=platelet_result.subject_id
  
  left join RBC_result
   on  final_co.hadm_id = RBC_result.hadm_id
  AND final_co.icustay_id=RBC_result.icustay_id
  AND final_co.subject_id=RBC_result.subject_id
  
  left join VASOPRESSORDURATIONS_result
  on final_co.icustay_id=VASOPRESSORDURATIONS_result.icustay_id
  
  left join ventdurations_result
  on final_co.icustay_id=ventdurations_result.icustay_id
  
  left join siqi_vital_signs
  on final_co.hadm_id = siqi_vital_signs.hadm_id
  AND final_co.icustay_id = siqi_vital_signs.icustay_id
  AND final_co.subject_id = siqi_vital_signs.subject_id
  
  left join ABG_result
  on final_co.icustay_id = ABG_result.icustay_id
  
  left join comorbidity
  on final_co.hadm_id = comorbidity.hadm_id
  
 where 
  exclusion_1st_care_unit = 0
  --AND exclusion_los = 0
  --AND exclusion_surgical = 0
  AND exclusion_age = 0
  AND exclusion_first_stay = 0
  order by hadm_id, icustay_id
  
  
 ) 
-------------------------------------------------------------------------------
--FINAL HM COHORT
-------------------------------------------------------------------------------
  select HM_lab_test.*
  , ast, alt, rrt, sofa
  , p_blood_culture_1,RBC_1,platelet_1,vasopressor_1,ventilation_1
  , heart_rate,respiration, arterial_bp_mean, arterial_BP_systolic, arterial_BP_diastolic, temperature
  , abg_ph, abg_pco2, abg_po2, abg_bicarbonate, abg_baseexcess, abg_spo2, abg_lactate
  , hypertension, congestive_heart_failure, cardiac_arrhythmias, chronic_pulmonary
  , depression, diabetes_uncomplicated, hypothyroidism, renal_failure
  , rheumatoid_arthritis, liver_disease, peptic_ulcer
  , age, gender, icu_length_of_stay
  , HM_INDICATOR, D_TYPE
  
  FROM finalco_wth_score_n_exclusion
  left join 
  HM_lab_test
  
  on  HM_lab_test.hadm_id = finalco_wth_score_n_exclusion.hadm_id
  AND HM_lab_test.icustay_id = finalco_wth_score_n_exclusion.icustay_id
  AND HM_lab_test.subject_id = finalco_wth_score_n_exclusion.subject_id
  
  ORDER BY HM_lab_test.subject_id, HM_lab_test.hadm_id, HM_lab_test.icustay_id

"""
data = pd.read_sql_query(query, con)


In [4]:
data['icustay_id'].unique().shape

(657,)

In [5]:
data.columns

Index(['subject_id', 'hadm_id', 'icustay_id', 'mort_icu', 'mort_hosp', 'mort_30', 'aniongap', 'albumin', 'bicarbonate', 'bilirubin', 'creatinine', 'chloride', 'glucose', 'hematocrit', 'hemoglobin', 'lactate', 'magnesium', 'phosphate', 'platelet', 'potassium', 'ptt', 'inr', 'pt', 'sodium', 'bun', 'wbc', 'calcium', 'freecalcium', 'ast', 'alt', 'rrt', 'sofa', 'p_blood_culture_1', 'rbc_1', 'platelet_1', 'vasopressor_1', 'ventilation_1', 'heart_rate', 'respiration', 'arterial_bp_mean', 'arterial_bp_systolic', 'arterial_bp_diastolic', 'temperature', 'abg_ph', 'abg_pco2', 'abg_po2', 'abg_bicarbonate', 'abg_baseexcess', 'abg_spo2', 'abg_lactate', 'hypertension', 'congestive_heart_failure', 'cardiac_arrhythmias', 'chronic_pulmonary', 'depression', 'diabetes_uncomplicated', 'hypothyroidism', 'renal_failure', 'rheumatoid_arthritis', 'liver_disease', 'peptic_ulcer', 'age', 'gender', 'icu_length_of_stay', 'hm_indicator', 'd_type'], dtype='object')

In [6]:
data.count()

subject_id                  657
hadm_id                     657
icustay_id                  657
mort_icu                    657
mort_hosp                   657
mort_30                     657
aniongap                    653
albumin                     439
bicarbonate                 653
bilirubin                   535
creatinine                  653
chloride                    653
glucose                     653
hematocrit                  654
hemoglobin                  654
lactate                     444
magnesium                   644
phosphate                   638
platelet                    653
potassium                   653
ptt                         631
inr                         631
pt                          631
sodium                      653
bun                         653
wbc                         653
calcium                     639
freecalcium                 218
ast                         429
alt                         428
rrt                         657
sofa    

In [7]:
data.to_csv("HM_cohort_wth_LabTest.csv")

In [8]:
data

Unnamed: 0,subject_id,hadm_id,icustay_id,mort_icu,mort_hosp,mort_30,aniongap,albumin,bicarbonate,bilirubin,creatinine,chloride,glucose,hematocrit,hemoglobin,lactate,magnesium,phosphate,platelet,potassium,ptt,inr,pt,sodium,bun,wbc,calcium,freecalcium,ast,alt,rrt,sofa,p_blood_culture_1,rbc_1,platelet_1,vasopressor_1,ventilation_1,heart_rate,respiration,arterial_bp_mean,arterial_bp_systolic,arterial_bp_diastolic,temperature,abg_ph,abg_pco2,abg_po2,abg_bicarbonate,abg_baseexcess,abg_spo2,abg_lactate,hypertension,congestive_heart_failure,cardiac_arrhythmias,chronic_pulmonary,depression,diabetes_uncomplicated,hypothyroidism,renal_failure,rheumatoid_arthritis,liver_disease,peptic_ulcer,age,gender,icu_length_of_stay,hm_indicator,d_type
0,31,128652,254478,1,1,1,9.0,2.7,27.0,0.9,0.9,95.0,110.0,30.0,10.6,1.40,1.6,2.7,109.0,3.3,29.4,1.2,13.2,128.0,13.0,6.90,8.2,1.12,32.0,44.0,0,2,1.0,,,1.0,1.0,54.0,12.0,89.0,132.0,65.0,98.699997,7.47,36.0,148.0,,3.0,98.000000,1.5,1,0.0,0.0,0.0,0.0,0,0.0,0.0,0.0,0.0,0.0,72.267095,M,7.937940,2,lymphoma
1,46,144073,268016,0,0,0,8.0,1.6,17.0,1.0,0.6,96.0,957.0,14.5,4.0,,1.7,5.2,232.0,2.5,27.4,1.2,13.7,118.0,13.0,2.20,5.8,,303.0,167.0,0,1,,1.0,,,,107.0,20.0,,,,98.099998,,,,,,,,0,0.0,0.0,0.0,0.0,0,0.0,0.0,0.0,0.0,0.0,73.999610,M,0.769039,2,myeloma
2,61,176332,252348,0,0,0,11.0,,23.0,0.3,0.4,110.0,95.0,21.3,7.5,,1.8,3.4,65.0,3.9,34.1,0.9,11.6,140.0,17.0,1.60,7.2,,9.0,17.0,0,4,1.0,1.0,1.0,,,96.0,22.0,,,,100.900002,,,,,,,,0,0.0,0.0,0.0,0.0,0,0.0,0.0,0.0,0.0,0.0,54.722615,M,2.559074,1,lymphoma
3,171,153112,274684,0,0,0,13.0,,26.0,,0.7,101.0,104.0,31.5,11.2,,2.1,4.1,271.0,4.3,26.3,1.1,13.1,136.0,10.0,8.50,9.1,,,,0,0,1.0,,,,,87.0,19.0,,,,98.599998,,,,,,,,0,0.0,0.0,0.0,0.0,0,0.0,0.0,0.0,0.0,0.0,61.982821,M,4.888414,2,myeloma
4,224,169158,255378,0,0,0,8.0,,37.0,1.0,0.6,94.0,115.0,24.4,8.5,,1.9,4.0,14.0,3.8,36.4,1.3,14.8,135.0,11.0,2.00,8.8,,,,0,6,,,1.0,,,70.0,24.0,,,,99.900002,,,,,,,,0,0.0,0.0,0.0,0.0,1,0.0,0.0,0.0,0.0,0.0,70.184531,M,1.104595,2,myelodysplastic_syndrome
5,298,119446,225523,0,0,0,14.0,,24.0,,0.6,103.0,116.0,24.4,8.2,,1.9,4.6,405.0,3.7,150.0,1.4,15.2,137.0,8.0,4.70,8.7,,,,0,0,1.0,1.0,,,,109.0,21.0,,,,98.900002,,,,,,,,0,0.0,1.0,0.0,0.0,0,0.0,0.0,0.0,0.0,0.0,72.896938,F,2.258981,2,lymphoma
6,370,123421,228402,0,0,0,11.0,,25.0,0.8,0.6,95.0,145.0,31.5,10.6,2.30,1.8,5.8,32.0,4.2,26.7,1.2,13.4,127.0,40.0,206.30,7.5,,7.0,17.0,0,4,,,1.0,,,95.0,15.0,53.0,72.0,42.0,99.300003,7.42,41.0,57.0,,,96.000000,,0,1.0,0.0,1.0,0.0,1,1.0,0.0,0.0,0.0,0.0,83.520116,F,2.575694,2,leukemia
7,404,166989,204729,1,1,1,34.0,,10.0,3.3,1.8,94.0,105.0,39.0,8.5,,,,,4.8,31.2,1.9,19.7,133.0,36.0,326.00,,,,,0,3,1.0,,,,,,,,,,,6.93,15.0,99.0,,,,,0,0.0,0.0,0.0,0.0,0,1.0,0.0,0.0,1.0,0.0,78.208078,M,0.422164,2,leukemia
8,419,111426,200674,0,0,0,13.0,3.3,28.0,0.6,0.6,102.0,169.0,29.7,11.1,2.40,2.0,1.6,120.0,3.7,25.8,1.3,14.9,139.0,7.0,15.10,8.1,,,,0,4,1.0,,,,,106.0,31.0,,,,98.000000,7.55,41.0,175.0,,10.0,95.000000,2.4,1,1.0,0.0,0.0,0.0,0,1.0,0.0,0.0,0.0,0.0,59.097153,F,1.231609,2,leukemia
9,466,150423,225985,0,1,1,8.0,2.1,31.0,,0.9,103.0,96.0,33.9,10.6,1.70,1.8,2.3,25.0,3.7,58.9,1.3,14.0,138.0,27.0,3.90,7.6,,,,0,7,1.0,,1.0,1.0,,136.0,12.0,,,,96.800003,,,,,,,,0,0.0,1.0,0.0,0.0,0,0.0,0.0,0.0,0.0,0.0,300.002116,M,2.483646,1,lymphoma


In [9]:
data[data['hm_indicator']==1].shape

(194, 66)

In [10]:
data[data['hm_indicator']==2].shape

(463, 66)

In [11]:
data['hadm_id'].unique().shape

(657,)

In [12]:
data['hadm_id'].shape

(657,)

In [1]:
data['bun']

NameError: name 'data' is not defined