In [2]:
# data wrangling
import pandas as pd

# plotting
import matplotlib.pyplot as plt

# other
import time

# sklearn
from sklearn.model_selection import train_test_split

# local files
from sofascores import compute_sofa

In [3]:
# load data
DATA_FILEPATH = "../data/all_hourly_data.h5"
patients = pd.read_hdf(DATA_FILEPATH, "patients")
vitals_labs_mean = pd.read_hdf(DATA_FILEPATH, "vitals_labs_mean")
interventions = pd.read_hdf(DATA_FILEPATH, "interventions")

In [4]:
print(patients.shape)
print(patients.head())

(34472, 28)
                              gender              ethnicity        age  \
subject_id hadm_id icustay_id                                            
3          145834  211552          M                  WHITE  76.526792   
4          185777  294638          F                  WHITE  47.845047   
6          107064  228232          F                  WHITE  65.942297   
9          150750  220597          M  UNKNOWN/NOT SPECIFIED  41.790228   
11         194540  229441          F                  WHITE  50.148295   

                              insurance           admittime  \
subject_id hadm_id icustay_id                                 
3          145834  211552      Medicare 2101-10-20 19:08:00   
4          185777  294638       Private 2191-03-16 00:28:00   
6          107064  228232      Medicare 2175-05-30 07:15:00   
9          150750  220597      Medicaid 2149-11-09 13:06:00   
11         194540  229441       Private 2178-04-16 06:18:00   

                           

In [12]:
print(vitals_labs_mean.shape)
print(vitals_labs_mean.head())
for column in vitals_labs_mean.columns:
    print(column)

(2200954, 104)
LEVEL2                                 alanine aminotransferase albumin  \
Aggregation Function                                       mean    mean   
subject_id hadm_id icustay_id hours_in                                    
3          145834  211552     0                            25.0     1.8   
                              1                             NaN     NaN   
                              2                             NaN     NaN   
                              3                             NaN     NaN   
                              4                             NaN     NaN   

LEVEL2                                 albumin ascites albumin pleural  \
Aggregation Function                              mean            mean   
subject_id hadm_id icustay_id hours_in                                   
3          145834  211552     0                    NaN             NaN   
                              1                    NaN             NaN   
              

In [6]:
print(interventions.shape)
print(interventions.head())
print(interventions.dropna().count())
print(interventions.nunique())
# no nan values were dropped so all values are 0 or 1, false or true most likely

(2200954, 14)
                                        vent  vaso  adenosine  dobutamine  \
subject_id hadm_id icustay_id hours_in                                      
3          145834  211552     0            1     0          0           0   
                              1            1     1          0           0   
                              2            1     1          0           0   
                              3            1     1          0           0   
                              4            1     1          0           0   

                                        dopamine  epinephrine  isuprel  \
subject_id hadm_id icustay_id hours_in                                   
3          145834  211552     0                0            0        0   
                              1                1            0        0   
                              2                1            0        0   
                              3                0            0        0   
  

In [7]:
print("number of unique ethnicities:", patients.ethnicity.nunique())
print()
print("amount of rows with a certain ethnicity")
print(patients.groupby("ethnicity")["age"].count())

number of unique ethnicities: 41

amount of rows with a certain ethnicity
ethnicity
AMERICAN INDIAN/ALASKA NATIVE                                  15
AMERICAN INDIAN/ALASKA NATIVE FEDERALLY RECOGNIZED TRIBE        2
ASIAN                                                         545
ASIAN - ASIAN INDIAN                                           49
ASIAN - CAMBODIAN                                              10
ASIAN - CHINESE                                               166
ASIAN - FILIPINO                                               13
ASIAN - JAPANESE                                                6
ASIAN - KOREAN                                                 10
ASIAN - OTHER                                                   7
ASIAN - THAI                                                    3
ASIAN - VIETNAMESE                                             33
BLACK/AFRICAN                                                  25
BLACK/AFRICAN AMERICAN                                    

In [8]:
print("number of unique insurance:", patients.insurance.nunique())
print()
print("amount of rows with a certain insurance")
print(patients.groupby("insurance")["age"].count())

number of unique insurance: 5

amount of rows with a certain insurance
insurance
Government     1050
Medicaid       2782
Medicare      18317
Private       11846
Self Pay        477
Name: age, dtype: int64


In [9]:
print("number of unique diagnosis_at_admission:", patients.diagnosis_at_admission.nunique())
print()
print("amount of rows with a certain diagnosis_at_admission")
print(patients.groupby("diagnosis_at_admission")["age"].count())
print("min count:", patients.groupby("diagnosis_at_admission")["age"].count().min())
print("max count:", patients.groupby("diagnosis_at_admission")["age"].count().max())
print("mean count:",patients.groupby("diagnosis_at_admission")["age"].count().mean())
print("std count:",patients.groupby("diagnosis_at_admission")["age"].count().std())

number of unique diagnosis_at_admission: 11352

amount of rows with a certain diagnosis_at_admission
diagnosis_at_admission
        MITRAL STENOSIS\MITRAL VALVE REPLACEMENT /SDA    1
   DUODENAL MASS/SDA                                     1
 ? SEROTONIN SYNDROME                                    1
 ABSENCE/SDA                                             1
 ADENOID CYSTIC CARCINOMA/SDA                            1
                                                        ..
WOUND INFECTION R/O SEPSIS                               1
WOUND INFECTION;HYPOTENSION                              1
WOUND TO RLE                                             1
ZENKER'S DIVERTICULUM/SDA                                1
ZYGOMATIC FRACTURE LEFT                                  1
Name: age, Length: 11352, dtype: int64
min count: 1
max count: 825
mean count: 3.0365574348132487
std count: 20.79460158089928


In [10]:
print("number of unique discharge_location:", patients.discharge_location.nunique())
print()
print("amount of rows with a certain discharge_location")
print(patients.groupby("discharge_location")["age"].count())

number of unique discharge_location: 17

amount of rows with a certain discharge_location
discharge_location
DEAD/EXPIRED                  3350
DISC-TRAN CANCER/CHLDRN H      451
DISC-TRAN TO FEDERAL HC         10
DISCH-TRAN TO PSYCH HOSP       372
HOME                         10129
HOME HEALTH CARE              9229
HOME WITH HOME IV PROVIDR       41
HOSPICE-HOME                   258
HOSPICE-MEDICAL FACILITY       107
ICF                             31
LEFT AGAINST MEDICAL ADVI      186
LONG TERM CARE HOSPITAL       1020
OTHER FACILITY                  43
REHAB/DISTINCT PART HOSP      3688
SHORT TERM HOSPITAL            353
SNF                           5203
SNF-MEDICAID ONLY CERTIF         1
Name: age, dtype: int64


In [11]:
print("number of unique admission_type:", patients.admission_type.nunique())
print()
print("amount of rows with a certain admission_type")
print(patients.groupby("admission_type")["age"].count())

number of unique admission_type: 3

amount of rows with a certain admission_type
admission_type
ELECTIVE      5705
EMERGENCY    27830
URGENT         937
Name: age, dtype: int64
