In [1]:
import pandas as pd
import numpy as np

from mimic3benchmark.readers import InHospitalMortalityReader

In [2]:
train_reader = InHospitalMortalityReader(dataset_dir='data/in-hospital-mortality/train',
                              listfile='data/in-hospital-mortality/train/listfile.csv')

test_reader = InHospitalMortalityReader(dataset_dir='data/in-hospital-mortality/test',
                              listfile='data/in-hospital-mortality/test/listfile.csv')

In [3]:
headers = ['Index', 'Hours', 'Capillary refill rate', 'Diastolic blood pressure', 'Fraction inspired oxygen', 'Glascow coma scale eye opening',
           'Glascow coma scale motor response', 'Glascow coma scale total', 'Glascow coma scale verbal response', 'Glucose', 'Heart Rate',
           'Height', 'Mean blood pressure', 'Oxygen saturation', 'Respiratory rate', 'Systolic blood pressure', 'Temperature', 'Weight', 'pH',
           'Label']

In [4]:
X = []

In [5]:
for i in range(train_reader.get_number_of_examples()):
    data = train_reader.read_example(i)
    index = np.array([[i] * data["X"].shape[0]]).T
    label = np.array([[data["y"]] * data["X"].shape[0]]).T
    tmp = np.concatenate((data["X"], label), axis=1)
    out = np.concatenate((index, tmp), axis=1)
    X.append(out)

In [6]:
for j in range(test_reader.get_number_of_examples()):
    data = test_reader.read_example(j)
    index = np.array([[i+j] * data["X"].shape[0]]).T
    label = np.array([[data["y"]] * data["X"].shape[0]]).T
    tmp = np.concatenate((data["X"], label), axis=1)
    out = np.concatenate((index, tmp), axis=1)
    X.append(out)

In [7]:
l = []
for x in X:
    l.append(x.shape[0])


In [8]:
np.mean(l)

96.06012583376697

In [9]:
all_data = np.vstack(X)
all_data.shape

(2030615, 20)

In [10]:
df = pd.DataFrame(all_data, index=None, columns=headers)
df

Unnamed: 0,Index,Hours,Capillary refill rate,Diastolic blood pressure,Fraction inspired oxygen,Glascow coma scale eye opening,Glascow coma scale motor response,Glascow coma scale total,Glascow coma scale verbal response,Glucose,Heart Rate,Height,Mean blood pressure,Oxygen saturation,Respiratory rate,Systolic blood pressure,Temperature,Weight,pH,Label
0,0,0.0,,,,,,,,128.0,,,,,,,,,7.47,0
1,0,0.0,,,,,,,,104.0,,,,,,,,,7.42,0
2,0,0.0,,,,,,,,184.0,,,,,,,,,7.49,0
3,0,0.0,,,,,,,,106.0,,,,,,,,,7.34,0
4,0,0.0,,,,,,,,,,,,100.0,10,,,,,0
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
2030610,21137,48.0,,40.0,,,,,,,122,,58,100.0,21,114.0,,,,1
2030611,21137,48.0,,42.0,,,,,,267.0,117,,56,,23,103.0,,,,1
2030612,21137,48.0,,,,,,,,,,,,100.0,,,,,,1
2030613,21137,48.0,,44.0,,,,,,,126,,58,,22,111.0,,,,1


In [11]:
coma_scale_eye_opening_replacements = {
    "1 No Response": 1,
    "None": 1,
    "2 To pain": 2,
    "To Pain": 2,
    "3 To speech": 3,
    "To Speech": 3,
    "4 Spontaneously": 4,
    "Spontaneously": 4,
}
coma_scale_motor_replacements = {
    "1 No Response": 1,
    "No response": 1,
    "2 Abnorm extensn": 2,
    "Abnormal extension": 2,
    "3 Abnorm flexion": 3,
    "Abnormal Flexion": 3,
    "4 Flex-withdraws": 4,
    "Flex-withdraws": 4,
    "5 Localizes Pain": 5,
    "Localizes Pain": 5,
    "6 Obeys Commands": 6,
    "Obeys Commands": 6
}
coma_scale_verbal_replacements = {
    "No Response-ETT": 0,
    "1.0 ET/Trach": 0,
    "1 No Response": 1,
    "No Response": 1,
    "2 Incomp sounds": 2,
    "Incomprehensible sounds": 2,
    "3 Inapprop words": 3,
    "Inappropriate Words": 3,
    "4 Confused": 4,
    "Confused": 4,
    "5 Oriented": 5,
    "Oriented": 5,
}

In [24]:
def preprocess_coma_scales(data):
    to_replace = {
        "Glascow coma scale eye opening":
            coma_scale_eye_opening_replacements,
        "Glascow coma scale motor response":
            coma_scale_motor_replacements,
        "Glascow coma scale verbal response":
            coma_scale_verbal_replacements
    }
    coma_scale_columns = list(to_replace.keys())
    coma_scales = data[coma_scale_columns]
    coma_scales = coma_scales.astype(str)
    coma_scales = coma_scales.replace(
        to_replace=to_replace
    )
    # coma_scales = coma_scales.astype(float)
    data = data.copy()
    data[coma_scale_columns] = coma_scales
    return data

In [25]:
df = preprocess_coma_scales(df)

In [26]:
df

Unnamed: 0,Index,Hours,Capillary refill rate,Diastolic blood pressure,Fraction inspired oxygen,Glascow coma scale eye opening,Glascow coma scale motor response,Glascow coma scale total,Glascow coma scale verbal response,Glucose,Heart Rate,Height,Mean blood pressure,Oxygen saturation,Respiratory rate,Systolic blood pressure,Temperature,Weight,pH,Label
0,0,0.0,,,,,,,,128.0,,,,,,,,,7.47,0
1,0,0.0,,,,,,,,104.0,,,,,,,,,7.42,0
2,0,0.0,,,,,,,,184.0,,,,,,,,,7.49,0
3,0,0.0,,,,,,,,106.0,,,,,,,,,7.34,0
4,0,0.0,,,,,,,,,,,,100.0,10,,,,,0
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
2030610,21137,48.0,,40.0,,,,,,,122,,58,100.0,21,114.0,,,,1
2030611,21137,48.0,,42.0,,,,,,267.0,117,,56,,23,103.0,,,,1
2030612,21137,48.0,,,,,,,,,,,,100.0,,,,,,1
2030613,21137,48.0,,44.0,,,,,,,126,,58,,22,111.0,,,,1


In [27]:
df.to_csv("mortality.csv")