In [78]:
from __future__ import absolute_import
from __future__ import print_function
import argparse
import os
import pandas as pd
import numpy as np
from mimic3benchmark.readers import InHospitalMortalityReader
from mimic3models import common_utils
from mimic3models.in_hospital_mortality.utils import save_results
from mimic3models.metrics import print_metrics_binary

In [9]:
def read_and_extract_features(reader, period, features):
    ret = common_utils.read_chunk(reader, reader.get_number_of_examples())
    # ret = common_utils.read_chunk(reader, 100)
    X = common_utils.extract_features_from_rawdata(ret['X'], ret['header'], period, features)
    return (X, ret['y'], ret['name'])

In [15]:
train_reader = InHospitalMortalityReader(dataset_dir='data/in-hospital-mortality/train',
                                             listfile='data/in-hospital-mortality/train_listfile.csv',
                                             period_length=48.0)
val_reader = InHospitalMortalityReader(dataset_dir='data/in-hospital-mortality/train',
                                             listfile='data/in-hospital-mortality/val_listfile.csv',
                                           period_length=48.0)
test_reader = InHospitalMortalityReader(dataset_dir='data/in-hospital-mortality/test',
                                             listfile='data/in-hospital-mortality/test_listfile.csv',
                                            period_length=48.0)

In [17]:
(_, train_y, train_names) = read_and_extract_features(train_reader, 'all', 'all')
(_, val_y, val_names) = read_and_extract_features(val_reader, 'all', 'all')
(_, test_y, test_names) = read_and_extract_features(test_reader, 'all', 'all')
columns = ['age','SBPmin','SBPmax','Tempmin','Tempmax','Respmin','Respmax','ABEmin','ABEmax','Lacmin','Lacmax','SBEmin','SBEmax','pCO2','pO2','K','HCO3','sO2','PC','PCT','Glu','SBC','M_label']
test_data=pd.DataFrame(columns=columns)
train_data=pd.DataFrame(columns=columns)
val_data=pd.DataFrame(columns=columns)

In [89]:
#events
i=0
for file in test_names:
    df = pd.read_csv('data/in-hospital-mortality/test/'+file)
    data=[np.NaN,df['Systolic blood pressure'].min(),df['Systolic blood pressure'].max(),df['Temperature'].min(),df['Temperature'].max(),df['Respiratory rate'].min(),df['Respiratory rate'].max(),np.NaN,np.NaN,np.NaN,np.NaN,np.NaN,np.NaN,np.NaN,np.NaN,np.NaN,np.NaN,df['Oxygen saturation'].min(),np.NaN,np.NaN,df['Glucose'].max(),np.NaN,test_y[i]]
    test_data.loc[i]=data
    i+=1
i=0
for file in train_names:
    df = pd.read_csv('data/in-hospital-mortality/train/'+file)
    data=[np.NaN,df['Systolic blood pressure'].min(),df['Systolic blood pressure'].max(),df['Temperature'].min(),df['Temperature'].max(),df['Respiratory rate'].min(),df['Respiratory rate'].max(),np.NaN,np.NaN,np.NaN,np.NaN,np.NaN,np.NaN,np.NaN,np.NaN,np.NaN,np.NaN,df['Oxygen saturation'].min(),np.NaN,np.NaN,df['Glucose'].max(),np.NaN,train_y[i]]
    train_data.loc[i]=data
    i+=1
i=0
for file in val_names:
    df = pd.read_csv('data/in-hospital-mortality/train/'+file)
    data=[np.NaN,df['Systolic blood pressure'].min(),df['Systolic blood pressure'].max(),df['Temperature'].min(),df['Temperature'].max(),df['Respiratory rate'].min(),df['Respiratory rate'].max(),np.NaN,np.NaN,np.NaN,np.NaN,np.NaN,np.NaN,np.NaN,np.NaN,np.NaN,np.NaN,df['Oxygen saturation'].min(),np.NaN,np.NaN,df['Glucose'].max(),np.NaN,val_y[i]]
    val_data.loc[i]=data
    i+=1

#age 
i=0
for file in test_names:
    PID=(file.split('_'))[0]
    if os.path.exists('data/root/test/'+PID+'/episode1.csv'):
        path='data/root/test/'+PID+'/episode1.csv'
    elif os.path.exists('data/root/test/'+PID+'/episode2.csv'):
        path='data/root/test/'+PID+'/episode2.csv'
    elif os.path.exists('data/root/test/'+PID+'/episode3.csv'):
        path='data/root/test/'+PID+'/episode3.csv'
    elif os.path.exists('data/root/test/'+PID+'/episode4.csv'):
        path='data/root/test/'+PID+'/episode4.csv'
    elif os.path.exists('data/root/test/'+PID+'/episode5.csv'):
        path='data/root/test/'+PID+'/episode5.csv'
    df = pd.read_csv(path)
    test_data['age'][i]=df['Age'][0]
    i+=1
i=0
for file in train_names:
    PID=(file.split('_'))[0]
    if os.path.exists('data/root/train/'+PID+'/episode1.csv'):
        path='data/root/train/'+PID+'/episode1.csv'
    elif os.path.exists('data/root/train/'+PID+'/episode2.csv'):
        path='data/root/train/'+PID+'/episode2.csv'
    elif os.path.exists('data/root/train/'+PID+'/episode3.csv'):
        path='data/root/train/'+PID+'/episode3.csv'
    elif os.path.exists('data/root/train/'+PID+'/episode4.csv'):
        path='data/root/train/'+PID+'/episode4.csv'
    elif os.path.exists('data/root/train/'+PID+'/episode5.csv'):
        path='data/root/train/'+PID+'/episode5.csv'
    df = pd.read_csv(path)
    train_data['age'][i]=df['Age'][0]
    i+=1
i=0
for file in val_names:
    PID=(file.split('_'))[0]
    if os.path.exists('data/root/train/'+PID+'/episode1.csv'):
        path='data/root/train/'+PID+'/episode1.csv'
    elif os.path.exists('data/root/train/'+PID+'/episode2.csv'):
        path='data/root/train/'+PID+'/episode2.csv'
    elif os.path.exists('data/root/train/'+PID+'/episode3.csv'):
        path='data/root/train/'+PID+'/episode3.csv'
    elif os.path.exists('data/root/train/'+PID+'/episode4.csv'):
        path='data/root/train/'+PID+'/episode4.csv'
    elif os.path.exists('data/root/train/'+PID+'/episode5.csv'):
        path='data/root/train/'+PID+'/episode5.csv'
    df = pd.read_csv(path)
    val_data['age'][i]=df['Age'][0]
    i+=1

In [None]:
i=0
for file in train_names:
    PID=(file.split('_'))[0]
    if os.path.exists('data/root/train/'+PID+'/episode1.csv'):
        path='data/root/train/'+PID+'/episode1.csv'
    elif os.path.exists('data/root/train/'+PID+'/episode2.csv'):
        path='data/root/train/'+PID+'/episode2.csv'
    elif os.path.exists('data/root/train/'+PID+'/episode3.csv'):
        path='data/root/train/'+PID+'/episode3.csv'
    elif os.path.exists('data/root/train/'+PID+'/episode4.csv'):
        path='data/root/train/'+PID+'/episode4.csv'
    elif os.path.exists('data/root/train/'+PID+'/episode5.csv'):
        path='data/root/train/'+PID+'/episode5.csv'
    df = pd.read_csv(path)
    train_data['age'][i]=df['Age'][0]
    i+=1

In [None]:
print(train_data.shape)
print(val_data.shape)
print(test_data.shape)
test_data.to_csv('mimiciii_data/test_data.csv')
val_data.to_csv('mimiciii_data/val_data.csv')
train_data.to_csv('mimiciii_data/train_data.csv')