# Train neural net
Using embeddings

In [1]:
%matplotlib inline
%reload_ext autoreload
%autoreload 2

In [2]:
from fastai.structured import *
from fastai.column_data import *

In [3]:
NB_DIR = %pwd

In [4]:
RAW_DATA = '/data1/MIMIC-III/RAW/'
INTERIM_DATA = f'{RAW_DATA}/../interim/'
PROCESSED_DATA = f'{RAW_DATA}/../processed/'

In [5]:
MIMIC3_BENCHMARK_LOCATION = f'{NB_DIR}/../mimic3-benchmarks/'

In [6]:
IHM_DATA = f'{PROCESSED_DATA}/in-hospital-mortality/'

# Explore data

In [7]:
train_dir = f'{IHM_DATA}/train/'
test_dir = f'{IHM_DATA}/test/'

In [8]:
train_label_file = f'{train_dir}/listfile.csv'
test_label_file = f'{test_dir}/listfile.csv'

In [9]:
train_label = pd.read_csv(train_label_file)

In [10]:
train_label.head()

Unnamed: 0,stay,y_true
0,44973_episode1_timeseries.csv,1
1,5250_episode1_timeseries.csv,0
2,25175_episode1_timeseries.csv,0
3,14159_episode1_timeseries.csv,1
4,7994_episode1_timeseries.csv,0


In [11]:
all_train_episodes = glob(f'{train_dir}/*episode*.csv')

In [12]:
len(all_train_episodes)

17903

In [31]:
sample_train_episodes = random.sample(all_train_episodes, 5000)

In [32]:
sample_train_episodes[5]

'/data1/MIMIC-III/RAW//../processed//in-hospital-mortality//train/57806_episode1_timeseries.csv'

In [33]:
one_sample_train_episode = pd.read_csv(sample_train_episodes[5])

In [34]:
subjEp = '57806_episode1'

In [35]:
one_sample_train_episode.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 114 entries, 0 to 113
Data columns (total 18 columns):
Hours                                 114 non-null float64
Capillary refill rate                 0 non-null float64
Diastolic blood pressure              59 non-null float64
Fraction inspired oxygen              10 non-null float64
Glascow coma scale eye opening        13 non-null object
Glascow coma scale motor response     13 non-null object
Glascow coma scale total              0 non-null float64
Glascow coma scale verbal response    13 non-null object
Glucose                               43 non-null float64
Heart Rate                            61 non-null float64
Height                                0 non-null float64
Mean blood pressure                   59 non-null float64
Oxygen saturation                     66 non-null float64
Respiratory rate                      63 non-null float64
Systolic blood pressure               59 non-null float64
Temperature                   

In [99]:
one_sample_train_episode

Unnamed: 0,Hours,Capillary refill rate,Diastolic blood pressure,Fraction inspired oxygen,Glascow coma scale eye opening,Glascow coma scale motor response,Glascow coma scale total,Glascow coma scale verbal response,Glucose,Heart Rate,Height,Mean blood pressure,Oxygen saturation,Respiratory rate,Systolic blood pressure,Temperature,Weight,pH
0,0.474444,,,,,,,,1000.0,,,,,,,,,5.00
1,0.607778,,,,,,,,,,,,,,,,,7.28
2,1.341111,,,,,,,,604.0,,,,,,,,,
3,1.491111,,,,,,,,,,,,,,,,,7.32
4,1.641111,,,,,,,,,,,,,20.0,,,,
5,1.791111,,,,,,,,,70.0,,,,14.0,,,,
6,1.807778,,93.0,,,,,,,70.0,,114.0,,14.0,169.0,,,
7,1.824444,,,1.0,,,,,,,,,94.0,0.0,,,,
8,1.891111,,106.0,,Spontaneously,Obeys Commands,,No Response-ETT,,67.0,,94.0,100.0,17.0,182.0,,,
9,1.974444,,,,,,,,,70.0,,,94.0,17.0,,35.666667,,


# Add outcome column

In [86]:
def insert_outcome_column(subject):
    subj = subject.split('/')[-1]
    outcome = int(train_label[train_label['stay']==subj]['y_true'])
    df = pd.read_csv(subject)
    df_tmp = df.copy()
    df_tmp.insert(len(df.columns), 'outcome', outcome)
    return df_tmp

In [87]:
subject = sample_train_episodes[6]
subj = subject.split('/')[-1]

In [88]:
train_label[train_label['stay']==subj]['y_true']

2083    0
Name: y_true, dtype: int64

In [93]:
upd_sample_2 = insert_outcome_column(sample_train_episodes[1])

In [95]:
upd_sample_2.head()

Unnamed: 0,Hours,Capillary refill rate,Diastolic blood pressure,Fraction inspired oxygen,Glascow coma scale eye opening,Glascow coma scale motor response,Glascow coma scale total,Glascow coma scale verbal response,Glucose,Heart Rate,Height,Mean blood pressure,Oxygen saturation,Respiratory rate,Systolic blood pressure,Temperature,Weight,pH,outcome
0,0.369444,,,,,,,,87.0,,,,,,,,,7.42,0
1,1.369444,,,,,,,,87.0,,,,,,,,,7.42,0
2,1.802778,,,,,,,,129.0,,,,,,,,,7.5,0
3,2.302778,,,,,,,,173.0,,,,,,,,,7.48,0
4,2.652778,,,,,,,,,,,,,,,,68.855266,,0


# Add subject-episode column

In [96]:
def insert_subj_episode(subject):
    subjEp = "_".join(subject.split('/')[-1].split("_")[:2])
    df = pd.read_csv(subject)
    df_tmp = df.copy()
    df_tmp.insert(0, 'subject_episode', subjEp)
    return df_tmp

In [55]:
upd_sample = insert_subj_episode(sample_train_episodes[6])

In [56]:
upd_sample.head()

Unnamed: 0,subject_episode,Hours,Capillary refill rate,Diastolic blood pressure,Fraction inspired oxygen,Glascow coma scale eye opening,Glascow coma scale motor response,Glascow coma scale total,Glascow coma scale verbal response,Glucose,Heart Rate,Height,Mean blood pressure,Oxygen saturation,Respiratory rate,Systolic blood pressure,Temperature,Weight,pH
0,54830_episode1,2.326111,,,,,Flex-withdraws,,No Response-ETT,,,,,,20.0,,36.166667,,
1,54830_episode1,2.692778,,,1.0,,,,,,,,,,0.0,,,,
2,54830_episode1,2.776111,,,,,,,,,84.0,,,,15.0,,,,
3,54830_episode1,2.842778,,,,,,,,,,,,100.0,,,,,
4,54830_episode1,3.076111,,77.0,,,,,,,,,85.0,,,110.0,,,


In [57]:
sample_dfs = [pd.read_csv(subj) for subj in sample_train_episodes]

In [64]:
sample_dfs_subjEp = []

for subj in sample_train_episodes:
    sample_dfs_subjEp.append(insert_subj_episode(subj))

In [66]:
all_sample_dfs = pd.concat(sample_dfs_subjEp)

In [102]:
pd.unique(all_sample_dfs['Glascow coma scale motor response'])

array([nan, 'Localizes Pain', 'Obeys Commands', 'Abnormal extension', '6 Obeys Commands', '5 Localizes Pain',
       '1 No Response', 'Flex-withdraws', 'No response', '4 Flex-withdraws', 'Abnormal Flexion',
       '3 Abnorm flexion', '2 Abnorm extensn'], dtype=object)

In [67]:
all_sample_dfs.head(500)

Unnamed: 0,subject_episode,Hours,Capillary refill rate,Diastolic blood pressure,Fraction inspired oxygen,Glascow coma scale eye opening,Glascow coma scale motor response,Glascow coma scale total,Glascow coma scale verbal response,Glucose,Heart Rate,Height,Mean blood pressure,Oxygen saturation,Respiratory rate,Systolic blood pressure,Temperature,Weight,pH
0,83860_episode1,0.135833,,,,,,,,,105.0,,,,20.0,,,,
1,83860_episode1,0.152500,,68.0,,,,,,,,,96.0,97.0,,169.0,,,
2,83860_episode1,0.319167,,71.0,,,,,,,105.0,,100.0,97.0,16.0,175.0,,,
3,83860_episode1,0.419167,,68.0,,,,,,,104.0,,100.0,96.0,18.0,182.0,37.222222,,
4,83860_episode1,0.435833,,,,,,,,,,,,,,,,98.50000,
5,83860_episode1,0.519167,,,,To Speech,Localizes Pain,,Confused,169.0,,,,,,,,,
6,83860_episode1,1.319167,,51.0,,,,,,,116.0,,69.0,98.0,23.0,119.0,37.777778,,
7,83860_episode1,2.319167,,57.0,,Spontaneously,Obeys Commands,,Confused,,119.0,,80.0,96.0,25.0,151.0,,,
8,83860_episode1,3.319167,,64.0,,,,,,,103.0,,89.0,97.0,24.0,150.0,,,
9,83860_episode1,3.385833,,,,,,,,,,,,,,,,,7.37


# TODO

- Decide how to represent each episode together with mortality
- Merge all training episodes together in one dataframe