In [1]:
import pickle
import numpy as np
import pandas as pd
from pandas import DataFrame
from pandas import read_csv
from matplotlib import pyplot
import matplotlib.pyplot as plt
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Dense
from tensorflow.keras.layers import Flatten
from tensorflow.keras.layers import Dropout
from tensorflow.keras.layers import BatchNormalization
#from keras_layer_normalization import LayerNormalization
from tensorflow.keras.layers import Conv1D
from tensorflow.keras.layers import MaxPooling1D
from tensorflow.keras.utils import to_categorical
from tensorflow.keras.callbacks import LearningRateScheduler
#from tensorflow.keras.layers import LayerNormalization

In [2]:
filepath= "processed_data/processedFrames.pkl"
OutputFrames = pickle.load(open(filepath, 'rb'))

In [3]:
admission_count = 0
rows = 0
rows_total = 0
df= DataFrame()
for key, chartFrame in OutputFrames.items():
    countFrame = chartFrame.drop(['firstICU', 'subject', 'Height (cm)', 'Non Invasive Blood Pressure mean', 'Non Invasive Blood Pressure systolic', 'Non Invasive Blood Pressure diastolic', 'Propofol','Fentanyl (Concentrate)', 'Midazolam (Versed)','Fentanyl','Dexmedetomidine (Precedex)','Morphine Sulfate','Hydromorphone (Dilaudid)','Lorazepam (Ativan)', 'SBT'], axis=1)
    if countFrame.isnull().values.any() == False:
        df = df.append(countFrame, ignore_index=True)
        rows = len(countFrame.index)
        admission_count = admission_count +1
        rows_total = rows_total + rows

In [4]:
df.head()

Unnamed: 0,timestamp,hadm,Admittype,Admdays,Ethnicity,Gender,Age,Admission Weight (Kg),Heart Rate,Respiratory Rate,...,Peak Insp. Pressure,O2 Flow,Plateau Pressure,Arterial O2 pressure,Arterial CO2 Pressure,Vented,Arterial Blood Pressure systolic,Arterial Blood Pressure diastolic,Arterial Blood Pressure mean,SBT_value
0,2161-10-11 12:00:00,196611.0,1.0,8.18,0.0,0.0,65.4,93.0,73.0,18.0,...,38.0,3.0,22.0,117.0,53.0,0.0,80.0,58.0,63.0,0.0
1,2161-10-11 13:00:00,196611.0,1.0,8.18,0.0,0.0,65.4,93.0,60.5,17.5,...,38.0,3.0,22.0,117.0,53.0,1.0,80.0,58.0,63.0,0.0
2,2161-10-11 14:00:00,196611.0,1.0,8.18,0.0,0.0,65.4,93.0,63.0,18.0,...,38.0,3.0,22.0,117.0,53.0,1.0,80.0,58.0,63.0,0.0
3,2161-10-11 15:00:00,196611.0,1.0,8.18,0.0,0.0,65.4,93.0,68.0,18.0,...,38.0,3.0,22.0,117.0,53.0,1.0,95.0,64.0,75.0,0.0
4,2161-10-11 16:00:00,196611.0,1.0,8.18,0.0,0.0,65.4,93.0,64.0,18.0,...,34.0,3.0,22.0,117.0,53.0,1.0,60.0,57.0,59.0,0.0


In [5]:
ventilator = df['Ventilator Mode'].unique()

In [6]:
ventilator.shape

(24,)

In [7]:
np.sort(ventilator)

array([ 0.,  1.,  2.,  6.,  7., 10., 11., 12., 13., 14., 15., 16., 17.,
       26., 30., 45., 46., 47., 48., 49., 50., 51., 53., 71.])

In [8]:
ventilator_dictstring = {0.:'PRVC/AC', 1.:'CMV', 2.:'CMV/ASSIST', 6.:'SIMV', 7.:'SIMV/PSV', 10.:'CPAP', 11.:'CPAP/PSV', 
                         12.:'MMV', 13.:'MMV/PSV', 14.:'PCV+', 15.:'SYNCHRON MASTER', 16.:'SYNCHRON SLAVE', 
                         17.:'Apnea Ventilation', 26.:'APRV', 30.:'Standby', 45.: 'PCV+/PSV', 46.:'SIMV/AutoFlow', 
                         47.:'SIMV/PSV/AutoFlow', 48.:'CMV/AutoFlow', 49.:'CMV/ASSIST/AutoFlow', 50.:'MMV/AutoFlow',
                         51.:'MMV/PSV/AutoFlow', 53.:'CPAP/PPS', 71.:'PCV+Assist'}

In [9]:
ventilator_dict = {0.: 0,  1.:1,  2.:2,  6.:3,  7.:4, 10.:5, 11.:6, 12.:7, 13.:8, 14.:9, 15.:10, 16.:11, 17.:12,
       26.:13, 30.:14, 45.:15, 46.:16, 47.:17, 48.:18, 49.:19, 50.:20, 51.:21, 53.:22, 71.:23}

In [10]:
convert_dict = {'Admittype': int, 
                'Ethnicity': int,
                'Gender': int,
                'SBT_value': int,
                'O2 saturation pulseoxymetry': int,
                'Peak Insp. Pressure': int
                
               }

In [11]:
df['ventilator_category'] = df['Ventilator Mode'].apply(lambda x:ventilator_dict[x] )

In [12]:
df = df.astype(convert_dict) 

In [13]:
df = df.rename(columns={'Respiratory Rate (spontaneous)': 'Respiratory Rate_spontaneous'})

In [14]:
df['Admittype'].dtype

dtype('int64')

In [15]:
df.to_csv('data/orginal_data.csv', index= False)

In [16]:
# now split into train/validation/test sets
import random
#unique_ids = [*OutputFrames]
unique_ids = df['hadm'].unique()
random.shuffle(unique_ids)
train_sample = 0.8
val_sample = 0.1
test_sample = 0.1
train_num = int(len(unique_ids) * 0.8)
val_num = int(len(unique_ids)*0.1) + train_num
train_ids = unique_ids[:train_num]
val_ids = unique_ids[train_num:val_num]
test_ids = unique_ids[val_num:]

In [17]:
train_set = DataFrame()
train_set = df.loc[df['hadm'].isin(train_ids)]

val_set = DataFrame()
val_set = df.loc[df['hadm'].isin(val_ids)]

test_set = DataFrame()
test_set = df.loc[df['hadm'].isin(test_ids)]

In [18]:
len(unique_ids), len(train_ids), len(test_ids), len(val_ids), rows_total, admission_count

(2685, 2148, 269, 268, 301964, 2685)

In [19]:
train_set = train_set.drop(['timestamp', 'hadm', 'Admdays', 'Ventilator Mode'], axis=1)
val_set = val_set.drop(['timestamp', 'hadm', 'Admdays', 'Ventilator Mode'], axis=1)
test_set = test_set.drop(['timestamp', 'hadm', 'Admdays', 'Ventilator Mode'], axis=1)

In [20]:
train_set.head()

Unnamed: 0,Admittype,Ethnicity,Gender,Age,Admission Weight (Kg),Heart Rate,Respiratory Rate,O2 saturation pulseoxymetry,Inspired O2 Fraction,PEEP set,...,O2 Flow,Plateau Pressure,Arterial O2 pressure,Arterial CO2 Pressure,Vented,Arterial Blood Pressure systolic,Arterial Blood Pressure diastolic,Arterial Blood Pressure mean,SBT_value,ventilator_category
187,1,0,0,63.9,113.0,85.4,16.25,100,100.0,5.0,...,12.0,19.0,103.0,35.0,0.0,112.4,59.6,77.2,0,2
188,1,0,0,63.9,113.0,90.5,15.75,100,100.0,5.0,...,12.0,19.0,90.0,43.0,1.0,106.5,56.25,71.0,0,2
189,1,0,0,63.9,113.0,91.5,17.5,100,70.0,10.0,...,12.0,23.0,167.0,39.0,1.0,106.0,56.5,71.0,0,19
190,1,0,0,63.9,113.0,90.0,18.0,100,70.0,10.0,...,12.0,23.0,111.0,35.0,1.0,106.0,58.0,73.0,0,19
191,1,0,0,63.9,113.0,94.0,18.0,100,50.0,10.0,...,12.0,23.0,111.0,35.0,1.0,95.0,56.0,68.0,0,6


In [21]:
# now get the y lable #
trainy = train_set['Vented']
valy = val_set['Vented']
testy = test_set['Vented']

In [22]:
train_set = train_set.drop(['Vented'], axis=1)
val_set = val_set.drop(['Vented'], axis=1)
test_set = test_set.drop(['Vented'], axis=1)

In [23]:
trainy.to_csv('data/trainy.csv', index=False)
valy.to_csv('data/valy.csv', index=False)
testy.to_csv('data/testy.csv', index=False)

In [24]:
train_set.to_csv('data/train_set.csv',index = False)
val_set.to_csv('data/val_set.csv', index = False)
test_set.to_csv('data/test_set.csv', index = False)

In [25]:
# make the range between 0 and 1
column = list(train_set.columns.values)
for col in column:
    minimum = df[col].min()
    maximum = df[col].max()
    train_set[col] = (train_set[col] - minimum)/(maximum-minimum)
    val_set[col] = (val_set[col] - minimum)/(maximum-minimum)
    test_set[col] = (test_set[col] - minimum)/(maximum-minimum)

In [26]:
train_set.to_csv('data/train_set_scaled.csv', index=False)
val_set.to_csv('data/val_set_scaled.csv', index=False)
test_set.to_csv('data/test_set_scaled.csv', index=False)

In [27]:
train_set.head()

Unnamed: 0,Admittype,Ethnicity,Gender,Age,Admission Weight (Kg),Heart Rate,Respiratory Rate,O2 saturation pulseoxymetry,Inspired O2 Fraction,PEEP set,...,Peak Insp. Pressure,O2 Flow,Plateau Pressure,Arterial O2 pressure,Arterial CO2 Pressure,Arterial Blood Pressure systolic,Arterial Blood Pressure diastolic,Arterial Blood Pressure mean,SBT_value,ventilator_category
187,1.0,0.0,0.0,0.160027,0.680437,0.367296,0.101562,1.0,1.0,0.333333,...,0.354839,0.628571,0.454545,0.25495,0.313433,0.393469,0.195885,0.19422,0.5,0.086957
188,1.0,0.0,0.0,0.160027,0.680437,0.399371,0.098437,1.0,1.0,0.333333,...,0.354839,0.628571,0.454545,0.222772,0.432836,0.369388,0.182099,0.176301,0.5,0.086957
189,1.0,0.0,0.0,0.160027,0.680437,0.40566,0.109375,1.0,0.7,0.666667,...,0.435484,0.628571,0.575758,0.413366,0.373134,0.367347,0.183128,0.176301,0.5,0.826087
190,1.0,0.0,0.0,0.160027,0.680437,0.396226,0.1125,1.0,0.7,0.666667,...,0.435484,0.628571,0.575758,0.274752,0.313433,0.367347,0.1893,0.182081,0.5,0.826087
191,1.0,0.0,0.0,0.160027,0.680437,0.421384,0.1125,1.0,0.5,0.666667,...,0.435484,0.628571,0.575758,0.274752,0.313433,0.322449,0.18107,0.16763,0.5,0.26087


In [None]:
############################################
############ find the class weight ##########
ynew = model.predict(train_set)

In [None]:
ynew = np.argmax(ynew, axis=1)

In [None]:
zerocount = 0
onecount = 0
for i in range(len(ynew)):
    if ynew[i] == 0:
        zerocount = zerocount +1
    else:
        onecount = onecount + 1

In [None]:
testy_count=read_csv('data/trainy.csv')

In [None]:
zero = testy_count[testy_count['Vented']==0]

In [None]:
zero.shape

In [None]:
one = testy_count[testy_count['Vented']==1]

In [None]:
one.shape
########################## use the real class lable ratio to update ##############

In [None]:
# summarize scores
def summarize_results(scores):
    m, s = np.mean(scores), np.std(scores)
    print('Accuracy: %.3f%% (+/-%.3f)' % (m, s))

In [None]:
scores = []
for i in range(3):
    model.fit(train_set, trainy, epochs=epochs, batch_size=batch_size, verbose=1)
    _, accuracy_val = model.evaluate(val_set, valy, batch_size=batch_size, verbose=0) 
    accuracy_val = accuracy_val *100
    print('>#%d: %.3f' % (i+1, accuracy_val))
    scores.append(accuracy_val)


In [None]:
summarize_results(scores)

In [None]:
model.summary()