In [1]:
import psycopg2
import matplotlib.pyplot as plt
import pandas as pd
import numpy as np
import sklearn
import tensorflow as tf
import assets.helpers as hp
import assets.metrics as m
import importlib
from sklearn.model_selection import train_test_split


2024-05-21 04:51:01.826007: I external/local_tsl/tsl/cuda/cudart_stub.cc:32] Could not find cuda drivers on your machine, GPU will not be used.
2024-05-21 04:51:01.906253: I external/local_tsl/tsl/cuda/cudart_stub.cc:32] Could not find cuda drivers on your machine, GPU will not be used.
2024-05-21 04:51:02.198083: I tensorflow/core/platform/cpu_feature_guard.cc:210] This TensorFlow binary is optimized to use available CPU instructions in performance-critical operations.
To enable the following instructions: AVX2 FMA, in other operations, rebuild TensorFlow with the appropriate compiler flags.


In [45]:
RANDOM_STATE = 42
# WINDOW_LENGTH = 16
WINDOW_LENGTH = 24
MIN_LOS_ICU = 24
CLIENT_COUNT = 4
USE_FL = False
VITAL_NAMES = ['heartrate', 'sysbp', 'diasbp', 'meanbp', 'resprate', 'tempc', 'spo2']
LAB_NAMES = ['albumin', 'bun', 'bilirubin', 'lactate', 'bicarbonate', 'bands', 'chloride', 'creatinine', 'glucose', 'hemoglobin', 'hematocrit', 'platelet', 'potassium', 'ptt', 'sodium', 'wbc']
FOLDER_SUFFIX = ''
MAX_TIMESTEPS = 10  # Set an upper limit for the number of timesteps

In [29]:
# Connect to db
conn = psycopg2.connect(host='localhost', port=5432, dbname='mimic', user='zainab', password='password')
cur = conn.cursor()

# Read vital signs
vitals = pd.read_sql_query(f'SELECT * FROM mimiciii.vitals_windowed_{WINDOW_LENGTH:d}h;', conn)

# Read in labs values
labs = pd.read_sql_query(f'SELECT * FROM mimiciii.labs_windowed_{WINDOW_LENGTH:d}h;', conn)
# Close the cursor and connection to so the server can allocate bandwidth to other requests
cur.close()
conn.close()

  vitals = pd.read_sql_query(f'SELECT * FROM mimiciii.vitals_windowed_{WINDOW_LENGTH:d}h;', conn)
  labs = pd.read_sql_query(f'SELECT * FROM mimiciii.labs_windowed_{WINDOW_LENGTH:d}h;', conn)


In [25]:
vitals_spec = tf.TensorSpec(
    shape=(None, len(VITAL_NAMES)),
    dtype=tf.dtypes.float64,
    name='vitals'
)
labs_spec = tf.TensorSpec(
    shape=(None, len(LAB_NAMES)),
    dtype=tf.dtypes.float64,
    name='labs'
)
label_spec = tf.TensorSpec(
    shape=1,
    dtype=tf.dtypes.float64,
    name='label'
)

In [46]:
# Convert datetime columns to seconds
for col in vitals.select_dtypes(include=['datetime64', 'timedelta64']).columns:
    vitals[col] = vitals[col].astype(int) / 10**9  # Convert to seconds

for col in labs.select_dtypes(include=['datetime64', 'timedelta64']).columns:
    labs[col] = labs[col].astype(int) / 10**9  # Convert to seconds

# Convert categorical columns to numeric using one-hot encoding
vitals = pd.get_dummies(vitals, drop_first=True)
labs = pd.get_dummies(labs, drop_first=True)

# Merge the vitals and labs data on the common key (icustay_id)
merged_data = pd.merge(vitals, labs, on='icustay_id', suffixes=('_vitals', '_labs'), how='inner')

# Ensure the labels are the same in both datasets
assert all(merged_data['label_death_icu_vitals'] == merged_data['label_death_icu_labs']), "Mismatch in labels between vitals and labs"

# Drop one of the label columns
merged_data = merged_data.drop(columns=['label_death_icu_labs'])

# Rename the remaining label column for consistency
merged_data = merged_data.rename(columns={'label_death_icu_vitals': 'label_death_icu'})


In [47]:
vitals_features = merged_data.filter(like='_vitals')
labs_features = merged_data.filter(like='_labs')
labels = merged_data['label_death_icu']

# Convert to NumPy arrays
X_vitals = vitals_features.to_numpy()
X_labs = labs_features.to_numpy()
y = labels.to_numpy()

# Calculate the number of timesteps that divides the total features without a remainder
def find_timesteps(total_features, max_timesteps):
    for timesteps in range(max_timesteps, 0, -1):
        if total_features % timesteps == 0:
            return timesteps
    return 1  # Fallback to 1 if no valid timesteps found

# Find suitable timesteps for vitals and labs
timesteps_vitals = find_timesteps(X_vitals.shape[1], MAX_TIMESTEPS)
timesteps_labs = find_timesteps(X_labs.shape[1], MAX_TIMESTEPS)

# Calculate number of features per timestep
num_features_vitals = X_vitals.shape[1] // timesteps_vitals
num_features_labs = X_labs.shape[1] // timesteps_labs

# Reshape data into 3D arrays (samples, timesteps, features)
X_vitals = X_vitals.reshape((-1, timesteps_vitals, num_features_vitals))
X_labs = X_labs.reshape((-1, timesteps_labs, num_features_labs))

# Split into training and test sets
X_vitals_train, X_vitals_test, X_labs_train, X_labs_test, y_train, y_test = train_test_split(
    X_vitals, X_labs, y, test_size=0.2, random_state=42)


In [55]:
class F1Score(tf.keras.metrics.Metric):
    def __init__(self, name='f1_score', **kwargs):
        super(F1Score, self).__init__(name=name, **kwargs)
        self.precision = tf.keras.metrics.Precision()
        self.recall = tf.keras.metrics.Recall()

    def update_state(self, y_true, y_pred, sample_weight=None):
        self.precision.update_state(y_true, y_pred, sample_weight)
        self.recall.update_state(y_true, y_pred, sample_weight)

    def result(self):
        precision = self.precision.result()
        recall = self.recall.result()
        return 2 * ((precision * recall) / (precision + recall + tf.keras.backend.epsilon()))

    def reset_states(self):
        self.precision.reset_states()
        self.recall.reset_states()

In [56]:
# Vital channel
inputs_vitals = tf.keras.Input(shape=vitals_spec.shape, name='Input_vitals')
mask_vitals = tf.keras.layers.Masking(mask_value=-2., name='mask_vitals')(inputs_vitals)
GRU_layer1_vitals = tf.keras.layers.GRU(16, return_sequences=True, name='GRU_layer1_vitals')(mask_vitals)
GRU_layer2_vitals = tf.keras.layers.GRU(16, return_sequences=True, name='GRU_layer2_vitals')(GRU_layer1_vitals)
GRU_layer3_vitals = tf.keras.layers.GRU(16, return_sequences=False, name='GRU_layer3_vitals')(GRU_layer2_vitals)

# Labs channel
inputs_labs = tf.keras.Input(shape=labs_spec.shape, name='Input_labs')
mask_labs = tf.keras.layers.Masking(mask_value=-2., name='mask_labs')(inputs_labs)
GRU_layer1_labs = tf.keras.layers.GRU(16, return_sequences=True, name='GRU_layer1_labs')(mask_labs)
GRU_layer2_labs = tf.keras.layers.GRU(16, return_sequences=True, name='GRU_layer2_labs')(GRU_layer1_labs)
GRU_layer3_labs = tf.keras.layers.GRU(16, return_sequences=False, name='GRU_layer3_labs')(GRU_layer2_labs)

# Concatenation of both branches
merge = tf.keras.layers.Concatenate()([GRU_layer3_vitals, GRU_layer3_labs])

outputs = tf.keras.layers.Dense(1, activation='sigmoid', name='output')(merge)

model = tf.keras.Model(inputs=[inputs_vitals, inputs_labs], outputs=outputs, name='RNN_model')
model.summary()


model.compile(optimizer='adam',
              loss='binary_crossentropy',
              metrics=['accuracy', tf.keras.metrics.Precision(), tf.keras.metrics.Recall(), F1Score()])


In [57]:
# Train the model
history = model.fit(
    x=[X_vitals_train, X_labs_train],
    y=y_train,
    epochs=20,
    batch_size=32,
    validation_data=([X_vitals_test, X_labs_test], y_test)
)

Epoch 1/20
[1m90/90[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m7s[0m 17ms/step - accuracy: 0.8143 - f1_score: 0.0375 - loss: 0.4941 - precision_4: 0.0938 - recall_4: 0.0428 - val_accuracy: 0.8627 - val_f1_score: 0.0000e+00 - val_loss: 0.3972 - val_precision_4: 0.0000e+00 - val_recall_4: 0.0000e+00
Epoch 2/20
[1m90/90[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 6ms/step - accuracy: 0.8688 - f1_score: 0.0000e+00 - loss: 0.3841 - precision_4: 0.0000e+00 - recall_4: 0.0000e+00 - val_accuracy: 0.8627 - val_f1_score: 0.0000e+00 - val_loss: 0.3853 - val_precision_4: 0.0000e+00 - val_recall_4: 0.0000e+00
Epoch 3/20
[1m90/90[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 6ms/step - accuracy: 0.8622 - f1_score: 0.0000e+00 - loss: 0.3884 - precision_4: 0.0000e+00 - recall_4: 0.0000e+00 - val_accuracy: 0.8627 - val_f1_score: 0.0000e+00 - val_loss: 0.3736 - val_precision_4: 0.0000e+00 - val_recall_4: 0.0000e+00
Epoch 4/20
[1m90/90[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m

In [58]:
# Evaluate the model
results = model.evaluate(
    x=[X_vitals_test, X_labs_test],
    y=y_test
)
print("Test Loss, Test Accuracy, Test Precision, Test Recall, Test F1-Score:", results)


[1m23/23[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 3ms/step - accuracy: 0.8705 - f1_score: 0.2891 - loss: 0.2865 - precision_4: 0.4877 - recall_4: 0.2164
Test Loss, Test Accuracy, Test Precision, Test Recall, Test F1-Score: [0.3102279007434845, 0.8613037467002869, 0.48571428656578064, 0.17171716690063477, 0.25373128056526184]
