# Neural Network Lab Classifier
-------------------------------------------
#### This neural network will use a dataset containing twenty-six separate input variables that are the initial blood lab tests taken upon a patient's admission to an Intensive Care Unit. Each input variable is an integer that represents that particular test's lab value. Tests that were not performed are represented by a zero. The neural network will take in these twenty-six input variables and attempt to predict whether the patient died while in the ICU.

In [1]:
# Import necessary dependencies
import pandas as pd
import numpy as np
import os
import tensorflow as tf
from keras.models import Sequential
from keras.utils import to_categorical
from keras.layers import Dense

Using TensorFlow backend.


# Data Pre-Processing
-------
#### Below is the process for combining the CSVs containing our desired information, and data cleaning so that it may be used in the neural network

In [2]:
# Import labsNew.csv and display
labsData = pd.read_csv("../Resources/labsNew.csv")

labsData.head()

Unnamed: 0,SUBJECT_ID,HADM_ID,Albumin,Anion Gap,Bicarbonate,"Bilirubin, Total",Chloride,"Chloride, Whole Blood",Creatinine,Glucose_Blood_Gas,...,PT,PTT,Phosphate,Platelet Count,Potassium,"Potassium, Whole Blood",Sodium,"Sodium, Whole Blood",Urea Nitrogen,White Blood Cells
0,2,163353.0,0.0,0.0,0.0,9.3,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,5.0,0.0,0.0,0.0,0.0,0.0,0.1
1,3,145834.0,1.8,17.0,25.0,0.8,99.0,114.0,3.2,265.0,...,14.8,125.7,4.8,179.0,5.4,3.7,136.0,139.0,36.0,15.1
2,4,185777.0,2.8,17.0,24.0,2.2,97.0,0.0,0.5,0.0,...,12.3,31.3,3.2,207.0,3.1,0.0,135.0,0.0,9.0,9.7
3,5,178980.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,309.0,0.0,0.0,0.0,0.0,0.0,13.9
4,6,107064.0,2.7,17.0,16.0,0.2,107.0,95.0,3.5,106.0,...,12.5,55.2,4.1,198.0,4.9,4.2,135.0,135.0,86.0,22.7


In [3]:
# Import the PATIENTS.csv
patientsData = pd.read_csv('../../Resources/PATIENTS.csv')

# Exclude all columns from the dataframe except for SUBJECT_ID and EXPIRE_FLAG
patientsData = patientsData[['SUBJECT_ID','EXPIRE_FLAG']]

# Display the resulting dataframe
patientsData.head()

Unnamed: 0,SUBJECT_ID,EXPIRE_FLAG
0,249,0
1,250,1
2,251,0
3,252,0
4,253,0


In [4]:
# Join the labs data with the patient expire flag
joinedData = labsData.merge(patientsData, on='SUBJECT_ID')

# Display the joined dataframe
joinedData.head()

Unnamed: 0,SUBJECT_ID,HADM_ID,Albumin,Anion Gap,Bicarbonate,"Bilirubin, Total",Chloride,"Chloride, Whole Blood",Creatinine,Glucose_Blood_Gas,...,PTT,Phosphate,Platelet Count,Potassium,"Potassium, Whole Blood",Sodium,"Sodium, Whole Blood",Urea Nitrogen,White Blood Cells,EXPIRE_FLAG
0,2,163353.0,0.0,0.0,0.0,9.3,0.0,0.0,0.0,0.0,...,0.0,0.0,5.0,0.0,0.0,0.0,0.0,0.0,0.1,0
1,3,145834.0,1.8,17.0,25.0,0.8,99.0,114.0,3.2,265.0,...,125.7,4.8,179.0,5.4,3.7,136.0,139.0,36.0,15.1,1
2,4,185777.0,2.8,17.0,24.0,2.2,97.0,0.0,0.5,0.0,...,31.3,3.2,207.0,3.1,0.0,135.0,0.0,9.0,9.7,0
3,5,178980.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,309.0,0.0,0.0,0.0,0.0,0.0,13.9,0
4,6,107064.0,2.7,17.0,16.0,0.2,107.0,95.0,3.5,106.0,...,55.2,4.1,198.0,4.9,4.2,135.0,135.0,86.0,22.7,0


In [5]:
# Define the X and y variables for the neural net, being sure to drop the SUBJECT_ID and HADM_ID columns
X = joinedData.drop(['SUBJECT_ID','HADM_ID','EXPIRE_FLAG'], axis=1)
y = joinedData["EXPIRE_FLAG"]

# Train Test Split
---------
#### Below splits the above data into train and test groups for training the neural network

In [6]:
# Import sklearn and keras dependencies
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import LabelEncoder, StandardScaler
from keras.utils import to_categorical

# Split the data into training and testing sets
X_train, X_test, y_train, y_test = train_test_split(
    X, y, random_state=1, stratify=y)

# Scale the X data
X_scaler = StandardScaler().fit(X_train)
X_train_scaled = X_scaler.transform(X_train)
X_test_scaled = X_scaler.transform(X_test)

# Neural Network Model Creation
--------
#### Below is the construction and training process for the neural network. This neural network will be a deep, sequential neural net, employing relu and softmax activation functions, the adam optimizer and the sparse categorical crossentropy loss function. There will be four layers from input to output, with each hidden layer containing fourteen nodes.

### Note:
##### If you wish to test an existing model, and do not want to create a new model, do not run the next four cells. The four cells below are for building new models. Just below them is where one can load existing models for testing.

In [7]:
# Define the model and add all desired layers to the neural network
model = Sequential()
model.add(Dense(units=14, activation='relu', input_dim=26))
model.add(Dense(units=14, activation='relu'))
model.add(Dense(units=2, activation='softmax'))

In [8]:
# Compile the model
model.compile(optimizer='adam',
              loss='sparse_categorical_crossentropy',
              metrics=['accuracy'])

# Set the parameters for fitting the model
model.fit(
    X_train_scaled,
    y_train,
    epochs=60,
    shuffle=True,
    verbose=2
)

Epoch 1/60
 - 7s - loss: 0.5609 - acc: 0.6906
Epoch 2/60
 - 4s - loss: 0.5327 - acc: 0.7136
Epoch 3/60
 - 4s - loss: 0.5250 - acc: 0.7187
Epoch 4/60
 - 4s - loss: 0.5207 - acc: 0.7211
Epoch 5/60
 - 4s - loss: 0.5179 - acc: 0.7238
Epoch 6/60
 - 4s - loss: 0.5156 - acc: 0.7249
Epoch 7/60
 - 4s - loss: 0.5139 - acc: 0.7246
Epoch 8/60
 - 4s - loss: 0.5119 - acc: 0.7280
Epoch 9/60
 - 4s - loss: 0.5112 - acc: 0.7296
Epoch 10/60
 - 4s - loss: 0.5096 - acc: 0.7302
Epoch 11/60
 - 4s - loss: 0.5086 - acc: 0.7313
Epoch 12/60
 - 4s - loss: 0.5079 - acc: 0.7307
Epoch 13/60
 - 4s - loss: 0.5075 - acc: 0.7305
Epoch 14/60
 - 4s - loss: 0.5064 - acc: 0.7311
Epoch 15/60
 - 4s - loss: 0.5061 - acc: 0.7328
Epoch 16/60
 - 4s - loss: 0.5055 - acc: 0.7309
Epoch 17/60
 - 4s - loss: 0.5051 - acc: 0.7317
Epoch 18/60
 - 4s - loss: 0.5046 - acc: 0.7341
Epoch 19/60
 - 4s - loss: 0.5043 - acc: 0.7340
Epoch 20/60
 - 4s - loss: 0.5035 - acc: 0.7338
Epoch 21/60
 - 4s - loss: 0.5034 - acc: 0.7349
Epoch 22/60
 - 4s - lo

<keras.callbacks.History at 0xb3ae914e0>

In [9]:
# Evaluate the model and print the loss and accuracy scores to check model performance
model_loss, model_accuracy = model.evaluate(
    X_test_scaled, y_test, verbose=2)
print(f"Normal Neural Network - Loss: {model_loss}, Accuracy: {model_accuracy}")

Normal Neural Network - Loss: 0.5104420580921719, Accuracy: 0.7282488986784141


In [10]:
# Compare the first five predictions to the actual values for evaluative purposes
predictions = model.predict_classes(X_test[:5])

print(f"Predicted classes: {predictions}")
print(f"Actual Labels: {list(y_test[:5])}")

Predicted classes: [0 0 0 0 0]
Actual Labels: [1, 0, 1, 1, 0]


# Save New Model
---------
#### If a new model proves satisfactory, the below code will save it for future use. Be sure to give it a unique name so it won't overwrite an existing saved model.

In [17]:
model.save("NeuralNetworkModels/LabValueModel#1.h5")

# Existing Models
--------
#### Below is the code for loading an existing model for testing. Be sure to run the first six cells of the notebook before running the code below.

In [18]:
# To test a model, load the desired model and then run the cells below

# ENTER THE NAME OF THE DESIRED MODEL TO LOAD HERE
NNModel = "LabValueModel#1.h5"

from keras.models import load_model
first_model = load_model(f"NeuralNetworkModels/{NNModel}")

In [19]:
# Evaluate the model and print the loss and accuracy scores to check model performance
model_loss, model_accuracy = first_model.evaluate(
    X_test_scaled, y_test, verbose=2)
print(f"Loaded Model Neural Network - Loss: {model_loss}, Loaded Model Accuracy: {model_accuracy}")

Loaded Model Neural Network - Loss: 0.5104420580921719, Loaded Model Accuracy: 0.7282488986784141


In [20]:
# Create a confusion matrix for the model

# Predict the classes using the predict_classes method and create an array with the actual classes
predictions = first_model.predict_classes(X_test)
y_test_arr = list(y_test)

# Create placeholder variables for the confusion matrix, initialize them at zero
tp = 0
fp = 0
tn = 0
fn = 0

# Create a for loop that ranges the length of the test data
for i in range(0,14527):
    
    # Use conditionals to compare the predicted class to the actual class and increment the appropriate variable
    # depending on if the prediction was correct
    if predictions[i] == 0:
        if y_test_arr[i] == 0:
            tn += 1
        else:
            fn += 1
        
    elif predictions[i] == 1:
        if y_test_arr[i] == 0:
            fp += 1
        else:
            tp += 1

            
# Calculate the accuracy, precision and recall scores and save to variables
acc = (tp + tn) / 14528
pre = tp / (tp + fp)
rec = tp / (tp + fn)

# Calculate the f1 score
f1 = 2 * ((pre * rec) / (pre + rec))
  
# Print out the values to see the results of the confusion matrix
print(f"True Positives: {tp}")
print(f"False Positive: {fp}")
print(f"True Negatives: {tn}")
print(f"False Negatives: {fn}")
print(f"---------------------------------")
print(f"Accuracy: {round(acc,4)}")
print(f"Precision: {round(pre,4)}")
print(f"Recall: {round(rec,4)}")
print(f"f1 Score: {round(f1,4)}")

True Positives: 3448
False Positive: 1830
True Negatives: 7131
False Negatives: 2118
---------------------------------
Accuracy: 0.7282
Precision: 0.6533
Recall: 0.6195
f1 Score: 0.6359
