In [None]:
import pandas as pd 
import io
import numpy as np
import matplotlib as mpl
import matplotlib.pyplot as plt
from google.colab import files
from datetime import date, time, datetime
from google.colab import drive
from sklearn.model_selection import train_test_split
from sklearn import svm
from sklearn.metrics import accuracy_score
from sklearn.metrics import recall_score
from sklearn.metrics import precision_score
from sklearn.ensemble import RandomForestClassifier
from sklearn.tree import DecisionTreeClassifier

# Emergency Department Waiting Times 

### Data Upload

In [None]:
uploaded = files.upload()
df = pd.read_csv(io.BytesIO(uploaded['CleanedData-withAge-Total.csv']))

Saving CleanedData-withAge-Total.csv to CleanedData-withAge-Total.csv


### Data Clean Up

Helper functions to clean up Dataset, split into test and train, and function calls

In [None]:
def cleanDataset(myData):
    myData.drop(index=myData[myData['CTAS_1.0'] == 1].index, inplace=True)
    myData.drop(index=myData[myData['CTAS_2.0'] == 1].index, inplace=True)
    myData = myData.drop(["CTAS_3.0","CTAS_2.0","CTAS_1.0","CTAS_4.0","CTAS_5.0","PTN_SEX_U"],axis=1)
    myData = myData.dropna()
    for col in list(myData.columns):
        myList=[]
        myList.append(col)
        myData[myList] = myData[myList].apply(lambda x: (x - x.min()) / (x.max() - x.min()))
    return myData

In [None]:
def splitIntoTrainTest(myData):
    Xtrain, Xtest = train_test_split(myData, test_size=0.2)
    yTrain = Xtrain["Total_Time_Benchmark"]
    del Xtrain["Total_Time_Benchmark"]

    yTest = Xtest["Total_Time_Benchmark"]
    del Xtest["Total_Time_Benchmark"]
    return Xtrain, Xtest, yTrain, yTest

In [None]:
myData = cleanDataset(df)
Xtrain, Xtest, yTrain, yTest = splitIntoTrainTest(myData)

### Support Vector Machine Classifiers

In [None]:
def trainSVM(Xtrain, Xtest, yTrain, yTest, k):
    clf = svm.SVC(kernel=k,random_state=0,class_weight='balanced')
    clf.fit(Xtrain,yTrain)
    y_pred = clf.predict(Xtest)
    print("Accuracy:", accuracy_score(yTest, y_pred))
    print("Recall:", precision_score(yTest, y_pred))
    print("Specificity:", recall_score(yTest, y_pred))

In [None]:
trainSVM(Xtrain, Xtest, yTrain, yTest, 'linear')

Done Training
Accuracy: 0.6670949780490416
Recall: 0.5825786977145321
Specificity: 0.6972903225806452


In [None]:
trainSVM(Xtrain, Xtest, yTrain, yTest, 'poly')

Done Training
Accuracy: 0.7141021522646964
Recall: 0.642806352216165
Specificity: 0.6998709677419355


In [None]:
trainSVM(Xtrain, Xtest, yTrain, yTest, 'rbf')

Done Training
Accuracy: 0.7120676731984152
Recall: 0.642274472168906
Specificity: 0.6908387096774193


### Random Forest Classifier

In [None]:
def trainRF(Xtrain, Xtest, yTrain, yTest):
    clf=RandomForestClassifier(random_state = 42,class_weight='balanced')#class_weight='balanced'
    clf.fit(Xtrain,yTrain)
    y_pred = clf.predict(Xtest)
    print("Accuracy:", accuracy_score(yTest, y_pred))
    print("Recall:", precision_score(yTest, y_pred))
    print("Specificity:", recall_score(yTest, y_pred))

In [None]:
trainRF(Xtrain, Xtest, yTrain, yTest)

Done Training
Accuracy: 0.7230966912945711
Recall: 0.7069020866773675
Specificity: 0.568258064516129


### Decision Tree For Feature Selection

In [None]:
def trainDT(Xtrain, Xtest, yTrain, yTest):
    clf=DecisionTreeClassifier(class_weight='balanced')
    clf.fit(Xtrain,yTrain)
    y_pred = clf.predict(Xtest)
    print("Accuracy:", accuracy_score(yTest, y_pred))
    print("Recall:", precision_score(yTest, y_pred))
    print("Specificity:", recall_score(yTest, y_pred))
    myList=[]
    myList.extend(Xtrain.columns)
    print(myList)
    coefs_with_fns = sorted(zip(clf.feature_importances_, myList))
    df = pd.DataFrame(coefs_with_fns)
    df.columns = 'Coefficient', 'Feature'
    df.sort_values(by='coefficient')
    print(df)

In [None]:
trainDT(Xtrain, Xtest, yTrain, yTest)

Done Training
Accuracy: 0.6376485705107613
Recall: 0.5630619059851014
Specificity: 0.5656774193548387
['ARRIVAL_MODE_Air Ambulance', 'ARRIVAL_MODE_Ground Ambulance', 'ARRIVAL_MODE_No Ambulance', 'INIT_TREAT_LOC_GRP_Intake', 'INIT_TREAT_LOC_GRP_MET', 'INIT_TREAT_LOC_GRP_Main', 'INIT_LOC_GRP_Intake', 'INIT_LOC_GRP_MET', 'INIT_LOC_GRP_Main', 'INIT_LOC_GRP_WR Intake', 'INIT_LOC_GRP_WR MET', 'INIT_LOC_GRP_WR Main', 'INIT_LOC_GRP_WR Non ED', 'Number_of_Patients_In_Waiting_Before', 'Number_of_Patients_In_Waiting_Before_CTAS', 'Number_of_Patients_In_Waiting_After', 'Number_of_Patients_In_Waiting_After_CTAS', 'Number_of_Patients_Waiting_For_Discharge', 'Number_of_Patients_Waiting_For_Discharge_CTAS', 'Number_of_Patients_Waiting_For_Admission', 'Number_of_Patients_Waiting_For_Admission_CTAS', 'Number_of_Physicians_At_Arrival', 'Number_of_Physicians_At_3Hours_After', 'Number_of_Physicians_At_2Hours_After', 'Number_of_Physicians_At_1Hours_After', 'Number_of_Physicians_At_3Hours_Before', 'Number_of

### Artifical Neural Network

In [None]:
from keras.models import Sequential
from keras.layers import Activation, Dense
from tensorflow.keras import layers
from keras.callbacks import ModelCheckpoint, EarlyStopping, ReduceLROnPlateau, ModelCheckpoint
from keras.optimizers import Adam
from sklearn.utils import class_weight
from sklearn.model_selection import cross_validate
import tensorflow as tf
from sklearn.metrics import confusion_matrix,f1_score, precision_score, recall_score

In [None]:
def buildModel(dim):
    model = Sequential()
    model.add(Dense(32, input_dim=dim, activation='relu'))
    model.add(Dense(32, activation='relu'))
    model.add(Dense(16, activation='relu'))
    model.add(Dense(8, activation='relu'))
    model.add(Dense(8, activation='relu'))
    model.add(Dense(1, activation='sigmoid'))

    return model

In [None]:
def fitModel(myData, model, Xtrain, yTrain, Xtest, yTest):
  opt = Adam(lr=0.0001)
  class_weight_dict = dict(enumerate(class_weight.compute_class_weight('balanced', np.unique(yTrain), yTrain)))
  model.compile(loss='mse', optimizer=opt, metrics=["accuracy",tf.keras.metrics.Precision(), tf.keras.metrics.Recall()])
  callbacks = [
            EarlyStopping(patience=100, verbose=1),
            ReduceLROnPlateau(factor=0.50, patience=25, min_lr=0.0001, verbose=1),
            ModelCheckpoint(
                filepath="Test1.h5",
                verbose=1, save_best_only=True, save_weights_only=True, monitor="val_loss", mode='min')]
  model.fit(Xtrain, yTrain, epochs=150, batch_size=50,validation_split=0.20,verbose=1,shuffle=True,callbacks=callbacks,class_weight=class_weight_dict)
  #y_pred = model.predict(Xtest)
  """ scoring = ['accuracy', 'precision', 'recall', 'roc_auc']
  scores = cross_validate(model, Xtrain,yTrain, cv =5, scoring=scoring, return_train_score=False)
  print(sum(scores["test_accuracy"])/len(scores["test_accuracy"]))
  print(sum(scores["test_precision"])/len(scores["test_precision"]))
  print(sum(scores["test_recall"])/len(scores["test_recall"]))"""
  y_pred = model.predict_classes(Xtest, verbose=0)
  print("Accuracy:", accuracy_score(yTest, y_pred))
  print("Recall:", precision_score(yTest, y_pred))
  print("Specificity:", recall_score(yTest, y_pred))

In [None]:
model = buildModel(49)
fitModel(myData, model, Xtrain, yTrain, Xtest, yTest)

Epoch 1/150

Epoch 00001: val_loss improved from inf to 0.25065, saving model to Test1.h5
Epoch 2/150

Epoch 00002: val_loss improved from 0.25065 to 0.24722, saving model to Test1.h5
Epoch 3/150

Epoch 00003: val_loss improved from 0.24722 to 0.24229, saving model to Test1.h5
Epoch 4/150

Epoch 00004: val_loss improved from 0.24229 to 0.23534, saving model to Test1.h5
Epoch 5/150

Epoch 00005: val_loss improved from 0.23534 to 0.23487, saving model to Test1.h5
Epoch 6/150

Epoch 00006: val_loss improved from 0.23487 to 0.22925, saving model to Test1.h5
Epoch 7/150

Epoch 00007: val_loss improved from 0.22925 to 0.22842, saving model to Test1.h5
Epoch 8/150

Epoch 00008: val_loss improved from 0.22842 to 0.22178, saving model to Test1.h5
Epoch 9/150

Epoch 00009: val_loss improved from 0.22178 to 0.21704, saving model to Test1.h5
Epoch 10/150

Epoch 00010: val_loss improved from 0.21704 to 0.21339, saving model to Test1.h5
Epoch 11/150

Epoch 00011: val_loss improved from 0.21339 to 0.



Accuracy: 0.7006103437198844
Recall: 0.6162764771460424
Specificity: 0.7199791612399062


#### Feature Reduction for ANN Classifier

Following Model Has 42 features, reduced from 49 earlier

In [None]:
myData_reduced_42 = myData.drop(["ARRIVAL_MODE_Air Ambulance",'PTN_AGE_>100',"Labs_Ordered_Hour_After_Arrival",
                          "Number_of_Patients_In_Waiting_Before_CTAS","Number_of_Patients_In_Waiting_Before",
                          "PTN_AGE_1-10","INIT_LOC_GRP_WR Non ED"],axis=1)
Xtrain_42, Xtest_42, yTrain_42, yTest_42 = splitIntoTrainTest(myData_reduced_42)


In [None]:
model_42 = buildModel(42)
fitModel(myData_reduced_42, model_42,  Xtrain_42, yTrain_42, Xtest_42, yTest_42)

Epoch 1/150

Epoch 00001: val_loss improved from inf to 0.24951, saving model to Test1.h5
Epoch 2/150

Epoch 00002: val_loss improved from 0.24951 to 0.23984, saving model to Test1.h5
Epoch 3/150

Epoch 00003: val_loss improved from 0.23984 to 0.22907, saving model to Test1.h5
Epoch 4/150

Epoch 00004: val_loss improved from 0.22907 to 0.22041, saving model to Test1.h5
Epoch 5/150

Epoch 00005: val_loss improved from 0.22041 to 0.21767, saving model to Test1.h5
Epoch 6/150

Epoch 00006: val_loss improved from 0.21767 to 0.21565, saving model to Test1.h5
Epoch 7/150

Epoch 00007: val_loss improved from 0.21565 to 0.21415, saving model to Test1.h5
Epoch 8/150

Epoch 00008: val_loss improved from 0.21415 to 0.21400, saving model to Test1.h5
Epoch 9/150

Epoch 00009: val_loss improved from 0.21400 to 0.21273, saving model to Test1.h5
Epoch 10/150

Epoch 00010: val_loss improved from 0.21273 to 0.20625, saving model to Test1.h5
Epoch 11/150

Epoch 00011: val_loss improved from 0.20625 to 0.



Accuracy: 0.7047863796980405
Recall: 0.6242053789731051
Specificity: 0.6766498807315133


#### Feature Reduction for ANN Classifier

Following Model Has 13 features, reduced from 49 and 42 earlier

In [None]:
myData_reduced_13 = myData.drop(["Number_of_Patients_In_Waiting_Before_CTAS",
                        "Number_of_Patients_In_Waiting_Before",
                        "ARRIVAL_MODE_Air Ambulance",
                        "PTN_AGE_>100",
                        "PTN_AGE_1-10",
                        "Labs_Ordered_Hour_After_Arrival",
                        "INIT_LOC_GRP_WR Non ED",
                        "Labs_Ordered_Hour_Before_Arrival",
                        "INIT_LOC_GRP_Intake",
                        "INIT_LOC_GRP_MET",
                        "INIT_LOC_GRP_Main",
                        "DIs_Ordered_Hour_Before_Arrival",
                        "DIs_Ordered_Hour_After_Arrival",
                        "PTN_AGE_91-100",
                        "time_of_day_of_arrival_Morning Dawn",
                        "time_of_day_of_arrival_Morning Dusk",
                        "time_of_day_of_arrival_Midday",
                        "time_of_day_of_arrival_Night/Evening",
                        "PTN_AGE_11-20",
                        "INIT_LOC_GRP_WR MET",
                        "ARRIVAL_MODE_Ground Ambulance",
                        "PTN_AGE_81-90",
                        "PTN_AGE_71-80",
                        "ARRIVAL_MODE_No Ambulance",
                        "INIT_TREAT_LOC_GRP_Main",
                        "PTN_AGE_61-70",
                        "PTN_AGE_51-60",
                        "PTN_AGE_41-50",
                        "INIT_LOC_GRP_WR Intake",
                        "PTN_AGE_21-30",
                        "PTN_AGE_31-40",
                        "INIT_LOC_GRP_WR Main",
                        "INIT_TREAT_LOC_GRP_Intake",
                        "INIT_TREAT_LOC_GRP_MET",
                        "PTN_SEX_F",
                        "PTN_SEX_M"],axis=1)
Xtrain_13, Xtest_13, yTrain_13, yTest_13 = splitIntoTrainTest(myData_reduced_13)

In [None]:
model_13 = buildModel(13)
fitModel(myData_reduced_13, model_13, Xtrain_13, yTrain_13, Xtrain_13, yTrain_13)

Epoch 1/150

Epoch 00001: val_loss improved from inf to 0.24899, saving model to Test1.h5
Epoch 2/150

Epoch 00002: val_loss improved from 0.24899 to 0.24404, saving model to Test1.h5
Epoch 3/150

Epoch 00003: val_loss improved from 0.24404 to 0.23794, saving model to Test1.h5
Epoch 4/150

Epoch 00004: val_loss improved from 0.23794 to 0.22963, saving model to Test1.h5
Epoch 5/150

Epoch 00005: val_loss improved from 0.22963 to 0.22627, saving model to Test1.h5
Epoch 6/150

Epoch 00006: val_loss improved from 0.22627 to 0.22359, saving model to Test1.h5
Epoch 7/150

Epoch 00007: val_loss improved from 0.22359 to 0.21703, saving model to Test1.h5
Epoch 8/150

Epoch 00008: val_loss did not improve from 0.21703
Epoch 9/150

Epoch 00009: val_loss improved from 0.21703 to 0.21223, saving model to Test1.h5
Epoch 10/150

Epoch 00010: val_loss did not improve from 0.21223
Epoch 11/150

Epoch 00011: val_loss improved from 0.21223 to 0.20863, saving model to Test1.h5
Epoch 12/150

Epoch 00012: v



Accuracy: 0.6932426643820947
Recall: 0.5982332155477031
Specificity: 0.7720521172638436
