In [90]:
# Dependencies
import matplotlib.pyplot as plt
import numpy as np
import pandas as pd
import seaborn as sns

from sklearn.model_selection import train_test_split
from sklearn.preprocessing import LabelEncoder, MinMaxScaler
from tensorflow.keras.utils import to_categorical
import tensorflow
# tensorflow.keras.__version__

In [91]:
# Read in csv (already transformed)
hosp_df = pd.read_csv('..//..//data/hospitals/All_Hospitals.csv')

# Drop columns not need for modeling
df = hosp_df.drop(['Description', 'FAC_NO', 'FAC_NAME','FAC_STR_ADDR','FAC_CITY','FAC_PHONE','FAC_ADMIN_NAME','FAC_OPERATED_THIS_YR','FAC_OP_PER_BEGIN_DT','FAC_OP_PER_END_DT','FAC_PAR_CORP_NAME','FAC_PAR_CORP_BUS_ADDR','FAC_PAR_CORP_CITY','FAC_PAR_CORP_STATE','REPT_PREP_NAME','SUBMITTED_DT','REV_REPT_PREP_NAME','REVISED_DT','CORRECTED_DT','LICENSE_NO','LICENSE_EFF_DATE','LICENSE_EXP_DATE','LICENSE_STATUS','FACILITY_LEVEL','ASSEMBLY_DIST','SENATE_DIST','CONGRESS_DIST','CENS_TRACT','MED_SVC_STUDY_AREA','LA_COUNTY_SVC_PLAN_AREA','COUNTY'], axis=1)
# ,'RURAL_HEALTH_CLINIC']

# Format Categorical Data
df.TEACH_HOSP = df.TEACH_HOSP != "No"
df.TEACH_RURAL = df.TEACH_RURAL != "No"
df.TRAUMA_CTR = df.TRAUMA_CTR != 0
df.HEALTH_SVC_AREA = df.HEALTH_SVC_AREA.str[:2]
df.LIC_CAT = df.LIC_CAT == "General Acute Care Hospital"
df.LICEE_TOC = df.LICEE_TOC == "Non-Profit Corporation (including church-related)"
df.PRIN_SERVICE_TYPE = df.PRIN_SERVICE_TYPE == "General Medical / Surgical"
df.FAC_ACQUIRE_EQUIP_OVER_500K = df.FAC_ACQUIRE_EQUIP_OVER_500K != "No"
df.OFFER_ALTERNATE_BIRTH_PROG = df.OFFER_ALTERNATE_BIRTH_PROG != "No"
df.LIC_CARDIOLOGY_CARDIOVASCULAR_SURG_SERVICES = df.LIC_CARDIOLOGY_CARDIOVASCULAR_SURG_SERVICES != 0
df.OFFER_AMBULATORY_SURG_PROG = df.OFFER_AMBULATORY_SURG_PROG != "No"
df.LIC_ED_LEV_END = df.LIC_ED_LEV_END != 0
df.EMSA_TRAUMA_DESIGNATION_PEDIATRIC = df.EMSA_TRAUMA_DESIGNATION_PEDIATRIC != 0
df.EMER_DEPT_AMBULANCE_DIVERSION_HOURS = df.EMER_DEPT_AMBULANCE_DIVERSION_HOURS != "No"
df.LIC_ED_LEV_BEGIN = df.LIC_ED_LEV_BEGIN != 0
df.EMSA_TRAUMA_DESIGNATION = df.EMSA_TRAUMA_DESIGNATION != 0
df.OUTPATIENT_PALLIATIVE_CARE_SERV_OFFERED = df.OUTPATIENT_PALLIATIVE_CARE_SERV_OFFERED != 0
df.INPATIENT_PALLIATIVE_CARE_PROG_OFFERED = df.INPATIENT_PALLIATIVE_CARE_PROG_OFFERED != 0
df.INPATIENT_HOSPICE_PROG_OFFERED = df.INPATIENT_HOSPICE_PROG_OFFERED != 0
df.SHORT_DOYLE_SERVICES_OFFERED = df.SHORT_DOYLE_SERVICES_OFFERED != 0


# df.PRIMARY_NON_ENGLISH_LANG = df.PRIMARY_NON_ENGLISH_LANG == "Spanish"
df.replace(True, 1, inplace=True)

# Drop final coloumns
df.drop(df.iloc[:, 267:323], inplace=True, axis=1)

# Fill in all blanks with 0 (for deep learning)
df = df.fillna(0)

df.head()

Unnamed: 0,FAC_ZIP,FAC_PAR_CORP_ZIP,TRAUMA_CTR,TEACH_HOSP,TEACH_RURAL,LONGITUDE,LATITUDE,HEALTH_SVC_AREA,LIC_CAT,LICEE_TOC,...,CARD_CATH_PED_IP_THER_VST,CARD_CATH_PED_OP_THER_VST,CARDIAC_CATHETERIZATION_ADULT_INPAT_THERAPEUTIC_VISITS,CARDIAC_CATHETERIZATION_ADULT_OUTPAT_THERAPEUTIC_VISITS,CARDIAC_CATHETERIZATION_THERAPEUTIC_VISITS_TOT,INPATIENT_AVG_PER_SURGERY,OUTPATIENT_AVG_PER_SURGERY,FAC_ACQUIRE_EQUIP_OVER_500K,Target,Label
0,94501.0,94602,1.0,0.0,0.0,-122.253991,37.76266,5,1.0,0.0,...,0,0,0,0,0,105.0,75.2,1.0,0.059835,1.0
1,94705.0,95833,1.0,0.0,0.0,-122.25743,37.85645,5,1.0,1.0,...,0,0,0,0,0,127.6,75.8,1.0,0.012884,0.0
2,94609.0,0,1.0,0.0,0.0,-122.26747,37.83722,5,1.0,1.0,...,22,35,2,1,60,137.4,70.8,0.0,0.0,0.0
3,94578.0,94602,1.0,0.0,0.0,-122.11819,37.70648,5,1.0,0.0,...,0,0,0,0,0,0.0,0.0,0.0,0.0,0.0
4,94704.0,95833,1.0,0.0,0.0,-122.26984,37.86373,5,1.0,1.0,...,0,0,0,0,0,0.0,0.0,0.0,0.0,0.0


In [92]:
# Retrive all possible inputs
features = df.iloc[:, 0:267]
features

Unnamed: 0,FAC_ZIP,FAC_PAR_CORP_ZIP,TRAUMA_CTR,TEACH_HOSP,TEACH_RURAL,LONGITUDE,LATITUDE,HEALTH_SVC_AREA,LIC_CAT,LICEE_TOC,...,CARDIOVASCULAR_SURG_OPER_ADULT_BYPASS_NOT_USED,CARDIOVASCULAR_SURG_OPER_BYPASS_NOT_USED_TOT,CARD_CATH_PED_IP_THER_VST,CARD_CATH_PED_OP_THER_VST,CARDIAC_CATHETERIZATION_ADULT_INPAT_THERAPEUTIC_VISITS,CARDIAC_CATHETERIZATION_ADULT_OUTPAT_THERAPEUTIC_VISITS,CARDIAC_CATHETERIZATION_THERAPEUTIC_VISITS_TOT,INPATIENT_AVG_PER_SURGERY,OUTPATIENT_AVG_PER_SURGERY,FAC_ACQUIRE_EQUIP_OVER_500K
0,94501.0,94602,1.0,0.0,0.0,-122.253991,37.762660,05,1.0,0.0,...,0,0,0,0,0,0,0,105.0,75.2,1.0
1,94705.0,95833,1.0,0.0,0.0,-122.257430,37.856450,05,1.0,1.0,...,0,0,0,0,0,0,0,127.6,75.8,1.0
2,94609.0,0,1.0,0.0,0.0,-122.267470,37.837220,05,1.0,1.0,...,3,34,22,35,2,1,60,137.4,70.8,0.0
3,94578.0,94602,1.0,0.0,0.0,-122.118190,37.706480,05,1.0,0.0,...,0,0,0,0,0,0,0,0.0,0.0,0.0
4,94704.0,95833,1.0,0.0,0.0,-122.269840,37.863730,05,1.0,1.0,...,0,0,0,0,0,0,0,0.0,0.0,0.0
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
473,95901.0,95991,1.0,0.0,0.0,-121.593602,39.138805,02,1.0,1.0,...,89,89,0,0,369,358,727,145.5,84.8,0.0
474,93703.0,93611,1.0,0.0,0.0,-119.779521,36.778140,09,1.0,1.0,...,0,0,0,0,0,0,0,0.0,0.0,0.0
475,93940.0,93940,1.0,0.0,0.0,-121.892428,36.580786,08,1.0,1.0,...,0,0,0,0,0,0,0,0.0,0.0,0.0
476,95023.0,95023,1.0,0.0,0.0,-121.386700,36.835140,08,1.0,0.0,...,0,0,0,0,0,0,0,0.0,0.0,0.0


## Data Pre-Processing

In [93]:
# Define X and y
X = features
y = df["Label"]
print(X.shape, y.shape)

(478, 267) (478,)


In [94]:
X_train, X_test, y_train, y_test = train_test_split(
    X, y, test_size=0.33, random_state=101)

In [95]:
X_scaler = MinMaxScaler().fit(X_train)
X_train_scaled = X_scaler.transform(X_train)
X_test_scaled = X_scaler.transform(X_test)

In [96]:
# # Step 1: Label-encode data set
label_encoder = LabelEncoder()
label_encoder.fit(y_train)
encoded_y_train = label_encoder.transform(y_train)
encoded_y_test = label_encoder.transform(y_test)

In [97]:
# # Step 2: Convert encoded labels to one-hot-encoding
y_train_categorical = to_categorical(encoded_y_train)
y_test_categorical = to_categorical(encoded_y_test)

# Create a Deep Learning Model

In [98]:
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Dense

In [99]:
# Create model and add layers
model = Sequential()
model.add(Dense(units=100, activation='relu', input_dim=267))
model.add(Dense(units=100, activation='relu'))
model.add(Dense(units=100, activation='relu'))
model.add(Dense(units=2, activation='softmax'))

In [100]:
tensorflow.keras.metrics.CategoricalAccuracy(
    name='categorical_accuracy', dtype=None
)

<tensorflow.python.keras.metrics.CategoricalAccuracy at 0x25101a0ccc8>

In [101]:
# Compile and fit the model
model.compile(optimizer='adam',
              loss='categorical_crossentropy',
              metrics=[tf.keras.metrics.CategoricalAccuracy()])
#               metrics=['accuracy'])

In [102]:
model.summary()

Model: "sequential_1"
_________________________________________________________________
Layer (type)                 Output Shape              Param #   
dense_4 (Dense)              (None, 100)               26800     
_________________________________________________________________
dense_5 (Dense)              (None, 100)               10100     
_________________________________________________________________
dense_6 (Dense)              (None, 100)               10100     
_________________________________________________________________
dense_7 (Dense)              (None, 2)                 202       
Total params: 47,202
Trainable params: 47,202
Non-trainable params: 0
_________________________________________________________________


In [103]:
model.fit(
    X_train_scaled,
    y_train_categorical,
    validation_data=(X_test_scaled, y_test_categorical),
    epochs=100,
    shuffle=True,
    verbose=2
)

Train on 320 samples, validate on 158 samples
Epoch 1/100
320/320 - 0s - loss: 0.6200 - categorical_accuracy: 0.7000 - val_loss: 0.5260 - val_categorical_accuracy: 0.7911
Epoch 2/100
320/320 - 0s - loss: 0.5378 - categorical_accuracy: 0.7750 - val_loss: 0.4846 - val_categorical_accuracy: 0.7911
Epoch 3/100
320/320 - 0s - loss: 0.4689 - categorical_accuracy: 0.7750 - val_loss: 0.4608 - val_categorical_accuracy: 0.7911
Epoch 4/100
320/320 - 0s - loss: 0.4223 - categorical_accuracy: 0.7812 - val_loss: 0.4517 - val_categorical_accuracy: 0.7911
Epoch 5/100
320/320 - 0s - loss: 0.3865 - categorical_accuracy: 0.8094 - val_loss: 0.4408 - val_categorical_accuracy: 0.7975
Epoch 6/100
320/320 - 0s - loss: 0.3386 - categorical_accuracy: 0.8469 - val_loss: 0.4363 - val_categorical_accuracy: 0.7975
Epoch 7/100
320/320 - 0s - loss: 0.3036 - categorical_accuracy: 0.8687 - val_loss: 0.4372 - val_categorical_accuracy: 0.8165
Epoch 8/100
320/320 - 0s - loss: 0.2788 - categorical_accuracy: 0.9031 - val_lo

Epoch 65/100
320/320 - 0s - loss: 2.3648e-04 - categorical_accuracy: 1.0000 - val_loss: 1.5720 - val_categorical_accuracy: 0.8038
Epoch 66/100
320/320 - 0s - loss: 2.3069e-04 - categorical_accuracy: 1.0000 - val_loss: 1.5844 - val_categorical_accuracy: 0.8038
Epoch 67/100
320/320 - 0s - loss: 2.2147e-04 - categorical_accuracy: 1.0000 - val_loss: 1.5774 - val_categorical_accuracy: 0.8038
Epoch 68/100
320/320 - 0s - loss: 2.1259e-04 - categorical_accuracy: 1.0000 - val_loss: 1.5874 - val_categorical_accuracy: 0.8038
Epoch 69/100
320/320 - 0s - loss: 2.0511e-04 - categorical_accuracy: 1.0000 - val_loss: 1.6051 - val_categorical_accuracy: 0.8038
Epoch 70/100
320/320 - 0s - loss: 2.0016e-04 - categorical_accuracy: 1.0000 - val_loss: 1.6037 - val_categorical_accuracy: 0.8038
Epoch 71/100
320/320 - 0s - loss: 1.9041e-04 - categorical_accuracy: 1.0000 - val_loss: 1.6016 - val_categorical_accuracy: 0.8038
Epoch 72/100
320/320 - 0s - loss: 1.8524e-04 - categorical_accuracy: 1.0000 - val_loss: 1.

<tensorflow.python.keras.callbacks.History at 0x25103cd8648>

## Quantify our Trained Model

In [104]:
model_loss, model_accuracy = model.evaluate(
    X_test_scaled, y_test_categorical, verbose=3)
print(
    f"Normal Neural Network - Loss: {model_loss}, Accuracy: {model_accuracy}")

Normal Neural Network - Loss: 1.734631229050552, Accuracy: 0.8037974834442139


## Make Predictions

In [105]:
encoded_predictions = model.predict_classes(X_test_scaled[:5])
prediction_labels = label_encoder.inverse_transform(encoded_predictions)

In [106]:
print(f"Predicted classes: {prediction_labels}")
print(f"Actual Labels: {list(y_test[:5])}")

Predicted classes: [0. 0. 0. 1. 0.]
Actual Labels: [1.0, 0.0, 0.0, 1.0, 0.0]


In [107]:
from sklearn.metrics import confusion_matrix
y_true = y_test[:5]
y_pred = prediction_labels
confusion_matrix(y_true, y_pred)


array([[3, 0],
       [1, 1]], dtype=int64)

In [108]:
from sklearn.metrics import cohen_kappa_score
y_true = y_test[:5]
y_pred = prediction_labels
cohen_kappa_score(y_true, y_pred)

0.5454545454545454

In [109]:
from sklearn.metrics import classification_report
y_true = [0, 1, 2, 2, 0]
y_pred = [0, 0, 2, 1, 0]
target_names = ['class 0', 'class 1', 'class 3']
print(classification_report(y_true, y_pred, target_names=target_names))

              precision    recall  f1-score   support

     class 0       0.67      1.00      0.80         2
     class 1       0.00      0.00      0.00         1
     class 3       1.00      0.50      0.67         2

    accuracy                           0.60         5
   macro avg       0.56      0.50      0.49         5
weighted avg       0.67      0.60      0.59         5



In [116]:
predictions = model.predict_classes(X_test_scaled)
labels = label_encoder.inverse_transform(predictions)
actual = list(labels)
predicted = list(y_test)

In [120]:
from sklearn.metrics import confusion_matrix
y_true = actual
y_pred = predicted
confusion_matrix(y_true, y_pred)

array([[114,  20],
       [ 11,  13]], dtype=int64)

In [119]:
from sklearn.metrics import classification_report
y_true = actual
y_pred = predicted
target_names = ['class 0 - false', 'class 1 - true']
print(classification_report(y_true, y_pred, target_names=target_names))

                 precision    recall  f1-score   support

class 0 - false       0.91      0.85      0.88       134
 class 1 - true       0.39      0.54      0.46        24

       accuracy                           0.80       158
      macro avg       0.65      0.70      0.67       158
   weighted avg       0.83      0.80      0.82       158



In [112]:
# Create the SVC Model
from sklearn.svm import SVC 
model2 = SVC(kernel='linear')
model2.fit(X_train_scaled, y_train)

# Create the GridSearchCV model
from sklearn.model_selection import GridSearchCV
param_grid = {'C': [1, 5, 10, 50],
              'gamma': [0.0001, 0.0005, 0.001, 0.005]}
grid = GridSearchCV(model2, param_grid, verbose=3)

In [113]:
# Train the model with GridSearch
grid.fit(X_train_scaled, y_train)

Fitting 5 folds for each of 16 candidates, totalling 80 fits
[CV] C=1, gamma=0.0001 ...............................................
[CV] ................... C=1, gamma=0.0001, score=0.781, total=   0.0s
[CV] C=1, gamma=0.0001 ...............................................
[CV] ................... C=1, gamma=0.0001, score=0.875, total=   0.0s
[CV] C=1, gamma=0.0001 ...............................................
[CV] ................... C=1, gamma=0.0001, score=0.875, total=   0.0s
[CV] C=1, gamma=0.0001 ...............................................
[CV] ................... C=1, gamma=0.0001, score=0.719, total=   0.0s
[CV] C=1, gamma=0.0001 ...............................................
[CV] ................... C=1, gamma=0.0001, score=0.719, total=   0.0s
[CV] C=1, gamma=0.0005 ...............................................
[CV] ................... C=1, gamma=0.0005, score=0.781, total=   0.0s
[CV] C=1, gamma=0.0005 ...............................................
[CV] ...........

[Parallel(n_jobs=1)]: Using backend SequentialBackend with 1 concurrent workers.
[Parallel(n_jobs=1)]: Done   1 out of   1 | elapsed:    0.0s remaining:    0.0s
[Parallel(n_jobs=1)]: Done   2 out of   2 | elapsed:    0.0s remaining:    0.0s



[CV] C=1, gamma=0.005 ................................................
[CV] .................... C=1, gamma=0.005, score=0.781, total=   0.0s
[CV] C=1, gamma=0.005 ................................................
[CV] .................... C=1, gamma=0.005, score=0.875, total=   0.0s
[CV] C=1, gamma=0.005 ................................................
[CV] .................... C=1, gamma=0.005, score=0.875, total=   0.0s
[CV] C=1, gamma=0.005 ................................................
[CV] .................... C=1, gamma=0.005, score=0.719, total=   0.0s
[CV] C=1, gamma=0.005 ................................................
[CV] .................... C=1, gamma=0.005, score=0.719, total=   0.0s
[CV] C=5, gamma=0.0001 ...............................................
[CV] ................... C=5, gamma=0.0001, score=0.766, total=   0.0s
[CV] C=5, gamma=0.0001 ...............................................
[CV] ................... C=5, gamma=0.0001, score=0.859, total=   0.0s
[CV] 

[CV] ................... C=50, gamma=0.001, score=0.828, total=   0.0s
[CV] C=50, gamma=0.001 ...............................................
[CV] ................... C=50, gamma=0.001, score=0.750, total=   0.0s
[CV] C=50, gamma=0.001 ...............................................
[CV] ................... C=50, gamma=0.001, score=0.781, total=   0.0s
[CV] C=50, gamma=0.005 ...............................................
[CV] ................... C=50, gamma=0.005, score=0.719, total=   0.0s
[CV] C=50, gamma=0.005 ...............................................
[CV] ................... C=50, gamma=0.005, score=0.797, total=   0.0s
[CV] C=50, gamma=0.005 ...............................................
[CV] ................... C=50, gamma=0.005, score=0.828, total=   0.0s
[CV] C=50, gamma=0.005 ...............................................
[CV] ................... C=50, gamma=0.005, score=0.750, total=   0.0s
[CV] C=50, gamma=0.005 ...............................................
[CV] .

[Parallel(n_jobs=1)]: Done  80 out of  80 | elapsed:    1.2s finished


GridSearchCV(cv=None, error_score=nan,
             estimator=SVC(C=1.0, break_ties=False, cache_size=200,
                           class_weight=None, coef0=0.0,
                           decision_function_shape='ovr', degree=3,
                           gamma='scale', kernel='linear', max_iter=-1,
                           probability=False, random_state=None, shrinking=True,
                           tol=0.001, verbose=False),
             iid='deprecated', n_jobs=None,
             param_grid={'C': [1, 5, 10, 50],
                         'gamma': [0.0001, 0.0005, 0.001, 0.005]},
             pre_dispatch='2*n_jobs', refit=True, return_train_score=False,
             scoring=None, verbose=3)

In [114]:
print(grid.best_params_)
print(grid.best_score_)

{'C': 10, 'gamma': 0.0001}
0.815625


In [115]:
from sklearn.metrics import accuracy_score
from sklearn.metrics import make_scorer
scoring = {'accuracy': make_scorer(accuracy_score),
            'prec': 'precision'}
scoring

{'accuracy': make_scorer(accuracy_score), 'prec': 'precision'}