# Preliminaries

In [69]:
from google.colab import drive

drive.mount('/content/drive/')

Drive already mounted at /content/drive/; to attempt to forcibly remount, call drive.mount("/content/drive/", force_remount=True).


In [70]:
import pandas as pd
import numpy as np

from sklearn.model_selection import train_test_split

from keras.models import Sequential
from keras.layers import LSTM, GRU, Dense, Dropout
from keras.callbacks import EarlyStopping
from keras.utils import to_categorical

from sklearn.compose import ColumnTransformer
from sklearn.preprocessing import OneHotEncoder

from sklearn.metrics import accuracy_score, classification_report
from sklearn.model_selection import cross_val_score

from sklearn.metrics import precision_recall_fscore_support as score
from sklearn.metrics import accuracy_score

In [146]:
dataset = pd.read_csv("/content/drive/MyDrive/SEM4/Machine Learning/Experiments/healthcare-dataset-stroke-data.csv")

print(dataset.shape)
dataset.head()

(5110, 12)


Unnamed: 0,id,gender,age,hypertension,heart_disease,ever_married,work_type,Residence_type,avg_glucose_level,bmi,smoking_status,stroke
0,9046,Male,67.0,0,1,Yes,Private,Urban,228.69,36.6,formerly smoked,1
1,51676,Female,61.0,0,0,Yes,Self-employed,Rural,202.21,,never smoked,1
2,31112,Male,80.0,0,1,Yes,Private,Rural,105.92,32.5,never smoked,1
3,60182,Female,49.0,0,0,Yes,Private,Urban,171.23,34.4,smokes,1
4,1665,Female,79.0,1,0,Yes,Self-employed,Rural,174.12,24.0,never smoked,1


In [147]:
dataset = dataset.drop(columns=['id'])
dataset = dataset[dataset['bmi'].notna()]
dataset = dataset[dataset.smoking_status != "Unknown"]
# dataset = dataset[dataset.gender != "Other"]

In [166]:
x = dataset.drop(columns=['stroke'])
y = dataset['stroke']

In [167]:
oneHotEncoding = ColumnTransformer(transformers = [('encoder', OneHotEncoder(), [0,4,5,6,9])], remainder='passthrough')
x = np.array(oneHotEncoding.fit_transform(x))
print(x)

[[  0.     1.     0.   ...   1.   228.69  36.6 ]
 [  0.     1.     0.   ...   1.   105.92  32.5 ]
 [  1.     0.     0.   ...   0.   171.23  34.4 ]
 ...
 [  1.     0.     0.   ...   0.   125.2   40.  ]
 [  1.     0.     0.   ...   0.    82.99  30.6 ]
 [  0.     1.     0.   ...   0.   166.29  25.6 ]]


In [168]:
x_train, x_test, y_train, y_test = train_test_split(x, y, test_size = 0.2)

In [169]:
x.shape

(3426, 20)

In [170]:
timesteps = 1
input_dim = 19

x_train_1 = np.resize(x_train, (x_train.shape[0], timesteps, input_dim))
x_test_1 = np.resize(x_test, (x_test.shape[0], timesteps, input_dim))
x_1 = np.resize(x, (x.shape[0], timesteps, input_dim))

In [171]:
y_train_1 = to_categorical(y_train)
y_test_1 = to_categorical(y_test)
y_1 = to_categorical(y)

print(y_1)

[[0. 1.]
 [0. 1.]
 [0. 1.]
 ...
 [1. 0.]
 [1. 0.]
 [1. 0.]]


In [172]:
print(y_1.shape)
print(y_train_1.shape)
print(y_test_1.shape)

(3426, 2)
(2740, 2)
(686, 2)


# Training LSTM

In [80]:
model = Sequential()
model.add(LSTM(128, return_sequences=True, input_shape=(timesteps, input_dim)))
model.add(Dropout(0.5))
model.add(LSTM(64))
model.add(Dropout(0.5))
model.add(Dense(2, activation='softmax'))

In [81]:
model.compile(optimizer='adam', loss='binary_crossentropy', metrics=['accuracy'])

In [82]:
model.fit(x_train_1, y_train_1, epochs=100, batch_size=256, validation_split=0.1, callbacks=[EarlyStopping(monitor='val_loss', patience=7, min_delta=0.0001)])

Epoch 1/100
Epoch 2/100
Epoch 3/100
Epoch 4/100
Epoch 5/100
Epoch 6/100
Epoch 7/100
Epoch 8/100
Epoch 9/100
Epoch 10/100
Epoch 11/100
Epoch 12/100
Epoch 13/100
Epoch 14/100
Epoch 15/100
Epoch 16/100
Epoch 17/100
Epoch 18/100
Epoch 19/100
Epoch 20/100
Epoch 21/100
Epoch 22/100


<keras.callbacks.History at 0x7f03b8f1ceb0>

In [83]:
y_pred_lstm = model.predict(x_test_1)
y_pred_lstm



array([[9.9997681e-01, 2.3099217e-05],
       [9.9946606e-01, 5.3403765e-04],
       [9.9780035e-01, 2.1996358e-03],
       ...,
       [9.9993682e-01, 6.3148575e-05],
       [9.9631602e-01, 3.6840253e-03],
       [9.9419397e-01, 5.8060046e-03]], dtype=float32)

In [62]:
accuracy_score(np.argmax(y_test_1, axis=1), np.argmax(y_pred_lstm, axis=1))

0.9387755102040817

In [96]:
print(classification_report(np.argmax(y_test_1, axis=1), np.argmax(y_pred_lstm, axis=1), zero_division=1))

              precision    recall  f1-score   support

           0       0.95      1.00      0.97       648
           1       1.00      0.00      0.00        37

    accuracy                           0.95       685
   macro avg       0.97      0.50      0.49       685
weighted avg       0.95      0.95      0.92       685



# Training GRU

In [173]:
modelGRU = Sequential()
modelGRU.add(GRU(128, return_sequences=True, input_shape=(timesteps, input_dim)))
modelGRU.add(Dropout(0.5))
modelGRU.add(GRU(64))
modelGRU.add(Dropout(0.5))
modelGRU.add(Dense(2, activation='softmax'))

In [174]:
modelGRU.compile(optimizer='adam', loss='binary_crossentropy', metrics=['accuracy'])

In [175]:
modelGRU.fit(x_train_1, y_train_1, epochs=100, batch_size=256, validation_split=0.2, callbacks=[EarlyStopping(monitor='val_loss', patience=7, min_delta=0.0001)])

Epoch 1/100
Epoch 2/100
Epoch 3/100
Epoch 4/100
Epoch 5/100
Epoch 6/100
Epoch 7/100
Epoch 8/100
Epoch 9/100
Epoch 10/100
Epoch 11/100


<keras.callbacks.History at 0x7f039f96c400>

In [176]:
modelGRU.evaluate(x_test_1,y_test_1)



[0.21375203132629395, 0.9460641145706177]

In [177]:
y_pred_gru = modelGRU.predict(x_test_1)
y_pred_gru



array([[9.9576080e-01, 4.2391648e-03],
       [9.9831051e-01, 1.6894217e-03],
       [9.9787503e-01, 2.1250199e-03],
       ...,
       [9.9783933e-01, 2.1606579e-03],
       [9.9953747e-01, 4.6261001e-04],
       [9.9776161e-01, 2.2384336e-03]], dtype=float32)

In [178]:
accuracy_score(np.argmax(y_test_1, axis=1), np.argmax(y_pred_gru, axis=1))

0.9460641399416909

In [179]:
print(classification_report(np.argmax(y_test_1, axis=1), np.argmax(y_pred_gru, axis=1), zero_division=1))

              precision    recall  f1-score   support

           0       0.95      1.00      0.97       649
           1       1.00      0.00      0.00        37

    accuracy                           0.95       686
   macro avg       0.97      0.50      0.49       686
weighted avg       0.95      0.95      0.92       686



In [180]:
x_1.shape

(3426, 1, 19)

In [181]:
y_pred_gru = modelGRU.predict(x_1)
y_pred_gru



array([[9.9644244e-01, 3.5574462e-03],
       [9.9833959e-01, 1.6603929e-03],
       [9.9820983e-01, 1.7901345e-03],
       ...,
       [9.9713802e-01, 2.8619380e-03],
       [9.9907315e-01, 9.2676835e-04],
       [9.9751115e-01, 2.4888024e-03]], dtype=float32)

In [182]:
y_pred_gru.shape

(3426, 2)

In [183]:
print(classification_report(np.argmax(y_1, axis=1), np.argmax(y_pred_gru, axis=1), zero_division=1))

              precision    recall  f1-score   support

           0       0.95      1.00      0.97      3246
           1       1.00      0.00      0.00       180

    accuracy                           0.95      3426
   macro avg       0.97      0.50      0.49      3426
weighted avg       0.95      0.95      0.92      3426

