# Preliminaries

In [None]:
import pandas as pd
import numpy as np

from sklearn.model_selection import train_test_split

from keras.callbacks import EarlyStopping
from keras.utils import to_categorical

from sklearn.compose import ColumnTransformer
from sklearn.preprocessing import OneHotEncoder

from sklearn.neural_network import MLPClassifier
from sklearn.preprocessing import StandardScaler

from sklearn.metrics import accuracy_score, classification_report
from sklearn.model_selection import cross_val_score

from sklearn.metrics import precision_recall_fscore_support as score
from sklearn.metrics import accuracy_score

In [None]:
dataset = pd.read_csv("/content/healthcare-dataset-stroke-data.csv")

print(dataset.shape)
dataset.head()

(5110, 12)


Unnamed: 0,id,gender,age,hypertension,heart_disease,ever_married,work_type,Residence_type,avg_glucose_level,bmi,smoking_status,stroke
0,9046,Male,67.0,0,1,Yes,Private,Urban,228.69,36.6,formerly smoked,1
1,51676,Female,61.0,0,0,Yes,Self-employed,Rural,202.21,,never smoked,1
2,31112,Male,80.0,0,1,Yes,Private,Rural,105.92,32.5,never smoked,1
3,60182,Female,49.0,0,0,Yes,Private,Urban,171.23,34.4,smokes,1
4,1665,Female,79.0,1,0,Yes,Self-employed,Rural,174.12,24.0,never smoked,1


# Preprocessing

In [None]:
dataset = dataset.drop(columns=['id'])
dataset = dataset[dataset['bmi'].notna()]
dataset = dataset[dataset.smoking_status != "Unknown"]
# dataset = dataset[dataset.gender != "Other"]

In [None]:
x = dataset.drop(columns=['stroke'])
y = dataset['stroke']

## Random oversampling

In [None]:
y.value_counts()

0    3246
1     180
Name: stroke, dtype: int64

## Shaping dataset

In [None]:
oneHotEncoding = ColumnTransformer(transformers = [('encoder', OneHotEncoder(), [0,4,5,6,9])], remainder='passthrough')
x = np.array(oneHotEncoding.fit_transform(x))
print(x)

[[  0.     1.     0.   ...   1.   228.69  36.6 ]
 [  0.     1.     0.   ...   1.   105.92  32.5 ]
 [  1.     0.     0.   ...   0.   171.23  34.4 ]
 ...
 [  1.     0.     0.   ...   0.   125.2   40.  ]
 [  1.     0.     0.   ...   0.    82.99  30.6 ]
 [  0.     1.     0.   ...   0.   166.29  25.6 ]]


In [None]:
x_train, x_test, y_train, y_test = train_test_split(x, y, test_size = 0.2, random_state=42)

In [None]:
y_train_1 = to_categorical(y_train)
y_test_1 = to_categorical(y_test)
y_1 = to_categorical(y)

# MLP training

In [None]:
mlp = MLPClassifier(hidden_layer_sizes=(100,), max_iter=1000, early_stopping=True, validation_fraction=0.15, verbose=True)

In [None]:
mlp.fit(x_train, y_train_1)

Iteration 1, loss = 1.01308089
Validation score: 0.917275
Iteration 2, loss = 0.63656100
Validation score: 0.941606
Iteration 3, loss = 0.45829713
Validation score: 0.953771
Iteration 4, loss = 0.45144203
Validation score: 0.951338
Iteration 5, loss = 0.42334063
Validation score: 0.951338
Iteration 6, loss = 0.41497264
Validation score: 0.951338
Iteration 7, loss = 0.41323940
Validation score: 0.953771
Iteration 8, loss = 0.41294204
Validation score: 0.941606
Iteration 9, loss = 0.41316443
Validation score: 0.951338
Iteration 10, loss = 0.39947047
Validation score: 0.951338
Iteration 11, loss = 0.39982741
Validation score: 0.953771
Iteration 12, loss = 0.39953297
Validation score: 0.956204
Iteration 13, loss = 0.39651724
Validation score: 0.956204
Iteration 14, loss = 0.40500759
Validation score: 0.956204
Iteration 15, loss = 0.42362259
Validation score: 0.956204
Iteration 16, loss = 0.40865363
Validation score: 0.953771
Iteration 17, loss = 0.38996219
Validation score: 0.953771
Iterat

In [None]:
import logging
from sklearn.neural_network import MLPClassifier

# Set up the logger
logging.basicConfig(filename='mlp_classifier.log', level=logging.INFO)

# Instantiate the MLP classifier
mlp = MLPClassifier()

# Define a custom callback function for logging
def log_callback(iteration, logs):
    logging.info("Iteration: %d - Loss: %f - Accuracy: %f" % (iteration, logs['loss'], logs['accuracy']))

# Train the MLP classifier
mlp.fit(X_train, y_train, callback=log_callback)

# Make predictions and evaluate the model
y_pred = mlp.predict(X_test)
accuracy = accuracy_score(y_test, y_pred)
logging.info("Final Accuracy: %f" % accuracy)

In [None]:
y_pred = mlp.predict(x_test)

In [None]:
accuracy_score(np.argmax(y_test_1, axis=1), np.argmax(y_pred, axis=1))

0.9358600583090378

In [None]:
print(classification_report(np.argmax(y_test_1, axis=1), np.argmax(y_pred, axis=1), zero_division=1))

              precision    recall  f1-score   support

           0       0.94      1.00      0.97       642
           1       1.00      0.00      0.00        44

    accuracy                           0.94       686
   macro avg       0.97      0.50      0.48       686
weighted avg       0.94      0.94      0.90       686



In [None]:
y_pred = mlp.predict(x)

In [None]:
print(classification_report(np.argmax(y_1, axis=1), np.argmax(y_pred, axis=1), zero_division=1))

              precision    recall  f1-score   support

           0       0.95      1.00      0.97      3246
           1       1.00      0.00      0.00       180

    accuracy                           0.95      3426
   macro avg       0.97      0.50      0.49      3426
weighted avg       0.95      0.95      0.92      3426

