# Create a Web App that Predicts Stroke 

In [141]:
import pandas as pd
import numpy as np
import pickle

from sklearn.svm import SVC
from sklearn.pipeline import make_pipeline
from sklearn.preprocessing import OneHotEncoder, StandardScaler
from sklearn.compose import ColumnTransformer, make_column_transformer, make_column_selector as selector
from sklearn.impute import SimpleImputer
from sklearn.model_selection import cross_validate, train_test_split
from sklearn.metrics import classification_report
import pickle

# Work with Data

In [142]:
path = '/Users/volkhind/Desktop/test/healthcare-dataset-stroke-data .csv'

data = pd.read_csv(path)

In [143]:
data = data[['gender', 'age', 'hypertension', 'heart_disease', 'bmi', 'smoking_status', 'stroke']]
data.drop(data[data['gender']=='Other'].index, inplace=True)
data = data.drop(labels=[162, 245, 182], axis=0)

In [144]:
def change(row):
    if row == 1:
        return 'Yes'
    return 'No'
data.hypertension = data.hypertension.apply(change)
data.heart_disease = data.heart_disease.apply(change)

In [145]:
children = data.loc[data['age'] < 38]
a = np.array(children.index)
del_i = pd.Series(np.random.choice(a, size=700))
data = data.drop(del_i)

In [146]:
y = data.stroke
data = data.drop('stroke', axis=1)
X_train, X_test, y_train, y_test = train_test_split(data, y, test_size=0.15)

# Preprocessing and Creating Model

In [161]:
num_pipe = make_pipeline(
    StandardScaler(), SimpleImputer(strategy="mean", add_indicator=True))
cat_pipe = OneHotEncoder()

preprocessor_linear = make_column_transformer(
    (num_pipe, selector(dtype_include="number")),
    (cat_pipe, selector(dtype_include="category")),)
scores = {"balanced_accuracy": [], 'recall': []}
scoring = ["balanced_accuracy", 'recall']
model = make_pipeline(preprocessor_linear, SVC(kernel='linear', C=1, class_weight='balanced', probability=True))
cv_result = cross_validate(model, data, y, scoring=scoring, cv=10)
scores["balanced_accuracy"].append(cv_result["test_balanced_accuracy"].mean())
scores['recall'].append(cv_result['test_recall'].mean())
df_scores = pd.DataFrame(scores, index=['Models result'])
df_scores

Unnamed: 0,balanced_accuracy,recall
Models result,0.742992,0.772667


In [148]:
model.fit(X_train, y_train)
print(classification_report(y_test, model.predict(X_test)))

              precision    recall  f1-score   support

           0       0.98      0.68      0.80       652
           1       0.08      0.70      0.15        27

    accuracy                           0.68       679
   macro avg       0.53      0.69      0.48       679
weighted avg       0.95      0.68      0.78       679



In [152]:
model.fit(data, y)

# Saving Model

In [162]:
m = {'model': model}
with open('model.pkl', 'wb') as file:
    pickle.dump(m, file)
    

In [163]:
with open('model.pkl', 'rb') as file:
    df = pickle.load(file)