In [None]:
import matplotlib.pyplot as plt
import pandas as pd
import numpy as np
import os
from sklearn.preprocessing import StandardScaler
from sklearn.metrics import classification_report
from sklearn.svm import SVC
from imblearn.over_sampling import SMOTE

In [None]:
# Import the data
stroke_info_df = pd.read_csv("data/stroke_data.csv")
stroke_info_df

In [None]:
# Drop the Unnamed column
stroke_info_df = stroke_info_df.drop('Unnamed: 0', axis=1)
stroke_info_df

In [None]:
# Convert categorical data to numeric with `pd.get_dummies`
stroke_info_df = pd.get_dummies(stroke_info_df, dtype=float)
stroke_info_df.head()

In [None]:
stroke_info_df.dtypes

In [None]:
# Get the target variables. 
y = stroke_info_df["Stroke"]

In [None]:
# Get the features. 
X = stroke_info_df.drop('Stroke', axis=1)

In [None]:
X.columns

In [None]:
# Split data into training and testing
from sklearn.model_selection import train_test_split
X_train, X_test, y_train, y_test = train_test_split(X, y, random_state=42)

In [None]:
# Create the StandardScaler instance
scaler = StandardScaler()
# Fit the Standard Scaler with the training data
X_scaler = scaler.fit(X_train)
# Scale the training data
X_train_scaled = X_scaler.transform(X_train)
X_test_scaled = X_scaler.transform(X_test)

In [None]:
# Create a support vector machine linear classifer, and fit it to the training data
svm_model = SVC(kernel='linear')
svm_model.fit(X_train, y_train)

In [None]:
# Print the model score by using the test data
print(svm_model.score(X_test,y_test))

In [None]:
# Calculate the classification report
testing_predictions = svm_model.predict(X_test)

testing_report = classification_report(y_test, testing_predictions)
print(testing_report)

In [None]:
testing_predictions
test_df = X_test.copy()
test_df['predicted'] = testing_predictions
test_df

In [None]:
from sklearn.metrics import confusion_matrix
test_matrix = confusion_matrix(y_test,testing_predictions)

# Print the confusion matrix for the training data
test_matrix

In [None]:
# Apply SMOTE to the training data only
smote = SMOTE(random_state=42)
X_train_resampled, y_train_resampled = smote.fit_resample(X_train, y_train)

In [None]:
# Create a support vector machine linear classifer, and fit it to the training data
svm_model = SVC(kernel='linear')
svm_model.fit(X_train_resampled, y_train_resampled)

In [None]:
# Calculate the classification report
testing_predictions = svm_model.predict(X_test)

testing_report = classification_report(y_test, testing_predictions)
print(testing_report)

In [None]:
from sklearn.metrics import confusion_matrix
test_matrix = confusion_matrix(y_test,testing_predictions)

# Print the confusion matrix for the training data
test_matrix

In [None]:
testing_predictions
test_df = X_test.copy()
test_df['predicted'] = testing_predictions
test_df

In [None]:
test_df.iloc[4]

In [None]:
# Calculate the classification report
prediction = svm_model.predict(X_test)

testing_report = classification_report(y_test, prediction)
print(testing_report)

In [None]:
# Save the model
import pickle

filename = 'models/svm_model.sav'
pickle.dump(svm_model, open(filename, 'wb'))

filename = 'models/svm__scalar_model.sav'
pickle.dump(X_scaler, open(filename, 'wb'))