In [7]:
import pandas as pd
from sklearn.model_selection import train_test_split
from sklearn.metrics import accuracy_score, classification_report
from sklearn.linear_model import LogisticRegression
import os
import numpy as np
from sklearn.metrics import precision_recall_fscore_support, roc_auc_score


df = pd.read_csv("data/processed/stroke_processed.csv")

X = df.drop("stroke", axis=1)
y = df["stroke"]

X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42, stratify=y)


In [2]:
y_pred_majority = [0] * len(y_test)

# Evaluate
print("Majority Class Baseline Performance:")
print(f"Accuracy: {accuracy_score(y_test, y_pred_majority):.4f}")
print(classification_report(y_test, y_pred_majority, zero_division=0))


Majority Class Baseline Performance:
Accuracy: 0.9511
              precision    recall  f1-score   support

           0       0.95      1.00      0.97       972
           1       0.00      0.00      0.00        50

    accuracy                           0.95      1022
   macro avg       0.48      0.50      0.49      1022
weighted avg       0.90      0.95      0.93      1022



The majority class classifier achieved an overall accuracy of 95.1%, but this result is misleading because it predicts no stroke for every case. While accuracy appears high due to class imbalance, the model completely fails to identify stroke cases (precision, recall, and F1-score of 0.00 for class 1). This baseline highlights the need for a more balanced and informative model for stroke prediction.

In [3]:
from sklearn.linear_model import LogisticRegression

log_reg = LogisticRegression(max_iter=1000)
log_reg.fit(X_train, y_train)

y_pred_log = log_reg.predict(X_test)

print("Logistic Regression Baseline Performance:")
print(classification_report(y_test, y_pred_log, zero_division=0))


Logistic Regression Baseline Performance:
              precision    recall  f1-score   support

           0       0.95      1.00      0.98       972
           1       1.00      0.02      0.04        50

    accuracy                           0.95      1022
   macro avg       0.98      0.51      0.51      1022
weighted avg       0.95      0.95      0.93      1022



The logistic regression baseline model achieved an overall accuracy of 95%, similar to the majority class classifier. However, while it slightly improved stroke detection recall of 0.02 for class 1, it still struggles to correctly identify positive cases due to severe class imbalance. This indicates that additional preprocessing steps, such as class balancing or feature scaling, will be necessary to improve model performance.

In [9]:
os.makedirs("results", exist_ok=True)

y_pred_majority = np.zeros_like(y_test)   

maj_precision, maj_recall, maj_f1, _ = precision_recall_fscore_support(
    y_test, y_pred_majority, average="binary", pos_label=1, zero_division=0
)

maj_roc_auc = np.nan

y_pred_log = log_reg.predict(X_test)
y_proba_log = log_reg.predict_proba(X_test)[:, 1]

log_precision, log_recall, log_f1, _ = precision_recall_fscore_support(
    y_test, y_pred_log, average="binary", pos_label=1, zero_division=0
)

log_roc_auc = roc_auc_score(y_test, y_proba_log)


baseline_results = pd.DataFrame([
    {
        "Model": "Majority Baseline",
        "Train_time": np.nan,
        "Inference_time": np.nan,
        "Precision": maj_precision,
        "Recall": maj_recall,
        "F1": maj_f1,
        "ROC_AUC": maj_roc_auc
    },
    {
        "Model": "Logistic Regression Baseline",
        "Train_time": train_time_log if "train_time_log" in globals() else np.nan,
        "Inference_time": inference_time_log if "inference_time_log" in globals() else np.nan,
        "Precision": log_precision,
        "Recall": log_recall,
        "F1": log_f1,
        "ROC_AUC": log_roc_auc
    }
])

baseline_results.to_csv("results/baseline_metrics.csv", index=False)
baseline_results

Unnamed: 0,Model,Train_time,Inference_time,Precision,Recall,F1,ROC_AUC
0,Majority Baseline,,,0.0,0.0,0.0,
1,Logistic Regression Baseline,,,1.0,0.02,0.039216,0.84216
