<a href="https://colab.research.google.com/github/VyshNavi2563/My-Projects/blob/main/Cerebral_Stroke_Prediction.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [None]:
#importing required libraries
import pandas as pd
import numpy as np

# Load the dataset
df = pd.read_csv('cerebral_stroke_prediction.csv')

# Check for missing values
df.isna().sum()

# Handle missing values
# Impute missing values with mean or median depending on the data type
df['age'].fillna(df['age'].mean(), inplace=True)
df['gender'].fillna(df['gender'].mode()[0], inplace=True)
df['hypertension'].fillna(df['hypertension'].mode()[0], inplace=True)
df['heart_disease'].fillna(df['heart_disease'].mode()[0], inplace=True)
df['bmi'].fillna(df['bmi'].mean(), inplace=True)

# Convert categorical variables to numerical variables
df['gender'] = df['gender'].map({'Male': 1, 'Female': 0})

# Scale the numerical variables
from sklearn.preprocessing import StandardScaler

scaler = StandardScaler()
scaled_df = scaler.fit_transform(df[['age', 'bmi']])

# Split the dataset into training and testing sets
from sklearn.model_selection import train_test_split

X = scaled_df
y = df['stroke']
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.25, random_state=42)


In [None]:
from imblearn.over_sampling import SMOTE

# Create a SMOTE object
smote = SMOTE(random_state=42)

# Oversample the training set
X_train, y_train = smote.fit_resample(X_train, y_train)


In [None]:
from sklearn.linear_model import LogisticRegression
from sklearn.tree import DecisionTreeClassifier
from sklearn.ensemble import RandomForestClassifier
from sklearn.svm import SVC
from xgboost import XGBClassifier

# Create a list of models to evaluate
models = [LogisticRegression(), DecisionTreeClassifier(), RandomForestClassifier(), SVC(), XGBClassifier()]

# Train and evaluate each model
results = {}
for model in models:
    model.fit(X_train, y_train)
    y_pred = model.predict(X_test)

    # Calculate the evaluation metrics
    accuracy = np.mean(y_test == y_pred)
    sensitivity = np.mean(y_test[y_test == 1] == y_pred[y_test == 1])
    specificity = np.mean(y_test[y_test == 0] == y_pred[y_test == 0])

    # Store the results
    results[model.__class__.__name__] = {'accuracy': accuracy, 'sensitivity': sensitivity, 'specificity': specificity}

# Print the results
for model, result in results.items():
    print(f'{model}: {result}')


LogisticRegression: {'accuracy': 0.7216589861751153, 'sensitivity': 0.7836538461538461, 'specificity': 0.7204472843450479}
DecisionTreeClassifier: {'accuracy': 0.9563133640552995, 'sensitivity': 0.1778846153846154, 'specificity': 0.9715279082879158}
RandomForestClassifier: {'accuracy': 0.951705069124424, 'sensitivity': 0.18269230769230768, 'specificity': 0.9667355760195452}
SVC: {'accuracy': 0.6937327188940092, 'sensitivity': 0.8269230769230769, 'specificity': 0.6911294869385454}
XGBClassifier: {'accuracy': 0.8706912442396313, 'sensitivity': 0.375, 'specificity': 0.8803796278894944}
