In [1]:
import pandas as pd
from sklearn.model_selection import train_test_split
from sklearn.linear_model import LogisticRegression
from sklearn.preprocessing import StandardScaler
import pickle

# Load your dataset
df = pd.read_csv(r"C:\Users\13615\Downloads\archive\healthcare-dataset-stroke-data.csv")

# Handle missing values by filling with mean for numerical columns
df['bmi'].fillna(df['bmi'].mean(), inplace=True)
df['avg_glucose_level'].fillna(df['avg_glucose_level'].mean(), inplace=True)

# Optional: Drop rows with missing target values (if any)
df = df.dropna(subset=['stroke'])

# Feature Engineering
df['gender'] = df['gender'].map({'Male': 1, 'Female': 0})
df['ever_married'] = df['ever_married'].map({'Yes': 1, 'No': 0})
df['Residence_type'] = df['Residence_type'].map({'Urban': 1, 'Rural': 0})

# One-hot encoding for smoking status and work type
df = pd.get_dummies(df, columns=['smoking_status', 'work_type'], drop_first=True)

# Define feature columns
feature_columns = [
    "gender", "age", "hypertension", "heart_disease", "ever_married", "Residence_type",
    "avg_glucose_level", "bmi",
    "smoking_status_formerly smoked", "smoking_status_never smoked", "smoking_status_smokes",
    "work_type_Never_worked", "work_type_Private", "work_type_Self-employed", "work_type_children"
]

# Ensure all feature columns are in the DataFrame
for col in feature_columns:
    if col not in df.columns:
        df[col] = 0  # Add missing columns with 0 values

# Prepare data
X = df[feature_columns]  # Features
y = df['stroke']          # Target

# Split data
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

# Train StandardScaler on training data
scaler = StandardScaler()
X_train = scaler.fit_transform(X_train)
X_test = scaler.transform(X_test)

# Save the updated scaler
with open("scaler.pkl", "wb") as file:
    pickle.dump(scaler, file)

# Train model
model = LogisticRegression()
model.fit(X_train, y_train)

# Save the updated model
with open("stroke_risk_model.pkl", "wb") as file:
    pickle.dump(model, file)

print("Model and Scaler trained and saved successfully!")





Model and Scaler trained and saved successfully!


In [2]:
print("Scaler n_features_in_:", scaler.n_features_in_)
print("Model n_features_in_:", model.n_features_in_)


Scaler n_features_in_: 15
Model n_features_in_: 15
