<a href="https://colab.research.google.com/github/anishrashinkar-26/BML/blob/main/FINAL_LCA3.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [12]:
import pandas as pd, numpy as np
from sklearn.model_selection import train_test_split, StratifiedKFold, cross_validate
from sklearn.compose import ColumnTransformer
from sklearn.preprocessing import OneHotEncoder, StandardScaler
from sklearn.pipeline import Pipeline
from sklearn.impute import SimpleImputer
from sklearn.linear_model import LogisticRegression

df = pd.read_csv("./processed.cleveland.data", header=None)

cols = [
    "Age", "Sex", "ChestPainType", "RestingBP", "Cholesterol", "FastingSugar",
    "RestingECG", "MaxHeartRate", "ExerciseAngina", "STDepression",
    "Slope", "MajorVessels", "Thalassemia", "Diagnosis"
]
df.columns = cols
df.replace("?", np.nan, inplace=True)

for c in ["MajorVessels","Thalassemia"]:
    df[c] = pd.to_numeric(df[c], errors="coerce")

df["HeartDisease"] = (df["Diagnosis"] > 0).astype(int)
X, y = df.drop(columns=["Diagnosis","HeartDisease"]), df["HeartDisease"]

num_cols = ["Age","RestingBP","Cholesterol","MaxHeartRate","STDepression"]
bin_cols = ["Sex","FastingSugar","ExerciseAngina"]
cat_cols = ["ChestPainType","RestingECG","Slope","MajorVessels","Thalassemia"]

preprocess = ColumnTransformer([
    ("num", Pipeline([("impute", SimpleImputer(strategy="median")),("scale", StandardScaler())]), num_cols),
    ("bin", SimpleImputer(strategy="most_frequent"), bin_cols),
    ("cat", Pipeline([("impute", SimpleImputer(strategy="most_frequent")),("onehot", OneHotEncoder(handle_unknown="ignore"))]), cat_cols)
])

pipe = Pipeline([("prep", preprocess),("model", LogisticRegression(max_iter=200,class_weight="balanced"))])
X_train,X_test,y_train,y_test = train_test_split(X,y,test_size=0.2,stratify=y,random_state=42)
cv = StratifiedKFold(n_splits=10,shuffle=True,random_state=42)
scores = cross_validate(pipe,X_train,y_train,cv=cv,scoring=["accuracy","precision","recall","f1","roc_auc"])

metrics = {k.replace("test_","").upper(): np.mean(v) for k,v in scores.items() if "test" in k}
print("Final ouput is :  ")
for m,v in metrics.items(): print(f"{m:12s}: {v:.3f}")


Final ouput is :  
ACCURACY    : 0.843
PRECISION   : 0.868
RECALL      : 0.802
F1          : 0.824
ROC_AUC     : 0.902
