<a href="https://colab.research.google.com/github/Usmantech125/heart-disease-prediction/blob/main/HeartDisease.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [16]:
import pandas as pd
from sklearn.model_selection import train_test_split, cross_val_score
from sklearn.preprocessing import StandardScaler
from sklearn.pipeline import Pipeline
from sklearn.linear_model import LogisticRegression
from sklearn.ensemble import RandomForestClassifier
from xgboost import XGBClassifier

In [1]:
df = pd.read_csv('/content/heart.csv')

In [2]:
df.head()

Unnamed: 0,age,sex,cp,trestbps,chol,fbs,restecg,thalach,exang,oldpeak,slope,ca,thal,target
0,52,1,0,125,212,0,1,168,0,1.0,2,2,3,0
1,53,1,0,140,203,1,0,155,1,3.1,0,0,3,0
2,70,1,0,145,174,0,1,125,1,2.6,0,0,3,0
3,61,1,0,148,203,0,1,161,0,0.0,2,1,3,0
4,62,0,0,138,294,1,1,106,0,1.9,1,3,2,0


In [5]:
df.isnull().sum()

Unnamed: 0,0
age,0
sex,0
cp,0
trestbps,0
chol,0
fbs,0
restecg,0
thalach,0
exang,0
oldpeak,0


In [7]:
X = df.drop("target", axis=1)
y = df['target']

In [8]:
X_train, X_test, y_train, y_test = train_test_split(X,y, test_size=0.2, random_state=42)

In [20]:
pipelines = {
    "Logistic Regression" : Pipeline([
      ("scaler", StandardScaler()),
      ("model", LogisticRegression(max_iter=1000))
    ]),

    "Random Forest" : Pipeline([
        ("model", RandomForestClassifier(n_estimators=100, random_state=42))
    ]),

    "XGBoost" : Pipeline([
        ("model", XGBClassifier(eval_metric="logloss", random_state=42))
    ])
}

In [26]:
result = {}
for name, pipe in pipelines.items():
  scores = cross_val_score(pipe, X_train, y_train, cv=5, scoring ="accuracy")
  result[name] = scores.mean()

In [27]:
print("Model Comparison (Accuracy):")
for name, acc in result.items():
    print(f"{name}: {acc:.2f}")

Model Comparison (Accuracy):
Logistic Regression: 0.85
Random Forest: 0.98
XGBoost: 0.98


In [31]:
best_model_name = max(result, key=result.get)
best_pipeline = pipelines[best_model_name]
best_pipeline.fit(X_train, y_train)
test_accuarcy = best_pipeline.score(X_test, y_test)

print(f"Best Model: {best_model_name}")
print(f"Test Accuracy: {test_accuarcy:.2f}")

Best Model: Random Forest
Test Accuracy: 0.99
