In [53]:
import pandas as pd
df = pd.read_csv("Heart_Disease_Cleaned.csv")

df.head()

Unnamed: 0,Age,Sex,Chest pain type,Cholesterol,EKG results,Max HR,Exercise angina,ST depression,Slope of ST,Number of vessels fluro,Thallium,Heart Disease
0,57,1,2,261,0,141,0,0.3,1,0,7,1
1,64,1,4,263,0,105,1,0.2,2,1,7,0
2,74,0,2,269,2,121,1,0.2,1,1,3,0
3,65,1,4,177,0,140,0,0.4,1,0,7,0
4,56,1,3,256,2,142,1,0.6,2,1,6,1


In [54]:
from sklearn.preprocessing import StandardScaler

scaler = StandardScaler()
X = df.drop(columns=["Heart Disease"], axis=1)
y = df["Heart Disease"]

x_scaled = scaler.fit_transform(X)


In [55]:
df['Heart Disease'].value_counts(normalize=True)  

Heart Disease
0    0.582569
1    0.417431
Name: proportion, dtype: float64

In [56]:
from sklearn.model_selection import train_test_split

X = df.drop(columns=["Heart Disease"], axis=1)
y = df["Heart Disease"]

X_train, X_temp, y_train, y_temp = train_test_split(
    X, y, test_size=0.3, random_state=42, stratify=y
)

X_val, X_test, y_val, y_test = train_test_split(
    X_temp, y_temp, test_size=0.5, random_state=42, stratify=y_temp
)   

print(f"Training set size: {X_train.shape} samples")
print(f"Validation set size: {X_val.shape} samples")
print(f"Test set size: {X_test.shape} samples")

Training set size: (152, 11) samples
Validation set size: (33, 11) samples
Test set size: (33, 11) samples


In [57]:
from sklearn.preprocessing import StandardScaler

scaler = StandardScaler()

X_train_s = scaler.fit_transform(X_train)
X_val_s   = scaler.transform(X_val)
X_test_s  = scaler.transform(X_test)


In [58]:
from sklearn.linear_model import LogisticRegression
from sklearn.metrics import accuracy_score

lr = LogisticRegression(class_weight="balanced", max_iter=1000)
lr.fit(X_train_s, y_train)

val_pred = lr.predict(X_val_s)
test_pred = lr.predict(X_test_s)

print("Logistic Regression")
print("Validation Accuracy:", accuracy_score(y_val, val_pred))
print("Test Accuracy:", accuracy_score(y_test, test_pred))

Logistic Regression
Validation Accuracy: 0.7575757575757576
Test Accuracy: 0.8787878787878788


In [59]:
from sklearn.neighbors import KNeighborsClassifier

knn = KNeighborsClassifier(n_neighbors=5)
knn.fit(X_train_s, y_train)

print("\nKNN")
print("Validation Accuracy:", accuracy_score(y_val, knn.predict(X_val_s)))
print("Test Accuracy:", accuracy_score(y_test, knn.predict(X_test_s)))


KNN
Validation Accuracy: 0.7575757575757576
Test Accuracy: 0.8484848484848485


In [60]:
from sklearn.svm import SVC

svm = SVC(class_weight="balanced")
svm.fit(X_train_s, y_train)

print("\nSVM")
print("Validation Accuracy:", accuracy_score(y_val, svm.predict(X_val_s)))
print("Test Accuracy:", accuracy_score(y_test, svm.predict(X_test_s)))


SVM
Validation Accuracy: 0.7878787878787878
Test Accuracy: 0.9090909090909091


In [61]:
from sklearn.ensemble import RandomForestClassifier

rf = RandomForestClassifier(
    n_estimators=200,
    class_weight="balanced",
    random_state=42
)

rf.fit(X_train, y_train)

print("\nRandom Forest")
print("Validation Accuracy:", accuracy_score(y_val, rf.predict(X_val)))
print("Test Accuracy:", accuracy_score(y_test, rf.predict(X_test)))



Random Forest
Validation Accuracy: 0.7878787878787878
Test Accuracy: 0.8787878787878788


In [62]:
# Store all model results in a dictionary
model_results = {
    "Logistic Regression": {
        "Validation Accuracy": accuracy_score(y_val, lr.predict(X_val_s)),
        "Test Accuracy": accuracy_score(y_test, lr.predict(X_test_s))
    },
    "KNN": {
        "Validation Accuracy": accuracy_score(y_val, knn.predict(X_val_s)),
        "Test Accuracy": accuracy_score(y_test, knn.predict(X_test_s))
    },
    "SVM": {
        "Validation Accuracy": accuracy_score(y_val, svm.predict(X_val_s)),
        "Test Accuracy": accuracy_score(y_test, svm.predict(X_test_s))
    },
    "Random Forest": {
        "Validation Accuracy": accuracy_score(y_val, rf.predict(X_val)),
        "Test Accuracy": accuracy_score(y_test, rf.predict(X_test))
    }
}



import json
print(json.dumps(model_results, indent=2))

{
  "Logistic Regression": {
    "Validation Accuracy": 0.7575757575757576,
    "Test Accuracy": 0.8787878787878788
  },
  "KNN": {
    "Validation Accuracy": 0.7575757575757576,
    "Test Accuracy": 0.8484848484848485
  },
  "SVM": {
    "Validation Accuracy": 0.7878787878787878,
    "Test Accuracy": 0.9090909090909091
  },
  "Random Forest": {
    "Validation Accuracy": 0.7878787878787878,
    "Test Accuracy": 0.8787878787878788
  }
}


In [63]:

import joblib

joblib.dump(svm, "svm_heart_disease_model.pkl")
joblib.dump(scaler, "scaler.pkl")

print("SVM model saved as 'svm_heart_disease_model.pkl'")
print("Scaler saved as 'scaler.pkl'")

SVM model saved as 'svm_heart_disease_model.pkl'
Scaler saved as 'scaler.pkl'
