In [1]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
from sklearn.model_selection import cross_val_score
from sklearn.pipeline import Pipeline
from sklearn.preprocessing import StandardScaler
from sklearn.linear_model import LogisticRegression
from sklearn.svm import SVC
from sklearn.ensemble import RandomForestClassifier


In [2]:
df = pd.read_csv("dataset.csv")

In [3]:
df.head()

Unnamed: 0,age,sex,chest pain type,resting bp s,cholesterol,fasting blood sugar,resting ecg,max heart rate,exercise angina,oldpeak,ST slope,target
0,40,1,2,140,289,0,0,172,0,0.0,1,0
1,49,0,3,160,180,0,0,156,0,1.0,2,1
2,37,1,2,130,283,0,1,98,0,0.0,1,0
3,48,0,4,138,214,0,0,108,1,1.5,2,1
4,54,1,3,150,195,0,0,122,0,0.0,1,0


In [4]:
df1 = df.drop(['sex'], axis = 1)

In [5]:
df1.head()

Unnamed: 0,age,chest pain type,resting bp s,cholesterol,fasting blood sugar,resting ecg,max heart rate,exercise angina,oldpeak,ST slope,target
0,40,2,140,289,0,0,172,0,0.0,1,0
1,49,3,160,180,0,0,156,0,1.0,2,1
2,37,2,130,283,0,1,98,0,0.0,1,0
3,48,4,138,214,0,0,108,1,1.5,2,1
4,54,3,150,195,0,0,122,0,0.0,1,0


In [6]:
df1.isnull().sum()

age                    0
chest pain type        0
resting bp s           0
cholesterol            0
fasting blood sugar    0
resting ecg            0
max heart rate         0
exercise angina        0
oldpeak                0
ST slope               0
target                 0
dtype: int64

In [7]:
X = df1.drop(columns=['target'])  # Features
y = df1['target']  # Target variable

In [8]:
# Split data into training and testing sets
from sklearn.model_selection import train_test_split
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

In [9]:
from sklearn.metrics import accuracy_score, classification_report, confusion_matrix

In [10]:
# Define models in a pipeline
models = {
    "Logistic Regression": Pipeline([
        ("scaler", StandardScaler()),
        ("model", LogisticRegression(max_iter=1000, random_state=42))
    ]),
    "Random Forest": Pipeline([
        ("scaler", StandardScaler()),
        ("model", RandomForestClassifier(n_estimators=100, random_state=42))
    ]),
    "SVM": Pipeline([
        ("scaler", StandardScaler()),
        ("model", SVC(random_state=42))
    ])
}

In [11]:
# Evaluate models using cross-validation
for name, pipeline in models.items():
    score = cross_val_score(pipeline, X_train, y_train, cv=5, scoring="accuracy").mean()
    print(f"{name}: {score:.4f}")

Logistic Regression: 0.8193
Random Forest: 0.8991
SVM: 0.8445


#### Choosing randomForest algo:

In [12]:
from sklearn.model_selection import GridSearchCV

# Define parameter grid
param_grid = {
    "n_estimators": [50, 100, 200],  # Number of trees
    "max_depth": [None, 10, 20],     # Tree depth
    "min_samples_split": [2, 5, 10], # Minimum samples to split a node
    "min_samples_leaf": [1, 2, 4]    # Minimum samples per leaf
}

In [13]:
# GridSearch for RandomForest
grid_search = GridSearchCV(RandomForestClassifier(random_state=42), param_grid, cv=5, scoring="accuracy", n_jobs=-1)
grid_search.fit(X_train, y_train)

In [14]:
# Print best parameters and accuracy
print(f"Best Parameters: {grid_search.best_params_}")
print(f"Best Accuracy: {grid_search.best_score_:.4f}")

Best Parameters: {'max_depth': 10, 'min_samples_leaf': 1, 'min_samples_split': 2, 'n_estimators': 50}
Best Accuracy: 0.9044


In [15]:
model = RandomForestClassifier(n_estimators=50,max_depth=10, min_samples_leaf=1, min_samples_split=2, random_state=42)
model.fit(X_train, y_train)

In [16]:
y_pred = model.predict(X_test)

# Evaluate model
accuracy = accuracy_score(y_test, y_pred)
conf_matrix = confusion_matrix(y_test, y_pred)
class_report = classification_report(y_test, y_pred)

In [17]:
accuracy

0.9117647058823529

In [18]:
conf_matrix

array([[ 95,  12],
       [  9, 122]])

In [19]:
def predict_heart_attack(features):
    input_data = np.array(features).reshape(1, -1)
    prediction = model.predict(input_data)
    return prediction[0]

user_input = [37, 2, 130, 283, 0, 1, 98, 0, 0, 1]


In [20]:
import warnings
warnings.simplefilter("ignore", UserWarning)


In [21]:
result = predict_heart_attack(user_input)

In [22]:
print(f"Predicted Heart Attack Risk: {'Yes' if result == 1 else 'No'}")


Predicted Heart Attack Risk: No
