In [4]:
# Import necessary libraries
import pandas as pd

# Load the dataset
data = pd.read_csv('Heart_disease_cleveland_new.csv')

# Display basic information about the dataset
print("\nDataset Overview:")
print(data.info())

# Check for missing values (if any)
print("\nMissing Values Check:")
print(data.isnull().sum())

# Separate features (X) and target variable (y)
X = data.drop('target', axis=1)  # Features
y = data['target']               # Target (1 = Heart Disease, 0 = No Heart Disease)


print(f"\nFeature set shape: {X.shape}")
print(f"Target set shape: {y.shape}")



Dataset Overview:
<class 'pandas.core.frame.DataFrame'>
RangeIndex: 303 entries, 0 to 302
Data columns (total 14 columns):
 #   Column    Non-Null Count  Dtype  
---  ------    --------------  -----  
 0   age       303 non-null    int64  
 1   sex       303 non-null    int64  
 2   cp        303 non-null    int64  
 3   trestbps  303 non-null    int64  
 4   chol      303 non-null    int64  
 5   fbs       303 non-null    int64  
 6   restecg   303 non-null    int64  
 7   thalach   303 non-null    int64  
 8   exang     303 non-null    int64  
 9   oldpeak   303 non-null    float64
 10  slope     303 non-null    int64  
 11  ca        303 non-null    int64  
 12  thal      303 non-null    int64  
 13  target    303 non-null    int64  
dtypes: float64(1), int64(13)
memory usage: 33.3 KB
None

Missing Values Check:
age         0
sex         0
cp          0
trestbps    0
chol        0
fbs         0
restecg     0
thalach     0
exang       0
oldpeak     0
slope       0
ca          0
thal

In [6]:
# Import necessary libraries
from sklearn.model_selection import train_test_split
from sklearn.neighbors import KNeighborsClassifier

# Split the dataset into training and testing sets (70% train, 30% test)
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.3, random_state=42)

# Initialize the K-Nearest Neighbors classifier with 5 neighbors
knn = KNeighborsClassifier(n_neighbors=5)

# Train the model on the training set
knn.fit(X_train, y_train)

print("\nModel training complete.")



Model training complete.


In [7]:
# Import the accuracy metric
from sklearn.metrics import accuracy_score

# Predict the target variable on the test set
y_pred = knn.predict(X_test)

# Calculate the model’s accuracy
accuracy = accuracy_score(y_test, y_pred)
print(f"\nModel Accuracy: {accuracy * 100:.2f}%")



Model Accuracy: 68.13%


In [10]:
import pandas as pd

# Introduction for the user
print("\nWelcome to the Heart Disease Risk Prediction System.")
print("Please provide the following information for risk assessment:")

# Collect user input with basic error handling
try:
    age = int(input("Age: "))
    sex = int(input("Sex (1 = Male, 0 = Female): "))
    cp = int(input("Chest Pain Type (0, 1, 2, 3): "))
    trestbps = int(input("Resting Blood Pressure (e.g., 120): "))
    chol = int(input("Cholesterol Level (e.g., 200): "))
    fbs = int(input("Fasting Blood Sugar > 120 mg/dl (1 = Yes, 0 = No): "))
    restecg = int(input("Resting ECG Results (0, 1, 2): "))
    thalach = int(input("Max Heart Rate Achieved: "))
    exang = int(input("Exercise-Induced Angina (1 = Yes, 0 = No): "))
    oldpeak = float(input("ST Depression Induced by Exercise: "))
    slope = int(input("Slope of Peak Exercise ST Segment (0, 1, 2): "))
    ca = int(input("Major Vessels Colored by Fluoroscopy (0-4): "))
    thal = int(input("Thalassemia (1 = Normal, 2 = Fixed Defect, 3 = Reversible Defect): "))

    # Gather inputs into a DataFrame for prediction
    user_data = pd.DataFrame([[age, sex, cp, trestbps, chol, fbs, restecg, thalach, exang, oldpeak, slope, ca, thal]],
                              columns=X.columns)  # Use the same feature names as X
    print("\nData entry successful.")

    # Step 1: Make a prediction using the trained model
    prediction = knn.predict(user_data)

    # Step 2: Display the prediction result
    if prediction[0] == 1:
        print("\nPrediction Result: High Risk of Heart Disease.")
    else:
        print("\nPrediction Result: Low Risk of Heart Disease.")

except ValueError:
    print("\nInvalid input! Please enter numeric values only.")




Welcome to the Heart Disease Risk Prediction System.
Please provide the following information for risk assessment:


Age:  20
Sex (1 = Male, 0 = Female):  1
Chest Pain Type (0, 1, 2, 3):  3
Resting Blood Pressure (e.g., 120):  100
Cholesterol Level (e.g., 200):  180
Fasting Blood Sugar > 120 mg/dl (1 = Yes, 0 = No):  1
Resting ECG Results (0, 1, 2):  1
Max Heart Rate Achieved:  40
Exercise-Induced Angina (1 = Yes, 0 = No):  1
ST Depression Induced by Exercise:  20
Slope of Peak Exercise ST Segment (0, 1, 2):  2
Major Vessels Colored by Fluoroscopy (0-4):  3
Thalassemia (1 = Normal, 2 = Fixed Defect, 3 = Reversible Defect):  2



Data entry successful.

Prediction Result: High Risk of Heart Disease.


In [10]:
import pandas as pd

# Introduction for the user
print("\nWelcome to the Heart Disease Risk Prediction System.")
print("Please provide the following information for risk assessment:")

# Collect user input with basic error handling
try:
    age = int(input("Age: "))
    sex = int(input("Sex (1 = Male, 0 = Female): "))
    cp = int(input("Chest Pain Type (0, 1, 2, 3): "))
    trestbps = int(input("Resting Blood Pressure (e.g., 120): "))
    chol = int(input("Cholesterol Level (e.g., 200): "))
    fbs = int(input("Fasting Blood Sugar > 120 mg/dl (1 = Yes, 0 = No): "))
    restecg = int(input("Resting ECG Results (0, 1, 2): "))
    thalach = int(input("Max Heart Rate Achieved: "))
    exang = int(input("Exercise-Induced Angina (1 = Yes, 0 = No): "))
    oldpeak = float(input("ST Depression Induced by Exercise: "))
    slope = int(input("Slope of Peak Exercise ST Segment (0, 1, 2): "))
    ca = int(input("Major Vessels Colored by Fluoroscopy (0-4): "))
    thal = int(input("Thalassemia (1 = Normal, 2 = Fixed Defect, 3 = Reversible Defect): "))

    # Gather inputs into a DataFrame for prediction
    user_data = pd.DataFrame([[age, sex, cp, trestbps, chol, fbs, restecg, thalach, exang, oldpeak, slope, ca, thal]],
                              columns=X.columns)  # Use the same feature names as X
    print("\nData entry successful.")

    # Step 1: Make a prediction using the trained model
    prediction = knn.predict(user_data)

    # Step 2: Display the prediction result
    if prediction[0] == 1:
        print("\nPrediction Result: High Risk of Heart Disease.")
    else:
        print("\nPrediction Result: Low Risk of Heart Disease.")

except ValueError:
    print("\nInvalid input! Please enter numeric values only.")




Welcome to the Heart Disease Risk Prediction System.
Please provide the following information for risk assessment:


Age:  20
Sex (1 = Male, 0 = Female):  1
Chest Pain Type (0, 1, 2, 3):  3
Resting Blood Pressure (e.g., 120):  100
Cholesterol Level (e.g., 200):  180
Fasting Blood Sugar > 120 mg/dl (1 = Yes, 0 = No):  1
Resting ECG Results (0, 1, 2):  1
Max Heart Rate Achieved:  40
Exercise-Induced Angina (1 = Yes, 0 = No):  1
ST Depression Induced by Exercise:  20
Slope of Peak Exercise ST Segment (0, 1, 2):  2
Major Vessels Colored by Fluoroscopy (0-4):  3
Thalassemia (1 = Normal, 2 = Fixed Defect, 3 = Reversible Defect):  2



Data entry successful.

Prediction Result: High Risk of Heart Disease.
