In [31]:
import pandas as pd
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import LabelEncoder
from sklearn.naive_bayes import GaussianNB
from sklearn.metrics import classification_report

# Load dataset from GitHub
url = 'https://raw.githubusercontent.com/AKRITI-07/mini_project/main/heart.csv'
df = pd.read_csv(url)

# Display column names for debugging
print("Columns in dataset:", df.columns)

# Identify categorical columns
categorical_columns = ['Sex', 'ChestPainType', 'RestingECG', 'ExerciseAngina', 'ST_Slope']

# Apply Label Encoding to categorical columns
label_encoders = {}
for col in categorical_columns:
    le = LabelEncoder()
    df[col] = le.fit_transform(df[col])
    label_encoders[col] = le  # Store encoders for later use in predictions

# Define features (X) and target (y)
X = df.drop(columns=['HeartDisease'])  # Features
y = df['HeartDisease']  # Target variable (1: Has heart disease, 0: No heart disease)

# Split data into training (80%) and testing (20%) sets
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

# Create a Naïve Bayes classifier (GaussianNB for continuous data)
model = GaussianNB()

# Train the model
model.fit(X_train, y_train)

# Make predictions on the test set
y_pred = model.predict(X_test)

# Generate classification report
print("\nClassification Report:\n")
print(classification_report(y_test, y_pred))


# Function to take user input and predict heart disease
def predict_heart_disease():
    print("\nEnter Patient Details for Prediction:")
    
    # User input for required features
    age = int(input("Enter Age: "))
    sex = input("Enter Sex (Male/Female): ")
    chest_pain = input("Enter Chest Pain Type (Typical Angina, Atypical Angina, Non-Anginal Pain, Asymptomatic): ")
    resting_bp = float(input("Enter Resting Blood Pressure (in mm Hg): "))
    cholesterol = float(input("Enter Serum Cholesterol (in mg/dl): "))
    fasting_bs = int(input("Fasting Blood Sugar > 120 mg/dl (Yes: 1, No: 0): "))
    resting_ecg = input("Enter Resting ECG results (Normal, ST-T wave abnormality, Left Ventricular Hypertrophy): ")
    max_hr = int(input("Enter Maximum Heart Rate Achieved: "))
    exercise_angina = input("Exercise Induced Angina (Yes/No): ")
    oldpeak = float(input("Enter ST depression induced by exercise: "))
    st_slope = input("Enter the slope of the peak exercise ST segment (Up, Flat, Down): ")

    # Encode categorical inputs
    sex_encoded = label_encoders['Sex'].transform([sex])[0]
    chest_pain_encoded = label_encoders['ChestPainType'].transform([chest_pain])[0]
    resting_ecg_encoded = label_encoders['RestingECG'].transform([resting_ecg])[0]
    exercise_angina_encoded = label_encoders['ExerciseAngina'].transform([exercise_angina])[0]
    st_slope_encoded = label_encoders['ST_Slope'].transform([st_slope])[0]

    # Create a DataFrame for user input
    user_data = pd.DataFrame({
        'Age': [age],
        'Sex': [sex_encoded],
        'ChestPainType': [chest_pain_encoded],
        'RestingBP': [resting_bp],
        'Cholesterol': [cholesterol],
        'FastingBS': [fasting_bs],
        'RestingECG': [resting_ecg_encoded],
        'MaxHR': [max_hr],
        'ExerciseAngina': [exercise_angina_encoded],
        'Oldpeak': [oldpeak],
        'ST_Slope': [st_slope_encoded]
    })

    # Predict heart disease
    prediction = model.predict(user_data)[0]

    # Output the prediction result
    if prediction == 1:
        print("\nPrediction: The patient is **likely** to have heart disease.")
    else:
        print("\nPrediction: The patient is **unlikely** to have heart disease.")


# Call function to take user input and make prediction
predict_heart_disease()


Columns in dataset: Index(['Age', 'Sex', 'ChestPainType', 'RestingBP', 'Cholesterol', 'FastingBS',
       'RestingECG', 'MaxHR', 'ExerciseAngina', 'Oldpeak', 'ST_Slope',
       'HeartDisease'],
      dtype='object')

Classification Report:

              precision    recall  f1-score   support

           0       0.79      0.84      0.82        77
           1       0.88      0.84      0.86       107

    accuracy                           0.84       184
   macro avg       0.84      0.84      0.84       184
weighted avg       0.84      0.84      0.84       184


Enter Patient Details for Prediction:


Enter Age:  34
Enter Sex (Male/Female):  M
Enter Chest Pain Type (Typical Angina, Atypical Angina, Non-Anginal Pain, Asymptomatic):  ATA
Enter Resting Blood Pressure (in mm Hg):  140
Enter Serum Cholesterol (in mg/dl):  139
Fasting Blood Sugar > 120 mg/dl (Yes: 1, No: 0):  1
Enter Resting ECG results (Normal, ST-T wave abnormality, Left Ventricular Hypertrophy):  Normal
Enter Maximum Heart Rate Achieved:  140
Exercise Induced Angina (Yes/No):  N
Enter ST depression induced by exercise:  5
Enter the slope of the peak exercise ST segment (Up, Flat, Down):  Up



Prediction: The patient is **likely** to have heart disease.
