In [1]:
import pandas as pd
import numpy as np
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import LabelEncoder, StandardScaler
from sklearn.linear_model import LogisticRegression  # MaxEnt Implementation
from sklearn.metrics import classification_report

# Load dataset from GitHub
url = 'https://raw.githubusercontent.com/AKRITI-07/mini_project/main/heart.csv'
df = pd.read_csv(url)

# Display column names
print("Columns in dataset:", df.columns)

# Identify categorical columns
categorical_columns = ['Sex', 'ChestPainType', 'RestingECG', 'ExerciseAngina', 'ST_Slope']

# Initialize LabelEncoders for categorical data
label_encoders = {}
for col in categorical_columns:
    le = LabelEncoder()
    df[col] = le.fit_transform(df[col])
    label_encoders[col] = le  # Store encoders for future use

# Define features and target variable
X = df.drop(columns=['HeartDisease'])  # Features
y = df['HeartDisease']  # Target variable

# Standardize numerical features (recommended for logistic regression)
scaler = StandardScaler()
X[X.columns] = scaler.fit_transform(X)

# Split dataset into training and testing sets
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

# Train the Maximum Entropy (Logistic Regression) model
maxent_model = LogisticRegression(max_iter=1000, solver='lbfgs')  # MaxEnt Model
maxent_model.fit(X_train, y_train)

# Make predictions on the test set
y_pred = maxent_model.predict(X_test)

# Generate classification report
print("\n=== Classification Report ===\n", classification_report(y_test, y_pred))

# Function to take user input and predict heart disease
def predict_heart_disease():
    print("\nEnter patient details for prediction:")
    
    # User Inputs
    age = int(input("Enter Age: "))
    sex = input("Enter Sex (Male/Female): ")
    chest_pain = input("Enter Chest Pain Type (Typical Angina, Atypical Angina, Non-Anginal Pain, Asymptomatic): ")
    resting_bp = float(input("Enter Resting Blood Pressure (in mm Hg): "))
    cholesterol = float(input("Enter Serum Cholesterol (in mg/dl): "))
    fasting_bs = int(input("Fasting Blood Sugar > 120 mg/dl (Yes: 1, No: 0): "))
    resting_ecg = input("Enter Resting ECG results (Normal, ST-T wave abnormality, Left Ventricular Hypertrophy): ")
    max_hr = int(input("Enter Maximum Heart Rate Achieved: "))
    exercise_angina = input("Exercise Induced Angina (Yes/No): ")
    oldpeak = float(input("Enter ST depression induced by exercise: "))
    st_slope = input("Enter the slope of the peak exercise ST segment (Up, Flat, Down): ")

    # Encode categorical inputs
    try:
        sex_encoded = label_encoders['Sex'].transform([sex])[0]
        chest_pain_encoded = label_encoders['ChestPainType'].transform([chest_pain])[0]
        resting_ecg_encoded = label_encoders['RestingECG'].transform([resting_ecg])[0]
        exercise_angina_encoded = label_encoders['ExerciseAngina'].transform([exercise_angina])[0]
        st_slope_encoded = label_encoders['ST_Slope'].transform([st_slope])[0]
    except ValueError:
        print("\nError: Invalid input for categorical variables. Please use the correct options.")
        return

    # Create input dataframe
    user_data = pd.DataFrame([[age, sex_encoded, chest_pain_encoded, resting_bp, cholesterol, fasting_bs, 
                               resting_ecg_encoded, max_hr, exercise_angina_encoded, oldpeak, st_slope_encoded]],
                              columns=X.columns)

    # Standardize input data
    user_data[X.columns] = scaler.transform(user_data)

    # Make prediction
    prediction = maxent_model.predict(user_data)[0]

    # Display result
    if prediction == 1:
        print("\nPrediction: The patient is **likely** to have Heart Disease.")
    else:
        print("\nPrediction: The patient is **unlikely** to have Heart Disease.")

# Call function to take user input and make prediction
predict_heart_disease()


Columns in dataset: Index(['Age', 'Sex', 'ChestPainType', 'RestingBP', 'Cholesterol', 'FastingBS',
       'RestingECG', 'MaxHR', 'ExerciseAngina', 'Oldpeak', 'ST_Slope',
       'HeartDisease'],
      dtype='object')

=== Classification Report ===
               precision    recall  f1-score   support

           0       0.78      0.88      0.83        77
           1       0.91      0.82      0.86       107

    accuracy                           0.85       184
   macro avg       0.84      0.85      0.85       184
weighted avg       0.85      0.85      0.85       184


Enter patient details for prediction:


Enter Age:  48
Enter Sex (Male/Female):  M
Enter Chest Pain Type (Typical Angina, Atypical Angina, Non-Anginal Pain, Asymptomatic):  NAP
Enter Resting Blood Pressure (in mm Hg):  115
Enter Serum Cholesterol (in mg/dl):  289
Fasting Blood Sugar > 120 mg/dl (Yes: 1, No: 0):  1
Enter Resting ECG results (Normal, ST-T wave abnormality, Left Ventricular Hypertrophy):  ST
Enter Maximum Heart Rate Achieved:  99
Exercise Induced Angina (Yes/No):  N
Enter ST depression induced by exercise:  1.5
Enter the slope of the peak exercise ST segment (Up, Flat, Down):  Down



Prediction: The patient is **likely** to have Heart Disease.
