In [15]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler, OneHotEncoder

# Load the dataset
df = pd.read_csv('heart.csv')

# Display first few rows and data info
print(df.head())
print(df.info())

# Data Description
print(df.describe().T)

# --- Step 1: Drop impossible values ---
df = df[(df['RestingBP'] != 0) & (df['Cholesterol'] != 0)]

# --- Step 2: Handle negative values in Oldpeak ---
median_oldpeak = df['Oldpeak'].median()
df['Oldpeak'] = df['Oldpeak'].apply(lambda x: median_oldpeak if x < 0 else x)

# --- Step 3: Check for any remaining missing values ---
print("Missing values after cleaning:")
print(df.isnull().sum())

# --- Step 1: One-hot encode categorical features ---
categorical_features = ['Sex', 'ChestPainType', 'RestingECG', 'ExerciseAngina', 'ST_Slope']
encoder = OneHotEncoder(sparse_output=False)
X_cat = encoder.fit_transform(df[categorical_features])
encoded_feature_names = encoder.get_feature_names_out(categorical_features)
X_cat_df = pd.DataFrame(X_cat, columns=encoded_feature_names, index=df.index)
df_encoded = pd.concat([df.drop(columns=categorical_features), X_cat_df], axis=1)

# Prepare features and target
X = df_encoded.drop('HeartDisease', axis=1).values
y = df_encoded['HeartDisease'].values
feature_names = df_encoded.drop('HeartDisease', axis=1).columns

# Splitting into train and test sets
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

# Feature Scaling
scaler = StandardScaler()
X_train_scaled = scaler.fit_transform(X_train)
X_test_scaled = scaler.transform(X_test)


   Age Sex ChestPainType  RestingBP  Cholesterol  FastingBS RestingECG  MaxHR  \
0   40   M           ATA        140          289          0     Normal    172   
1   49   F           NAP        160          180          0     Normal    156   
2   37   M           ATA        130          283          0         ST     98   
3   48   F           ASY        138          214          0     Normal    108   
4   54   M           NAP        150          195          0     Normal    122   

  ExerciseAngina  Oldpeak ST_Slope  HeartDisease  
0              N      0.0       Up             0  
1              N      1.0     Flat             1  
2              N      0.0       Up             0  
3              Y      1.5     Flat             1  
4              N      0.0       Up             0  
<class 'pandas.core.frame.DataFrame'>
RangeIndex: 918 entries, 0 to 917
Data columns (total 12 columns):
 #   Column          Non-Null Count  Dtype  
---  ------          --------------  -----  
 0   Age    

In [None]:
from sklearn.neighbors import KNeighborsClassifier
from sklearn.ensemble import RandomForestClassifier
from sklearn.linear_model import LogisticRegression
from sklearn.metrics import accuracy_score, classification_report, confusion_matrix
from sklearn.model_selection import GridSearchCV, StratifiedKFold

# --- KNN Model ---
def knn_tune_clf_hyperparameters(clf, param_grid, X_train, y_train, scoring='recall', n_splits=3):
    
    # Cross-validation strategy: maintains class balance in each fold
    cv = StratifiedKFold(n_splits=n_splits, shuffle=True, random_state=0)

    # Grid search for best hyperparameters
    clf_grid = GridSearchCV(clf, param_grid, cv=cv, scoring=scoring, n_jobs=-1)

    # Fit model
    clf_grid.fit(X_train, y_train)

    # Extract best model and parameters
    best_model = clf_grid.best_estimator_
    best_params = clf_grid.best_params_

    return best_model, best_params

# Define your classifier
knn = KNeighborsClassifier()

# Define the hyperparameter grid
param_grid = {
    'n_neighbors': list(range(1, 21)),
    'weights': ['uniform', 'distance'],
    'metric': ['euclidean', 'manhattan'],
    'leaf_size': [10, 20, 30, 40],
}

# Call the tuning function
best_knn_model, best_knn_params = knn_tune_clf_hyperparameters(knn, param_grid, X_train, y_train)

print("Best KNN Parameters:", best_knn_params)
print(classification_report(y_train, best_knn_model.predict(X_train)))

# --- Random Forest Model ---
def rf_tune_clf_hyperparameters(clf, param_grid, X_train, y_train, scoring='recall', n_splits=3):
    
    # Cross-validation strategy: maintains class balance in each fold
    cv = StratifiedKFold(n_splits=n_splits, shuffle=True, random_state=0)

    # Grid search for best hyperparameters
    clf_grid = GridSearchCV(clf, param_grid, cv=cv, scoring=scoring, n_jobs=-1)

    # Fit model
    clf_grid.fit(X_train, y_train)

    # Extract best model and parameters
    best_model = clf_grid.best_estimator_
    best_params = clf_grid.best_params_

    return best_model, best_params

# Define your classifier
rf_model = RandomForestClassifier(random_state=42)

# Define the hyperparameter grid
param_grid = {
    'n_estimators': [50, 100, 200],
    'max_depth': [None, 5, 10, 20],
    'min_samples_split': [2, 5, 10],
    'min_samples_leaf': [1, 2, 4],
    'max_features': ['sqrt', 'log2']
}

# Call the tuning function
best_rf_model, best_rf_params = rf_tune_clf_hyperparameters(rf_model, param_grid, X_train, y_train)

print("Best Random Forest Parameters:", best_rf_params)

# Predict on test data using the best tuned model
y_pred = best_rf_model.predict(X_test)

# Evaluate accuracy
accuracy = accuracy_score(y_test, y_pred)
print(f"Test Accuracy of Best Random Forest Model: {accuracy:.4f}")
# Print out the classification_report
print(classification_report(y_test, y_pred))


# --- Logistic Regression Model ---
lr_model = LogisticRegression(max_iter=1000)
lr_model.fit(X_train_scaled, y_train)

# Logistic Regression Model Evaluation
lr_train_pred = lr_model.predict(X_train_scaled)
lr_test_pred = lr_model.predict(X_test_scaled)
lr_train_accuracy = accuracy_score(y_train, lr_train_pred)
lr_test_accuracy = accuracy_score(y_test, lr_test_pred)
print(f"Logistic Regression Train Accuracy: {lr_train_accuracy:.4f}")
print(f"Logistic Regression Test Accuracy: {lr_test_accuracy:.4f}")
print("Logistic Regression Classification Report:")
print(classification_report(y_test, lr_test_pred))


Best KNN Parameters: {'leaf_size': 10, 'metric': 'manhattan', 'n_neighbors': 3, 'weights': 'distance'}
Best Random Forest Parameters: {'max_depth': 5, 'max_features': 'sqrt', 'min_samples_leaf': 4, 'min_samples_split': 10, 'n_estimators': 50}
Test Accuracy of Best Random Forest Model: 0.9267
              precision    recall  f1-score   support

           0       0.92      0.93      0.92        71
           1       0.94      0.92      0.93        79

    accuracy                           0.93       150
   macro avg       0.93      0.93      0.93       150
weighted avg       0.93      0.93      0.93       150

Logistic Regression Train Accuracy: 0.8607
Logistic Regression Test Accuracy: 0.8933
Logistic Regression Classification Report:
              precision    recall  f1-score   support

           0       0.86      0.93      0.89        71
           1       0.93      0.86      0.89        79

    accuracy                           0.89       150
   macro avg       0.89      0.90 

In [20]:
import customtkinter as ctk
from tkinter import messagebox
import numpy as np

# GUI code
ctk.set_appearance_mode("dark")
ctk.set_default_color_theme("blue")

class HeartDiseasePredictor:
    def __init__(self):
        self.window = ctk.CTk()
        self.window.title("❤️ Heart Disease Prediction System")
        self.window.geometry("1000x800")
        self.window.resizable(True, True)
        
        # Model variables
        self.models = {
            "K-Nearest Neighbors": knn_model,
            "Random Forest": rf_model,
            "Logistic Regression": lr_model
        }
        self.current_model = "K-Nearest Neighbors"
        self.scaler = scaler
        
        # Create the UI
        self._create_ui()
        
    def _create_ui(self):
        """Create the main user interface"""
        main_frame = ctk.CTkFrame(self.window)
        main_frame.pack(fill="both", expand=True, padx=20, pady=20)
        
        title_label = ctk.CTkLabel(
            main_frame,
            text="❤️ Heart Health Risk Check",
            font=ctk.CTkFont(size=28, weight="bold")
        )
        title_label.pack(pady=(20, 10))
        
        info_label = ctk.CTkLabel(
            main_frame,
            text="📊 Check your heart now ",
            font=ctk.CTkFont(size=14, weight="bold"),
            text_color="orange"
        )
        info_label.pack(pady=(0, 20))
        
        self.scrollable_frame = ctk.CTkScrollableFrame(main_frame, height=400)
        self.scrollable_frame.pack(fill="both", expand=True, padx=20, pady=(0, 20))
        
        self._create_input_fields()
        self._create_prediction_section(main_frame)
    
    def _create_input_fields(self):
        # Input fields as provided by you
        self.input_fields = {
            "st_slope": {
                "label": "📈 Heart Response to Exercise",
                "type": "option",
                "values": ["Getting Better (Up)", "Staying Same (Flat)"],
                "default": "Getting Better (Up)"
            },
            "exercise_angina": {
                "label": "💔 Chest Pain During Exercise",
                "type": "option",
                "values": ["No Pain", "Yes, I Get Pain"],
                "default": "No Pain"
            },
            "chest_pain_type": {
                "label": "🫀 Type of Chest Discomfort",
                "type": "option",
                "values": ["No Symptoms (ASY)", "Unusual Chest Pain (ATA)", "Not Heart-Related Pain (NAP)"],
                "default": "No Symptoms (ASY)"
            },
            "oldpeak": {
                "label": "📊 Heart Stress Test Result",
                "type": "entry",
                "default": "0.0"
            },
            "max_hr": {
                "label": "💓 Fastest Heart Rate",
                "type": "entry",
                "default": "150"
            },
            "age": {
                "label": "🎂 Your Age",
                "type": "entry",
                "default": "50"
            },
            "sex": {
                "label": "👤 Gender",
                "type": "option",
                "values": ["Male", "Female"],
                "default": "Male"
            },
            "resting_bp": {
                "label": "🩺 Blood Pressure at Rest",
                "type": "entry",
                "default": "120"
            }
        }
        
        self.input_widgets = {}
        sorted_fields = sorted(self.input_fields.items(), key=lambda x: x[1]['label'])
        left_frame = ctk.CTkFrame(self.scrollable_frame)
        left_frame.pack(side="left", fill="both", expand=True, padx=(0, 10))
        right_frame = ctk.CTkFrame(self.scrollable_frame)
        right_frame.pack(side="right", fill="both", expand=True, padx=(10, 0))
        
        mid_point = len(sorted_fields) // 2
        for i, (field_name, field_info) in enumerate(sorted_fields):
            parent_frame = left_frame if i < mid_point else right_frame
            field_container = ctk.CTkFrame(parent_frame)
            field_container.pack(fill="x", padx=10, pady=5)
            label = ctk.CTkLabel(
                field_container,
                text=field_info["label"],
                font=ctk.CTkFont(size=12, weight="bold")
            )
            label.pack(anchor="w", padx=10, pady=(10, 5))
            if field_info["type"] == "entry":
                widget = ctk.CTkEntry(field_container)
                widget.insert(0, field_info["default"])
            else:
                widget = ctk.CTkOptionMenu(field_container, values=field_info["values"])
                widget.set(field_info["default"])
            widget.pack(fill="x", padx=10, pady=(0, 10))
            self.input_widgets[field_name] = widget
    
    def _create_prediction_section(self, parent):
        model_frame = ctk.CTkFrame(parent)
        model_frame.pack(fill="x", padx=20, pady=(0, 10))
        
        model_label = ctk.CTkLabel(
            model_frame,
            text="Select Prediction Model:",
            font=ctk.CTkFont(size=16, weight="bold")
        )
        model_label.pack(side="left", padx=(20, 10), pady=20)
        
        self.model_selector = ctk.CTkOptionMenu(
            model_frame,
            values=list(self.models.keys()),
            command=self._on_model_change
        )
        self.model_selector.pack(side="left", padx=10, pady=20)
        
        self.predict_button = ctk.CTkButton(
            model_frame,
            text="🔍 Predict Risk",
            command=self._predict,
            font=ctk.CTkFont(size=14, weight="bold"),
            height=40,
            width=150
        )
        self.predict_button.pack(side="right", padx=20, pady=20)
        
        self.results_frame = ctk.CTkFrame(parent)
        self.results_frame.pack(fill="x", padx=20, pady=(0, 20))
        
        self.results_label = ctk.CTkLabel(
            self.results_frame,
            text="Enter patient data and click 'Predict Risk' to see results",
            font=ctk.CTkFont(size=14)
        )
        self.results_label.pack(pady=20)
    
    def _on_model_change(self, selected_model):
        self.current_model = selected_model
        self.results_label.configure(
            text=f"Model changed to: {selected_model}\nEnter patient data and click 'Predict Risk' to see results"
        )
    
    def _get_input_values(self):
        try:
            feature_values = {}
            for field_name, widget in self.input_widgets.items():
                if isinstance(widget, ctk.CTkEntry):
                    feature_values[field_name] = float(widget.get())
                else:
                    feature_values[field_name] = widget.get()
            
            # Create an array with 20 features (matching the number of features after one-hot encoding)
            processed_values = [0.0] * 20
            
            # Feature transformations for user input
            # 1. Process "st_slope" (Heart Response to Exercise)
            if "Getting Better" in feature_values['st_slope']:
                processed_values[0] = 1.0  # ST_Slope_Up
            else:
                processed_values[1] = 1.0  # ST_Slope_Flat
            
            # 2. Process "exercise_angina" (Chest Pain During Exercise)
            if "Yes, I Get Pain" in feature_values['exercise_angina']:
                processed_values[2] = 1.0  # ExerciseAngina_Yes
            else:
                processed_values[3] = 1.0  # ExerciseAngina_No
            
            # 3. Process "chest_pain_type" (Chest Pain Type)
            chest_pain = feature_values['chest_pain_type']
            if "No Symptoms" in chest_pain:
                processed_values[4] = 1.0  # ChestPainType_ASY
            elif "Unusual Chest Pain" in chest_pain:
                processed_values[5] = 1.0  # ChestPainType_ATA
            elif "Not Heart-Related" in chest_pain:
                processed_values[6] = 1.0  # ChestPainType_NAP
            
            # 4. Other features (these should be mapped to the appropriate indices in `processed_values`)
            processed_values[7] = feature_values['oldpeak']  # Heart Stress Test Result
            processed_values[8] = feature_values['max_hr']  # Fastest Heart Rate
            processed_values[9] = feature_values['age']     # Age
            processed_values[10] = feature_values['resting_bp']  # Blood Pressure at Rest
            
            # 5. Process "sex" (Gender)
            if feature_values['sex'] == 'Female':
                processed_values[11] = 1.0  # Sex_Female
            else:
                processed_values[12] = 1.0  # Sex_Male

            # The rest of the features (make sure to add the proper values for each of the missing ones)
            # For example, you can add missing features here if needed
            
            # Return the input as a 2D array (matching the input shape expected by the model)
            return np.array(processed_values).reshape(1, -1)
        
        except ValueError as e:
            messagebox.showerror("Input Error", f"Please check your input values. Error: {str(e)}")
            return None
        except Exception as e:
            messagebox.showerror("Processing Error", f"Error processing inputs: {str(e)}")
            return None


    def _predict(self):
        input_data = self._get_input_values()
        if input_data is None:
            return
        
        model = self.models[self.current_model]
        if model is None:
            messagebox.showerror("Model Error", f"Model '{self.current_model}' is not available yet.")
            return
        
        try:
            input_scaled = self.scaler.transform(input_data)
            prediction = model.predict(input_scaled)[0]
            probability = model.predict_proba(input_scaled)[0] if hasattr(model, 'predict_proba') else [0.5, 0.5]
            risk_level = "HIGH RISK" if prediction == 1 else "LOW RISK"
            risk_color = "red" if prediction == 1 else "green"
            confidence = max(probability) * 100
            
            result_text = f"""
🏥 PREDICTION RESULTS
━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━

Model Used: {self.current_model}
Risk Assessment: {risk_level}
Confidence: {confidence:.1f}%

⚠️  IMPORTANT DISCLAIMER ⚠️
This prediction is for educational purposes only.
Always consult with healthcare professionals for proper medical diagnosis and treatment.
            """
            self.results_label.configure(
                text=result_text,
                text_color=risk_color if prediction == 1 else "lightgreen"
            )
        except Exception as e:
            messagebox.showerror("Prediction Error", f"Error making prediction: {str(e)}")
    
    def run(self):
        self.window.mainloop()

def integrate_trained_models(app, knn_model=None, rf_model=None, lr_model=None, scaler=None):
    """
    Integrate your trained models into the application.
    
    Args:
        app: HeartDiseasePredictor instance (GUI app).
        knn_model: Your trained KNN model.
        rf_model: Your trained Random Forest model.
        lr_model: Your trained Logistic Regression model.
        scaler: Your fitted StandardScaler.
    """
    if knn_model is not None:
        app.models["K-Nearest Neighbors"] = knn_model
    if rf_model is not None:
        app.models["Random Forest"] = rf_model
    if lr_model is not None:
        app.models["Logistic Regression"] = lr_model
    if scaler is not None:
        app.scaler = scaler

# Example usage
if __name__ == "__main__":
    app = HeartDiseasePredictor()
    
    # Integrate the trained models and scaler into the app
    integrate_trained_models(app, knn_model=best_knn_model, rf_model=best_rf_model, lr_model=lr_model, scaler=scaler)

    
    app.run()