# 2.6 Hyperparameter Tuning

**Random Forest Example with GridSearchCV**

In [113]:
from sklearn.model_selection import GridSearchCV

# Define parameter grid
param_grid_rf = {
    'n_estimators': [100, 200, 300],
    'max_depth': [None, 5, 10, 15],
    'min_samples_split': [2, 5, 10],
    'min_samples_leaf': [1, 2, 4]
}

# Initialize GridSearchCV
grid_rf = GridSearchCV(
    estimator=RandomForestClassifier(random_state=42),
    param_grid=param_grid_rf,
    cv=5,               # 5-fold cross-validation
    scoring='roc_auc',  # optimize for AUC
    n_jobs=-1
)

# Fit GridSearchCV
grid_rf.fit(X_train, y_train)

# Best parameters & score
print("✅ Best RF Parameters:", grid_rf.best_params_)
print("Best AUC Score (CV):", grid_rf.best_score_)

# Evaluate on test set
best_rf = grid_rf.best_estimator_
y_pred_rf = best_rf.predict(X_test)
y_prob_rf = best_rf.predict_proba(X_test)[:,1]

from sklearn.metrics import accuracy_score, roc_auc_score
print("Test Accuracy:", accuracy_score(y_test, y_pred_rf))
print("Test AUC:", roc_auc_score(y_test, y_prob_rf))


✅ Best RF Parameters: {'max_depth': None, 'min_samples_leaf': 4, 'min_samples_split': 2, 'n_estimators': 200}
Best AUC Score (CV): 0.902979020979021
Test Accuracy: 0.8333333333333334
Test AUC: 0.921875


**SVM Example with RandomizedSearchCV**

In [114]:
from sklearn.model_selection import RandomizedSearchCV
from scipy.stats import uniform

# Parameter distribution for SVM
param_dist_svm = {
    'C': uniform(0.1, 10),
    'gamma': ['scale', 'auto'],
    'kernel': ['rbf', 'linear', 'poly']
}

# Initialize RandomizedSearchCV
rand_svm = RandomizedSearchCV(
    estimator=SVC(probability=True, random_state=42),
    param_distributions=param_dist_svm,
    n_iter=20,
    cv=5,
    scoring='roc_auc',
    n_jobs=-1,
    random_state=42
)

# Fit RandomizedSearchCV
rand_svm.fit(X_train, y_train)

# Best parameters & score
print("✅ Best SVM Parameters:", rand_svm.best_params_)
print("Best AUC Score (CV):", rand_svm.best_score_)

# Evaluate on test set
best_svm = rand_svm.best_estimator_
y_pred_svm = best_svm.predict(X_test)
y_prob_svm = best_svm.predict_proba(X_test)[:,1]

print("Test Accuracy:", accuracy_score(y_test, y_pred_svm))
print("Test AUC:", roc_auc_score(y_test, y_prob_svm))


✅ Best SVM Parameters: {'C': np.float64(9.588855372533333), 'gamma': 'auto', 'kernel': 'linear'}
Best AUC Score (CV): 0.8964475524475525
Test Accuracy: 0.85
Test AUC: 0.9162946428571428


# Saving the Model

In [116]:
import os
import joblib
from sklearn.pipeline import Pipeline
from sklearn.preprocessing import StandardScaler

# Create folder if it doesn't exist
os.makedirs("models", exist_ok=True)

# Example: Random Forest pipeline
pipeline_rf = Pipeline([
    ('scaler', StandardScaler()),
    ('model', best_rf)
])

# Fit pipeline (optional)
pipeline_rf.fit(X_train, y_train)

# Save pipeline
joblib.dump(pipeline_rf, "models/heart_disease_rf_pipeline.pkl")
print("✅ Random Forest model pipeline saved successfully!")


✅ Random Forest model pipeline saved successfully!


In [117]:
# Create folder if it doesn't exist (optional, already done)
os.makedirs("models", exist_ok=True)

# SVM pipeline
pipeline_svm = Pipeline([
    ('scaler', StandardScaler()),  # SVM needs scaling
    ('model', best_svm)            # your optimized SVM model
])

# Fit pipeline on training data
pipeline_svm.fit(X_train, y_train)

# Save pipeline to .pkl
joblib.dump(pipeline_svm, "models/heart_disease_svm_pipeline.pkl")
print("✅ SVM model pipeline saved successfully!")

✅ SVM model pipeline saved successfully!


# 2.8 Streamlit Web UI Development

In [123]:
# !pip install streamlit pyngrok joblib


In [None]:
# app.py
# # (Paste your entire Streamlit app code here)
# import streamlit as st
# import pandas as pd
# import numpy as np
# import joblib
# import matplotlib.pyplot as plt
# import seaborn as sns

# # Load the saved model pipeline (choose RF or SVM)
# model = joblib.load("models/heart_disease_rf_pipeline.pkl")
# # model = joblib.load("models/heart_disease_svm_pipeline.pkl")  # optional

# st.set_page_config(page_title="Heart Disease Prediction", layout="wide")
# st.title("❤️ Heart Disease Risk Prediction")

# # --- Sidebar Input ---
# st.sidebar.header("Enter Your Health Data:")

# def user_input_features():
#     age = st.sidebar.slider('Age', 20, 100, 50)
#     sex = st.sidebar.selectbox('Sex', ['Male', 'Female'])
#     cp = st.sidebar.selectbox('Chest Pain Type', ['typical angina', 'atypical angina', 'non-anginal', 'asymptomatic'])
#     trestbps = st.sidebar.slider('Resting Blood Pressure (mmHg)', 80, 200, 120)
#     chol = st.sidebar.slider('Cholesterol (mg/dL)', 100, 400, 200)
#     fbs = st.sidebar.selectbox('Fasting Blood Sugar > 120 mg/dL', ['True', 'False'])
#     restecg = st.sidebar.selectbox('Resting ECG', ['normal', 'ST-T abnormality', 'left ventricular hypertrophy'])
#     thalach = st.sidebar.slider('Max Heart Rate Achieved', 60, 220, 150)
#     exang = st.sidebar.selectbox('Exercise Induced Angina', ['True', 'False'])
#     oldpeak = st.sidebar.slider('ST depression induced by exercise', 0.0, 10.0, 1.0)
#     slope = st.sidebar.selectbox('Slope of ST segment', ['upsloping', 'flat', 'downsloping'])
#     ca = st.sidebar.slider('Number of major vessels colored by fluoroscopy', 0, 3, 0)
#     thal = st.sidebar.selectbox('Thalassemia', ['normal', 'fixed defect', 'reversable defect'])

#     data = {
#         'age': age,
#         'sex': 1 if sex == 'Male' else 0,
#         'trestbps': trestbps,
#         'chol': chol,
#         'fbs': 1 if fbs == 'True' else 0,
#         'restecg_1': 1 if restecg == 'ST-T abnormality' else 0,
#         'restecg_2': 1 if restecg == 'left ventricular hypertrophy' else 0,
#         'thalach': thalach,
#         'exang': 1 if exang == 'True' else 0,
#         'oldpeak': oldpeak,
#         'slope_2': 1 if slope == 'flat' else 0,
#         'slope_3': 1 if slope == 'downsloping' else 0,
#         'ca': ca,
#         'thal_6.0': 1 if thal == 'fixed defect' else 0,
#         'thal_7.0': 1 if thal == 'reversable defect' else 0,
#         # Add cp dummy variables
#         'cp_2': 1 if cp == 'atypical angina' else 0,
#         'cp_3': 1 if cp == 'non-anginal' else 0,
#         'cp_4': 1 if cp == 'asymptomatic' else 0
#     }

#     features = pd.DataFrame(data, index=[0])
#     return features

# input_df = user_input_features()

# # --- Prediction ---
# st.subheader("Prediction Result")
# prediction = model.predict(input_df)
# prediction_proba = model.predict_proba(input_df)

# st.write("**Predicted Heart Disease:**", "Yes ❤️" if prediction[0]==1 else "No 💙")
# st.write("**Prediction Probability:**")
# st.write(f"Probability of Heart Disease: {prediction_proba[0][1]:.2f}")
# st.write(f"Probability of No Heart Disease: {prediction_proba[0][0]:.2f}")

# # --- Data Visualization (Optional) ---
# st.subheader("Heart Disease Trends (Sample Dataset)")

# # Load dataset for visualization
# # df_viz = pd.read_csv("data/heart_disease_uci.csv")  # optional
# # Example visualization using random sample
# df_viz = pd.DataFrame({
#     'age': np.random.randint(29, 77, 100),
#     'num': np.random.choice([0,1], 100)
# })

# fig, ax = plt.subplots()
# sns.countplot(x='num', data=df_viz, ax=ax)
# ax.set_xticklabels(['No Disease', 'Heart Disease'])
# ax.set_title("Heart Disease Distribution")
# st.pyplot(fig)

In [121]:
# from pyngrok import ngrok
# import os

# # Kill any existing tunnels
# ngrok.kill()

# # Set up a tunnel to the Streamlit port 8501
# public_url = ngrok.connect(port='8501')
# print("🔗 Public URL:", public_url)

# # Run Streamlit app in background
# !streamlit run app.py &>/dev/null &
