In [1]:
# 📘 Feature Selection Notebook (Step 3)

import pandas as pd
from sklearn.feature_selection import RFE, SelectKBest, chi2
from sklearn.linear_model import LogisticRegression

# Load cleaned dataset
df = pd.read_csv('../data/heart_cleaned.csv')
X = df.drop('target', axis=1)
y = df['target']

# ======================
# RFE
# ======================
print("🔷 RFE (Top Features):")
model = LogisticRegression(max_iter=1000)
rfe = RFE(model, n_features_to_select=5)
rfe.fit(X, y)
selected_rfe = X.columns[rfe.support_]
print("Selected Features by RFE:", list(selected_rfe))

# ======================
# Chi-Square
# ======================
print("🔷 Chi-Square Test (Top Features):")
X_chi = X.copy()
X_chi[X_chi < 0] = 0  # Chi2 needs non-negative values

chi_selector = SelectKBest(score_func=chi2, k=5)
chi_selector.fit(X_chi, y)
selected_chi = X.columns[chi_selector.get_support()]
print("Selected Features by Chi-Square:", list(selected_chi))


🔷 RFE (Top Features):
Selected Features by RFE: ['cp', 'restecg', 'oldpeak', 'ca', 'thal']
🔷 Chi-Square Test (Top Features):
Selected Features by Chi-Square: ['cp', 'exang', 'oldpeak', 'ca', 'thal']
