In [25]:
import numpy as np
import pandas as pd
from sklearn.datasets import load_iris
from sklearn.feature_selection import SelectKBest, chi2, VarianceThreshold, RFE, SequentialFeatureSelector
from sklearn.linear_model import LogisticRegression
from sklearn.ensemble import RandomForestClassifier
from sklearn.model_selection import train_test_split
from sklearn.svm import SVC
from sklearn.preprocessing import StandardScaler

In [27]:
# Load dataset
data = load_iris()
X = pd.DataFrame(data.data, columns=data.feature_names)
y = data.target


In [28]:
data.feature_names

['sepal length (cm)',
 'sepal width (cm)',
 'petal length (cm)',
 'petal width (cm)']

In [11]:
data.target

array([0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
       0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
       0, 0, 0, 0, 0, 0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
       1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
       1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,
       2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,
       2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2])

# 1. Filter Methods 

In [3]:

# Chi-Square Test (for categorical features, we discretize data)
X_chi2 = X.apply(lambda x: pd.cut(x, bins=3, labels=False))  # Convert to categorical
chi2_selector = SelectKBest(score_func=chi2, k=2)
X_chi2_selected = chi2_selector.fit_transform(X_chi2, y)
print("\nFilter Method (Chi-Square) - Selected Features:", chi2_selector.get_support())


Filter Method (Chi-Square) - Selected Features: [False False  True  True]


In [12]:
# Variance Threshold (removes low variance features)
var_thresh = VarianceThreshold(threshold=0.1)
X_var_selected = var_thresh.fit_transform(X)
print("Filter Method (Variance Threshold) - Remaining Features:", var_thresh.get_support())

Filter Method (Variance Threshold) - Remaining Features: [ True  True  True  True]


# 2. Wrapper Methods 


In [14]:
model = LogisticRegression()

In [16]:
# Recursive Feature Elimination (RFE)
rfe = RFE(model, n_features_to_select=3)
X_rfe_selected = rfe.fit_transform(X, y)
print("\nWrapper Method (RFE) - Selected Features:", rfe.get_support())


Wrapper Method (RFE) - Selected Features: [False  True  True  True]


STOP: TOTAL NO. OF ITERATIONS REACHED LIMIT.

Increase the number of iterations (max_iter) or scale the data as shown in:
    https://scikit-learn.org/stable/modules/preprocessing.html
Please also refer to the documentation for alternative solver options:
    https://scikit-learn.org/stable/modules/linear_model.html#logistic-regression
  n_iter_i = _check_optimize_result(


In [29]:
# Scale the data to improve convergence
scaler = StandardScaler()
X_scaled = scaler.fit_transform(X)

In [30]:
# Recursive Feature Elimination (RFE)
rfe = RFE(model, n_features_to_select=2)
X_rfe_selected = rfe.fit_transform(X_scaled, y)
print("\nWrapper Method (RFE) - Selected Features:", rfe.get_support())


Wrapper Method (RFE) - Selected Features: [False False  True  True]


In [17]:
# Forward Selection
forward_selector = SequentialFeatureSelector(model, n_features_to_select=2, direction='forward')
forward_selector.fit(X, y)
print("Wrapper Method (Forward Selection) - Selected Features:", forward_selector.get_support())


Wrapper Method (Forward Selection) - Selected Features: [False False  True  True]


In [18]:

# Backward Elimination
backward_selector = SequentialFeatureSelector(model, n_features_to_select=2, direction='backward')
backward_selector.fit(X, y)
print("Wrapper Method (Backward Elimination) - Selected Features:", backward_selector.get_support())

Wrapper Method (Backward Elimination) - Selected Features: [False False  True  True]


# 3. Embedded Methods

In [19]:
# Lasso Regression (L1 Regularization)
lasso = LogisticRegression(penalty='l1', solver='liblinear')
lasso.fit(X, y)
print("\nEmbedded Method (Lasso) - Feature Importance:", lasso.coef_)


Embedded Method (Lasso) - Feature Importance: [[ 0.          2.52095427 -2.82990737  0.        ]
 [ 0.32855835 -1.79382712  0.66572511 -1.57254851]
 [-2.62318511 -2.50802931  3.26201459  4.61779287]]


In [21]:
# Random Forest Feature Importance
rf = RandomForestClassifier()
rf.fit(X, y)
print("Embedded Method (Random Forest) - Feature Importance:", rf.feature_importances_)
print(rf.max_features)

Embedded Method (Random Forest) - Feature Importance: [0.11675487 0.0171872  0.39681974 0.4692382 ]
sqrt


In [33]:
feature_importance = rf.feature_importances_
feature_names = rf.feature_names_in_

# Print feature importance with names
for name, importance in zip(feature_names, feature_importance):
    print(f"{name}: {importance}")

sepal length (cm): 0.11675487043845086
sepal width (cm): 0.017187197285106037
petal length (cm): 0.39681973620720656
petal width (cm): 0.4692381960692365


In [34]:
sorted_indices = np.argsort(rf.feature_importances_)[::-1]  # Sort in descending order
for i in sorted_indices[:2]:  # Get top 2 features
    print(f"{rf.feature_names_in_[i]}: {rf.feature_importances_[i]}")

petal width (cm): 0.4692381960692365
petal length (cm): 0.39681973620720656


# 1. Filter Methods (Preprocessing Step)
Selects features before training the model based on statistical tests.

Works independently of any machine learning model.

 **Examples:**
 
Correlation: Remove features highly correlated with others.

Variance Threshold: Remove features with very low variance.

Chi-Square Test (
𝜒
2
χ 
2
 ): Measures dependency between categorical features and target.

Mutual Information: Measures how much information one variable provides about another.

Best for: High-dimensional datasets, quick preprocessing.

# 2. Wrapper Methods (Model-Based)
Selects features by training a model and evaluating performance.

Computationally expensive but gives better results.

**Examples:**

Forward Selection: Start with no features, add them one by one based on performance.

Backward Elimination: Start with all features, remove the least important one by one.

Recursive Feature Elimination (RFE): Trains a model and removes the least important features iteratively.

Best for: Small to medium datasets where accuracy is more important than speed.

# 3. Embedded Methods (Built-in Model Feature Selection)
Feature selection is done while training the model.

More efficient than Wrapper Methods.

**Examples:**

Lasso Regression (L1 Regularization): Shrinks some feature weights to zero, effectively removing them.

Decision Tree-based Models (e.g., Random Forest, XGBoost): Automatically determine feature importance.

Best for: Large datasets where computational cost matters.

Which One to Use?
Feature Selection Method	When to Use?
Filter Methods	Large datasets, fast selection, before training.
Wrapper Methods	When accuracy is more important, for small datasets.
Embedded Methods	When using tree-based models or Lasso Regression.