In [3]:
#!pip install mlxtend

-----------------------
#### Sequential Forward Selection (SFS) 
------------------
- is a wrapper method used for feature selection. 
- In this method, features are added to the model one at a time, and at each step, the feature that improves the model performance the most is selected. 
- This process continues until a predefined number of features is reached or until further addition of features does not improve the performance.


In [18]:
# Import necessary libraries
import numpy as np
import pandas as pd
from sklearn.datasets import load_breast_cancer
from sklearn.model_selection import train_test_split

from sklearn.ensemble import RandomForestClassifier
from sklearn.tree import DecisionTreeClassifier
from mlxtend.feature_selection import SequentialFeatureSelector

In [11]:
# Load the breast cancer dataset
data = load_breast_cancer()
X = pd.DataFrame(data.data, columns=data.feature_names)
y = pd.Series(data.target, name='target')

In [7]:
X.shape

(569, 30)

In [3]:
# Split the dataset into training and testing sets
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

In [19]:
# Create a random forest classifier (you can choose a different classifier)
clf = DecisionTreeClassifier()

In [20]:
# Sequential Forward Selection
sfs = SequentialFeatureSelector(clf, 
                                k_features= 'best', 
                                forward   = True, 
                                floating  = False, 
                                scoring   = 'accuracy', 
                                cv        = 5)

In [21]:
%%time
# Fit the Sequential Feature Selector to the training data
sfs = sfs.fit(X_train, y_train)

CPU times: total: 9.47 s
Wall time: 20.8 s


In [22]:
# Print the selected features at each step
print(pes}, Accuracy: {accuracy:.2%}")

Iteration-wise Feature Selection:
Step 1: Features - ['worst concave points'], Accuracy: 88.57%
Step 2: Features - ['worst radius', 'worst concave points'], Accuracy: 92.09%
Step 3: Features - ['worst radius', 'worst texture', 'worst concave points'], Accuracy: 94.51%
Step 4: Features - ['concavity error', 'worst radius', 'worst texture', 'worst concave points'], Accuracy: 95.16%
Step 5: Features - ['concavity error', 'fractal dimension error', 'worst radius', 'worst texture', 'worst concave points'], Accuracy: 94.73%
Step 6: Features - ['area error', 'concavity error', 'fractal dimension error', 'worst radius', 'worst texture', 'worst concave points'], Accuracy: 94.73%
Step 7: Features - ['mean radius', 'area error', 'concavity error', 'fractal dimension error', 'worst radius', 'worst texture', 'worst concave points'], Accuracy: 94.51%
Step 8: Features - ['mean radius', 'mean area', 'area error', 'concavity error', 'fractal dimension error', 'worst radius', 'worst texture', 'worst con

In [23]:

# Print the final selected features
selected_features = list(X_train.columns[list(sfs.k_feature_idx_)])
print("\nSelected Features:", selected_features)

# Evaluate the model with selected features on the test set
accuracy = sfs.k_score_
print(f"Model Accuracy with Selected Features: {accuracy:.2%}")


Selected Features: ['mean radius', 'mean texture', 'mean area', 'mean concave points', 'area error', 'concavity error', 'fractal dimension error', 'worst radius', 'worst texture', 'worst concave points']
Model Accuracy with Selected Features: 95.60%
