In [3]:
import numpy as np
from sklearn.datasets import make_regression
from sklearn.linear_model import LinearRegression
from sklearn.feature_selection import SelectKBest, f_regression
from itertools import combinations
from mlxtend.feature_selection import SequentialFeatureSelector

In [4]:
#Synthetic Data
X, y = make_regression(n_samples=1000, n_features=10, noise=0.3, random_state=0)

In [6]:
X

array([[-0.88718094,  0.58725938,  0.81267404, ..., -1.60183605,
        -0.93278904, -0.51029274],
       [-0.91798431, -0.20357067, -0.47336093, ...,  0.24622276,
        -0.6518565 ,  0.39710624],
       [ 0.05549223,  0.28005786,  0.4595849 , ...,  1.45580795,
         1.4849256 , -2.08663498],
       ...,
       [-0.14893382,  0.62317373,  1.07784157, ..., -1.84853034,
        -0.75127222, -0.5772585 ],
       [ 0.93610755,  1.20169786,  0.40665709, ..., -0.48024903,
         0.8091803 , -2.53455446],
       [ 0.66833899, -3.39229999, -1.34550773, ...,  0.10864712,
        -0.9531795 , -1.02370145]])

## 1. Forward Selection Method

In [7]:
best_features_forward = []
model  = LinearRegression()
for _ in range(X.shape[1]):
    best_score = 0
    best_features = None
    for feature in range(X.shape[1]):
        if feature in best_features_forward:
            continue
        selected_features = best_features_forward + [feature]
        X_subset = X[:, selected_features]
        model.fit(X_subset, y)
        score = model.score(X_subset, y)
        if score> best_score:
            best_score = score
            best_features = feature
    best_features_forward.append(best_features)
print("Forward selection best features", best_features_forward)

Forward selection best features [3, 8, 6, 4, 0, 9, 1, 7, 5, 2]


## 2. Backward Elimination

In [11]:
selected_features = list(range(X.shape[1]))
model = LinearRegression()
while len(selected_features) > 1:
    worst_score = 1
    worst_feature = None
    for feature in selected_features:
        subset = selected_features.copy()
        subset.remove(feature)
        X_subset = X[:, subset]
        model.fit(X_subset, y)
        score = model.score(X_subset, y)
        if score < worst_score:
            worst_score = score
            worst_feature = feature
    selected_features.remove(worst_feature)
print("Backward Elimination Best Features:", selected_features)

Backward Elimination Best Features: [7]


## 3. RFE

In [12]:
rfe = SequentialFeatureSelector(model, k_features=1, forward=False)
rfe.fit(X, y)
best_features_rfe = list(rfe.k_feature_idx_)
print("RFE Best Features", best_features_rfe)

RFE Best Features [3]


In [13]:
rfe = SequentialFeatureSelector(model, k_features=5, forward=False)
rfe.fit(X, y)
best_features_rfe = list(rfe.k_feature_idx_)
print("RFE Best Features", best_features_rfe)

RFE Best Features [0, 3, 4, 6, 8]


## 4. Exhaustive Feature Selection

In [16]:
best_score_exhaustive = 0
best_features_exhaustive = None
model = LinearRegression()
for k in range(1, X.shape[1] + 1):
    for subset in combinations(range(X.shape[1]), k):
        X_subset = X[:, subset]
        model.fit(X_subset, y)
        score = model.score(X_subset, y)
        if score > best_score_exhaustive:
            best_score_exhaustive = score
            best_features_exhaustive = subset
print("Exhaustive Feature Selection best features:",  best_features_exhaustive)

Exhaustive Feature Selection best features: (0, 1, 2, 3, 4, 5, 6, 7, 8, 9)
