# Forward Stepwise Selection

We show how to use forward stepwise selection to select a subset of features to use in the linear regression model.

In [1]:
import numpy as np
from sklearn import datasets

from mltools.glm import LinearRegression
from mltools.glm import FSSLinearRegression

In [2]:
# Set NumPy random number generator seed for replicability
np.random.seed(100)

## Load the Data

In [3]:
data = datasets.load_diabetes()

# Shuffle data
n = len(data.target)
idx = np.random.permutation(n)
x = data.data[idx, :]
y = data.target[idx]

# 50/50 training/testing split
x_train = x[:(n // 2), :]
x_test = x[(n // 2):, :]
y_train = y[:(n // 2)]
y_test = y[(n // 2):]

## Ordinary Linear Regression

In [4]:
%%time
model = LinearRegression()
model.fit(x_train, y_train)

CPU times: user 1.64 ms, sys: 682 µs, total: 2.32 ms
Wall time: 3.09 ms


In [5]:
mse_train = model.mse(x_train, y_train)
mse_test = model.mse(x_test, y_test)
mse_full = model.mse(x, y)
print(f"Training MSE:   {mse_train:.3f}")
print(f"Testing MSE:    {mse_test:.3f}")
print(f"Full data MSE:  {mse_full:.3f}")

Training MSE:   2500.425
Testing MSE:    3320.616
Full data MSE:  2910.521


## Forward Stepwise Selection

In [6]:
%%time
model = FSSLinearRegression()
model.fit(x_train, y_train, f_threshold=4)

CPU times: user 27.6 ms, sys: 4.21 ms, total: 31.8 ms
Wall time: 34.6 ms


In [7]:
print(f"Selected feature indices: {model.indices}")

Selected feature indices: [1 2 3 4 5 8]


In [8]:
mse_train = model.mse(x_train, y_train)
mse_test = model.mse(x_test, y_test)
mse_full = model.mse(x, y)
print(f"Training MSE:   {mse_train:.3f}")
print(f"Testing MSE:    {mse_test:.3f}")
print(f"Full data MSE:  {mse_full:.3f}")

Training MSE:   2534.766
Testing MSE:    3297.840
Full data MSE:  2916.303
