In [1]:
# Load libraries
import warnings
from sklearn.datasets import make_regression
from sklearn.feature_selection import RFECV
from sklearn import datasets, linear_model

In [2]:
# Suppress an annoying but harmless warning
warnings.filterwarnings(action="ignore", module="scipy",
message="^internal gelsd")

In [3]:
# Generate features matrix, target vector, and the true coefficients
features, target = make_regression(n_samples = 10000,
n_features = 100,
n_informative = 2,
random_state = 1)

In [4]:
# Create a linear regression
ols = linear_model.LinearRegression()

In [5]:
# Recursively eliminate features
rfecv = RFECV(estimator=ols, step=1, scoring="neg_mean_squared_error")
rfecv.fit(features, target)
rfecv.transform(features)

array([[ 0.00850799,  0.7031277 ,  1.15422032, ..., -0.94579905,
        -1.13210747,  0.39544554],
       [-1.07500204,  2.56148527,  1.44423524, ..., -0.55990689,
         0.46847025, -0.66998145],
       [ 1.37940721, -1.77039484,  2.17806632, ...,  0.85638636,
         0.98059736, -0.17881197],
       ...,
       [-0.80331656, -1.60648007,  0.06790262, ..., -0.69079768,
        -0.40526417,  0.01579457],
       [ 0.39508844, -1.34564911, -0.05938279, ...,  0.1762367 ,
        -0.66344459, -1.21773707],
       [-0.55383035,  0.82880112, -0.75794736, ..., -0.58948827,
        -1.24820021,  0.79632039]])

In [6]:
# Which categories are best
rfecv.support_

array([False, False, False, False, False,  True, False, False, False,
       False, False, False, False, False, False, False, False, False,
       False, False, False, False, False, False, False, False, False,
       False, False, False, False, False, False, False, False, False,
       False, False, False,  True, False, False, False, False, False,
       False, False, False, False, False, False, False, False, False,
       False, False, False, False, False, False, False, False, False,
       False, False,  True, False, False, False, False, False, False,
        True, False, False, False, False, False, False,  True, False,
       False, False, False, False,  True, False, False, False, False,
       False, False,  True, False, False, False, False, False, False,
       False])

In [7]:
# Rank features best (1) to worst
rfecv.ranking_

array([22, 70, 92, 25, 28,  1, 11, 90, 84, 91, 37, 82,  2, 59, 93, 57, 88,
       64, 20, 77, 58, 50, 60, 81, 21, 69, 10, 36, 63, 71, 62, 67, 32, 29,
       14, 87, 83, 30, 38,  1, 55, 23, 12, 65, 31, 53, 94, 74, 89, 54, 61,
       24, 52, 15, 43,  5, 44,  9, 45, 68,  4, 85, 27, 47, 51,  1, 79, 41,
        8, 80, 42, 34,  1, 17, 76, 78, 49, 33,  7,  1, 48, 66,  6, 26, 75,
        1, 73, 19, 18, 13, 86, 46,  1,  3, 40, 56, 35, 16, 72, 39])