In [28]:
import matplotlib.pyplot as plt
import numpy as np
%matplotlib inline

import warnings
warnings.filterwarnings('ignore')

# EstimatorCV Objects for Efficient Parameter Search

- Built in methods for hyper-parameter selection
- When ever you want to adjust parameters of an estimator, check if there is a CV variant for a more efficient hyper-paramater search & evaluation

## Recursive Feature Eliminiation

In [3]:
from sklearn.datasets import load_iris
from sklearn.model_selection import train_test_split

# Load the iris dataset
iris = load_iris()

X_train, X_test, y_train, y_test = train_test_split(iris.data, iris.target, random_state=0)

In [6]:
from sklearn.feature_selection import RFE
from sklearn.linear_model import LogisticRegression

# Recursive feature elimination - One by one, remove features that are unimportant by a specific model
# Logistic regression is this model that we use
# Coef created tell you how important these features are.

# Remove 2 features from the 4 dimensional dataset
feature_elimination_lr = RFE(LogisticRegression(C=100), n_features_to_select=2)

In [7]:
feature_elimination_lr.fit(X_train, y_train)
feature_elimination_lr.score(X_test, y_test)

0.94736842105263153

In [10]:
# Find out how many features to select, you can use GridSearch with cross-validation
from sklearn.model_selection import GridSearchCV

# Recursively build  a model with a range of feature selection
param_grid = {'n_features_to_select': range(1, 5)}

grid_search = GridSearchCV(feature_elimination_lr, param_grid, cv=5)
grid_search.fit(X_train, y_train)
grid_search.score(X_test, y_test)

0.97368421052631582

In [11]:
grid_search.best_params_

{'n_features_to_select': 4}

In [14]:
from sklearn.feature_selection import RFECV

# Drop one feature at a time, and report the score
# Automatically select the best number of features
# Just like using gridsearchCV above, but more efficient

rfecv = RFECV(LogisticRegression(C=100)).fit(X_train, y_train)
rfecv.score(X_test, y_test)

0.97368421052631582

In [15]:
rfecv.n_features_

4

## Efficient hyper-parameter selection for Lasso

In [19]:
from sklearn.datasets import make_regression

X, y = make_regression(noise=60, random_state=0)
X_train, X_test, y_train, y_test = train_test_split(X, y, random_state=0)

In [21]:
from sklearn.linear_model import Lasso, LassoCV

# Use default value of alpha
lasso = Lasso().fit(X_train, y_train)
print("lasso score with default alpha: %f" % lasso.score(X_test, y_test))

# CV, automatically select the best value of alpha (100 different values)
lassocv =  LassoCV().fit(X_train, y_train)
print("lasso score with automatic alpha: %f" % lassocv.score(X_test, y_test))

lasso score with default alpha: 0.691687
lasso score with automatic alpha: 0.776175


In [29]:
# Define a logirthmic grid of 20 entries alpha. LassoCV still has better performance
grid_search = GridSearchCV(Lasso(), param_grid={'alpha': np.logspace(-5, 1, 20)})
grid_search.fit(X_train, y_train)
print("lasso score with grid-searched alpha: %f" % grid_search.score(X_test, y_test))

lasso score with grid-searched alpha: 0.755467


In [24]:
print("best alpha found by LassoCV: %f" % lassocv.alpha_)
print("best alpha found by GridSearchCV: %f" % grid_search.best_params_['alpha'])

best alpha found by LassoCV: 8.042118
best alpha found by GridSearchCV: 10.000000


In [25]:
%timeit Lasso().fit(X_train, y_train)

1000 loops, best of 3: 1.45 ms per loop


In [26]:
%timeit LassoCV().fit(X_train, y_train)

10 loops, best of 3: 59.3 ms per loop


In [30]:
%timeit grid_search.fit(X_train, y_train)

1 loop, best of 3: 218 ms per loop
