In [1]:
import matplotlib.pyplot as plt
import numpy as np
%matplotlib inline

# EstimatorCV for Efficient Parameter Search

## Recursive Feature Elimination

In [3]:
from sklearn.datasets import load_iris
from sklearn.cross_validation import train_test_split

iris = load_iris()
X_train, X_test, y_train, y_test = train_test_split(iris.data, iris.target,\
                                                   random_state=0)

In [4]:
from sklearn.feature_selection import RFE
from sklearn.linear_model import LogisticRegression

feature_elimination_lr = RFE(LogisticRegression(C=100), n_features_to_select=2)

In [5]:
feature_elimination_lr.fit(X_train, y_train)
feature_elimination_lr.score(X_test, y_test)

0.94736842105263153

In [7]:
from sklearn.grid_search import GridSearchCV

param_grid = {'n_features_to_select': list(range(1,5))}
grid_search = GridSearchCV(feature_elimination_lr, param_grid, cv=5)
grid_search.fit(X_train, y_train)
grid_search.score(X_test, y_test)

0.97368421052631582

In [11]:
grid_search.best_params_

{'n_features_to_select': 4}

In [9]:
from sklearn.feature_selection import RFECV
rfecv = RFECV(LogisticRegression(C=100)).fit(X_train, y_train)
rfecv.score(X_test, y_test)

0.97368421052631582

In [10]:
rfecv.n_features_

4

## Efficient hyper-parameter selection for Lasso

In [14]:
from sklearn.datasets import make_regression
X, y = make_regression(noise=60, random_state=0)
X_train, X_test, y_train, y_test = train_test_split(X,y,random_state=0)

In [17]:
from sklearn.linear_model import Lasso, LassoCV

lasso = Lasso().fit(X_train, y_train)
print("Lasso score with default alpha: %f" % lasso.score(X_test,y_test))

lassocv = LassoCV().fit(X_train,y_train)
print("Lasso score with automatic alpha: %f" % lassocv.score(X_test,y_test))

Lasso score with default alpha: 0.691687
Lasso score with automatic alpha: 0.776175


In [18]:
grid_search = GridSearchCV(Lasso(), param_grid={'alpha':np.logspace(-5,1,20)})
grid_search.fit(X_train, y_train)
print("Lasso score with grid-search alpha: %f" % grid_search.score(X_test,y_test))



Lasso score with grid-search alpha: 0.755467


In [20]:
print("Best alpha found by lassoCV: %f" % lassocv.alpha_)
print("Best alpha found by GridSearch: %f" % grid_search.best_params_['alpha'])

Best alpha found by lassoCV: 8.042118
Best alpha found by GridSearch: 10.000000


In [21]:
%timeit Lasso().fit(X_train, y_train)

100 loops, best of 3: 13.5 ms per loop


In [22]:
%timeit LassoCV().fit(X_train, y_train)

1 loop, best of 3: 607 ms per loop


In [23]:
%timeit grid_search.fit(X_train, y_train)



1 loop, best of 3: 2.55 s per loop
