In [1]:
# This exercise was taken from
# http://scikit-learn.org/stable/tutorial/statistical_inference/model_selection.html

Remarks concerning this script:
This is my personal solution to the exercise. I realized that the two methods 
I used here give different results so I looked at the solution.
First, it seems that choosing to shuffle the observations
before splitting them into k folds has a non-negligeable effect.
Also it seems that for some obscure reason, the official
solution only uses the first 150 observations (out of 442) which
has also an effect on the resulting optimal alpha.
Then it seemst that the built-in method agreas with the manual WITHOUT
shuffling, when the grid is provided for the paramter search.
When the grid is not provided, the alpha found is still very similar.
There are two other files related to this script: scikit-learn_ex4_var,
where I modified some parameters to see how it changed, and scikit-learn_ex4_sol,
which contains the official solution.

In [3]:
import numpy as np
from sklearn import datasets
from sklearn.linear_model import LassoCV
from sklearn.linear_model import Lasso
from sklearn.model_selection import KFold
from sklearn.model_selection import GridSearchCV

diabetes = datasets.load_diabetes()

In [11]:
X = diabetes.data
y = diabetes.target

In [38]:
k_fold = KFold(n_splits=3, shuffle=False)
alphas = np.logspace(-4, -0.5, 30)

In [39]:
# First method: manual CV with KFold

In [40]:
regr_1 = Lasso()
scores_1 = np.zeros(len(alphas))

In [41]:
for i in range(len(alphas)):
    scores_1[i] = np.mean([regr_1.set_params(alpha=alphas[i]).fit(X[train], y[train]).score(X[test], y[test]) 
                           for train, test in k_fold.split(X)])

In [42]:
scores_1

array([ 0.48869329,  0.48869012,  0.48868584,  0.48868   ,  0.48867199,
        0.48866087,  0.48864527,  0.48862308,  0.48859094,  0.48854362,
        0.48847249,  0.48844691,  0.48858788,  0.48875045,  0.48894351,
        0.48910838,  0.4892294 ,  0.48933274,  0.48927415,  0.48912505,
        0.48904068,  0.48863356,  0.487985  ,  0.48721608,  0.4870226 ,
        0.4865495 ,  0.48504431,  0.48145065,  0.47617173,  0.46804211])

In [43]:
np.argmax(scores_1)

17

In [44]:
alpha_opt = alphas[np.argmax(scores_1)]

In [45]:
alpha_opt

0.011264816923358867

In [46]:
# Second method: Built-in CV for Lasso with same grid

In [47]:
regr_2 = LassoCV(alphas=alphas, random_state=0)
regr_2.fit(X, y)  

LassoCV(alphas=array([  1.00000e-04,   1.32035e-04,   1.74333e-04,   2.30181e-04,
         3.03920e-04,   4.01281e-04,   5.29832e-04,   6.99564e-04,
         9.23671e-04,   1.21957e-03,   1.61026e-03,   2.12611e-03,
         2.80722e-03,   3.70651e-03,   4.89390e-03,   6.46167e-03,
         8.53168e-03,   1...     7.88046e-02,   1.04050e-01,   1.37382e-01,   1.81393e-01,
         2.39503e-01,   3.16228e-01]),
    copy_X=True, cv=None, eps=0.001, fit_intercept=True, max_iter=1000,
    n_alphas=100, n_jobs=1, normalize=False, positive=False,
    precompute='auto', random_state=0, selection='cyclic', tol=0.0001,
    verbose=False)

In [48]:
regr_2.alpha_

0.011264816923358867

In [49]:
regr_1.set_params(alpha=0.0112).fit(X[:400], y[:400]).score(X[400:], y[400:])

0.69446544226528084

In [50]:
# Third method: Built-in CV for Lasso without parameter grid given

In [51]:
regr_3 = LassoCV(random_state=0)
regr_3.fit(X, y)  
regr_3.alpha_

0.012291895087486161