[reference](https://towardsdatascience.com/linear-regression-models-4a3d14b8d368)

In [33]:
import pandas as pd
import numpy as np

In [34]:
from sklearn.datasets import load_diabetes
from sklearn.model_selection import train_test_split

# load regression dataset
diabetes, target = load_diabetes(return_X_y=True)
diabetes = pd.DataFrame(diabetes)
# Prepare data for modeling
# Separate input features and target
y = target
X = diabetes
# setting up testing and training sets
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.25, random_state=27)

In [35]:
from sklearn.model_selection import cross_val_score
# function to get cross validation scores
def get_cv_scores(model):
    scores = cross_val_score(model,
                             X_train,
                             y_train,
                             cv=5,
                             scoring='r2')
    print('CV Mean: ', np.mean(scores))
    print('STD: ', np.std(scores))

The low R-squared value indicates that our model is not very accurate. 

The standard deviation value indicate we may be overfitting the training data.

In [36]:
from sklearn.linear_model import LinearRegression
# Train model
lr = LinearRegression().fit(X_train, y_train)
# get cross val scores
get_cv_scores(lr)

CV Mean:  0.4758207516304269
STD:  0.14121306928200053


In [60]:
lr.intercept_, lr.coef_

(150.38723146856879,
 array([  22.14427241, -263.14388403,  590.24318898,  293.52984672,
        -671.6939331 ,  371.14603011,  113.81129467,  253.96139439,
         685.7075389 ,   67.93382428]))

In [52]:
from sklearn.linear_model import Ridge
# Train model with default alpha=1
ridge = Ridge(alpha=1).fit(X_train, y_train)
# get cross val scores
get_cv_scores(ridge)

CV Mean:  0.38262318036179815
STD:  0.09902512898413976


In [61]:
ridge.intercept_, ridge.coef_

(150.82466927160974,
 array([  40.16755296,  -73.80188618,  297.65447022,  175.01650003,
          25.89922481,   -5.8471405 , -143.36375708,  128.10951789,
         241.11305435,  110.58253269]))

In [55]:
from sklearn.model_selection import GridSearchCV

# find optimal alpha with grid search
alpha = [0.001, 0.01, 0.1, 1, 10, 100, 1000]
param_grid = dict(alpha=alpha)
grid = GridSearchCV(estimator=ridge, 
                    param_grid=param_grid, scoring='r2', verbose=1, n_jobs=-1)
grid_result = grid.fit(X_train, y_train)
print('Best Score: ', grid_result.best_score_)
print('Best Params: ', grid_result.best_params_)

Fitting 5 folds for each of 7 candidates, totalling 35 fits
Best Score:  0.4763789786748541
Best Params:  {'alpha': 0.01}


In [56]:
from sklearn.linear_model import Lasso
# Train model with default alpha=1
lasso = Lasso(alpha=1).fit(X_train, y_train)
# get cross val scores
get_cv_scores(lasso)

CV Mean:  0.35100225307809524
STD:  0.08727942579582965


In [62]:
lasso.intercept_, lasso.coef_

(150.23398151174302,
 array([  0.        ,  -0.        , 446.07485255,   0.        ,
          0.        ,   0.        ,  -0.        ,   0.        ,
        320.40851635,   0.        ]))

In [44]:
# find optimal alpha with grid search
alpha = [0.001, 0.01, 0.1, 1, 10, 100, 1000]
param_grid = dict(alpha=alpha)
grid = GridSearchCV(estimator=lasso, param_grid=param_grid, scoring='r2', verbose=1, n_jobs=-1)
grid_result = grid.fit(X_train, y_train)
print('Best Score: ', grid_result.best_score_)
print('Best Params: ', grid_result.best_params_)

Fitting 5 folds for each of 7 candidates, totalling 35 fits
Best Score:  0.47604950330973816
Best Params:  {'alpha': 0.1}


In [45]:
for coef, col in enumerate(X_train.columns):
    print(f'{col}:  {lasso.coef_[coef]}')

0:  0.0
1:  -0.0
2:  446.0748525493875
3:  0.0
4:  0.0
5:  0.0
6:  -0.0
7:  0.0
8:  320.4085163534719
9:  0.0


In [46]:
from sklearn.linear_model import ElasticNet
# Train model with default alpha=1 and l1_ratio=0.5
elastic_net = ElasticNet(alpha=1, l1_ratio=0.5).fit(X_train, y_train)
# get cross val scores
get_cv_scores(elastic_net)

CV Mean:  -0.05139212673031255
STD:  0.0729799674615791


In [63]:
elastic_net.intercept_, elastic_net.coef_

(149.93555698842826,
 array([ 0.46343117,  0.        ,  3.68670551,  2.26301582,  0.63176403,
         0.34179573, -2.03015426,  2.33388253,  3.31622451,  1.91549509]))

In [47]:
alpha = [0.001, 0.01, 0.1, 1, 10, 100, 1000]
l1_ratio = [0, 0.1, 0.2, 0.3, 0.4, 0.5, 0.6, 0.7, 0.8, 0.9, 1]
param_grid = dict(alpha=alpha, l1_ratio=l1_ratio)
grid = GridSearchCV(estimator=elastic_net, param_grid=param_grid, scoring='r2', verbose=1, n_jobs=-1)
grid_result = grid.fit(X_train, y_train)
print('Best Score: ', grid_result.best_score_)
print('Best Params: ', grid_result.best_params_)

Fitting 5 folds for each of 77 candidates, totalling 385 fits
Best Score:  0.4772267119772916
Best Params:  {'alpha': 0.001, 'l1_ratio': 0.8}


  model = cd_fast.enet_coordinate_descent(
  model = cd_fast.enet_coordinate_descent(
  model = cd_fast.enet_coordinate_descent(
  model = cd_fast.enet_coordinate_descent(
  model = cd_fast.enet_coordinate_descent(
  model = cd_fast.enet_coordinate_descent(
  model = cd_fast.enet_coordinate_descent(
  model = cd_fast.enet_coordinate_descent(
  model = cd_fast.enet_coordinate_descent(
  model = cd_fast.enet_coordinate_descent(
  model = cd_fast.enet_coordinate_descent(
  model = cd_fast.enet_coordinate_descent(
  model = cd_fast.enet_coordinate_descent(
  model = cd_fast.enet_coordinate_descent(
  model = cd_fast.enet_coordinate_descent(
  model = cd_fast.enet_coordinate_descent(
  model = cd_fast.enet_coordinate_descent(
  model = cd_fast.enet_coordinate_descent(
  model = cd_fast.enet_coordinate_descent(
  model = cd_fast.enet_coordinate_descent(
  model = cd_fast.enet_coordinate_descent(
  model = cd_fast.enet_coordinate_descent(
  model = cd_fast.enet_coordinate_descent(
  model = c