## Coding Exercise #0306

### 1. Regularized regressions:

In [1]:
import numpy as np
import pandas as pd
import os
import matplotlib.pyplot as plt
from sklearn.metrics import mean_squared_error
from sklearn.model_selection import train_test_split
from sklearn.linear_model import Ridge, Lasso

#### 1.1. Read in data:

The dataset comes from 1974 MotoTrend US Magazine.

In [2]:
# Go to the directory where the data file is located. 
# os.chdir(r'~~')                # Please, replace the path with your own.  

In [3]:
df = pd.read_csv('data_mtcars.csv', header='infer')

In [4]:
df.shape

(32, 12)

In [5]:
df.head(5)

Unnamed: 0,model,mpg,cyl,disp,hp,drat,wt,qsec,vs,am,gear,carb
0,Mazda RX4,21.0,6,160.0,110,3.9,2.62,16.46,0,1,4,4
1,Mazda RX4 Wag,21.0,6,160.0,110,3.9,2.875,17.02,0,1,4,4
2,Datsun 710,22.8,4,108.0,93,3.85,2.32,18.61,1,1,4,1
3,Hornet 4 Drive,21.4,6,258.0,110,3.08,3.215,19.44,1,0,3,1
4,Hornet Sportabout,18.7,8,360.0,175,3.15,3.44,17.02,0,0,3,2


In [6]:
# Separate into X and Y.
X=df.drop(columns=['model','mpg'])
Y=df.mpg

In [7]:
# Split the dataset.
X_train, X_test, Y_train, Y_test = train_test_split(X, Y, test_size=0.3, random_state=123)

#### 1.2. Ridge regression:

In [8]:
lambdaRange = range(1,100)

In [9]:
bestRsq = 0.0                                                # To contain the best R^2.
bestLambda = 0.0                                             # To contain the best lambda.
for aLambda in lambdaRange:
    ridge = Ridge(aLambda)
    ridge.fit(X_train,Y_train)                               # Train with the training set.
    rsq = ridge.score(X_test,Y_test)                         # Calculate R^2 with the testing set. 
    if (rsq > bestRsq):                                      # If R^2 is better, replace bestRsq.
        bestRsq = rsq
        bestLambda = aLambda
        print("Lambda = ", bestLambda, " and  R^2 = ", np.round(rsq,3))
ridge = Ridge(bestLambda)                                    # Final run with the bestLambda.
ridge.fit(X,Y)                                                  

Lambda =  1  and  R^2 =  0.779
Lambda =  2  and  R^2 =  0.813
Lambda =  3  and  R^2 =  0.822
Lambda =  4  and  R^2 =  0.825
Lambda =  5  and  R^2 =  0.825


In [10]:
# Show the intercept.
ridge.intercept_

28.587763266422805

In [11]:
# Show other parameters.
ridge.coef_

array([-0.51185256, -0.00768344, -0.01533332,  0.61667067, -1.39209019,
       -0.00709397,  0.11850925,  0.94390866,  0.71148287, -0.7038523 ])

NOTE: In Ridge, even when $\lambda$ is large no parameter is exactly 0.

#### 1.3. Lasso regression:

In [12]:
lambdaRange = range(1,100)

In [13]:
bestRsq = 0.0                                                # To contain the best R^2.
bestLambda = 0.0                                             # To contain the best lambda.
for aLambda in lambdaRange:
    lasso = Lasso(aLambda)
    lasso.fit(X_train,Y_train)                               # Train with the training set.
    rsq = lasso.score(X_test,Y_test)                         # Calculate R^2 with the testing set. 
    if (rsq > bestRsq):                                      # If R^2 is better, replace bestRsq.
        bestRsq = rsq
        bestLambda = aLambda
        print("Lambda = ", bestLambda, " and  R^2 = ", np.round(rsq,3))
lasso = Lasso(bestLambda)                                    # Final run with the bestLambda.
lasso.fit(X,Y) 

Lambda =  1  and  R^2 =  0.735
Lambda =  2  and  R^2 =  0.743
Lambda =  3  and  R^2 =  0.744
Lambda =  4  and  R^2 =  0.744
Lambda =  5  and  R^2 =  0.744
Lambda =  6  and  R^2 =  0.745
Lambda =  7  and  R^2 =  0.745
Lambda =  8  and  R^2 =  0.745
Lambda =  9  and  R^2 =  0.746
Lambda =  10  and  R^2 =  0.746
Lambda =  11  and  R^2 =  0.746
Lambda =  12  and  R^2 =  0.747
Lambda =  13  and  R^2 =  0.747
Lambda =  14  and  R^2 =  0.747
Lambda =  15  and  R^2 =  0.747
Lambda =  16  and  R^2 =  0.747
Lambda =  17  and  R^2 =  0.748
Lambda =  18  and  R^2 =  0.748
Lambda =  19  and  R^2 =  0.748
Lambda =  20  and  R^2 =  0.748
Lambda =  21  and  R^2 =  0.749
Lambda =  22  and  R^2 =  0.749
Lambda =  23  and  R^2 =  0.749
Lambda =  24  and  R^2 =  0.749
Lambda =  25  and  R^2 =  0.749
Lambda =  26  and  R^2 =  0.749
Lambda =  27  and  R^2 =  0.749
Lambda =  28  and  R^2 =  0.75
Lambda =  29  and  R^2 =  0.75
Lambda =  30  and  R^2 =  0.75
Lambda =  31  and  R^2 =  0.75
Lambda =  32  and  R^

In [14]:
# Show the intercept.
lasso.intercept_

29.481705776221872

In [15]:
# Show other parameters.
lasso.coef_

array([-0.       , -0.0335117, -0.0113111,  0.       , -0.       ,
        0.       ,  0.       ,  0.       ,  0.       , -0.       ])

NOTE: Lasso can make some parameters exactly 0 when $\lambda$ is large enough.