### <font color = #0080ff> Various Regression Algorithms by Prashant Mittal
OLS, LASSO, Ridge, Elastic Net    
<font color= dark red> More can be learnt here<br>
https://scikit-learn.org/stable/modules/classes.html#module-sklearn.linear_model

Regressors
<img src="regression.png"/>

In [9]:
# Importing dependencies

import pandas as pd
import numpy as np
from sklearn import model_selection
from sklearn.linear_model import LinearRegression
from sklearn.linear_model import Ridge
from sklearn.linear_model import Lasso
from sklearn.linear_model import ElasticNet
np.set_printoptions(precision=3, suppress = True)

# The method r2_score is for calculating r-squared value
from sklearn.metrics import r2_score

from sklearn.model_selection import train_test_split
from sklearn.metrics import mean_squared_error

# This is to import Python built in Toy datasets
from sklearn import datasets as dt
import os
# os.listdir()

In [4]:
# ------------------------------------------------------------
# Importing Boston housing dataset for illustration purposes
# ------------------------------------------------------------

boston = dt.load_boston()

# ------------------------------------
# Converting boston housing data to df
# ------------------------------------

boston2 = pd.DataFrame(boston.data, columns = boston.feature_names)
boston2['target'] = pd.Series(boston.target)
boston2


Unnamed: 0,CRIM,ZN,INDUS,CHAS,NOX,RM,AGE,DIS,RAD,TAX,PTRATIO,B,LSTAT,target
0,0.00632,18.0,2.31,0.0,0.538,6.575,65.2,4.0900,1.0,296.0,15.3,396.90,4.98,24.0
1,0.02731,0.0,7.07,0.0,0.469,6.421,78.9,4.9671,2.0,242.0,17.8,396.90,9.14,21.6
2,0.02729,0.0,7.07,0.0,0.469,7.185,61.1,4.9671,2.0,242.0,17.8,392.83,4.03,34.7
3,0.03237,0.0,2.18,0.0,0.458,6.998,45.8,6.0622,3.0,222.0,18.7,394.63,2.94,33.4
4,0.06905,0.0,2.18,0.0,0.458,7.147,54.2,6.0622,3.0,222.0,18.7,396.90,5.33,36.2
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
501,0.06263,0.0,11.93,0.0,0.573,6.593,69.1,2.4786,1.0,273.0,21.0,391.99,9.67,22.4
502,0.04527,0.0,11.93,0.0,0.573,6.120,76.7,2.2875,1.0,273.0,21.0,396.90,9.08,20.6
503,0.06076,0.0,11.93,0.0,0.573,6.976,91.0,2.1675,1.0,273.0,21.0,396.90,5.64,23.9
504,0.10959,0.0,11.93,0.0,0.573,6.794,89.3,2.3889,1.0,273.0,21.0,393.45,6.48,22.0


In [5]:
# -----------------------------------
# Feature space and target variable
# -----------------------------------

X = boston2.iloc[:,:-1].values
y = boston2['target'].values

# -----------------
# train-test split
# -----------------
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.30, random_state=40)
print(X_train.shape); print(X_test.shape)

(354, 13)
(152, 13)


### <font color = #0080ff> OLS Regression (Multiple Linear Regression)

In [10]:
# -------------------------------------------
# OLS regression (Multiple linear regression)
# -------------------------------------------

lr = LinearRegression()

# Fitting the model on training set)
lr.fit(X_train, y_train)

# The following options format a numpy array and suppress scientific notation
np.set_printoptions(suppress=True)
np.set_printoptions(precision=3)


# R-square score for training
print("R-square for the training set is = ", format(lr.score(X_train, y_train), '.3f'))


# Coefficients from regression model
print("Model coeficients are = ", lr.coef_, '.3f')

# Predicting the test set
lr_pred= lr.predict(X_test)

# Calculating mean squared error and R-sq for the predictions
print(np.sqrt(mean_squared_error(y_test,lr_pred))) 
print("R-sq for test set is = ", format(r2_score(y_test, lr_pred), '.3f'))


R-square for the training set is =  0.743
Model coeficients are =  [ -0.102   0.039  -0.061   3.481 -17.46    3.664  -0.005  -1.371   0.251
  -0.009  -0.858   0.007  -0.497] .3f
5.641775532184683
R-sq for test set is =  0.722


### <font color = #0080ff>LASSO Regression (L1 Norm applied to the cost function)

<font><b>List of hyperparameters:<br></b>
class sklearn.linear_model.Lasso(alpha=1.0, *, fit_intercept=True, normalize=False, precompute=False, copy_X=True, max_iter=1000, tol=0.0001, warm_start=False, positive=False, random_state=None, selection='cyclic')

In [7]:
from sklearn import linear_model
lasso = linear_model.Lasso(alpha=0.1)
lasso.fit(X_train, y_train)


# R-square score for training
print("R-square for training set for LASSO = ", format(lasso.score(X_train, y_train), '.3f'))
print("")

# Coefficients for LASSO model
print("Model coeficients are = ", lasso.coef_, '.3f')
print("")

# Predict the test set
lasso_pred= lasso.predict(X_test)
# print(lasso_pred)

# Calculating mean squared error and R-sq for the predictions
print(np.sqrt(mean_squared_error(y_test,lasso_pred))) 
print("R-sq for test set for LASSO = ", format(r2_score(y_test, lasso_pred), '.3f'))


R-square for training set for LASSO =  0.728

Model coeficients are =  [-0.075  0.039 -0.11   1.682 -0.     3.507 -0.016 -1.05   0.209 -0.012
 -0.725  0.008 -0.547] .3f

5.726312002368792
R-sq for test set for LASSO =  0.713


### <font color = #0080ff>LASSO CV regression

In [11]:
from sklearn.linear_model import LassoCV
lassoCV = LassoCV(cv=10, random_state=0).fit(X_train, y_train)

# R-square score for training
print("R-square for training set for LASSO cross validation = ", format(lassoCV.score(X_train, y_train), '.3f'))
print("")

# Coefficients for LASSO model
print("Model coeficients are = ", lassoCV.coef_, '.3f')
print("")

# Predict the test set
lassoCV_pred=  lassoCV.predict(X_test)
# print(lasso_pred)

# Calculating mean squared error and R-sq for the predictions
print(np.sqrt(mean_squared_error(y_test,lassoCV_pred))) 
print("R-sq for test set for LASSO CV= ", format(r2_score(y_test, lassoCV_pred), '.3f'))


R-square for training set for LASSO cross validation =  0.694

Model coeficients are =  [-0.05286468  0.0410971  -0.07224345  0.         -0.          1.38008478
  0.00533236 -0.76634464  0.22168668 -0.01316231 -0.69044179  0.00673715
 -0.70532082] .3f

6.084584485176483
R-sq for test set for LASSO =  0.676


### <font color = #0080ff>RIDGE regression (L2 norm penalty applied to the cost function)

class sklearn.linear_model.Ridge(alpha=1.0, *, fit_intercept=True, normalize=False, copy_X=True, max_iter=None, tol=0.001, solver='auto', random_state=None)

In [12]:
from sklearn.linear_model import Ridge
ridge = Ridge(alpha=1.0)
ridge.fit(X_train, y_train)

# R-square score for training
print("R-square for training set for RIDGE = ", format(ridge.score(X_train, y_train), '.3f'))
print("")

# Coefficients for RIDGE model
print("Model coeficients are = ", ridge.coef_, '.3f')
print("")

# Predict the test set
ridge_pred= ridge.predict(X_test)

# Calculating mean squared error and R-sq for the predictions
print(np.sqrt(mean_squared_error(y_test,ridge_pred))) 
print("R-sq for test set for RIDGE = ", format(r2_score(y_test, ridge_pred), '.3f'))


R-square for training set for RIDGE =  0.740

Model coeficients are =  [-8.96773369e-02  3.87293612e-02 -9.46326932e-02  3.27853539e+00
 -8.83310616e+00  3.70498171e+00 -1.32155805e-02 -1.23740801e+00
  2.25516417e-01 -1.00845466e-02 -7.84860767e-01  7.35041802e-03
 -5.07459245e-01] .3f

5.659906078948198
R-sq for test set for RIDGE =  0.720


### <font color = #0080ff>Elastic Net (combination of Ridge and LASSO)
https://scikit-learn.org/stable/modules/generated/sklearn.linear_model.ElasticNet.html#sklearn.linear_model.ElasticNet

class sklearn.linear_model.ElasticNet(alpha=1.0, *, l1_ratio=0.5, fit_intercept=True, normalize=False, precompute=False, max_iter=1000, copy_X=True, tol=0.0001, warm_start=False, positive=False, random_state=None, selection='cyclic')

In [51]:
from sklearn.linear_model import ElasticNet

enet = ElasticNet(random_state=0)
enet.fit(X_train, y_train)

# R-square score for training
print("R-square for training set for Elastic Net = ", format(enet.score(X_train, y_train), '.3f'))
print("")

# Coefficients for RIDGE model
print("Model coeficients are = ", enet.coef_, '.3f')
print("")

# Predict the test set
enet_pred= enet.predict(X_test)

# Calculating mean squared error and R-sq for the predictions
print(np.sqrt(mean_squared_error(y_test,enet_pred))) 
print("R-sq for test set for Elastic Net = ", format(r2_score(y_test, enet_pred), '.3f'))



R-square for training set for Elastic Net =  0.681

Model coeficients are =  [-0.0754756   0.04377314 -0.08908477  0.         -0.          0.71127287
  0.01274977 -0.72275539  0.24931762 -0.0137618  -0.68041582  0.00623504
 -0.74185153] .3f

6.235084507091816
R-sq for test set for Elastic Net =  0.660


### <font color = #0080ff> A general function to produce R-sq and MSE from Ridge or LASSO

In [22]:
# A general function for regression

from sklearn.linear_model import Lasso
from sklearn.linear_model import Ridge

def reg(x,i):
    model = x(alpha=i)
    model.fit(X_train, y_train)

    print("Model = ", x, " alpha = ",i) 
    print("R-square for training set= ", format(model.score(X_train, y_train), '.3f'))
    print("")

    # Predict the test set
    model_pred= model.predict(X_test)
    # Calculating mean squared error and R-sq for the predictions
    print('%.3f' % np.sqrt(mean_squared_error(y_test,model_pred))) 
    print("R-sq for test set for RIDGE = ", format(r2_score(y_test, model_pred), '.3f'))
    print("---------")

# --------------------------------------------------------
# Running the above function for various alpha values
# --------------------------------------------------------
for i in list(np.arange(0.1, 2, 0.2)):
    j = round(i,1)
    reg(Ridge,j)
    reg(Lasso,j)



Model =  <class 'sklearn.linear_model._ridge.Ridge'>  alpha =  0.1
R-square for training set=  0.743

5.642
R-sq for test set for RIDGE =  0.722
---------
Model =  <class 'sklearn.linear_model._coordinate_descent.Lasso'>  alpha =  0.1
R-square for training set=  0.728

5.726
R-sq for test set for RIDGE =  0.713
---------
Model =  <class 'sklearn.linear_model._ridge.Ridge'>  alpha =  0.3
R-square for training set=  0.742

5.645
R-sq for test set for RIDGE =  0.721
---------
Model =  <class 'sklearn.linear_model._coordinate_descent.Lasso'>  alpha =  0.3
R-square for training set=  0.717

5.808
R-sq for test set for RIDGE =  0.705
---------
Model =  <class 'sklearn.linear_model._ridge.Ridge'>  alpha =  0.5
R-square for training set=  0.742

5.649
R-sq for test set for RIDGE =  0.721
---------
Model =  <class 'sklearn.linear_model._coordinate_descent.Lasso'>  alpha =  0.5
R-square for training set=  0.708

5.929
R-sq for test set for RIDGE =  0.693
---------
Model =  <class 'sklearn.linear

In [23]:
# ------------------------------------
# A general function for regression
# ------------------------------------

from sklearn.linear_model import Lasso
from sklearn.linear_model import Ridge
from sklearn.linear_model import ElasticNet

def regdf(x,i, l1):
    if x!= ElasticNet:
        model = x(alpha=i)
    else:
        model = x(alpha=i, l1_ratio = l1)
    model.fit(X_train, y_train)
    rsqtrain= format(model.score(X_train, y_train), '.3f')
    
    # Predict the test set
    model_pred= model.predict(X_test)

    # Calculating mean squared error and R-sq for the predictions
    msetest = np.sqrt(mean_squared_error(y_test,model_pred))
    rsqtest = format(model.score(X_train, y_train), '.3f')
    return rsqtrain, msetest, rsqtest


rsqtrainlist = []
msetestlist = []
rsqtestlist = []
modellist = []
alphalist = []
for x in [Ridge, Lasso, ElasticNet]:
    for i in list(np.arange(0.1, 2, 0.2)):        
        for j in list(np.arange(0.1, 1, 0.2)): 
            a,b,c = regdf(x, round(i,2), round(j,2))
            rsqtrainlist.append(a)
            msetestlist.append(b)
            rsqtestlist.append(c)
            modellist.append(x)
            alphalist.append(round(i,2))
    
testdf = pd.DataFrame()
testdf['rsqtrain'] = rsqtrainlist
testdf['msetest'] = msetestlist
testdf['rsqtest'] = rsqtestlist
testdf['model'] = modellist
testdf['alpha'] = alphalist
testdf['Name'] = 'Prashant'


testdf

Unnamed: 0,rsqtrain,msetest,rsqtest,model,alpha,Name
0,0.743,5.641699,0.743,<class 'sklearn.linear_model._ridge.Ridge'>,0.1,Prashant
1,0.743,5.641699,0.743,<class 'sklearn.linear_model._ridge.Ridge'>,0.1,Prashant
2,0.743,5.641699,0.743,<class 'sklearn.linear_model._ridge.Ridge'>,0.1,Prashant
3,0.743,5.641699,0.743,<class 'sklearn.linear_model._ridge.Ridge'>,0.1,Prashant
4,0.743,5.641699,0.743,<class 'sklearn.linear_model._ridge.Ridge'>,0.1,Prashant
...,...,...,...,...,...,...
145,0.668,6.383720,0.668,<class 'sklearn.linear_model._coordinate_desce...,1.9,Prashant
146,0.661,6.441651,0.661,<class 'sklearn.linear_model._coordinate_desce...,1.9,Prashant
147,0.651,6.535825,0.651,<class 'sklearn.linear_model._coordinate_desce...,1.9,Prashant
148,0.637,6.658048,0.637,<class 'sklearn.linear_model._coordinate_desce...,1.9,Prashant
