# **Ridge & Lasso Regression**



In [33]:
# Import Library.
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt

In [34]:
# Import Dataset.
dataset = pd.read_csv('50_Startups.csv')
dataset.head()

Unnamed: 0,R&D Spend,Administration,Marketing Spend,State,Profit
0,165349.2,136897.8,471784.1,New York,192261.83
1,162597.7,151377.59,443898.53,California,191792.06
2,153441.51,101145.55,407934.54,Florida,191050.39
3,144372.41,118671.85,383199.62,New York,182901.99
4,142107.34,91391.77,366168.42,Florida,166187.94


In [35]:
# Split dataset into features and target variables.
X = dataset.iloc[:, :-1].values
y = dataset.iloc[:, -1].values

# **Encoding Categorical Data.**

In [36]:
from sklearn.compose import ColumnTransformer
from sklearn.preprocessing import OneHotEncoder
ct = ColumnTransformer(transformers = [('encoder', OneHotEncoder(), [3])], remainder = 'passthrough')
X = np.array(ct.fit_transform(X))

print(X)

[[0.0 0.0 1.0 165349.2 136897.8 471784.1]
 [1.0 0.0 0.0 162597.7 151377.59 443898.53]
 [0.0 1.0 0.0 153441.51 101145.55 407934.54]
 [0.0 0.0 1.0 144372.41 118671.85 383199.62]
 [0.0 1.0 0.0 142107.34 91391.77 366168.42]
 [0.0 0.0 1.0 131876.9 99814.71 362861.36]
 [1.0 0.0 0.0 134615.46 147198.87 127716.82]
 [0.0 1.0 0.0 130298.13 145530.06 323876.68]
 [0.0 0.0 1.0 120542.52 148718.95 311613.29]
 [1.0 0.0 0.0 123334.88 108679.17 304981.62]
 [0.0 1.0 0.0 101913.08 110594.11 229160.95]
 [1.0 0.0 0.0 100671.96 91790.61 249744.55]
 [0.0 1.0 0.0 93863.75 127320.38 249839.44]
 [1.0 0.0 0.0 91992.39 135495.07 252664.93]
 [0.0 1.0 0.0 119943.24 156547.42 256512.92]
 [0.0 0.0 1.0 114523.61 122616.84 261776.23]
 [1.0 0.0 0.0 78013.11 121597.55 264346.06]
 [0.0 0.0 1.0 94657.16 145077.58 282574.31]
 [0.0 1.0 0.0 91749.16 114175.79 294919.57]
 [0.0 0.0 1.0 86419.7 153514.11 0.0]
 [1.0 0.0 0.0 76253.86 113867.3 298664.47]
 [0.0 0.0 1.0 78389.47 153773.43 299737.29]
 [0.0 1.0 0.0 73994.56 122782.75 3

# **Feature Scaling.**

In [37]:
from sklearn.preprocessing import StandardScaler
sc = StandardScaler()
X = sc.fit_transform(X)

print(X)

[[-7.17740563e-01 -6.85994341e-01  1.39326109e+00  2.01641149e+00
   5.60752915e-01  2.15394309e+00]
 [ 1.39326109e+00 -6.85994341e-01 -7.17740563e-01  1.95586034e+00
   1.08280658e+00  1.92360040e+00]
 [-7.17740563e-01  1.45773797e+00 -7.17740563e-01  1.75436374e+00
  -7.28257028e-01  1.62652767e+00]
 [-7.17740563e-01 -6.85994341e-01  1.39326109e+00  1.55478369e+00
  -9.63646307e-02  1.42221024e+00]
 [-7.17740563e-01  1.45773797e+00 -7.17740563e-01  1.50493720e+00
  -1.07991935e+00  1.28152771e+00]
 [-7.17740563e-01 -6.85994341e-01  1.39326109e+00  1.27980001e+00
  -7.76239071e-01  1.25421046e+00]
 [ 1.39326109e+00 -6.85994341e-01 -7.17740563e-01  1.34006641e+00
   9.32147208e-01 -6.88149930e-01]
 [-7.17740563e-01  1.45773797e+00 -7.17740563e-01  1.24505666e+00
   8.71980011e-01  9.32185978e-01]
 [-7.17740563e-01 -6.85994341e-01  1.39326109e+00  1.03036886e+00
   9.86952101e-01  8.30886909e-01]
 [ 1.39326109e+00 -6.85994341e-01 -7.17740563e-01  1.09181921e+00
  -4.56640246e-01  7.7610

# **Split dataset into Training set and Test set.**

In [38]:
from sklearn.model_selection import train_test_split
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size = 0.2, random_state = 0)

# **Training the Multiple Linear Regression Model.**

[sklearn.linear_model.LinearRegression](https://scikit-learn.org/stable/modules/generated/sklearn.linear_model.LinearRegression.html)

In [39]:
from sklearn.linear_model import LinearRegression
regressor = LinearRegression().fit(X_train, y_train)

# Predicting the Test set results.
y_pred = regressor.predict(X_test)

# **Regression Metrics.**

[https://scikit-learn.org/stable/modules/model_evaluation.html#regression-metrics](https://scikit-learn.org/stable/modules/model_evaluation.html#regression-metrics)

The sklearn.metrics module implements several loss, score, and utility functions to measure regression performance. Some of those have been enhanced to handle the multioutput case: **Mean-Squared Error**, **Mean-Absolute Error**, **Explained Variance Score** and **R2 Score**.


*   [sklearn.metrics.mean_squared_error](https://scikit-learn.org/stable/modules/generated/sklearn.metrics.mean_squared_error.html#sklearn.metrics.mean_squared_error)
*   [sklearn.metrics.mean_absolute_error](https://scikit-learn.org/stable/modules/generated/sklearn.metrics.mean_absolute_error.html#sklearn.metrics.mean_absolute_error)
*   [sklearn.metrics.mean_squared_log_error](https://scikit-learn.org/stable/modules/generated/sklearn.metrics.mean_squared_log_error.html#sklearn.metrics.mean_squared_log_error)
*   [sklearn.metrics.mean_absolute_percentage_error](https://scikit-learn.org/stable/modules/generated/sklearn.metrics.mean_absolute_percentage_error.html#sklearn.metrics.mean_absolute_percentage_error)
*   [sklearn.metrics.r2_score](https://scikit-learn.org/stable/modules/generated/sklearn.metrics.r2_score.html#sklearn.metrics.r2_score)
*   [sklearn.metrics.explained_variance_score](https://scikit-learn.org/stable/modules/generated/sklearn.metrics.explained_variance_score.html#sklearn.metrics.explained_variance_score)

In [40]:
# Calculating the Mean-Squared Error.
from sklearn.metrics import mean_squared_error
print("Mean-Squared Error (MSE) is", mean_squared_error(y_test, y_pred))

# Calculating the Mean-Absolute Error.
from sklearn.metrics import mean_absolute_error
print("Mean-Absolute Error (MAE) is", mean_absolute_error(y_test, y_pred))

# Calculating the Mean-Squared Log Error.
from sklearn.metrics import mean_squared_log_error
print("Mean-Squared Log Error is", mean_squared_log_error(y_test, y_pred))

# Calculating the Explained Variance Score.
from sklearn.metrics import explained_variance_score
print("Explained Variance Score is", explained_variance_score(y_test, y_pred))

# Calculating the R2 Score.
from sklearn.metrics import r2_score
print("R2 Score is", r2_score(y_test, y_pred))

Mean-Squared Error (MSE) is 83502864.03257738
Mean-Absolute Error (MAE) is 7514.29365964061
Mean-Squared Log Error is 0.0071320014828482855
Explained Variance Score is 0.9469192858652778
R2 Score is 0.9347068473282425


# **Ridge Regression**

[sklearn.linear_model.Ridge](https://scikit-learn.org/stable/modules/generated/sklearn.linear_model.Ridge.html)

In [41]:
from sklearn.linear_model import Ridge
from sklearn.model_selection import GridSearchCV

ridge = Ridge()
parameters = {'alpha':[1e-15, 1e-10, 1e-8, 1e-3, 1e-2, 1, 5, 10, 20, 30, 35, 40, 45, 50, 55, 100]}
ridge_regressor = GridSearchCV(ridge, parameters, scoring = 'neg_mean_squared_error', cv = 10)
ridge_regressor.fit(X_train, y_train)

  overwrite_a=True).T
  overwrite_a=True).T
  overwrite_a=True).T
  overwrite_a=True).T


GridSearchCV(cv=10, error_score=nan,
             estimator=Ridge(alpha=1.0, copy_X=True, fit_intercept=True,
                             max_iter=None, normalize=False, random_state=None,
                             solver='auto', tol=0.001),
             iid='deprecated', n_jobs=None,
             param_grid={'alpha': [1e-15, 1e-10, 1e-08, 0.001, 0.01, 1, 5, 10,
                                   20, 30, 35, 40, 45, 50, 55, 100]},
             pre_dispatch='2*n_jobs', refit=True, return_train_score=False,
             scoring='neg_mean_squared_error', verbose=0)

In [42]:
print(ridge_regressor.best_params_)
print(ridge_regressor.best_score_)

{'alpha': 0.01}
-125688467.07824266


In [43]:
# Predicting the Test set results.
ridge_pred = ridge_regressor.predict(X_test)

In [44]:
# Calculating the Mean-Squared Error.
from sklearn.metrics import mean_squared_error
print("Mean-Squared Error (MSE) is", mean_squared_error(y_test, ridge_pred))

# Calculating the Mean-Absolute Error.
from sklearn.metrics import mean_absolute_error
print("Mean-Absolute Error (MAE) is", mean_absolute_error(y_test, ridge_pred))

# Calculating the Mean-Squared Log Error.
from sklearn.metrics import mean_squared_log_error
print("Mean-Squared Log Error is", mean_squared_log_error(y_test, ridge_pred))

# Calculating the Explained Variance Score.
from sklearn.metrics import explained_variance_score
print("Explained Variance Score is", explained_variance_score(y_test, ridge_pred))

# Calculating the R2 Score.
from sklearn.metrics import r2_score
print("R2 Score is", r2_score(y_test, ridge_pred))

Mean-Squared Error (MSE) is 83654974.84968407
Mean-Absolute Error (MAE) is 7518.756152545879
Mean-Squared Log Error is 0.007134978449003458
Explained Variance Score is 0.9468199534447763
R2 Score is 0.9345879077574932


# **Lasso Regression**

[sklearn.linear_model.Lasso](https://scikit-learn.org/stable/modules/generated/sklearn.linear_model.Lasso.html)

In [45]:
from sklearn.linear_model import Lasso
from sklearn.model_selection import GridSearchCV
lasso = Lasso()
parameters = {'alpha':[1e-15, 1e-10, 1e-8, 1e-3, 1e-2, 1, 5, 10, 20, 30, 35, 40, 45, 50, 55, 100]}
lasso_regressor = GridSearchCV(lasso, parameters, scoring = 'neg_mean_squared_error', cv = 10)
lasso_regressor.fit(X_train, y_train)

  positive)
  positive)
  positive)
  positive)
  positive)
  positive)
  positive)
  positive)
  positive)
  positive)
  positive)
  positive)
  positive)
  positive)
  positive)
  positive)
  positive)
  positive)
  positive)
  positive)
  positive)
  positive)
  positive)
  positive)
  positive)
  positive)


GridSearchCV(cv=10, error_score=nan,
             estimator=Lasso(alpha=1.0, copy_X=True, fit_intercept=True,
                             max_iter=1000, normalize=False, positive=False,
                             precompute=False, random_state=None,
                             selection='cyclic', tol=0.0001, warm_start=False),
             iid='deprecated', n_jobs=None,
             param_grid={'alpha': [1e-15, 1e-10, 1e-08, 0.001, 0.01, 1, 5, 10,
                                   20, 30, 35, 40, 45, 50, 55, 100]},
             pre_dispatch='2*n_jobs', refit=True, return_train_score=False,
             scoring='neg_mean_squared_error', verbose=0)

In [46]:
print(lasso_regressor.best_params_)
print(lasso_regressor.best_score_)

{'alpha': 100}
-123534532.97612849


In [47]:
# Predicting the Test set results.
lasso_pred = lasso_regressor.predict(X_test)

In [48]:
# Calculating the Mean-Squared Error.
from sklearn.metrics import mean_squared_error
print("Mean-Squared Error (MSE) is", mean_squared_error(y_test, lasso_pred))

# Calculating the Mean-Absolute Error.
from sklearn.metrics import mean_absolute_error
print("Mean-Absolute Error (MAE) is", mean_absolute_error(y_test, lasso_pred))

# Calculating the Mean-Squared Log Error.
from sklearn.metrics import mean_squared_log_error
print("Mean-Squared Log Error is", mean_squared_log_error(y_test, lasso_pred))

# Calculating the Explained Variance Score.
from sklearn.metrics import explained_variance_score
print("Explained Variance Score is", explained_variance_score(y_test, lasso_pred))

# Calculating the R2 Score.
from sklearn.metrics import r2_score
print("R2 Score is", r2_score(y_test, lasso_pred))

Mean-Squared Error (MSE) is 80646221.9395118
Mean-Absolute Error (MAE) is 7388.16193079923
Mean-Squared Log Error is 0.0068806889510902415
Explained Variance Score is 0.9485186499591459
R2 Score is 0.9369405332080266
