**Ridge Regression - Scratch**

**Import Libraries**

In [1]:
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import random
import math
import time
import seaborn as sns
from sklearn.model_selection import train_test_split
from sklearn.linear_model import LinearRegression, Ridge, Lasso
from sklearn.linear_model import Lasso
from sklearn.datasets import make_regression, load_diabetes
from sklearn.preprocessing import LabelEncoder, OneHotEncoder
from sklearn.compose import ColumnTransformer
from sklearn.metrics import mean_squared_error, r2_score
from sklearn.metrics import f1_score, precision_score, recall_score, confusion_matrix, mean_absolute_error, explained_variance_score

**Load Dataset**

In [2]:
X, y = load_diabetes(return_X_y=True)
print(X.shape), print(y.shape)

(442, 10)
(442,)


(None, None)

In [3]:
print(X[:5])
print(y[:5])

[[ 0.03807591  0.05068012  0.06169621  0.02187239 -0.0442235  -0.03482076
  -0.04340085 -0.00259226  0.01990749 -0.01764613]
 [-0.00188202 -0.04464164 -0.05147406 -0.02632753 -0.00844872 -0.01916334
   0.07441156 -0.03949338 -0.06833155 -0.09220405]
 [ 0.08529891  0.05068012  0.04445121 -0.00567042 -0.04559945 -0.03419447
  -0.03235593 -0.00259226  0.00286131 -0.02593034]
 [-0.08906294 -0.04464164 -0.01159501 -0.03665608  0.01219057  0.02499059
  -0.03603757  0.03430886  0.02268774 -0.00936191]
 [ 0.00538306 -0.04464164 -0.03638469  0.02187239  0.00393485  0.01559614
   0.00814208 -0.00259226 -0.03198764 -0.04664087]]
[151.  75. 141. 206. 135.]


In [4]:
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=4)
X_train.shape, X_test.shape, y_train.shape, y_test.shape

((353, 10), (89, 10), (353,), (89,))

In [5]:
reg = Ridge(alpha=0.1,solver='cholesky')
reg.fit(X_train,y_train)

In [6]:
y_pred = reg.predict(X_test)
print(y_pred)

[ 79.11687293 110.53477702 167.91664171  56.09370852 176.28941373
 143.1207191  113.55542675 119.62484626  81.1578992   76.76131678
 100.57883353 186.51861928 176.92787856 129.16688066 158.03962815
 140.6919152  177.98896665 121.81303789 108.91384989 179.34966945
 216.2571452  177.21873025  55.02285248 221.15400516  64.88561557
 103.11020925 155.92973833 173.41810907  72.19426363  79.27174588
 189.87240518 112.43816609 246.03053579 180.68495859 102.98204948
 167.1093383  171.61286368 160.42495621 143.75222884 155.43807956
 192.81472846 170.21104819 233.02444667  76.45676401 232.59483513
 105.94563573 150.03909319  70.13429949 193.68093275 137.73383294
 112.56570315 104.42101972 153.7123577  216.2568939   62.56545543
 186.12773302 102.92375218  92.71474925 182.75690638 218.66313594
 125.02620187 102.93275892 165.7103638  240.39090443 110.34456127
 167.33199015 128.36423368 195.24736539 218.36239586 186.00021658
 146.86450096 157.40897721 157.98824545 213.813442   160.40285761
 203.28118

In [7]:
r2_score(y_test,y_pred)

0.4693128853309805

In [8]:
print(reg.coef_)
print(reg.intercept_)

[  44.02063391 -241.69329987  452.98665299  332.04420177  -76.33686744
  -68.52143809 -164.98809083  149.96908118  431.61347417   58.51862681]
150.89053425602808


In [9]:
class RidgeRegressionScratch:

    def __init__(self,alpha=0.1):

        self.alpha = alpha
        self.coef_ = None
        self.intercept_ = None

    def fit(self,X_train,y_train):

        X_train = np.insert(X_train,0,1,axis=1)
        I = np.identity(X_train.shape[1])
        I[0][0] = 0
        result = np.linalg.inv(np.dot(X_train.T,X_train) + self.alpha * I).dot(X_train.T).dot(y_train)
        self.intercept_ = result[0]
        self.coef_ = result[1:]

    def predict(self,X_test):
        return np.dot(X_test,self.coef_) + self.intercept_

In [10]:
reg = RidgeRegressionScratch()
reg.fit(X_train,y_train)

In [11]:
y_pred = reg.predict(X_test)
print(y_pred)

[ 79.11687293 110.53477702 167.91664171  56.09370852 176.28941373
 143.1207191  113.55542675 119.62484626  81.1578992   76.76131678
 100.57883353 186.51861928 176.92787856 129.16688066 158.03962815
 140.6919152  177.98896665 121.81303789 108.91384989 179.34966945
 216.2571452  177.21873025  55.02285248 221.15400516  64.88561557
 103.11020925 155.92973833 173.41810907  72.19426363  79.27174588
 189.87240518 112.43816609 246.03053579 180.68495859 102.98204948
 167.1093383  171.61286368 160.42495621 143.75222884 155.43807956
 192.81472846 170.21104819 233.02444667  76.45676401 232.59483513
 105.94563573 150.03909319  70.13429949 193.68093275 137.73383294
 112.56570315 104.42101972 153.7123577  216.2568939   62.56545543
 186.12773302 102.92375218  92.71474925 182.75690638 218.66313594
 125.02620187 102.93275892 165.7103638  240.39090443 110.34456127
 167.33199015 128.36423368 195.24736539 218.36239586 186.00021658
 146.86450096 157.40897721 157.98824545 213.813442   160.40285761
 203.28118

In [12]:
print(r2_score(y_test,y_pred))
print(reg.coef_)
print(reg.intercept_)

0.4693128853309805
[  44.02063391 -241.69329987  452.98665299  332.04420177  -76.33686744
  -68.52143809 -164.98809083  149.96908118  431.61347417   58.51862681]
150.8905342560281
