In [11]:
#                       Implementation of Ridge Regression From Scratch (L2 Regularization)

# Mathematically, a new hyperparamter (alpha) is added (2D data) to denomenator of slope which reduces slope .
# Decrease in slope means target variable is less dependent on input features. Thus reduces Overfitting

In [6]:
import numpy as np
from sklearn.datasets import load_diabetes
from sklearn.metrics import r2_score
from sklearn.model_selection import train_test_split

In [7]:
X,y = load_diabetes(return_X_y=True)
X_train,X_test,y_train,y_test = train_test_split(X,y,test_size=0.2,random_state=4)

In [12]:
#                                                Custom Class For Ridge Regression
class RidgeRegression:
    
    def __init__(self, alpha=0.1):
        
        self.alpha = alpha
        self.coef_ = None
        self.intercept_ = None
        
    def fit(self, X_train, y_train):
        
        X_train = np.insert(X_train, 0, 1, axis=1)
        I = np.identity(X_train.shape[1])
        I[0][0] = 0
        result = np.linalg.inv(np.dot(X_train.T, X_train) + self.alpha * I).dot(X_train.T).dot(y_train)
        self.intercept_ = result[0]
        self.coef_ = result[1:]
    
    def predict(self, X_test):
        return np.dot(X_test, self.coef_) + self.intercept_

In [14]:
rr = RidgeRegression()
rr.fit(X_train, y_train)
y_pred = rr.predict(X_test)
print('r2_score:',r2_score(y_test, y_pred))
print('Coefficients:',rr.coef_)
print('Intercept:',rr.intercept_)

r2_score: 0.4693128853309805
Coefficients: [  44.02063391 -241.69329987  452.98665299  332.04420177  -76.33686744
  -68.52143809 -164.98809083  149.96908118  431.61347417   58.51862681]
Intercept: 150.8905342560281
