In [8]:
import numpy as np
import pandas as pd
import seaborn as sns
import matplotlib.pyplot as plt

from sklearn.model_selection import train_test_split
from sklearn.linear_model import LinearRegression
from sklearn.datasets import load_diabetes
from sklearn.metrics import r2_score, mean_absolute_error, mean_squared_error

In [3]:
X,y = load_diabetes(return_X_y=True)

In [5]:
X.shape

(442, 10)

In [6]:
y.shape

(442,)

In [7]:
X_train,X_test,y_train,y_test = train_test_split(X,y,test_size=0.2,random_state=41)

In [10]:
linearReg = LinearRegression()
linearReg.fit(X_train,y_train)
test_pred = linearReg.predict(X_test)
train_pred = linearReg.predict(X_train)
r2 = r2_score(y_test,test_pred)
print(f'r2 score:{r2}')

r2 score:0.5052064192833896


### Checking if the model is overfit or underfit

In [11]:
training_error = mean_absolute_error(y_train,train_pred)
testing_error = mean_absolute_error(y_test,test_pred)

In [14]:
Variance = np.abs(training_error - testing_error)

In [15]:
Variance

0.6304281798824505

In [16]:
training_error

43.370201892866355

In [17]:
testing_error

44.000630072748805

In [18]:
y_test[:10]

array([215., 248., 262., 292., 219.,  88., 172., 113., 150.,  83.])

In [19]:
test_pred[:10]

array([240.68494773, 189.59915445, 175.57815132, 205.07874966,
       136.97794201, 144.35675319, 148.80819735, 123.45451372,
       130.7523313 ,  70.1370028 ])

In [20]:
y_train[:10]

array([104.,  95.,  45., 292.,  88., 120., 232., 166., 182.,  31.])

In [21]:
train_pred[:10]

array([ 35.0694877 , 142.96712923,  30.26866321, 197.24818951,
       112.89433254, 161.05989546, 233.93419673, 208.0022485 ,
       138.39813951,  97.58033114])

### Applying the Ridge Regression to see if we are seeing any improvement in the model score

In [22]:
from sklearn.linear_model import Ridge

In [53]:
rr = Ridge(alpha=0.01)

In [54]:
rr.fit(X_train,y_train)

In [55]:
train_pred1 = rr.predict(X_train)
test_pred1 = rr.predict(X_test)

In [56]:
r2_score(y_test,test_pred1)

0.5167222617567644

In [57]:
train_error = mean_absolute_error(y_train,train_pred1)
train_error

43.480503652575315

In [58]:
test_error = mean_absolute_error(y_test,test_pred1)
test_error

43.41893075455325

In [59]:
y_test[:10]

array([215., 248., 262., 292., 219.,  88., 172., 113., 150.,  83.])

In [60]:
test_pred1[:10]

array([241.47413263, 191.35100071, 175.97237395, 199.87907777,
       135.67730663, 141.18268273, 147.10920452, 124.40207874,
       127.56471052,  74.30758512])

In [82]:
rr.coef_

array([ -33.35934766, -250.7407271 ,  541.9881913 ,  272.50543196,
       -487.51153617,  196.847143  ,  -56.07465871,  181.50985558,
        581.44437919,  120.27373541])

In [83]:
rr.intercept_

151.2927162961269

### Ridge Regression From Scratch

In [104]:
class CustomRidge:
    
    def __init__(self,alpha):
        self.alpha = alpha
        self.intercept_ = None
        self.coefficient = None
        
    def fit(self,X_train,y_train):
        X_train = np.insert(X_train,0,1,axis=1)
        X_train_transpose = X_train.T
        X_train = X_train
        identity_matrix = np.eye(X_train.shape[1])
        
        res = np.linalg.inv(np.dot(X_train_transpose,X_train) + (self.alpha * identity_matrix))
        res2 = np.dot(X_train_transpose,y_train)
        
        result = np.dot(res,res2)
        
        self.intercept_ = result[0]
        self.coefficient = result[1:]
        
        return result
    
    def predict(self,X_test):
        return np.dot(X_test,self.coefficient) + self.intercept_

In [105]:
myalg = CustomRidge(0.01)

In [106]:
print(myalg.fit(X_train,y_train))

[ 151.28841215  -33.36060276 -250.74105324  541.98908602  272.50610232
 -487.51881376  196.85392842  -56.07146671  181.50933031  581.44108753
  120.27529008]


In [107]:
y_pred = myalg.predict(X_test)

In [108]:
r2_score(y_test,y_pred)

0.516715430273572