#### Using load_diabetes dataset for MLRM 

In [27]:
import numpy as np
from sklearn.datasets import load_diabetes

In [28]:
x,y = load_diabetes(return_X_y=True)

In [5]:
x

array([[ 0.03807591,  0.05068012,  0.06169621, ..., -0.00259226,
         0.01990842, -0.01764613],
       [-0.00188202, -0.04464164, -0.05147406, ..., -0.03949338,
        -0.06832974, -0.09220405],
       [ 0.08529891,  0.05068012,  0.04445121, ..., -0.00259226,
         0.00286377, -0.02593034],
       ...,
       [ 0.04170844,  0.05068012, -0.01590626, ..., -0.01107952,
        -0.04687948,  0.01549073],
       [-0.04547248, -0.04464164,  0.03906215, ...,  0.02655962,
         0.04452837, -0.02593034],
       [-0.04547248, -0.04464164, -0.0730303 , ..., -0.03949338,
        -0.00421986,  0.00306441]])

In [6]:
y

array([151.,  75., 141., 206., 135.,  97., 138.,  63., 110., 310., 101.,
        69., 179., 185., 118., 171., 166., 144.,  97., 168.,  68.,  49.,
        68., 245., 184., 202., 137.,  85., 131., 283., 129.,  59., 341.,
        87.,  65., 102., 265., 276., 252.,  90., 100.,  55.,  61.,  92.,
       259.,  53., 190., 142.,  75., 142., 155., 225.,  59., 104., 182.,
       128.,  52.,  37., 170., 170.,  61., 144.,  52., 128.,  71., 163.,
       150.,  97., 160., 178.,  48., 270., 202., 111.,  85.,  42., 170.,
       200., 252., 113., 143.,  51.,  52., 210.,  65., 141.,  55., 134.,
        42., 111.,  98., 164.,  48.,  96.,  90., 162., 150., 279.,  92.,
        83., 128., 102., 302., 198.,  95.,  53., 134., 144., 232.,  81.,
       104.,  59., 246., 297., 258., 229., 275., 281., 179., 200., 200.,
       173., 180.,  84., 121., 161.,  99., 109., 115., 268., 274., 158.,
       107.,  83., 103., 272.,  85., 280., 336., 281., 118., 317., 235.,
        60., 174., 259., 178., 128.,  96., 126., 28

In [7]:
print(x.shape)
print(y.shape)

(442, 10)
(442,)


#### Scikit learn to apply MLRM

In [8]:
from sklearn.model_selection import train_test_split

In [9]:
x_train,x_test,y_train,y_test = train_test_split(x,y,test_size=0.25,random_state=2)

In [10]:
print(x_train.shape)
print(x_test.shape)

(331, 10)
(111, 10)


In [12]:
from sklearn.linear_model import LinearRegression

In [13]:
reg = LinearRegression()

In [14]:
reg.fit(x_train,y_train)

LinearRegression()

In [15]:
y_pred = reg.predict(x_test)

In [11]:
from sklearn.metrics import r2_score

In [16]:
r2_score(y_test,y_pred)

0.442960870613316

In [17]:
reg.coef_

array([ -36.49214644, -194.10737013,  513.87510687,  355.03926144,
       -890.97772785,  591.66754489,  155.4763403 ,  146.44553573,
        846.83812282,   54.30338383])

In [18]:
reg.intercept_

152.6589110011197

#### Writing class for MLRM 

In [21]:
class MLRM:
    
    def __init__(self):
        self.coef_ = None
        self.intercept_ = None 
        
    def fit(self,x_train,y_train):
        x_train = np.insert(x_train,0,1,axis=1)
        
        betas = np.linalg.inv(np.dot(x_train.T,x_train)).dot(x_train.T).dot(y_train)
        self.coef_ = betas[1:]
        self.intercept_ = betas[0]
        
    def predict(self,x_test):
        y_pred = np.dot(x_test,self.coef_) + self.intercept_
        return y_pred

In [22]:
lr = MLRM()

In [23]:
lr.fit(x_train,y_train)

In [24]:
np.insert(x_train,0,1,axis=1)

array([[ 1.        , -0.05273755, -0.04464164, ...,  0.03430886,
         0.13237265,  0.00306441],
       [ 1.        ,  0.01991321,  0.05068012, ..., -0.00259226,
         0.00371174,  0.04034337],
       [ 1.        ,  0.01628068, -0.04464164, ..., -0.00259226,
         0.03723201, -0.0010777 ],
       ...,
       [ 1.        ,  0.06350368,  0.05068012, ..., -0.00259226,
         0.08449528, -0.01764613],
       [ 1.        , -0.05273755,  0.05068012, ...,  0.1081111 ,
         0.03605579, -0.04249877],
       [ 1.        ,  0.00175052,  0.05068012, ...,  0.1081111 ,
         0.06898221,  0.12732762]])

In [25]:
y_pred = lr.predict(x_test)

In [26]:
y_pred

array([153.61797396, 210.5600601 , 125.2821184 , 106.69044349,
       258.93909756, 258.179781  , 116.46288091, 118.56032831,
        97.13544228, 194.01434313, 145.54185748, 173.79607098,
       178.74920613, 135.39059208, 292.41911507,  92.56938022,
       213.7227351 , 156.71628165, 135.81719098, 119.97882064,
       144.88164381, 172.65436203, 156.84921441, 177.68290933,
       131.2597003 , 225.44242039, 199.50874376,  99.57160613,
        47.90701932, 239.66747862, 246.95968085, 114.92867324,
        68.55181205,  96.66256098, 202.5861438 , 169.40594267,
       161.42729936, 195.57100434, 113.6013452 , 235.05442825,
       141.54151733, 120.01727557, 197.18822085, 195.87682624,
       176.54104644, 151.31491774, 164.83903154, 278.41942016,
       101.22092954, 168.20538188, 258.0101907 , 136.99061237,
       153.85276871, 110.36329404, 194.41308838,  79.41963756,
       125.12378348,  69.00776417, 157.13512366, 163.55898815,
       168.43390358, 159.70066937,  96.48681543, 239.29

#### Applying OLS 

In [30]:
import statsmodels.api as sm 

In [32]:
x = sm.add_constant(x)
result = sm.OLS(y,x).fit()
print(result.summary())

                            OLS Regression Results                            
Dep. Variable:                      y   R-squared:                       0.518
Model:                            OLS   Adj. R-squared:                  0.507
Method:                 Least Squares   F-statistic:                     46.27
Date:                Fri, 21 Apr 2023   Prob (F-statistic):           3.83e-62
Time:                        20:11:48   Log-Likelihood:                -2386.0
No. Observations:                 442   AIC:                             4794.
Df Residuals:                     431   BIC:                             4839.
Df Model:                          10                                         
Covariance Type:            nonrobust                                         
                 coef    std err          t      P>|t|      [0.025      0.975]
------------------------------------------------------------------------------
const        152.1335      2.576     59.061      0.0