In [1]:
import pandas as pd
import numpy as np
from sklearn.linear_model import LinearRegression
from sklearn.model_selection import train_test_split
from sklearn.metrics import r2_score
from sklearn.datasets import load_diabetes

In [2]:
X,y = load_diabetes(return_X_y=True,as_frame=True)

In [3]:
from yreport import data_health_report

In [4]:
s = data_health_report(X)

In [5]:
s.summary()

Data Health Score: 100.0/100
Rows: 442 | No_Columns: 10

Numeric Columns : ['age', 'bmi', 'bp', 's1', 's2', 's3', 's4', 's5', 's6', 'sex']
Categorical Columns : []
DateTime Columns : []

Missing Percentage: 


Recommendations:
- encoding: {}
- missing: {}

Numeric Diagnostics:
- age: skew=-0.23, outliers=0.0%, no transform needed
- bmi: skew=0.6, outliers=0.68%, no transform needed
- bp: skew=0.29, outliers=0.0%, no transform needed
- s1: skew=0.38, outliers=1.81%, no transform needed
- s2: skew=0.44, outliers=1.58%, no transform needed
- s3: skew=0.8, outliers=1.58%, no transform needed
- s4: skew=0.73, outliers=0.45%, no transform needed
- s5: skew=0.29, outliers=0.9%, no transform needed
- s6: skew=0.21, outliers=2.04%, no transform needed
- sex: skew=0.13, outliers=0.0%, no transform needed


In [6]:
# firslty check the by using MLR
X_train,X_test,y_train,y_test = train_test_split(X,y,test_size=0.2,random_state=2)

In [7]:
mlr = LinearRegression()
mlr.fit(X_train,y_train)
y_pred = mlr.predict(X_test)
print(f'r2 score is {r2_score(y_test,y_pred)}')

r2 score is 0.43993386615689667


In [8]:
mlr.coef_

array([  -9.15865318, -205.45432163,  516.69374454,  340.61999905,
       -895.5520019 ,  561.22067904,  153.89310954,  126.73139688,
        861.12700152,   52.42112238])

In [9]:
mlr.intercept_

151.88331005254167

In [10]:
# now let's make our own class for MLR with batch GD

In [45]:
class YGD:
    def __init__(self,epoch,lr):
        self.epoch = epoch
        self.lr = lr
        self.intercept_ = 0
        self.coef_ = np.ones(X_train.shape[1])

    def fit(self,X_train,y_train):
        for _ in range(self.epoch):
            # intercept
            y_pred = self.intercept_ + (np.dot(X_train,self.coef_))
            slope = -2/len(X_train) * np.sum(y_train - y_pred)
            self.intercept_ = self.intercept_ - self.lr* (slope)

            # coef 
            slope = -2/len(X_train) * np.dot((y_train - y_pred),X_train)
            self.coef_ = self.coef_ - self.lr * ( slope )
        
    def predict(self,X_test):
         return np.dot(X_test,self.coef_) + self.intercept_

In [46]:
ymlr = YGD(500,0.1)

In [47]:
ymlr.fit(X_train,y_train)

In [48]:
ymlr.coef_

array([ 50.61655097,  -0.92864384, 165.01987168, 124.52845806,
        40.90380061,  25.32364879, -96.29788197,  93.94879167,
       157.87930964,  87.31014873])

In [49]:
ymlr.intercept_

151.53233878908244