In [3]:
from sklearn.linear_model import LinearRegression
import numpy as np
from sklearn.datasets import load_diabetes
from sklearn.metrics import r2_score
from sklearn.model_selection import train_test_split

In [6]:
x,y=load_diabetes(return_X_y=True)

In [9]:
x.shape

(442, 10)

In [10]:
y.shape

(442,)

In [11]:
x_train,x_test,y_train,y_test = train_test_split(x,y,test_size=0.2,random_state=2)

In [12]:
print(x_train.shape)
print(x_test.shape)
print(y_train.shape)
print(y_test.shape)

(353, 10)
(89, 10)
(353,)
(89,)


In [13]:
reg=LinearRegression()

In [14]:
reg.fit(x_train,y_train)

LinearRegression()

In [15]:
y_pred=reg.predict(x_test)
y_pred

array([154.1235067 , 204.81721599, 124.92988001, 106.09339576,
       258.53035681, 256.32953702, 118.75258786, 119.52147402,
       101.50717468, 190.54137158, 141.70360267, 172.51631204,
       174.34089304, 134.81184017, 294.13950798,  94.11403289,
       211.97052873, 156.49984762, 134.20709632, 119.62534726,
       148.88045343, 165.00378118, 151.09977307, 176.03719872,
       133.27651748, 221.29531227, 197.17482787,  96.15923158,
        50.26531577, 230.48342249, 242.06266394, 114.1153262 ,
        67.0785352 ,  94.53179042, 201.21593262, 167.05306138,
       159.87838519, 192.78601513, 114.49291816, 233.4825497 ,
       140.82309666, 121.06814332, 192.27431013, 191.13157307,
       179.1698153 , 148.35140027, 163.47610288, 276.81203359,
       100.17813072, 164.11265163, 255.81074398, 136.94979051,
       152.37507828, 107.92662528, 194.21661635,  77.35015426,
       118.50951725,  68.38527563, 154.29094022, 162.48905632,
       168.36590928, 156.8764705 ,  97.13958436, 238.17

In [16]:
r2_score(y_test,y_pred)

0.4399387660024645

In [17]:
print(reg.intercept_)
print(reg.coef_)

151.88334520854633
[  -9.16088483 -205.46225988  516.68462383  340.62734108 -895.54360867
  561.21453306  153.88478595  126.73431596  861.12139955   52.41982836]


### Stochastic gradient descent from scratch

In [43]:
class SGDRegressor:
    
    def __init__(self,epochs=50,lr=0.01):
        
        self.epochs=epochs
        self.lr=lr
        self.intercept_=None
        self.coef_=None
        
    def fit(self,x_train,y_train):
        
        self.intercept_=0
        self.coef_=np.ones(x_train.shape[1])
        
        for i in range(self.epochs):
            for j in range(x_train.shape[0]):
                idx=np.random.randint(0,x_train.shape[0])
                
                y_hat= np.dot(x_train[idx],self.coef_) + self.intercept_
                
                intercept_der= -2 * (y_train[idx] - y_hat)
                self.intercept_= self.intercept_ -(self.lr * intercept_der)
                
                coef_der= -2 * np.dot((y_train[idx] - y_hat), x_train[idx])
                self.coef_= self.coef_ - (self.lr * coef_der)
                
        print(self.intercept_, self.coef_)
        
        
    def predict(self,x_test):
        return np.dot(x_test,self.coef_) + self.intercept_

In [44]:
sgd=SGDRegressor()

In [45]:
sgd.fit(x_train,y_train)

157.8386710537039 [  51.30412707  -70.70974718  360.38134243  251.55868363   17.14652881
  -30.32241519 -171.44243447  129.06898414  321.50265317  124.87168865]


In [47]:
y_pred=sgd.predict(x_test)
y_pred

array([159.5960274 , 198.01040044, 145.09384079, 114.75817103,
       250.59225917, 242.94230216, 113.40179235, 120.43787019,
        99.20324247, 191.2168494 , 165.59778038, 178.1268071 ,
       189.96854894, 155.97502953, 260.8687545 ,  98.56884692,
       192.28371876, 145.27824633, 145.4531213 , 144.50658611,
       139.41549019, 195.49447083, 171.72677454, 183.61502487,
       132.55758875, 225.27987071, 201.07074639, 126.39528879,
        69.51063093, 246.69998791, 242.31638088, 124.95006243,
        81.08534145, 116.10404165, 205.15605813, 172.02113337,
       172.72279911, 201.51336176, 121.36804084, 237.80297217,
       142.62031884, 132.75139968, 191.599458  , 191.82236787,
       176.37450547, 152.04001953, 179.07536239, 283.61244615,
       123.27984952, 193.96552005, 242.26691977, 137.36043497,
       150.11762453, 154.72411462, 195.14975521, 119.16094592,
       160.66665011,  92.19380756, 164.3475108 , 150.52675944,
       169.42790209, 176.9907208 , 116.90108932, 211.78

In [48]:
r2_score(y_test,y_pred)

0.43010856580711687