In [50]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
from sklearn.datasets import load_diabetes

In [51]:
## Load dataset
X,Y=load_diabetes(return_X_y=True)

In [52]:
## Train Test Split
from sklearn.model_selection import train_test_split
X_train,X_test,Y_train,Y_test=train_test_split(X,Y,test_size=0.2,random_state=2)

In [53]:
## Apply Simple Linear Regression
from sklearn.linear_model import LinearRegression
LR=LinearRegression()

## Fit
LR.fit(X_train,Y_train)

## Print Coefficients and Intercepts
print("Coefficients=",LR.coef_)
print("Intercept=",LR.intercept_)

#Prediction
Y_pred=LR.predict(X_test)

# Metrics
from sklearn.metrics import mean_absolute_error,mean_squared_error,r2_score
mae=mean_absolute_error(Y_test,Y_pred)
mse=mean_squared_error(Y_test,Y_pred)
rmse=np.sqrt(mse)
print("Mean Absolute Error=",mae)
print("Mean Squared Error=",mse)
print("Root Mean Squared Error=",rmse)

score=r2_score(Y_test,Y_pred)
print("R2 Score=",score)

Coefficients= [  -9.15865318 -205.45432163  516.69374454  340.61999905 -895.5520019
  561.22067904  153.89310954  126.73139688  861.12700152   52.42112238]
Intercept= 151.88331005254167
Mean Absolute Error= 45.21303419046903
Mean Squared Error= 3094.4566715660626
Root Mean Squared Error= 55.627840795469155
R2 Score= 0.4399338661568968


In [65]:
## Stochastic Gradient Descent Function

class SGDRegressor:

    ## Constructor
    def __init__(self,learning_rate=0.01,epochs=10):
        self.coefficient_=None
        self.intercept_=None
        self.lr=learning_rate
        self.epochs=epochs

    ## Fit method
    def fit(self,X_train,Y_train):
        ## Initialize coefficients i.e. Intercept to 0 and coeffients will be 10 for 10 columns initialized to 1
        self.intercept_=0
        self.coefficient_=np.ones(X_train.shape[1])

        ## In each epoch for each rows coefficients will get updated
        for i in range(self.epochs): 
            for j in range(X_train.shape[0]): ## X_train.shape[0]-->353 rows
                idx=np.random.randint(0,X_train.shape[0]) ## In SGD rows are picked up randomly so generating a random row number each time
                
                y_hat=np.dot(X_train[idx],self.coefficient_)+self.intercept_ ## Same as y_hat=mx+c for each row and here y_hat is a single number
                
                ## Calculation of intercept derivative for updating intercept
                intercept_der=-2 * (Y_train[idx]-y_hat)

                ## Update intercept based on covergence algorithm
                self.intercept_=self.intercept_-(self.lr * intercept_der)

                ## Calculation of coeffient derivative
                coef_der = -2 * np.dot((Y_train[idx] - y_hat),X_train[idx])

                ## Update intercept based on covergence algorithm
                self.coefficient_=self.coefficient_ - (self.lr * coef_der)

        print("Coefficients=",self.coefficient_)
        print("Intercept=",self.intercept_)


    def predict(self,X_test):
        return np.dot(X_test,self.coefficient_) + self.intercept_ 

In [86]:
sgd = SGDRegressor(learning_rate=0.01,epochs=60)

In [87]:
Y_train.shape ## (353,)
#Y_train.reshape(X_train.shape[0],1)

(353,)

In [91]:
## We will fit the data and also will understand the time taken
import time as t

start=t.time()
sgd.fit(X_train,Y_train)
print("Time Taken=",t.time()-start)

Coefficients= [  49.82657493  -94.10194419  379.20139126  275.58070889   14.37527351
  -42.58163346 -181.07957223  136.75111061  355.68256538  120.56102506]
Intercept= 145.71490133496943
Time Taken= 0.9095494747161865


As observed above time taken is very less for stochastic gradient descent

In [89]:
## Prediction
Y_pred=sgd.predict(X_test)

In [90]:
## Score
score=r2_score(Y_test,Y_pred)
print("Score=",score)

Score= 0.44025953492688696


Everytime we run the algorithm we get little varied result of coefficient and learning rate as the rows are picked randomely

Below helpers Used for writing SGDRegressor 

In [17]:
#X_train.shape ## (353,10)
#X_train.shape[1] ## 10
np.ones(X_train.shape[1])

array([1., 1., 1., 1., 1., 1., 1., 1., 1., 1.])

In [19]:
## This will generate random number between 0 and 353 each time
np.random.randint(0,X_train.shape[0])

134

In [31]:
X_train.iloc[42]

age    0.056239
sex   -0.044642
bmi   -0.057941
bp    -0.007977
s1     0.052093
s2     0.049103
s3     0.056003
s4    -0.021412
s5    -0.028323
s6     0.044485
Name: 200, dtype: float64

In [33]:
np.dot(X_train.iloc[42],np.ones(X_train.shape[1]))+0

0.09762897536168796

Sklearn Implementation

In [92]:
## Import SGD Regressor
from sklearn.linear_model import SGDRegressor

In [93]:
## Max_iter=epochs
## eta0= initial value of learning rate
## Learning rate is constant i.e. eta0
reg=SGDRegressor(max_iter=100,learning_rate='constant',eta0=0.01)

In [94]:
reg.fit(X_train,Y_train)



In [95]:
Y_pred=reg.predict(X_test)

In [96]:
print("Score=",r2_score(Y_test,Y_pred))

Score= 0.432942487026887
