In [5]:
#                               Implementation of Stochastic Gradient Descent From Scratch

# Parameters (Coefficients & intercept) are updated only once per epoch using the average gradient of all training examples. But in Stochastic GD.
# Parameters are updated n times, for n rows, in an epoch .
# This approaches will affect the convergence speed and the path the algorithm takes to reach the minimum. 
# BGD can be slower, resource intensive and sometime error-prone for large datasets . 

In [6]:
import numpy as np
from sklearn.datasets import load_diabetes
from sklearn.model_selection import train_test_split

In [7]:
X, y = load_diabetes(return_X_y=True)
print(X.shape)
print(y.shape)

(442, 10)
(442,)


In [8]:
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

In [11]:
class StochasticGradientDescent:
    
    def __init__(self, learning_rate=0.01, epochs=100):
        
        self.coef_ = None
        self.intercept_ = None
        self.lr = learning_rate
        self.epochs = epochs
        
    def fit(self, X_train, y_train):
        # init your coefs
        self.intercept_ = 0
        self.coef_ = np.ones(X_train.shape[1])
        
        for i in range(self.epochs):
            for j in range(X_train.shape[0]):
                idx = np.random.randint(0, X_train.shape[0])
                
                y_hat = np.dot(X_train[idx], self.coef_) + self.intercept_
                
                intercept_der = -2 * (y_train[idx] - y_hat)
                self.intercept_ = self.intercept_ - (self.lr * intercept_der)
                
                coef_der = -2 * np.dot((y_train[idx] - y_hat), X_train[idx])
                self.coef_ = self.coef_ - (self.lr * coef_der)
        
        print(self.intercept_, self.coef_)
    
    def predict(self,X_test):
        return np.dot(X_test, self.coef_) + self.intercept_

In [12]:
sgd = StochasticGradientDescent (0.01, 40)

In [13]:
sgd.fit(X_train, y_train)

162.92426917987265 [  62.4124295   -79.16091132  331.83985533  227.03130724   13.30408176
  -23.05358367 -164.10724786  143.24348738  269.1389418   146.19239456]


In [16]:
y_pred = sgd.predict (X_test)

In [17]:
from sklearn.metrics import r2_score
r2_score(y_test,y_pred)

0.4206236983700927