In [1]:
# @forward pass
# loss fn
# gradients
# updating gradients
# y^ = X.W + b
# error = y^- y
# J = (1/2m) * Σ(error**2)
# dw = 1/m * Σ(X.T * error)
# db = 1/m * Σ(error)
# w = w - alpha * dw
# b = b - alpha * db

In [2]:
import numpy as np

class LinearRegressionGD:
    #init, fit, predict
    def __init__(self, alpha = 0.1, iterations = 1000):
        self.alpha = alpha
        self.iterations = iterations
        self.weights = None
        self.bias = None

    def fit(self, X, y):
        n_instances, n_features = X.shape
        self.weights = np.zeros(n_features)
        self.bias = 0
        for i in range(self.iterations):
            #forward pass
            pred_y = np.dot(X, self.weights) + self.bias
            #error
            error = pred_y - y
            #loss
            loss = np.mean(error**2)
            if i%1000 == 0:
                print(f"loss at {i} iteration is {loss}")
            #gradients
            dw = np.dot(X.T, error) / n_instances
            db = np.sum(error) /n_instances
            #gradient update
            self.weights = self.weights - self.alpha * dw
            self.bias = self.bias - self.alpha * db

    def predict(self, X):
        return np.dot(X, self.weights) + self.bias
            
            

In [3]:
class LinearRegressionSGD:
    #init, fit, predict
    def __init__(self, alpha = 0.1, iterations = 1000, batch_size = 64):
        self.alpha = alpha
        self.iterations = iterations
        self.weights = None
        self.bias = None
        self.batch_size = batch_size

    def fit(self, X, y):
        n_instances, n_features = X.shape
        self.weights = np.zeros(n_features)
        self.bias = 0
        #Mini batch SGD
        for _ in range(self.iterations):
            #shuffle the data
            indices = np.random.permutation(n_instances) #n_instances = 100 it returns 0 to 99 in a shuffled indices = [4,98,0,....,1,2]
            X_shuffled = X[indices]
            y_shuffled = y[indices]

            for i in range(0, n_instances, self.batch_size): #i =0, 0+64, 128, so on
                X_batch = X_shuffled[i: i +self.batch_size]
                y_batch = y_shuffled[i: i +self.batch_size]
                #forward pass
                pred_y = np.dot(X_batch, self.weights) + self.bias
                error = pred_y - y_batch
                #gradients # dw = 1/m * Σ(X.T * error)
                # db = 1/m * Σ(error)
                dw = np.dot(X_batch.T, error) / len(X_batch)
                db = np.sum(error) / len(X_batch)
                #gradient update
                self.weights = self.weights - self.alpha * dw
                self.bias = self.bias - self.alpha * db

    def predict(self, X):
        return np.dot(X, self.weights) + self.bias

In [4]:
from sklearn.datasets import load_diabetes
import pandas as pd
from sklearn.model_selection import train_test_split

data = load_diabetes(as_frame=True)
df = data.frame

X = df.drop(columns=["target"]).values
y = df["target"].values

X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

In [5]:
model = LinearRegressionGD(alpha = 0.1, iterations = 10000)
model.fit(X_train, y_train)
y_pred = model.predict(X_test)
y_pred

loss at 0 iteration is 29711.32294617564
loss at 1000 iteration is 4008.446876737857
loss at 2000 iteration is 3443.577918490817
loss at 3000 iteration is 3214.7373024153208
loss at 4000 iteration is 3094.0715404302573
loss at 5000 iteration is 3022.883922921272
loss at 6000 iteration is 2979.024610480903
loss at 7000 iteration is 2951.4107485581694
loss at 8000 iteration is 2933.7448439816944
loss at 9000 iteration is 2922.2725655888


array([142.6302984 , 177.20486571, 141.80764598, 289.4203295 ,
       124.91078374,  98.61468745, 252.10688087, 191.55248404,
        88.41342437, 115.47377347,  96.36088178, 154.81313019,
        65.94589262, 209.02420815, 105.54079599, 135.52234042,
       223.22697556, 246.25827849, 193.69242364, 213.23282734,
       200.6117166 ,  89.42938074,  76.44105239, 188.46120071,
       153.95262118, 164.45986524, 187.73749881, 176.24680176,
        51.95783925, 117.07065057, 179.72530895,  94.349441  ,
       133.32434145, 181.91734578, 173.0727751 , 189.56252606,
       126.94837779, 123.68325753, 152.72348896,  61.44699847,
        81.66610406, 112.12475683, 158.76526185, 153.75551598,
       173.69280551,  66.10559619,  82.62400755, 106.033354  ,
        61.8114628 , 154.91951309, 152.68083722,  65.89075468,
       116.87810138, 109.79443004, 169.6528556 , 154.73169551,
        98.78389552, 203.01842262, 115.27793949,  69.01949261,
       183.19516116, 195.93706305, 141.38339331, 111.70

In [6]:
model = LinearRegressionSGD(alpha = 0.1, iterations = 10000, batch_size = 64)
model.fit(X_train, y_train)
y_pred = model.predict(X_test)
y_pred

array([140.02391869, 181.06406872, 139.63486175, 294.09671694,
       120.53435429,  92.83219428, 256.81544439, 186.93637495,
        82.96436759, 110.30232398,  94.48208409, 162.29058985,
        63.35622276, 204.83853236,  97.98450406, 131.70729427,
       221.39269973, 246.24981071, 195.83395185, 214.15194185,
       207.22578566,  88.12797285,  71.43656602, 187.89553712,
       156.05118857, 161.58665137, 189.40833807, 176.38028163,
        49.22225833, 109.77676548, 180.06205054,  90.90254587,
       130.75215218, 180.05427188, 172.59018719, 190.94836095,
       121.58175075, 117.01777586, 144.54540224,  60.27961179,
        73.86005244, 107.11248367, 161.45065145, 148.35161138,
       175.15494031,  64.96761919,  77.95627919, 106.35979545,
        57.88736047, 160.96960268, 156.88986522,  64.97264762,
       113.16926382, 107.44938423, 169.2680961 , 159.94304841,
        93.68412279, 207.49782277, 117.59819114,  68.00173573,
       184.30934252, 202.42387291, 141.15418331, 104.35

In [7]:
from sklearn.linear_model import LinearRegression
model = LinearRegression()
model.fit(X_train, y_train)
y_pred = model.predict(X_test)
y_pred

array([139.5475584 , 179.51720835, 134.03875572, 291.41702925,
       123.78965872,  92.1723465 , 258.23238899, 181.33732057,
        90.22411311, 108.63375858,  94.13865744, 168.43486358,
        53.5047888 , 206.63081659, 100.12925869, 130.66657085,
       219.53071499, 250.7803234 , 196.3688346 , 218.57511815,
       207.35050182,  88.48340941,  70.43285917, 188.95914235,
       154.8868162 , 159.36170122, 188.31263363, 180.39094033,
        47.99046561, 108.97453871, 174.77897633,  86.36406656,
       132.95761215, 184.53819483, 173.83220911, 190.35858492,
       124.4156176 , 119.65110656, 147.95168682,  59.05405241,
        71.62331856, 107.68284704, 165.45365458, 155.00975931,
       171.04799096,  61.45761356,  71.66672581, 114.96732206,
        51.57975523, 167.57599528, 152.52291955,  62.95568515,
       103.49741722, 109.20751489, 175.64118426, 154.60296242,
        94.41704366, 210.74209145, 120.2566205 ,  77.61585399,
       187.93203995, 206.49337474, 140.63167076, 105.59