Authored by Arqam Patel. 

CC BY 4.0 License

In [1]:
import numpy as np

$$ \hat{y}_i = m x_i + c$$

$$ L = \frac{1}{N} \sum (y_i - \hat{y}_i )^2  = \frac{1}{N} \sum (y_i - mx_i - c)^2 $$

$$ \frac{dL}{dm} = - \frac{2}{N} \sum (y_i - \hat{y}_i) x_i $$

$$ \frac{dL}{dm} = - \frac{2}{N} \sum (y_i - \hat{y}_i) $$

In [17]:
class NeuralSLR():
    def __init__(self, lr = 0.1):
        # Parameters
        self.m = 0.
        self.c = 0.
        
        # NOT A PARAMETER. This is what's called a hyperparameter. 
        self.lr = lr
    
    # makes the forward pass i.e. computes model's predictions given the current parameters
    def forward(self, data):
        pred = self.m*data + self.c
        return pred
    
    # computes the loss given current parameters
    def loss(self, data, true):
        N = len(x)
        pred = self.forward(data)
        loss = 1/N* np.sum((pred-true)**2)
        return loss
    
    # computes gradients wrt params
    def grad(self, data, true):
        N = len(x)
        pred = self.forward(data)
        dldm = -2/N* np.sum((true- pred)*data)
        dldc = -2/N * np.sum((true- pred))
        return dldm, dldc
    
    # performs single grad descent step
    def step(self, data, true):
        dldm, dldc = self.grad(data, true)
        self.m = self.m - self.lr*dldm
        self.c = self.c - self.lr*dldc
    
    # just a wrapper function to conveniently perform grad descent multiple times
    def train(self, data, true, n_steps):
        for i in range(n_steps):
            print(f"Step no {i}")
            self.step(data, true)
            print(f"m: {self.m}")
            print(f"c: {self.c}")
            print(f"Loss {self.loss(data, true)}")
        

In [20]:
# generating training data 
x = np.random.randn(40)
y = 5*x - 1 + np.random.randn(40)/100 # adding a little bit of noise in the true values

In [57]:
x

array([ 0.03175921,  0.44806265,  0.19392662, -1.16852062,  0.05050875,
       -1.75192881, -0.02878816,  0.86087693,  1.69576665,  0.61179224,
       -0.49353732, -0.39522401,  1.26148996,  0.46887088,  1.32129613,
       -0.85409231, -0.44458541, -2.30406965, -1.42827572,  0.9172523 ,
       -0.11919319, -0.68664254,  0.07233606,  0.57468015, -0.73574885,
        0.31318194,  1.92184232, -0.12083301,  1.24597831, -0.19361131,
       -1.6018749 ,  0.36796196, -0.16712353, -0.76188552, -1.12055819,
       -0.45243497, -1.33295597, -0.20193059,  1.13357446, -0.71531842])

In [58]:
y

array([ -0.82793079,   1.23618882,  -0.02689421,  -6.84205638,
        -0.74778569,  -9.7536547 ,  -1.16972151,   3.30212752,
         7.46909761,   2.06838191,  -3.4699038 ,  -2.96896461,
         5.30482791,   1.3384159 ,   5.60115392,  -5.28745568,
        -3.21838186, -12.53606868,  -8.13000479,   3.58799181,
        -1.60278286,  -4.44325782,  -0.63225428,   1.85862149,
        -4.66314865,   0.56661144,   8.60308913,  -1.61371708,
         5.25467805,  -1.96121569,  -9.01008994,   0.81863379,
        -1.84197863,  -4.79259935,  -6.62571367,  -3.25874329,
        -7.67069634,  -2.0260326 ,   4.67541111,  -4.55627991])

In [18]:
# initialisiing the model
model1 = NeuralSLR()

In [21]:
# the model parameters should converge towards the true parameters we used to generate the data
model1.train(x, y, 100)

Step no 0
m: 4.99902128170198
c: -0.9992428118105702
Loss 0.00013996169856231612
Step no 1
m: 4.999283527985586
c: -0.9996729325173602
Loss 0.000137692586938622
Step no 2
m: 4.999489692605844
c: -1.0000123244166244
Loss 0.00013628261365388722
Step no 3
m: 4.99965174621118
c: -1.0002801393680307
Loss 0.0001354064788417823
Step no 4
m: 4.999779108154706
c: -1.0004914841073493
Loss 0.00013486205576065057
Step no 5
m: 4.999879189591526
c: -1.0006582750411561
Loss 0.00013452375091176663
Step no 6
m: 4.9999578210792075
c: -1.000789912339498
Loss 0.00013431352481079383
Step no 7
m: 5.000019589223949
c: -1.0008938115389256
Loss 0.00013418288593054791
Step no 8
m: 5.000068101703146
c: -1.0009758227855257
Loss 0.0001341017026843343
Step no 9
m: 5.000106195889412
c: -1.001040561474878
Loss 0.00013405125173386503
Step no 10
m: 5.00013610306701
c: -1.00109166902133
Loss 0.00013401989851199067
Step no 11
m: 5.000159577684148
c: -1.001132018527393
Loss 0.00013400041326644386
Step no 12
m: 5.000177999

In [56]:
# btw you didn't really need grad descent because Linear Regression generally has an analytical solution
X = np.vstack((x, np.ones(40))).T
params = (np.linalg.inv(X.T@X) @ X.T)@np.expand_dims(y, axis = 1)
print(params)

[[ 5.00024471]
 [-1.00128367]]
