GRADIENT DESCENT

In [7]:
import numpy as np  # do everything with numpy
# f = w * x such as f = 2 * x
X = np.array([1,2,3,4], dtype = np.float32)
y = np.array([2,4,6,8], dtype = np.float32)   #2 * x

w = 0.0

#model prediction
def forward(x):
    return w * x  #forward pass

#loss
def loss(y, y_pred):
    return ((y - y_pred)**2).mean()

#gradient , MSE = 1/N * (w*x -y)**2
# dJ/dw = 1/N 2x (w*x - y)
def gradient(x,y, y_pred):
    return np.dot(2*x, y_pred - y).mean()

print(f'Prediction before training: f(5) = {forward(5):.3f}')

#training
learning_rate = 0.01
n_iters = 5

for epoch in range(n_iters):
    # prediction = forward pass
    y_pred = forward(X)
    print(f'y pred = {y_pred}')

    #loss
    l = loss(y, y_pred)
    print(f'loss = {l}')

    #gradients
    dw = gradient(X, y, y_pred)
    print(f'gradient = {dw}')
    #update weights
    w -= learning_rate * dw
    print(f'weight = {w}')
    print("******************************")
    if epoch % 5 == 0:
        pass
        #print(f'epoch {epoch + 1}: w  = {w:.3f}, loss = {l:.8f}')
    
print(f"Prediction after the training: f(5) = {forward(5):.3f}")

Prediction before training: f(5) = 0.000
y pred = [0. 0. 0. 0.]
loss = 30.0
gradient = -120.0
weight = 1.2
******************************
y pred = [1.2       2.4       3.6000001 4.8      ]
loss = 4.799999237060547
gradient = -48.0
weight = 1.68
******************************
y pred = [1.68 3.36 5.04 6.72]
loss = 0.7680001854896545
gradient = -19.200002670288086
weight = 1.8720000267028807
******************************
y pred = [1.872 3.744 5.616 7.488]
loss = 0.1228799968957901
gradient = -7.679999828338623
weight = 1.948800024986267
******************************
y pred = [1.9488 3.8976 5.8464 7.7952]
loss = 0.019660834223031998
gradient = -3.072002649307251
weight = 1.9795200514793394
******************************
Prediction after the training: f(5) = 9.898


In [4]:
import torch  # do everything with torch
# f = w * x such as f = 2 * x
X = torch.tensor([1,2,3,4], dtype = torch.float32)
y = torch.tensor([2,4,6,8], dtype = torch.float32)   #2 * x

w = torch.tensor(0.0, dtype = torch.float32, requires_grad=True)

#model prediction
def forward(x):
    return w * x  #forward pass

#loss
def loss(y, y_pred):
    return ((y - y_pred)**2).mean()


print(f'Prediction before training: f(5) = {forward(5):.3f}')

#training
learning_rate = 0.01
n_iters = 100

for epoch in range(n_iters):
    # prediction = forward pass
    y_pred = forward(X)

    #loss
    l = loss(y, y_pred)

    #gradients = backward pass
    l.backward()  # dl/dw

    #update weights
    with torch.no_grad():
        w -= learning_rate * w.grad

    # zero gradients : you need to empty the gradients before the next iter.
    w.grad.zero_()  #underscore at the end means it will modify, make changes

    if epoch % 10 == 0:
        print(f'epoch {epoch + 1}: w  = {w:.3f}, loss = {l:.8f}')
    
print(f"Prediction after the training: f(5) = {forward(5):.3f}")

Prediction before training: f(5) = 0.000
epoch 1: w  = 0.300, loss = 30.00000000
epoch 11: w  = 1.665, loss = 1.16278565
epoch 21: w  = 1.934, loss = 0.04506890
epoch 31: w  = 1.987, loss = 0.00174685
epoch 41: w  = 1.997, loss = 0.00006770
epoch 51: w  = 1.999, loss = 0.00000262
epoch 61: w  = 2.000, loss = 0.00000010
epoch 71: w  = 2.000, loss = 0.00000000
epoch 81: w  = 2.000, loss = 0.00000000
epoch 91: w  = 2.000, loss = 0.00000000
Prediction after the training: f(5) = 10.000


TRAINING PIPELINE

In [None]:
# 1) Design model (input, output size, forward pass)
# 2) Construct loss and optimizer
# 3) Training loop
#   - forward pass: compute prediction
#   - backward pass: gradients
#   - update weights

# thats the whole pipeline

In [38]:
from sklearn.linear_model import LinearRegression
import torch.nn as nn
 # initalization is random, you get different result everytime you run
X = torch.tensor([[1],[2],[3],[4]], dtype = torch.float32)
y = torch.tensor([[2],[4],[6],[8]], dtype = torch.float32)   #2 * x
# no need to define weights, pytorch module knows the parameters
X_test = torch.tensor([5], dtype = torch.float32)

n_samples, n_features = X.shape
print(X.shape)
print(n_samples,n_features)

input_size = n_features
output_size = n_features

class My_Model(nn.Module): #linear regression, dummy example
    def __init__(self,input_dim,output_dim):
        super(LinearRegression, self).__init__()
        #define layers
        self.lin = nn.Linear(input_dim, output_dim)

    def forward(self, x):
        return self.lin(x)


# model = nn.Linear(input_size, output_size)
model = My_Model(input_size, output_size)



print(f'Prediction before training: f(5) = {model(X_test).item():.3f}')

# loss which is provided by pytorch
loss = nn.MSELoss()
optimizer = torch.optim.SGD(model.parameters(), lr = learning_rate)

learning_rate = 0.01
n_iters = 100

for epoch in range(n_iters):
    # prediction = forward pass
    y_pred = model(X)

    #loss
    l = loss(y, y_pred)

    #gradients = backward pass
    l.backward()  # dl/dw

    #no need to update weights manually, use optimizer
    optimizer.step()

    # zero gradients : you need to empty the gradients before the next iter.
    optimizer.zero_grad()  
    if epoch % 10 == 0:
        [w, b] = model.parameters()
        print(f'epoch {epoch + 1}: w  = {w[0][0].item():.3f}, loss = {l:.8f}') 
print(f"Prediction after the training: f(5) = {model(X_test).item():.3f}")  

torch.Size([4, 1])
4 1


TypeError: super(type, obj): obj must be an instance or subtype of type