In [1]:
import torch

In [2]:
import numpy as np

# everything manual

In [5]:
# f = w * x
# f = 2 * x

X = np.array([1,2,3,4], dtype=np.float32)
Y = np.array([2,4,6,8], dtype=np.float32)

W = 0.0

# model prediction
def forward(x):
    return W * x

# loss = MSE
def loss(y, y_hat):
    return ((y - y_hat)**2).mean()

# gradient
# MSE = 1/N * (w*x -y)**2
# dJ/dw = 1/N 2x (w*x - y)
def gradient(x, y, y_hat):
    return np.dot(2*x, y_hat - y).mean()

print(f'prediction before training: {forward(5):.3f}')

# training
lr = 0.01
n_iters = 10
for i in range(n_iters):
    y_hat = forward(X)
    l = loss(Y, y_hat)

    # gradient
    dw = gradient(X, Y, y_hat)

    W -= lr * dw

    if i % 1 == 0:
        print(f'epoch: {i+1} loss: {l:.8f}')

print(f'prediction after training: {forward(5):.3f}')





prediction before training: 0.000
epoch: 1 loss: 30.00000000
epoch: 2 loss: 4.80000067
epoch: 3 loss: 0.76800019
epoch: 4 loss: 0.12288000
epoch: 5 loss: 0.01966083
epoch: 6 loss: 0.00314574
epoch: 7 loss: 0.00050332
epoch: 8 loss: 0.00008053
epoch: 9 loss: 0.00001288
epoch: 10 loss: 0.00000206
prediction after training: 9.999


# replace manual gradient computation using torch

In [14]:
X = torch.tensor([1,2,3,4], dtype=torch.float32)
Y = torch.tensor([2,4,6,8], dtype=torch.float32)

W = torch.tensor(0.0, requires_grad=True)

# model prediction
def forward(x):
    return W * x

# loss = MSE
def loss(y, y_hat):
    return ((y - y_hat)**2).mean()

print(f'prediction before training: {forward(5):.3f}')

# training
lr = 0.01
n_iters = 20
for i in range(n_iters):
    y_hat = forward(X)
    l = loss(Y, y_hat) # loss computed

    # compute gradient of current tensor wrt graph leaves
    l.backward()

    with torch.no_grad():
        W -= W.grad * lr

    W.grad.zero_()
    
    if i % 1 == 0:
        print(f'epoch: {i+1} loss: {l:.8f}')

print(f'prediction after training: {forward(5):.3f}')

prediction before training: 0.000
epoch: 1 loss: 30.00000000
epoch: 2 loss: 21.67499924
epoch: 3 loss: 15.66018772
epoch: 4 loss: 11.31448650
epoch: 5 loss: 8.17471695
epoch: 6 loss: 5.90623236
epoch: 7 loss: 4.26725292
epoch: 8 loss: 3.08308983
epoch: 9 loss: 2.22753215
epoch: 10 loss: 1.60939169
epoch: 11 loss: 1.16278565
epoch: 12 loss: 0.84011245
epoch: 13 loss: 0.60698116
epoch: 14 loss: 0.43854395
epoch: 15 loss: 0.31684780
epoch: 16 loss: 0.22892261
epoch: 17 loss: 0.16539653
epoch: 18 loss: 0.11949898
epoch: 19 loss: 0.08633806
epoch: 20 loss: 0.06237914
prediction after training: 9.612


# replace manual loss calculation and optimization

In [17]:
import torch
import torch.nn as nn

X = torch.tensor([1,2,3,4], dtype=torch.float32)
Y = torch.tensor([2,4,6,8], dtype=torch.float32)

W = torch.tensor(0.0, requires_grad=True)

# model prediction
def forward(x):
    return W * x

# loss = MSE
loss = nn.MSELoss()
optim = torch.optim.SGD([W], lr=0.01)

print(f'prediction before training: {forward(5):.3f}')

# training
n_iters = 20
for i in range(n_iters):
    y_hat = forward(X)
    l = loss(Y, y_hat) # loss computed

    # compute gradient of current tensor wrt graph leaves
    l.backward()

    optim.step() # update weights

    optim.zero_grad() # reset gradient accumulation to zero
    
    if i % 1 == 0:
        print(f'epoch: {i+1} loss: {l:.8f}')

print(f'prediction after training: {forward(5):.3f}')

prediction before training: 0.000
epoch: 1 loss: 30.00000000
epoch: 2 loss: 21.67499924
epoch: 3 loss: 15.66018772
epoch: 4 loss: 11.31448650
epoch: 5 loss: 8.17471695
epoch: 6 loss: 5.90623236
epoch: 7 loss: 4.26725292
epoch: 8 loss: 3.08308983
epoch: 9 loss: 2.22753215
epoch: 10 loss: 1.60939169
epoch: 11 loss: 1.16278565
epoch: 12 loss: 0.84011245
epoch: 13 loss: 0.60698116
epoch: 14 loss: 0.43854395
epoch: 15 loss: 0.31684780
epoch: 16 loss: 0.22892261
epoch: 17 loss: 0.16539653
epoch: 18 loss: 0.11949898
epoch: 19 loss: 0.08633806
epoch: 20 loss: 0.06237914
prediction after training: 9.612


# replace forward method with pytorch model now

In [24]:
import torch
import torch.nn as nn

X = torch.tensor([[1],[2],[3],[4]], dtype=torch.float32)
Y = torch.tensor([[2],[4],[6],[8]], dtype=torch.float32)

X_test = torch.tensor([5.0], dtype=torch.float)

n_samples, n_features = X.shape

model = nn.Linear(n_features, n_features)

# loss = MSE
loss = nn.MSELoss()
optim = torch.optim.SGD(model.parameters(), lr=0.01)

print(f'prediction before training: {model(X_test).item():.3f}')

# training
n_iters = 20
for i in range(n_iters):
    y_hat = model(X)
    l = loss(Y, y_hat) # loss computed

    # compute gradient of current tensor wrt graph leaves
    l.backward()

    optim.step() # update weights

    optim.zero_grad() # reset gradient accumulation to zero
    
    if i % 1 == 0:
        w, b = model.parameters()
        print(f'epoch: {i+1} w: {w.item()} loss: {l:.8f} ')

print(f'prediction after training: {model(X_test).item():.3f}')

prediction before training: -2.798
epoch: 1 w: -0.34948939085006714 loss: 45.28266525 
epoch: 2 w: -0.0425642728805542 loss: 31.53902054 
epoch: 3 w: 0.2133583426475525 loss: 22.00188828 
epoch: 4 w: 0.4267953634262085 loss: 15.38357258 
epoch: 5 w: 0.604841411113739 loss: 10.79056358 
epoch: 6 w: 0.753406286239624 loss: 7.60287619 
epoch: 7 w: 0.8774126768112183 loss: 5.39031696 
epoch: 8 w: 0.980961263179779 loss: 3.85438204 
epoch: 9 w: 1.0674679279327393 loss: 2.78794670 
epoch: 10 w: 1.1397778987884521 loss: 2.04729176 
epoch: 11 w: 1.2002614736557007 loss: 1.53269255 
epoch: 12 w: 1.250892996788025 loss: 1.17495275 
epoch: 13 w: 1.2933169603347778 loss: 0.92605811 
epoch: 14 w: 1.3289034366607666 loss: 0.75269306 
epoch: 15 w: 1.3587934970855713 loss: 0.63174039 
epoch: 16 w: 1.3839378356933594 loss: 0.54715943 
epoch: 17 w: 1.4051282405853271 loss: 0.48782015 
epoch: 18 w: 1.423024296760559 loss: 0.44599921 
epoch: 19 w: 1.4381754398345947 loss: 0.41633755 
epoch: 20 w: 1.451039

# practice

In [27]:
X = np.array([1,2,3,4], dtype=np.float32)
Y = np.array([2,4,6,8], dtype=np.float32)

W = 0.0

def forward(x):
    return W * x

def loss(y_hat, y):
    return ((y_hat - y) ** 2).mean()

def gradient(X, Y, y_hat):
    return np.dot(2 * X, y_hat - Y).mean()


n_iters = 10
lr = 0.01

print(f'before model training: {forward(5):.4f}')

for i in range(n_iters):
    y_hat = forward(X)
    l = loss(y_hat, Y)

    dw = gradient(X, Y, y_hat)
    W -= lr * dw

    if i % 1 == 0:
        print(f'epoch: {i+1} W: {W:.4f} loss:{l:.4f} forward: {forward(5)}')

print(f'after model training: {forward(5):.4f}')

before model training: 0.0000
epoch: 1 W: 1.2000 loss:30.0000 forward: 5.999999523162842
epoch: 2 W: 1.6800 loss:4.8000 forward: 8.399999618530273
epoch: 3 W: 1.8720 loss:0.7680 forward: 9.359999656677246
epoch: 4 W: 1.9488 loss:0.1229 forward: 9.743999481201172
epoch: 5 W: 1.9795 loss:0.0197 forward: 9.897600173950195
epoch: 6 W: 1.9918 loss:0.0031 forward: 9.959039688110352
epoch: 7 W: 1.9967 loss:0.0005 forward: 9.98361587524414
epoch: 8 W: 1.9987 loss:0.0001 forward: 9.993446350097656
epoch: 9 W: 1.9995 loss:0.0000 forward: 9.9973783493042
epoch: 10 W: 1.9998 loss:0.0000 forward: 9.99895191192627
after model training: 9.9990


In [43]:
import torch
import torch.nn as nn

lr = 0.01


loss = nn.MSELoss()

X_test = torch.tensor([5.0], dtype=torch.float)
X = torch.tensor([[1],[2],[3],[4]], dtype=torch.float)
Y = torch.tensor([[2],[4],[6],[8]], dtype=torch.float)

class LinearModel(nn.Module):
    def __init__(self, in_features, out_features):
        super().__init__()
        self.l1 = nn.Linear(in_features, out_features)

    def forward(self, x):
        return self.l1(x)
    
n_samples, n_features = X.shape
model = LinearModel(n_features, n_features)

print(f'before training: {model(X_test).item()}')

optim = torch.optim.SGD(model.parameters(), lr=lr)

n_iters = 30
for epoch in range(n_iters):
    y_hat = model(X)
    l = loss(y_hat, Y)

    l.backward() # compute gradients
    
    optim.step() # update the weights

    optim.zero_grad() # reset the gradient accumulation

    if epoch % 1 == 0:
        print(f'epoch: {epoch + 1} loss: {l:.4f}')


print(f'after training: {model(X_test).item()}')
    


before training: -4.255657196044922
epoch: 1 loss: 66.1850
epoch: 2 loss: 45.9244
epoch: 3 loss: 31.8660
epoch: 4 loss: 22.1112
epoch: 5 loss: 15.3426
epoch: 6 loss: 10.6459
epoch: 7 loss: 7.3870
epoch: 8 loss: 5.1258
epoch: 9 loss: 3.5567
epoch: 10 loss: 2.4680
epoch: 11 loss: 1.7125
epoch: 12 loss: 1.1883
epoch: 13 loss: 0.8246
epoch: 14 loss: 0.5722
epoch: 15 loss: 0.3971
epoch: 16 loss: 0.2756
epoch: 17 loss: 0.1913
epoch: 18 loss: 0.1328
epoch: 19 loss: 0.0922
epoch: 20 loss: 0.0640
epoch: 21 loss: 0.0445
epoch: 22 loss: 0.0309
epoch: 23 loss: 0.0215
epoch: 24 loss: 0.0149
epoch: 25 loss: 0.0104
epoch: 26 loss: 0.0073
epoch: 27 loss: 0.0051
epoch: 28 loss: 0.0036
epoch: 29 loss: 0.0025
epoch: 30 loss: 0.0018
after training: 9.920428276062012


In [54]:
X = torch.tensor([1,2,3,4], dtype=torch.float32)
Y = torch.tensor([2,4,6,8], dtype=torch.float32)

W = torch.tensor(0.0, dtype=torch.float32, requires_grad=True)

def forward(x):
    return W * x

def loss(y_hat, y):
    return ((y_hat - y) ** 2).mean()

n_iters = 20
lr = 0.01

print(f'before model training: {forward(5):.4f}')

for i in range(n_iters):
    y_hat = forward(X)
    l = loss(y_hat, Y)

    # dw = gradient(X, Y, y_hat)
    l.backward()
    dw = W.grad
    with torch.no_grad():
        W -= lr * dw

    W.grad.zero_() # reset gradient accumulation

    if i % 1 == 0:
        print(f'epoch: {i+1} W: {W:.4f} loss:{l:.4f} forward: {forward(5)}')

print(f'after model training: {forward(5):.4f}')

before model training: 0.0000
epoch: 1 W: 0.3000 loss:30.0000 forward: 1.4999998807907104
epoch: 2 W: 0.5550 loss:21.6750 forward: 2.7749996185302734
epoch: 3 W: 0.7717 loss:15.6602 forward: 3.8587496280670166
epoch: 4 W: 0.9560 loss:11.3145 forward: 4.779937267303467
epoch: 5 W: 1.1126 loss:8.1747 forward: 5.562946796417236
epoch: 6 W: 1.2457 loss:5.9062 forward: 6.228504657745361
epoch: 7 W: 1.3588 loss:4.2673 forward: 6.794229030609131
epoch: 8 W: 1.4550 loss:3.0831 forward: 7.275094985961914
epoch: 9 W: 1.5368 loss:2.2275 forward: 7.683830738067627
epoch: 10 W: 1.6063 loss:1.6094 forward: 8.031255722045898
epoch: 11 W: 1.6653 loss:1.1628 forward: 8.326567649841309
epoch: 12 W: 1.7155 loss:0.8401 forward: 8.577583312988281
epoch: 13 W: 1.7582 loss:0.6070 forward: 8.790945053100586
epoch: 14 W: 1.7945 loss:0.4385 forward: 8.97230339050293
epoch: 15 W: 1.8253 loss:0.3168 forward: 9.126458168029785
epoch: 16 W: 1.8515 loss:0.2289 forward: 9.257489204406738
epoch: 17 W: 1.8738 loss:0.16

In [59]:
X = torch.tensor([1,2,3,4], dtype=torch.float32)
Y = torch.tensor([2,4,6,8], dtype=torch.float32)

W = torch.tensor(0.0, dtype=torch.float32, requires_grad=True)

def forward(x):
    return W * x

lr = 0.01

loss = nn.MSELoss()
optim = torch.optim.SGD([W], lr=lr)

n_iters = 20

print(f'before model training: {forward(5):.4f}')

for i in range(n_iters):
    y_hat = forward(X)
    l = loss(y_hat, Y)

    l.backward() # compute gradient
    optim.step() # update weights
    optim.zero_grad() # reset accumulated gradients

    if i % 1 == 0:
        print(f'epoch: {i+1} W: {W:.4f} loss:{l:.4f} forward: {forward(5)}')

print(f'after model training: {forward(5):.4f}')

before model training: 0.0000
epoch: 1 W: 0.3000 loss:30.0000 forward: 1.4999998807907104
epoch: 2 W: 0.5550 loss:21.6750 forward: 2.7749996185302734
epoch: 3 W: 0.7717 loss:15.6602 forward: 3.8587496280670166
epoch: 4 W: 0.9560 loss:11.3145 forward: 4.779937267303467
epoch: 5 W: 1.1126 loss:8.1747 forward: 5.562946796417236
epoch: 6 W: 1.2457 loss:5.9062 forward: 6.228504657745361
epoch: 7 W: 1.3588 loss:4.2673 forward: 6.794229030609131
epoch: 8 W: 1.4550 loss:3.0831 forward: 7.275094985961914
epoch: 9 W: 1.5368 loss:2.2275 forward: 7.683830738067627
epoch: 10 W: 1.6063 loss:1.6094 forward: 8.031255722045898
epoch: 11 W: 1.6653 loss:1.1628 forward: 8.326567649841309
epoch: 12 W: 1.7155 loss:0.8401 forward: 8.577583312988281
epoch: 13 W: 1.7582 loss:0.6070 forward: 8.790945053100586
epoch: 14 W: 1.7945 loss:0.4385 forward: 8.97230339050293
epoch: 15 W: 1.8253 loss:0.3168 forward: 9.126458168029785
epoch: 16 W: 1.8515 loss:0.2289 forward: 9.257489204406738
epoch: 17 W: 1.8738 loss:0.16