In [1]:
import sys
import torch

sys.executable

'/home/wenhan/Documents/ML-Projects/torch-proj/.venv/bin/python'

In [2]:
x = torch.rand(5, 3)
print(x)
torch.cuda.is_available()

tensor([[0.0235, 0.2433, 0.3121],
        [0.9126, 0.6306, 0.7220],
        [0.0261, 0.5044, 0.3942],
        [0.8609, 0.9280, 0.5298],
        [0.6101, 0.1042, 0.9497]])


True

# Autograd

## Gradient compute

In [3]:
x = torch.randn(3, requires_grad=True)
x

tensor([-0.4106,  0.9740, -0.4584], requires_grad=True)

In [4]:
y = x + 2
y

tensor([1.5894, 2.9740, 1.5416], grad_fn=<AddBackward0>)

In [5]:
z1 = y * y * 2
z1

tensor([ 5.0521, 17.6891,  4.7531], grad_fn=<MulBackward0>)

In [6]:
v = torch.tensor([.1, 1., .001], dtype=torch.float64) 
z1.backward(v) # does not need v if z is scalar

In [7]:
x.grad

tensor([6.3574e-01, 1.1896e+01, 6.1664e-03])

In [8]:
z2 = (y * y * 2).mean()
z2.backward()
x.grad

tensor([ 2.7549, 15.8612,  2.0616])

## Skip Gradient compute

In [9]:
x = torch.randn(3, requires_grad=True)
x

tensor([-1.1107, -0.8836,  1.1742], requires_grad=True)

In [10]:
y2 = x.detach() # same value as x, but no need grad
y2 == x

tensor([True, True, True])

In [11]:
with torch.no_grad():
    y3 = x + 2
    print(y3)

tensor([0.8893, 1.1164, 3.1742])


# Backprop

In [12]:
x = torch.tensor(1.)
y = torch.tensor(2.)
weight = torch.tensor(1., requires_grad=True)

# forward pass - liner regression
y_hat = weight * x
loss = (y_hat - y)**2
print(loss)

# backprop
loss.backward()
print(weight.grad)

# update weights and repeat

tensor(1., grad_fn=<PowBackward0>)
tensor(-2.)


# Gradient Descent

## Manual linear regression

In [268]:
import numpy as np

np.random.seed(1)
# f = 5 * x

in_x = np.arange(1, 5, 1)
out_y = in_x * 5
x = np.array(in_x, dtype=np.float64)
y = np.array(out_y, dtype=np.float64)
N = x.shape[0]
w = np.random.randn(1)[0]
x, y, w, N

(array([1., 2., 3., 4.]), array([ 5., 10., 15., 20.]), 1.6243453636632417, 4)

f = 1/N (XB - Y).T (XB - Y) = 1/n [ (BtXt - Yt) (XB - Y) ] \
  = 1/N [ BtXtXB - BtXtY - YtXB + YtY ] \
  = 1/N [ BtXtXB - 2YtXB - YtY ] \
df/dB = 1/N [ 2XtXB - 2YtX ]

In [269]:
def forward(x, w):
    return w*x

def loss(x, w, y):
    return sum((y-x*w)**2) / N

def gradient(x, w, y):
    return (2 * np.dot(x, x) * w - 2 * np.dot(y, x))/N

In [270]:
print(f'initial pred: f(5) = {forward(5, w):.3f}')

initial pred: f(5) = 8.122


In [271]:
loss(x, w, y), gradient(x, w, y)

(85.46283167866389, -50.63481954505137)

In [273]:
lr = 0.01
max_iter = 100
w_copy = w.copy()
for epoch in range(max_iter):
    y_pred = forward(x, w_copy)
    loss_val = loss(x, w_copy, y)
    grad_w = gradient(x, w_copy, y)
    w_copy += -grad_w*lr
    if epoch % 10 == 0:
        print(f'epoch {epoch+1}: w = {w_copy:.3f}, loss = {loss_val:.8f}')
print(f'final pred: f(5) = {forward(5, w_copy):.3f}')

epoch 1: w = 2.131, loss = 85.46283168
epoch 11: w = 4.435, loss = 3.31249928
epoch 21: w = 4.889, loss = 0.12839092
epoch 31: w = 4.978, loss = 0.00497637
epoch 41: w = 4.996, loss = 0.00019288
epoch 51: w = 4.999, loss = 0.00000748
epoch 61: w = 5.000, loss = 0.00000029
epoch 71: w = 5.000, loss = 0.00000001
epoch 81: w = 5.000, loss = 0.00000000
epoch 91: w = 5.000, loss = 0.00000000
final pred: f(5) = 25.000


## PyTorch

In [278]:
import torch.nn as nn

In [293]:
X = torch.tensor([[ele] for ele in in_x], dtype=torch.float32) # needs to be 2-D array
y = torch.tensor([[ele] for ele in out_y], dtype=torch.float32) # needs to be 2_D array
print('Input and output size:', X.shape, y.shape)

n_samples, n_features = X.shape

class LinearRegression(nn.Module):
    def __init__(self, input_dim, output_dim):
        super(LinearRegression, self).__init__()
        # define layers
        self.lin = nn.Linear(input_dim, output_dim)

    def forward(self, X):
        return self.lin(X)

# model = nn.Linear(in_features=n_features, out_features=n_features)
model = LinearRegression(input_dim=n_features, output_dim=n_features)

lr = 0.1
max_iter = 300
loss = nn.MSELoss()
optimizer = torch.optim.SGD(params=model.parameters(), lr=lr)
for epoch in range(max_iter):
    y_pred = model(X)
    loss_val = loss(y, y_pred)

    loss_val.backward()
    
    optimizer.step()

    optimizer.zero_grad()

    if epoch % 30 == 0:
        [weights, biase] = model.parameters() # weight is a list of list
        print(f'epoch {epoch+1}: w = {weights[0][0].item():.3f}, loss = {loss_val:.8f}')
print(f'final pred: f(5) = {model(torch.tensor([5], dtype=torch.float32)).item():.3f}')

Input and output size: torch.Size([4, 1]) torch.Size([4, 1])
epoch 1: w = 7.137, loss = 134.40914917
epoch 31: w = 4.837, loss = 0.04077616
epoch 61: w = 4.935, loss = 0.00658181
epoch 91: w = 4.974, loss = 0.00106242
epoch 121: w = 4.989, loss = 0.00017149
epoch 151: w = 4.996, loss = 0.00002768
epoch 181: w = 4.998, loss = 0.00000447
epoch 211: w = 4.999, loss = 0.00000072
epoch 241: w = 5.000, loss = 0.00000012
epoch 271: w = 5.000, loss = 0.00000002
final pred: f(5) = 25.000
