In [1]:
import torch
import numpy as np

### Tensor Basics

In [24]:
a = torch.ones(5)
print(a)

tensor([1., 1., 1., 1., 1.])


In [25]:
b = a.numpy()
print(b)

[1. 1. 1. 1. 1.]


In [26]:
# If the tensor values are saved on cpu both the tensor and the numpy array copy of the tensor
# will point to the same value
# so changing one, changes the other
a.add_(1)
print(a)
print(b)

tensor([2., 2., 2., 2., 2.])
[2. 2. 2. 2. 2.]


In [27]:
c = np.ones(5)
d = torch.from_numpy(c)
print(c)
print(d)

[1. 1. 1. 1. 1.]
tensor([1., 1., 1., 1., 1.], dtype=torch.float64)


In [28]:
# Numpy can handle only CPU tensors
if torch.cuda.is_available():
    device = torch.device("cuda")
    x = torch.ones(5, device=device)
    y = torch.ones(5).to(device)
    z = x + y
    try:
        w = z.numpy()
    except:
        print("Numpy can handle only CPU tensors. Moving z to CPU.")
        w = z.to("cpu")

Numpy can handle only CPU tensors. Moving z to CPU.


In [29]:
# By default requires_grad=False
# Setting to True tell pytorch cu calculate the gradient for x to be used later in optimizations
x = torch.ones(5, requires_grad=True)
print(x)

tensor([1., 1., 1., 1., 1.], requires_grad=True)


### Gradient Calculation With Autograd

In [30]:
x = torch.rand(3, requires_grad=True)
print(x)

tensor([0.5311, 0.3700, 0.6382], requires_grad=True)


In [31]:
# Because we set requires_grad=True, the gradient function is set (grad_fn=<AddBackward0>) -> dy/dx
y = x + 2
print(y)

tensor([2.5311, 2.3700, 2.6382], grad_fn=<AddBackward0>)


In [32]:
z = y*y*2
print(z)

tensor([12.8134, 11.2340, 13.9199], grad_fn=<MulBackward0>)


In [33]:
z = z.mean()
print(z)

tensor(12.6558, grad_fn=<MeanBackward0>)


In [34]:
# Calculate the gradient
# backward() takes no argument because z at this point (the mean) is a scalar
z.backward() # dz/dx
print(x.grad)

tensor([3.3749, 3.1600, 3.5176])


In [35]:
w = y*y*2
print(w)

tensor([12.8134, 11.2340, 13.9199], grad_fn=<MulBackward0>)


In [36]:
# In the background this is a Vector Jacobian product
v = torch.tensor([0.1, 1.0, 0.001], dtype=torch.float32)
w.backward(v)

In [38]:
print(x.grad)

tensor([ 4.3873, 12.6401,  3.5281])


In [39]:
# Prevent tracking the gradients

# x.requires_grad_(False)
# x.detach()
# with torch.no_grad():

In [40]:
x.requires_grad_(False)
print(x)

tensor([0.5311, 0.3700, 0.6382])


In [41]:
y = x.detach()
print(y)

tensor([0.5311, 0.3700, 0.6382])


In [42]:
with torch.no_grad():
    y = x + 2
    print(y)

tensor([2.5311, 2.3700, 2.6382])


In [43]:
# Whenever we call the backward function the gradient of the tensor will be
# summed up (accumulated) into .grad attribute

In [58]:
weights = torch.ones(4, requires_grad=True)

In [53]:
# When we computed for range(1) we obtained [3., 3., 3., 3.]
# but when we change to range(2) it accumulated the gradient so
# at the first pass we got [3., 3., 3., 3.], but at the second pass we got [6., 6., 6., 6.]
# to prevent the accumulation we must empty the gradient
for epoch in range(2):
    model_output = (weights*3).sum()
    
    model_output.backward()
    
    print(weights.grad)
    
    weights.grad.zero_()

tensor([3., 3., 3., 3.])
tensor([3., 3., 3., 3.])


### Backpropagation

In [2]:
x = torch.tensor(1.0)
y = torch.tensor(2.0)

w = torch.tensor(1.0, requires_grad=True)

In [3]:
# forward pass
y_hat = w * x
loss = (y_hat - y)**2
print(loss)

tensor(1., grad_fn=<PowBackward0>)


In [4]:
# backward pass
loss.backward()
print(w.grad)

tensor(-2.)


### Gradient Descent with Autograd and Backpropagation

In [2]:
# MANUAL IMPLEMENTATION with NUMPY
# f = w * x
# f = 2 * x

X = np.array([1,2,3,4], dtype=np.float32)
Y = np.array([2,4,6,8], dtype=np.float32)

w = 0.0

In [7]:
# model prediction
def forward(x):
    return w * x

# loss = MSE
def loss(y, y_pred):
    return ((y_pred - y)**2).mean()

# gradient
# MSE = 1/N * (w * x - y)**2
# dJ/dw = 1/N 2x (w*x - y)
def gradient(x, y, y_pred):
    return np.dot(2*x, y_pred-y).mean()

In [8]:
print(f'prediction before training: f(5) = {forward(5):.3f}')

prediction before training: f(5) = 0.000


In [10]:
# Training
learning_rate = 0.01
n_iters = 30

for epoch in range(n_iters):
    # prediction = forward pass
    y_pred = forward(X)
    
    # loss
    l = loss(Y, y_pred)
    
    # gradients
    dw = gradient(X,Y,y_pred)
    
    # update weights
    w -= learning_rate * dw
    
    if epoch % 2 == 0:
        print(f'epoch {epoch + 1}: w = {w:.3f}, loss = {l:.8f}')

print(f'prediction after training: f(5) = {forward(5):.3f}')

epoch 1: w = 2.000, loss = 0.00000033
epoch 3: w = 2.000, loss = 0.00000001
epoch 5: w = 2.000, loss = 0.00000000
epoch 7: w = 2.000, loss = 0.00000000
epoch 9: w = 2.000, loss = 0.00000000
epoch 11: w = 2.000, loss = 0.00000000
epoch 13: w = 2.000, loss = 0.00000000
epoch 15: w = 2.000, loss = 0.00000000
epoch 17: w = 2.000, loss = 0.00000000
epoch 19: w = 2.000, loss = 0.00000000
epoch 21: w = 2.000, loss = 0.00000000
epoch 23: w = 2.000, loss = 0.00000000
epoch 25: w = 2.000, loss = 0.00000000
epoch 27: w = 2.000, loss = 0.00000000
epoch 29: w = 2.000, loss = 0.00000000
prediction after training: f(5) = 10.000


In [16]:
# IMPLEMENTATION with TORCH

X = torch.tensor([1,2,3,4], dtype=torch.float32)
Y = torch.tensor([2,4,6,8], dtype=torch.float32)

w = torch.tensor(0.0, dtype=torch.float32, requires_grad=True)

In [17]:
# Training
learning_rate = 0.01
n_iters = 100

for epoch in range(n_iters):
    # prediction = forward pass
    y_pred = forward(X)
    
    # loss
    l = loss(Y, y_pred)
    
    # gradients = backward pass
    l.backward() # dL/dw
    
    # update weights
    with torch.no_grad():
        w -= learning_rate * w.grad
    
    # zero gradients
    w.grad.zero_()
    
    if epoch % 10 == 0:
        print(f'epoch {epoch + 1}: w = {w:.3f}, loss = {l:.8f}')

print(f'prediction after training: f(5) = {forward(5):.3f}')

epoch 1: w = 0.300, loss = 30.00000000
epoch 11: w = 1.665, loss = 1.16278565
epoch 21: w = 1.934, loss = 0.04506890
epoch 31: w = 1.987, loss = 0.00174685
epoch 41: w = 1.997, loss = 0.00006770
epoch 51: w = 1.999, loss = 0.00000262
epoch 61: w = 2.000, loss = 0.00000010
epoch 71: w = 2.000, loss = 0.00000000
epoch 81: w = 2.000, loss = 0.00000000
epoch 91: w = 2.000, loss = 0.00000000
prediction after training: f(5) = 10.000


### Training Pipeline: Model, Loss and Optimizer

In [18]:
# 1) Design model (input, output size, forward pass)
# 2) Construct loss and optimizer
# 3) Training loop
#    - forward pass: compute prediction
#    - backward pass: gradients
#    - update weights

In [19]:
import torch.nn as nn

In [20]:
X = torch.tensor([1,2,3,4], dtype=torch.float32)
Y = torch.tensor([2,4,6,8], dtype=torch.float32)

w = torch.tensor(0.0, dtype=torch.float32, requires_grad=True)

In [23]:
learning_rate = 0.01
n_iters = 200

loss = nn.MSELoss()
optimizer = torch.optim.SGD([w], lr=learning_rate)

In [25]:
# Training

for epoch in range(n_iters):
    # prediction = forward pass
    y_pred = forward(X)
    
    # loss
    l = loss(Y, y_pred)
    
    # gradients = backward pass
    l.backward() # dL/dw
    
    # update weights
    optimizer.step()
    
    # zero gradients
    optimizer.zero_grad()
    
    if epoch % 20 == 0:
        print(f'epoch {epoch + 1}: w = {w:.3f}, loss = {l:.8f}')

print(f'prediction after training: f(5) = {forward(5):.3f}')

epoch 1: w = 2.000, loss = 0.00000000
epoch 21: w = 2.000, loss = 0.00000000
epoch 41: w = 2.000, loss = 0.00000000
epoch 61: w = 2.000, loss = 0.00000000
epoch 81: w = 2.000, loss = 0.00000000
epoch 101: w = 2.000, loss = 0.00000000
epoch 121: w = 2.000, loss = 0.00000000
epoch 141: w = 2.000, loss = 0.00000000
epoch 161: w = 2.000, loss = 0.00000000
epoch 181: w = 2.000, loss = 0.00000000
prediction after training: f(5) = 10.000


In [30]:
# FULL PyTorch Method

# We reshape the inputs to make them 2D Arrays
X = torch.tensor([[1],[2],[3],[4]], dtype=torch.float32)
Y = torch.tensor([[2],[4],[6],[8]], dtype=torch.float32)

n_samples, n_features = X.shape
print(n_samples, n_features)

input_size = n_features
output_size = n_features

X_test = torch.tensor([5], dtype=torch.float32)

4 1


In [31]:
model = nn.Linear(input_size, output_size)

print(f'prediction before training: f(5) = {model(X_test).item():.3f}')

prediction before training: f(5) = 0.834


In [36]:
learning_rate = 0.01
n_iters = 500

loss = nn.MSELoss()
optimizer = torch.optim.SGD(model.parameters(), lr=learning_rate)

In [38]:
# Training

for epoch in range(n_iters):
    # prediction = forward pass
    y_pred = model(X)
    
    # loss
    l = loss(Y, y_pred)
    
    # gradients = backward pass
    l.backward() # dL/dw
    
    # update weights
    optimizer.step()
    
    # zero gradients
    optimizer.zero_grad()
    
    if epoch % 50 == 0:
        [w, b] = model.parameters()
        print(f'epoch {epoch + 1}: w = {w[0][0].item():.3f}, loss = {l:.8f}')

print(f'prediction after training: f(5) = {model(X_test).item():.3f}')

epoch 1: w = 1.970, loss = 0.00128762
epoch 51: w = 1.974, loss = 0.00095405
epoch 101: w = 1.978, loss = 0.00070689
epoch 151: w = 1.981, loss = 0.00052376
epoch 201: w = 1.984, loss = 0.00038808
epoch 251: w = 1.986, loss = 0.00028754
epoch 301: w = 1.988, loss = 0.00021305
epoch 351: w = 1.990, loss = 0.00015785
prediction after training: f(5) = 9.981


In [39]:
# Custom made class
class LinearRegression(nn.Module):
    def __init__(self, input_dim, output_dim):
        super(LinearRegression, self).__init__()
        # define layers
        self.lin = nn.Linear(input_dim, output_dim)
        
    def forward(self, x):
        return self.lin(x)

model = LinearRegression(input_size, output_size)

In [40]:
learning_rate = 0.01
n_iters = 500

loss = nn.MSELoss()
optimizer = torch.optim.SGD(model.parameters(), lr=learning_rate)

In [41]:
# Training

for epoch in range(n_iters):
    # prediction = forward pass
    y_pred = model(X)
    
    # loss
    l = loss(Y, y_pred)
    
    # gradients = backward pass
    l.backward() # dL/dw
    
    # update weights
    optimizer.step()
    
    # zero gradients
    optimizer.zero_grad()
    
    if epoch % 50 == 0:
        [w, b] = model.parameters()
        print(f'epoch {epoch + 1}: w = {w[0][0].item():.3f}, loss = {l:.8f}')

print(f'prediction after training: f(5) = {model(X_test).item():.3f}')

epoch 1: w = 0.391, loss = 22.33718300
epoch 51: w = 1.721, loss = 0.11292330
epoch 101: w = 1.760, loss = 0.08366893
epoch 151: w = 1.793, loss = 0.06199339
epoch 201: w = 1.822, loss = 0.04593320
epoch 251: w = 1.847, loss = 0.03403363
epoch 301: w = 1.868, loss = 0.02521677
epoch 351: w = 1.887, loss = 0.01868407
epoch 401: w = 1.902, loss = 0.01384371
epoch 451: w = 1.916, loss = 0.01025733
prediction after training: f(5) = 9.851
