# Tensor Import

In [2]:
import torch

# 1. Tensor Basics

In [3]:
x = torch.tensor(5)
print(x)

x = torch.tensor((1,2))
print(x)

x = torch.empty(2,3)
print(x)

tensor(5)
tensor([1, 2])
tensor([[0., 0., 0.],
        [0., 0., 0.]])


In [4]:
# check size
print("x.size(): ", x.size())
print("x.shape: ", x.shape)

x.size():  torch.Size([2, 3])
x.shape:  torch.Size([2, 3])


In [5]:
# check data type
print(x.dtype)

torch.float32


In [6]:
# requires_grad argument
# This will tell pytorch that it will need to calculate the gradients for this tensor
# later in your optimization steps
# e.g. this is a variable in your model that you want to optimize
x = torch.tensor([5.5, 3], requires_grad=True)
print(x)

tensor([5.5000, 3.0000], requires_grad=True)


## Operations with Tensors

In [7]:
# Operations
x = torch.ones(2, 3)
y = torch.rand(2, 3)

# addition
z = x + y

# subtraction
z = x - y

# multiplication
z = x * y

# division
z = x / y

print(x)
print(y)
print(z)



tensor([[1., 1., 1.],
        [1., 1., 1.]])
tensor([[0.4585, 0.3084, 0.5104],
        [0.4450, 0.6249, 0.2298]])
tensor([[2.1812, 3.2427, 1.9591],
        [2.2473, 1.6001, 4.3510]])


In [8]:
# Slicing
x = torch.rand(3, 3)
print(x)

print("x[:, 0]", x[:, 0])
print("x[0, :]", x[0, :])
print("x[0, 0]", x[0, 0])

# Get the actual value if only 1 element in your tensor
print("x[0, 0].item()", x[0, 0].item())

tensor([[0.6832, 0.0078, 0.5288],
        [0.3430, 0.2727, 0.4996],
        [0.2094, 0.9740, 0.7433]])
x[:, 0] tensor([0.6832, 0.3430, 0.2094])
x[0, :] tensor([0.6832, 0.0078, 0.5288])
x[0, 0] tensor(0.6832)
x[0, 0].item() 0.6832377910614014


In [9]:
# Reshape with torch.view()
x = torch.rand(4, 4)
y = x.view(16)
z = x.view(-1, 8) # if -1 pytorch will automatically determine the necessary size

print(x)
print(y)
print(z)

tensor([[0.8314, 0.6465, 0.3504, 0.5952],
        [0.4828, 0.6971, 0.6084, 0.3003],
        [0.2078, 0.6992, 0.2212, 0.7253],
        [0.0794, 0.7376, 0.3740, 0.3098]])
tensor([0.8314, 0.6465, 0.3504, 0.5952, 0.4828, 0.6971, 0.6084, 0.3003, 0.2078,
        0.6992, 0.2212, 0.7253, 0.0794, 0.7376, 0.3740, 0.3098])
tensor([[0.8314, 0.6465, 0.3504, 0.5952, 0.4828, 0.6971, 0.6084, 0.3003],
        [0.2078, 0.6992, 0.2212, 0.7253, 0.0794, 0.7376, 0.3740, 0.3098]])


## NumPy

Converting a Torch Tensor to a NumPy array and vice versa

In [10]:
a = torch.ones(5)
print(a)

b = a.numpy()
print(b)
print(type(b))

tensor([1., 1., 1., 1., 1.])
[1. 1. 1. 1. 1.]
<class 'numpy.ndarray'>


In [11]:
# Careful: If the Tensor is on the CPU (not the GPU),
# both objects will share the same memory location, so changing one
# will also change the other
a.add_(1)
print(a)
print(b)

tensor([2., 2., 2., 2., 2.])
[2. 2. 2. 2. 2.]


In [12]:
# numpy to torch with .from_numpy(x), or torch.tensor() to copy it
import numpy as np

a = np.ones(5)
b = torch.from_numpy(a) # they share the same memory location
c = torch.tensor(a)
print(a)
print(b)
print(c)

# again be careful when modifying
a += 1
print(a)
print(b)
print(c)

[1. 1. 1. 1. 1.]
tensor([1., 1., 1., 1., 1.], dtype=torch.float64)
tensor([1., 1., 1., 1., 1.], dtype=torch.float64)
[2. 2. 2. 2. 2.]
tensor([2., 2., 2., 2., 2.], dtype=torch.float64)
tensor([1., 1., 1., 1., 1.], dtype=torch.float64)


## GPU Support

By default all tensors are created on the CPU. But we can also move them to the GPU (if it's available ), or create them directly on the GPU.

In [13]:
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")

x = torch.rand(2, 2).to(device) # move tensors to GPU device if available

# it is a small optimizations
x = torch.rand(2, 2, device=device)  # or directy create them on GPU if available

# 2. Autograd

The autograd package provides automatic differentiation for all operations on Tensors. Generally speaking, torch.autograd is an engine for computing the vector-Jacobian product. It computes partial derivates while applying the chain rule.

In [14]:
# requires_grad = True -> tracks all operations on the tensor
x = torch.rand(3, requires_grad=True)
y = x + 2

# y was created as a result of an operation, so it has a grad_fn attribute.
# grad_fn: references a Function that has created the Tensor
print(x) # created by the user -> grad_fn is None
print(y)
print(y.grad_fn)

tensor([0.6541, 0.5788, 0.0460], requires_grad=True)
tensor([2.6541, 2.5788, 2.0460], grad_fn=<AddBackward0>)
<AddBackward0 object at 0x10f86fb50>


In [15]:
# more operations on y
z = y * y * 3
print(z)

z = z.mean()
print(z)

tensor([21.1331, 19.9506, 12.5577], grad_fn=<MulBackward0>)
tensor(17.8805, grad_fn=<MeanBackward0>)


In [16]:
# Compute the gradients with backpropagation
# When we finish our computation we can call .backward() and have all the gradients computed automatically.
# The gradient for this tensor will be accumulated into .grad attribute.
# It is the partial derivate of the function w.r.t. the tensor

z.backward()
print(x.grad) # dz/dx

# !!! Careful!!! backward() accumulates the gradient for this tensor into .grad attribute.
# !!! We need to be careful during optimization !!! optimizer.zero_grad()

tensor([5.3082, 5.1576, 4.0919])


## Stop a tensor from tracking history:

For example during the training loop when we want to update our weights, or after training during evaluation. These operations should not be part of the gradient computation. To prevent this, we can use:

* `x.requires_grad_(False)`
* ``x.detach()`
* wrap in `with torch.no_grad():`

In [17]:
# .requires_grad_(...) changes an existing flag in-place.
a = torch.rand(2, 2)
b = (a * a).sum()
print(a.requires_grad)
print(b.grad_fn)

a.requires_grad_(True)
b = (a * a).sum()
print(a.requires_grad)
print(b.grad_fn)

False
None
True
<SumBackward0 object at 0x107f05750>


In [18]:
# .detach(): get a new Tensor with the same content but no gradient computation:
a = torch.rand(2, 2, requires_grad=True)
b = a.detach()

print(a.requires_grad)
print(b.requires_grad)

True
False


In [19]:
# wrap in 'with torch.no_grad():'
a = torch.rand(2, 2, requires_grad=True)
print(a.requires_grad)

with torch.no_grad():
    b = a ** 2
    print(b.requires_grad)

True
False


## Gradient Descent Autograd
Linear Regression example:

$f(x) = w * x + b$

here : `f(x) = 2 * x`

In [20]:
# Linear regression
# f = w * x  + b
# here : f = 2 * x

x = torch.tensor([1, 2, 3, 4, 5, 6, 7, 8], dtype=torch.float32)
y = torch.tensor([2, 4, 6, 8, 10, 12, 14, 16], dtype=torch.float32)

w = torch.tensor(0.0, dtype=torch.float32, requires_grad=True)

# model output
def forward(x):
    return w * x

# loss = MSE
def loss(y, y_pred):
    return ((y_pred - y)**2).mean()

In [21]:
X_test = 12.0

# Training
learning_rate = 0.01
epochs = 100

print(f'Prediction before training: f({X_test}) = {forward(X_test).item():.3f}')

for epoch in range(epochs):
    # predict = forward pass
    y_pred = forward(x)

    # loss
    l = loss(y, y_pred)

    # calculate gradients = backward pass dl/dw
    l.backward()

    # update weights
    #w.data = w.data - learning_rate * w.grad
    with torch.no_grad():
      w -= learning_rate * w.grad
    
    # zero the gradients after updating
    w.grad.zero_()

    if (epoch + 1) % 10 == 0:
        print(f'epoch {epoch+1}: w = {w.item():.3f}, loss = {l.item():.3f}')

print(f'Prediction after training: f({X_test}) = {forward(X_test).item():.3f}')

Prediction before training: f(12.0) = 0.000
epoch 10: w = 1.998, loss = 0.000
epoch 20: w = 2.000, loss = 0.000
epoch 30: w = 2.000, loss = 0.000
epoch 40: w = 2.000, loss = 0.000
epoch 50: w = 2.000, loss = 0.000
epoch 60: w = 2.000, loss = 0.000
epoch 70: w = 2.000, loss = 0.000
epoch 80: w = 2.000, loss = 0.000
epoch 90: w = 2.000, loss = 0.000
epoch 100: w = 2.000, loss = 0.000
Prediction after training: f(12.0) = 24.000


# 3. Model, Loss & Optimizer

A typical PyTorch pipeline looks like this:

1. Design model (input, output, forward pass with different layers)
2. Construct loss and optimizer
3. Training loop:
  - Forward = compute prediction and loss
  - Backward = compute gradients
  - Update weights

In [22]:
import torch
import torch.nn as nn

# Linear regression
# f = w * x  + b
# here : f = 2 * x

# 0) Training samples, SHAPE is important
x = torch.tensor([[1], [2], [3], [4], [5], [6], [7], [8]], dtype=torch.float32)
y = torch.tensor([[2], [4], [6], [8], [10], [12], [14], [16]], dtype=torch.float32)

n_samples, n_features = x.shape
print(f"n_samples = {n_samples}, n_features = {n_features}")

# 0) Create a TEST sample
X_test = torch.tensor([5], dtype=torch.float32)

n_samples = 8, n_features = 1


In [23]:
# 1) Design Model, the model has to implement the forward pass!
class LinearRegression(nn.Module):
    def __init__(self, input_size, output_size):
        super().__init__()
        # define different layers
        self.linear = nn.Linear(input_size, output_size)

    def forward(self, x):
        return self.linear(x)

input_size = output_size = n_features

model = LinearRegression(input_size, output_size)

print(f'Prediction before training: f({X_test.item()}) = {model(X_test).item():.3f}')

# 2) Define loss and optimizer
learning_rate = 0.01
epochs = 100

loss = nn.MSELoss()
optimizer = torch.optim.SGD(model.parameters(), lr=learning_rate)

# 3) Training loop
for epoch in range(epochs):
    # predict = forward pass with our model
    y_pred = model(x)

    # loss
    l = loss(y_pred, y)

    # calculate gradients = backward pass
    l.backward()

    # update weights
    optimizer.step()

    # zero the gradients after updating
    optimizer.zero_grad()

    if (epoch + 1) % 10 == 0:
        w, b = model.parameters() # unpack parameters
        print(f'epoch {epoch + 1}: w = {w[0][0].item():.3f}, loss = {l.item():.3f}')

print(f'Prediction after training: f({X_test.item()}) = {model(X_test).item():.3f}')

Prediction before training: f(5.0) = 2.579
epoch 10: w = 1.977, loss = 0.003
epoch 20: w = 1.979, loss = 0.003
epoch 30: w = 1.980, loss = 0.003
epoch 40: w = 1.981, loss = 0.002
epoch 50: w = 1.981, loss = 0.002
epoch 60: w = 1.982, loss = 0.002
epoch 70: w = 1.983, loss = 0.002
epoch 80: w = 1.984, loss = 0.002
epoch 90: w = 1.984, loss = 0.002
epoch 100: w = 1.985, loss = 0.002
Prediction after training: f(5.0) = 10.009


# 4. Neural Network
GPU, Datasets, DataLoader, Transforms, Neural Network, Training & Evaluation