### 1. Tensors


In [1]:
import torch
import numpy as np

# torch.empty(size): uninitiallized
x = torch.empty(1) # scalar
print("empty(1):", x)
x = torch.empty(3) # vector
print("empty(3):",x)
x = torch.empty(2, 3) # matrix
print("empty(2,3):",x)
x = torch.empty(2, 2, 3) # tensor, 3 dimensions
#x = torch.empty(2,2,2,3) # tensor, 4 dimensions
print("empty(2, 2, 3):",x)

# torch.rand(size): random numbers [0, 1]
x = torch.rand(5, 3)
print("rand(5,3):", x)

# torch.zeros(size), fill with 0
# torch.ones(size), fill with 1
x = torch.zeros(5, 3)
print("zeros(5,3):", x)

empty(1): tensor([3.8706e+25])
empty(3): tensor([0., 0., 0.])
empty(2,3): tensor([[0., 0., 0.],
        [0., 0., 0.]])
empty(2, 2, 3): tensor([[[0., 0., 0.],
         [0., 0., 0.]],

        [[0., 0., 0.],
         [0., 0., 0.]]])
rand(5,3): tensor([[0.8392, 0.1522, 0.9456],
        [0.1599, 0.0984, 0.0883],
        [0.2543, 0.7842, 0.8470],
        [0.0730, 0.0693, 0.4106],
        [0.3078, 0.0744, 0.3951]])
zeros(5,3): tensor([[0., 0., 0.],
        [0., 0., 0.],
        [0., 0., 0.],
        [0., 0., 0.],
        [0., 0., 0.]])


In [2]:
# Check Size 
print("Size : " , x.size())
print("Shape : " , x.shape)

Size :  torch.Size([5, 3])
Shape :  torch.Size([5, 3])


In [3]:
# Check Datatype
print(x.dtype)

# Spcify types, float32 default
x = torch.zeros(5,3, dtype=torch.float16)
print(x)

# Check Type
print(x.dtype)

torch.float32
tensor([[0., 0., 0.],
        [0., 0., 0.],
        [0., 0., 0.],
        [0., 0., 0.],
        [0., 0., 0.]], dtype=torch.float16)
torch.float16


In [4]:
# Construct from data 
x  = torch.tensor([5, 5, 3 ])
print(x, x.dtype)

tensor([5, 5, 3]) torch.int64


In [5]:
# Requires_grad argument 
# This will tell pytorch that it will need to calcuate the gradient for this tensor 
# Later in your optimization steps 
# i.e. this is a variable in your model that you want to optimize 

x = torch.tensor([5.5,3], requires_grad=True)
print(x) 


tensor([5.5000, 3.0000], requires_grad=True)


In [6]:
# Operations
x = torch.ones(2, 2)
y = torch.rand(2, 2)

# Elementwise addition
z = x + y
torch.add(x,y)
# torch.add(x,y)

#In plane addition, everything with a  trailing underscore is a inplace operation
#i.e. it will modify the variable
# y.add_(x)

print(x)
print(y)
print("Addition : ", z)

z = x - y 
torch.sub(x, y)
print("Subtraction : ", z)

z = x * y 
torch.mul(x, y)
print("Multiplication : ", z)

z = x / y 
torch.div(x, y)
print("Division : ", z)


tensor([[1., 1.],
        [1., 1.]])
tensor([[0.8190, 0.5136],
        [0.9709, 0.9184]])
Addition :  tensor([[1.8190, 1.5136],
        [1.9709, 1.9184]])
Subtraction :  tensor([[0.1810, 0.4864],
        [0.0291, 0.0816]])
Multiplication :  tensor([[0.8190, 0.5136],
        [0.9709, 0.9184]])
Division :  tensor([[1.2210, 1.9472],
        [1.0299, 1.0889]])


In [7]:
# Slicing 
x = torch.rand(5, 3)
print(x)
print("X[: , 0] ", x[: , 0 ])
print("X[1, :] ", x[1, :  ])
print("X[1, 1] ", x[1, 1])

print("x[1, 1].item() ", x[1,1].item())

tensor([[0.4594, 0.4578, 0.0236],
        [0.7371, 0.0307, 0.0077],
        [0.7398, 0.1254, 0.2807],
        [0.7672, 0.6725, 0.4940],
        [0.8361, 0.6619, 0.7829]])
X[: , 0]  tensor([0.4594, 0.7371, 0.7398, 0.7672, 0.8361])
X[1, :]  tensor([0.7371, 0.0307, 0.0077])
X[1, 1]  tensor(0.0307)
x[1, 1].item()  0.03069692850112915


In [8]:
# Reshape with torch.view()
x = torch.randn(4, 4) 
y = x.view(16) # Shapes the x as (16, ) in y
z = x.view(-1, 8) # The -1 tells PyTorch to infer the appropriate dimension automatically based on the total number of elements

print(x.size(), y.size(), z.size())

torch.Size([4, 4]) torch.Size([16]) torch.Size([2, 8])


### NumPy

#### Converting a Torch to a NumPy array and vice versa is very easy.


In [9]:
import torch

a = torch.ones(5)
print(a)

b = a.numpy()
print(b)
print(type(b))


tensor([1., 1., 1., 1., 1.])
[1. 1. 1. 1. 1.]
<class 'numpy.ndarray'>


In [10]:
# Numpy to torch with .from_numpy(x), or torch.tensor() to copy it 
a = np.ones(5)
b = torch.from_numpy(a)
c = torch.tensor(a)
print(a)
print(b)
print(c)

# Again be careful when modifying
a += 1 
print(a)
print(b)
print(c)

[1. 1. 1. 1. 1.]
tensor([1., 1., 1., 1., 1.], dtype=torch.float64)
tensor([1., 1., 1., 1., 1.], dtype=torch.float64)
[2. 2. 2. 2. 2.]
tensor([2., 2., 2., 2., 2.], dtype=torch.float64)
tensor([1., 1., 1., 1., 1.], dtype=torch.float64)


### GPU Support

#### By default are created on the CPU. But we can also move them to the GPU (if it's available), or create them direcly on the GPU


In [11]:
device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')

x = torch.rand(2, 2).to(device) # Move tensors to GPU device 
# x = a.to("cpu")
# x = x.to('cuda')

x = torch.rand(2, 2, device=device) # or directly create them on GPU 

### 2. Autograd

The autograd package provides automatic differentiation for all operations on Tensors. Generally speaking, torch.autograd is an engine for computing the vector-Jacobian product. It computes partial derivatives while applying the chain rule.


In [12]:
# requires_grad = True --> tracks all operations on the tensor. 
x = torch.randn(3, requires_grad=True )
y = x + 2 

# y was created as a result of an operation, so it has a grad_fn attribute
# grad_fn: references a Function that has created the Tensor
print(x)
print(y)
print(y.grad_fn)

tensor([-0.1052,  0.8526,  0.8189], requires_grad=True)
tensor([1.8948, 2.8526, 2.8189], grad_fn=<AddBackward0>)
<AddBackward0 object at 0x0000021C98110F70>


In [13]:
# Do more operations of y 
z = y * y * 3
print(z)
z = z.mean()
print(z)

tensor([10.7711, 24.4112, 23.8383], grad_fn=<MulBackward0>)
tensor(19.6735, grad_fn=<MeanBackward0>)


In [14]:
# Let's compute the graidients with backpropogation
# When we finish our computation we can  call .backward() and have all the gradients computer automatically. 
# The gradient for this tensor will be accumulated into .grad attribute. 
# It is the partial derivate of the function w.r.t. the tensor

z = y * y * 3
print(z)
z = z.mean()
print(z)


# ! ! ! Careful ! ! ! backward() accumulate the gradient for this tensor into .grad attribute. 
# ! ! ! We need to be careful during optimization ! ! ! optimizer.zero_grad()

tensor([10.7711, 24.4112, 23.8383], grad_fn=<MulBackward0>)
tensor(19.6735, grad_fn=<MeanBackward0>)


In [15]:
z.backward()
print(x.grad)

tensor([3.7896, 5.7051, 5.6378])


## Stop a tensor from tracking history:

For example during the training loop when we want to update our weights, or after training during evaluation. These operation part of the gradient computation. To prevent this, we can use:

- x.requires_grad(False)
- x.detach()
- wrap in with torch.no_grad():


In [16]:
# .requires_grad(...) changes on existing falg in-place 
a = torch.randn(2, 2)
b = (a*a).sum()
print(a.requires_grad)
print(b.grad_fn)

a.requires_grad_(True)
b = (a*a).sum()
print(a.requires_grad)
print(b.grad_fn)

False
None
True
<SumBackward0 object at 0x0000021C978B0AC0>


In [17]:
# .deatch(): get a new tensor with the same content but no gradient computation: 
a = torch.randn(2, 2, requires_grad=True)
b = a.detach()
print(a.requires_grad)
print(b.requires_grad)

True
False


In [18]:
# wrap in 'with torch.no_grad()'
a = torch.randn(2, 2, requires_grad=True)
print(a.requires_grad)
with torch.no_grad(): 
    b = a** 2
    print(b.requires_grad)

True
False


### Gradient Descent Autograd

Linear Regression example:

- _f(x) = w \* x + b_


In [3]:
# Linear regression 
# f = w * x + b
# here : f = 2 * x

X = torch.tensor([1, 2, 3, 4, 5, 6, 7, 8], dtype=torch.float32)  
Y = torch.tensor([2, 4, 6, 8, 10, 12, 14, 16], dtype=torch.float32)  

w = torch.tensor(0.0, dtype=torch.float32, requires_grad=True)

# model output 
def forward(x): 
    return w * x 

# loss = MSE 
def loss(y, y_pred): 
    return ((y_pred - y)** 2).mean()

x_test = 5.0
print(f"Prediction before traning: f({x_test}) = {forward(x_test).item():.3f}")


Prediction before traning: f(5.0) = 0.000


In [4]:
# Training 
learning_rate = 0.1 
n_epocha = 100

for epoch in range(n_epocha): 
    # predict = forward pass 
    y_pred = forward(X)

    # Loss 
    l = loss(Y, y_pred)

    # calcuate the gradients = backward pass 
    l.backward()

    # Update weights 
    # w.data = w.data - learning_rate * w.grad 
    with torch.no_grad(): 
        w -= learning_rate * w.grad

    # zero the gradients after updating 
    w.grad.zero_()

    if(epoch+1) % 10 ==0 : 
        print(f'epoch {epoch + 1}: w = {w.item():.3f}, loss = {l.item():.3f}')

    print(f'Prediction after training: f({x_test}) = {forward(x_test).item():.3f}')



Prediction after training: f(5.0) = 51.000
Prediction after training: f(5.0) = -158.100
Prediction after training: f(5.0) = 699.210
Prediction after training: f(5.0) = -2815.761
Prediction after training: f(5.0) = 11595.620
Prediction after training: f(5.0) = -47491.047
Prediction after training: f(5.0) = 194764.297
Prediction after training: f(5.0) = -798482.562
Prediction after training: f(5.0) = 3273829.750
epoch 10: w = -2684530.000, loss = 10932253097984.000
Prediction after training: f(5.0) = -13422650.000
Prediction after training: f(5.0) = 55032920.000
Prediction after training: f(5.0) = -225634912.000
Prediction after training: f(5.0) = 925103232.000
Prediction after training: f(5.0) = -3792922880.000
Prediction after training: f(5.0) = 15550984192.000
Prediction after training: f(5.0) = -63759032320.000
Prediction after training: f(5.0) = 261412028416.000
Prediction after training: f(5.0) = -1071789375488.000
Prediction after training: f(5.0) = 4394336911360.000
epoch 20: w =