In [1]:
# https://uvadlc-notebooks.readthedocs.io/en/latest/tutorial_notebooks/tutorial2/Introduction_to_PyTorch.html


In [1]:
## Standard libraries
import os
import math
import numpy as np
import time

## Imports for plotting
import matplotlib.pyplot as plt
%matplotlib inline
from IPython.display import set_matplotlib_formats
set_matplotlib_formats('svg', 'pdf') # For export
from matplotlib.colors import to_rgba
import seaborn as sns
sns.set()

import warnings
warnings.filterwarnings("ignore")
## Progress bar
from tqdm.notebook import tqdm


In [2]:
import torch
torch.__version__

'1.13.0+cpu'

In [3]:
torch.manual_seed(42)

<torch._C.Generator at 0x29475a1d8f0>

In [4]:
x = torch.Tensor(2, 3, 4)
print(x)


tensor([[[0., 0., 0., 0.],
         [0., 0., 0., 0.],
         [0., 0., 0., 0.]],

        [[0., 0., 0., 0.],
         [0., 0., 0., 0.],
         [0., 0., 0., 0.]]])


In [5]:
# Create a tensor from a (nested) list
x = torch.Tensor([[1, 2], [3, 4]])
print(x)


tensor([[1., 2.],
        [3., 4.]])


In [8]:
# Create a tensor with random values between 0 and 1 with the shape [2, 3, 4]
x = torch.rand(2, 3, 4)
print(x)


tensor([[[0.8823, 0.9150, 0.3829, 0.9593],
         [0.3904, 0.6009, 0.2566, 0.7936],
         [0.9408, 0.1332, 0.9346, 0.5936]],

        [[0.8694, 0.5677, 0.7411, 0.4294],
         [0.8854, 0.5739, 0.2666, 0.6274],
         [0.2696, 0.4414, 0.2969, 0.8317]]])


In [9]:
shape = x.shape
print("Shape:", x.shape)

size = x.size()
print("Size:", size)

dim1, dim2, dim3 = x.size()
print("Size:", dim1, dim2, dim3)

Shape: torch.Size([2, 3, 4])
Size: torch.Size([2, 3, 4])
Size: 2 3 4


## Tensor to Numpy, and Numpy to Tensor

In [10]:
np_arr = np.array([[1,2],[3,4]])
tensor = torch.from_numpy(np_arr)

print("Numpy array:", np_arr)
print("PyTorch tensor:", tensor)

Numpy array: [[1 2]
 [3 4]]
PyTorch tensor: tensor([[1, 2],
        [3, 4]], dtype=torch.int32)


In [11]:
tensor = torch.arange(4)
np_arr = tensor.numpy()

print("PyTorch tensor:", tensor)
print("Numpy array:", np_arr)

PyTorch tensor: tensor([0, 1, 2, 3])
Numpy array: [0 1 2 3]


## Operations

In [12]:
x1 = torch.rand(2, 3)
x2 = torch.rand(2, 3)
y = x1 + x2

print("X1", x1)
print("X2", x2)
print("Y", y)


X1 tensor([[0.1053, 0.2695, 0.3588],
        [0.1994, 0.5472, 0.0062]])
X2 tensor([[0.9516, 0.0753, 0.8860],
        [0.5832, 0.3376, 0.8090]])
Y tensor([[1.0569, 0.3448, 1.2448],
        [0.7826, 0.8848, 0.8151]])


In [13]:
x1 = torch.rand(2, 3)
x2 = torch.rand(2, 3)
print("X1 (before)", x1)
print("X2 (before)", x2)

x2.add_(x1)
print("X1 (after)", x1)
print("X2 (after)", x2)


X1 (before) tensor([[0.5779, 0.9040, 0.5547],
        [0.3423, 0.6343, 0.3644]])
X2 (before) tensor([[0.7104, 0.9464, 0.7890],
        [0.2814, 0.7886, 0.5895]])
X1 (after) tensor([[0.5779, 0.9040, 0.5547],
        [0.3423, 0.6343, 0.3644]])
X2 (after) tensor([[1.2884, 1.8504, 1.3437],
        [0.6237, 1.4230, 0.9539]])


In [14]:
x = torch.arange(6)
print("X", x)

x = x.view(2, 3)
print("X", x)

x = x.permute(1, 0)  # Swapping dimension 0 and 1
print("X", x)


X tensor([0, 1, 2, 3, 4, 5])
X tensor([[0, 1, 2],
        [3, 4, 5]])
X tensor([[0, 3],
        [1, 4],
        [2, 5]])


Other commonly used operations include matrix multiplications, which are essential for neural networks. Quite often, we have an input vector 
, which is transformed using a learned weight matrix 
. There are multiple ways and functions to perform matrix multiplication, some of which we list below:

torch.matmul: Performs the matrix product over two tensors, where the specific behavior depends on the dimensions. If both inputs are matrices (2-dimensional tensors), it performs the standard matrix product. For higher dimensional inputs, the function supports broadcasting (for details see the documentation). Can also be written as a @ b, similar to numpy.

torch.mm: Performs the matrix product over two matrices, but doesn’t support broadcasting (see documentation)

torch.bmm: Performs the matrix product with a support batch dimension. If the first tensor 
 is of shape (
), and the second tensor 
 (
), the output 
 is of shape (
), and has been calculated by performing 
 matrix multiplications of the submatrices of 
 and 
: 

torch.einsum: Performs matrix multiplications and more (i.e. sums of products) using the Einstein summation convention. Explanation of the Einstein sum can be found in assignment 1.

Usually, we use torch.matmul or torch.bmm. We can try a matrix multiplication with torch.matmul below.

In [15]:
x = torch.arange(6)
x = x.view(2, 3)
print("X", x)


X tensor([[0, 1, 2],
        [3, 4, 5]])


In [16]:
# We can also stack multiple operations in a single line
W = torch.arange(9).view(3, 3)
print("W", W)


W tensor([[0, 1, 2],
        [3, 4, 5],
        [6, 7, 8]])


In [17]:
h = torch.matmul(x, W) # Verify the result by calculating it by hand too!
print("h", h)

h tensor([[15, 18, 21],
        [42, 54, 66]])


# Indexing
We often have the situation where we need to select a part of a tensor. Indexing works just like in numpy, so let’s try it:

In [18]:
x = torch.arange(12).view(3, 4)
print("X", x)


X tensor([[ 0,  1,  2,  3],
        [ 4,  5,  6,  7],
        [ 8,  9, 10, 11]])


In [19]:
print(x[:, 1])   # Second column
print(x[0])      # First row
print(x[:2, -1])  # First two rows, last column
print(x[1:3, :])  # Middle two rows


tensor([1, 5, 9])
tensor([0, 1, 2, 3])
tensor([3, 7])
tensor([[ 4,  5,  6,  7],
        [ 8,  9, 10, 11]])


# Dynamic Computation Graph and Backpropagation
One of the main reasons for using PyTorch in Deep Learning projects is that we can automatically get gradients/derivatives of functions that we define. We will mainly use PyTorch for implementing neural networks, and they are just fancy functions. If we use weight matrices in our function that we want to learn, then those are called the parameters or simply the weights.

If our neural network would output a single scalar value, we would talk about taking the derivative, but you will see that quite often we will have multiple output variables (“values”); in that case we talk about gradients. It’s a more general term.

Given an input 
, we define our function by manipulating that input, usually by matrix-multiplications with weight matrices and additions with so-called bias vectors. As we manipulate our input, we are automatically creating a computational graph. This graph shows how to arrive at our output from our input. PyTorch is a define-by-run framework; this means that we can just do our manipulations, and PyTorch will keep track of that graph for us. Thus, we create a dynamic computation graph along the way.

So, to recap: the only thing we have to do is to compute the output, and then we can ask PyTorch to automatically get the gradients.

Note: Why do we want gradients? Consider that we have defined a function, a neural net, that is supposed to compute a certain output 
 for an input vector 
. We then define an error measure that tells us how wrong our network is; how bad it is in predicting output 
 from input 
. Based on this error measure, we can use the gradients to update the weights 
 that were responsible for the output, so that the next time we present input 
 to our network, the output will be closer to what we want.

The first thing we have to do is to specify which tensors require gradients. By default, when we create a tensor, it does not require gradients.

In [20]:
x = torch.ones((3,))
print(x.requires_grad)


False


In [21]:
x.requires_grad_(True)
print(x.requires_grad)


True


In order to get familiar with the concept of a computation graph, we will create one for the following function:

$$
y = \frac{1}{|x|}\sum_i \left[(x_i + 2)^2 + 3\right]
$$

You could imagine that "x" are our parameters, and we want to optimize (either maximize or minimize) the output y, For this, we want to obtain the gradients $\partial y / \partial \mathbf{x}$ For our example, 
we’ll use $\mathbf{x}=[0,1,2]$ as our input.

In [22]:
x = torch.arange(3, dtype=torch.float32, requires_grad=True)  # Only float tensors can have gradients
print("X", x)

X tensor([0., 1., 2.], requires_grad=True)


In [23]:
a = x + 2
b = a ** 2
c = b + 3
y = c.mean()
print("Y", y)


Y tensor(12.6667, grad_fn=<MeanBackward0>)


![graph](https://uvadlc-notebooks.readthedocs.io/en/latest/_images/pytorch_computation_graph.svg)

We calculate "a" based on the inputs "x" and the constant is 2, b is a squared, and so on. The visualization is an abstraction of the dependencies between inputs and outputs of the operations we have applied. Each node of the computation graph has automatically defined a function for calculating the gradients with respect to its inputs, grad_fn. You can see this when we printed the output tensor y. This is why the computation graph is usually visualized in the reverse direction (arrows point from the result to the inputs). We can perform backpropagation on the computation graph by calling the function backward() on the last output, which effectively calculates the gradients for each tensor that has the property requires_grad=True:

In [24]:
y.backward()


x.grad will now contain the gradient $\partial y/ \partial \mathcal{x}$ and this gradient indicates how a change in "x" will affect output "y" given the current input $\mathbf{x}=[0,1,2]$

In [25]:
print(x.grad)

tensor([1.3333, 2.0000, 2.6667])


https://uvadlc-notebooks.readthedocs.io/en/latest/tutorial_notebooks/tutorial2/Introduction_to_PyTorch.html