In [1]:
import torch
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
x = torch.rand(2,2,2)
print(x)

tensor([[[0.6836, 0.4573],
         [0.2590, 0.3863]],

        [[0.0009, 0.9299],
         [0.8156, 0.6432]]])


## Introduction to Tensors

### Creating Tensors

In [None]:
# scalar
scalar = torch.tensor(7)
scalar, scalar.ndim, scalar.shape, 

(0, torch.Size([]), tensor(7))

In [None]:
# vector
vector = torch.tensor([7.7])
vector, vector.ndim, vector.shape

(tensor([7.7000]), 1, torch.Size([1]))

In [25]:
# MATRIX
MATRIX = torch.tensor([[7, 8], [9,10]])
MATRIX, MATRIX[1], MATRIX.shape, MATRIX.ndim

(tensor([[ 7,  8],
         [ 9, 10]]),
 tensor([ 9, 10]),
 torch.Size([2, 2]),
 2)

In [31]:
# TENSOR
TENSOR = torch.tensor([[[1,2,3], [4,5,6], [7, 8, 9]]])
TENSOR, TENSOR.ndim, TENSOR.shape

(tensor([[[1, 2, 3],
          [4, 5, 6],
          [7, 8, 9]]]),
 3,
 torch.Size([1, 3, 3]))

### Random Tensors

Why random tensors? Neural networks initialise tensors with random data and adjust them to better represent data.

In [38]:
# Create a random tensor of size (3, 4)
random_tensor = torch.rand(3, 3, 3, 3)
random_tensor

tensor([[[[0.3478, 0.4269, 0.5097],
          [0.7949, 0.4568, 0.5523],
          [0.5491, 0.2325, 0.1627]],

         [[0.1410, 0.7776, 0.2026],
          [0.2902, 0.9564, 0.6760],
          [0.2530, 0.5852, 0.0578]],

         [[0.0614, 0.1904, 0.6139],
          [0.4099, 0.5323, 0.9206],
          [0.6166, 0.6665, 0.4051]]],


        [[[0.3201, 0.0082, 0.3964],
          [0.3311, 0.7768, 0.2907],
          [0.8414, 0.9017, 0.0705]],

         [[0.3092, 0.6817, 0.0287],
          [0.0539, 0.6596, 0.2148],
          [0.8429, 0.3417, 0.4057]],

         [[0.5903, 0.9446, 0.1981],
          [0.3517, 0.8444, 0.3161],
          [0.4075, 0.5707, 0.4147]]],


        [[[0.1466, 0.6863, 0.1460],
          [0.1526, 0.1460, 0.3912],
          [0.4474, 0.3698, 0.8433]],

         [[0.1146, 0.2835, 0.0316],
          [0.7052, 0.0745, 0.7167],
          [0.8514, 0.1994, 0.7471]],

         [[0.6232, 0.5056, 0.4288],
          [0.7923, 0.7522, 0.8340],
          [0.6648, 0.6780, 0.0672]]]])

In [35]:
# Create a random tensor with similar shape to an image tensor
random_image_size_tensor = torch.rand(size = (224, 224, 3)) #height, width, color channels R, G, B
random_image_size_tensor.shape, random_image_size_tensor.ndim

(torch.Size([224, 224, 3]), 3)

### Zeros and Ones

In [39]:
# Create a tensor or all zeros
zeros = torch.zeros(size = (3, 4))
zeros

tensor([[0., 0., 0., 0.],
        [0., 0., 0., 0.],
        [0., 0., 0., 0.]])

In [40]:
# Create a tensor of all ones
ones = torch.ones(size=(3,4))
ones

tensor([[1., 1., 1., 1.],
        [1., 1., 1., 1.],
        [1., 1., 1., 1.]])

In [41]:
ones.dtype, zeros.dtype

(torch.float32, torch.float32)

### Creating a range of tensors and tensors-like

In [49]:
# Using torch.arange
one_to_ten = torch.arange(0, 10)
one_to_ten

tensor([0, 1, 2, 3, 4, 5, 6, 7, 8, 9])

In [None]:
# With step 2
one_to_ten_step2 = torch.arange(0, 10, 2)
one_to_ten_step2

tensor([0, 2, 4, 6, 8])

In [51]:
# Creating tensors like
ten_zeros = torch.zeros_like(input=one_to_ten)
ten_zeros

tensor([0, 0, 0, 0, 0, 0, 0, 0, 0, 0])

### Tensor datatypes

In [61]:
# Float 32 tensor
float_32_tensor = torch.tensor([3.0, 6.0, 9.0], dtype=None, device=None, requires_grad=False)
float_32_tensor, float_32_tensor.dtype

(tensor([3., 6., 9.]), torch.float32)

In [62]:
float_16_tensor = float_32_tensor.type(torch.float16)
float_16_tensor, float_16_tensor.dtype

(tensor([3., 6., 9.], dtype=torch.float16), torch.float16)

In [64]:
float_x_tensor = float_16_tensor * float_32_tensor
float_16_tensor.dtype

torch.float16

### Getting information from tensors

1. Tensors not the right datatype - to get datatype from a tensor, use tensor.dtype
2. Tensors not the right shape - to get shape from a tensor, use tensor.shape
3. Tensors not on the right device - to get device from a tensor, use tensor.device

In [65]:
# Create tensor
some_tensor = torch.rand(3,4)
some_tensor

tensor([[0.7151, 0.8338, 0.9438, 0.5988],
        [0.9785, 0.7065, 0.5650, 0.3623],
        [0.0356, 0.0908, 0.7729, 0.7001]])

In [None]:
print(some_tensor)
print(f"Datatype of tensor: {some_tensor.dtype}")
print(f"Shape of tensor: {some_tensor.shape}")
print(f"Device tensor is on: {some_tensor.device}")

tensor([[0.7151, 0.8338, 0.9438, 0.5988],
        [0.9785, 0.7065, 0.5650, 0.3623],
        [0.0356, 0.0908, 0.7729, 0.7001]])
Datatype of tensor: torch.float32
Shape of tensor: torch.Size([3, 4])
Device tensor is on: cpu


### Manipulating Tensors (tensor operations)

Tensor operations include:
* Addition
* Subtraction
* Multiplication (element-wise)
* Division
* Matrix multiplication

In [86]:
# Create a tensor and add 100 to it
tensor = torch.tensor([1, 2, 3])
tensor + 100

tensor([101, 102, 103])

In [71]:
# Multiply tensor by 10
tensor * 10

tensor([10, 20, 30])

In [72]:
# Subtract 10
tensor - 10

tensor([-9, -8, -7])

In [74]:
#Pytorch inbuilt function
torch.mul(tensor, 10), torch.add(tensor, 10)

(tensor([10, 20, 30]), tensor([11, 12, 13]))

### Matrix multiplication

Two main ways: element-wise multiplication and matrix multiplication

Two main rules that performing matrix multiplication needs to satisfy:
1. The **inner** dimensions much match
* `(3, 2) @ (3, 2)` won't work
* `(2, 3) @ (3, 2)` will work
* `(3, 2) @ (2, 3)` will work

2. The resulting matrix has the shape of the **outer** dimensions.
* `(2, 3) @ (3, 2) -> (2, 2)`
* `(3, 2) @ (2, 3) -> (3, 3)` 

In [78]:
# Element wise multiplication
print(tensor, "*", tensor)
print(f"Equals: {tensor * tensor}")

tensor([1, 2, 3]) * tensor([1, 2, 3])
Equals: tensor([1, 4, 9])


In [79]:
# Matrix multiplication
print(tensor, "@", tensor)
print(f"Equals: {torch.matmul(tensor, tensor)}")

tensor([1, 2, 3]) @ tensor([1, 2, 3])
Equals: 14


In [None]:
#Matrix multiplication with a for loop
%%time
value = 0
for i in range(len(tensor)):
    value += tensor[i] * tensor[i]
print(value)

tensor(14)
CPU times: user 2.06 ms, sys: 543 μs, total: 2.6 ms
Wall time: 1.83 ms


In [None]:
# Matrix multiplication with torch
%%time
torch.matmul(tensor, tensor)

CPU times: user 253 μs, sys: 67 μs, total: 320 μs
Wall time: 245 μs


tensor(14)

### One of the most common errors in deep learning: shape errors

In [153]:
# Shapes for matrix multiplication
tensor_A = torch.tensor([[1, 2],
                         [3, 4],
                         [5, 6]])

tensor_B = torch.tensor([[7, 10],
                         [8, 11],
                         [9, 12]])

torch.mm(tensor_A, tensor_B) #torch.mm alias for torch.matmul

RuntimeError: mat1 and mat2 shapes cannot be multiplied (3x2 and 3x2)

In [154]:
tensor_A.shape, tensor_B.shape

(torch.Size([3, 2]), torch.Size([3, 2]))

#### To fix our tensor shape issues, we can manipulate the shape of one of our tensors using transpose

In [155]:
tensor_B.T, tensor_B

(tensor([[ 7,  8,  9],
         [10, 11, 12]]),
 tensor([[ 7, 10],
         [ 8, 11],
         [ 9, 12]]))

In [156]:
tensor_B.T, tensor_B.T.shape

(tensor([[ 7,  8,  9],
         [10, 11, 12]]),
 torch.Size([2, 3]))

In [158]:
# The matrix multiplication works only when tensor_B is transposed
print(f"Original shapes: tensor_A = {tensor_A.shape}, tensor_B = {tensor_B.shape}")
print(f"New shapes: tensor_A = {tensor_A.shape}, tensor_B.T = {tensor_B.T.shape}")
print(f"Multiplying: = {tensor_A.shape} @ {tensor_B.T.shape} <- inner dimenstions must match")
print("Output: \n")
output = torch.matmul(tensor_A, tensor_B.T)
print(output)
print(f"\nOutput shape: {output.shape}")

Original shapes: tensor_A = torch.Size([3, 2]), tensor_B = torch.Size([3, 2])
New shapes: tensor_A = torch.Size([3, 2]), tensor_B.T = torch.Size([2, 3])
Multiplying: = torch.Size([3, 2]) @ torch.Size([2, 3]) <- inner dimenstions must match
Output: 

tensor([[ 27,  30,  33],
        [ 61,  68,  75],
        [ 95, 106, 117]])

Output shape: torch.Size([3, 3])


### Finding the mix, max, mean, sum etc. (tensor aggregation)

In [163]:
# Create a tensor
x = torch.arange(0, 100, 10)
x, x.dtype

(tensor([ 0, 10, 20, 30, 40, 50, 60, 70, 80, 90]), torch.int64)

In [None]:
# Find the min and max
torch.min(x), x.min(), torch.max(x), x.max()

(tensor(0), tensor(0), tensor(90), tensor(90))

In [None]:
#Find the mean - torch.mean() function requires a tensor of float32 datatype to work
torch.mean(x.type(torch.float32)), x.type(torch.float32).mean()

(tensor(45.), tensor(45.))

In [167]:
# Find the sum
torch.sum(x), x.sum()

(tensor(450), tensor(450))

### Finding the positional mix and max

In [172]:
print(f"The min value of {x} is: {torch.min(x)} and is found at index: {torch.argmin(x)}.")
print(f"The max value of {x} is: {torch.max(x)} and is found at index: {torch.argmax(x)}.")

The min value of tensor([ 0, 10, 20, 30, 40, 50, 60, 70, 80, 90]) is: 0 and is found at index: 0.
The max value of tensor([ 0, 10, 20, 30, 40, 50, 60, 70, 80, 90]) is: 90 and is found at index: 9.


### Reshaping, stacking, squeezing and unsqueezing tensors

* Reshaping - reshapes an input tensor to a defined shape
* View - return a view of an input tensor of a certain shape but keep the same memory as the original tensor
* Stacking - combine multiple tensors on of each other (vstack) or side by side (hstack)
* Squeezing - removes all `1` dimensions from a tensor
* Unsqeezing - add a `1` dimensions from a tensor
* Permute - Return a view of the input with dimensions permuted (swapped) in a certain way

In [206]:
# Create a tensor
x = torch.arange(1., 10.)
x, x.shape

(tensor([1., 2., 3., 4., 5., 6., 7., 8., 9.]), torch.Size([9]))

In [207]:
# Add an extra dimension
x_reshaped = x.reshape(1, 9)
x_reshaped, x_reshaped.shape

(tensor([[1., 2., 3., 4., 5., 6., 7., 8., 9.]]), torch.Size([1, 9]))

In [208]:
# Change the view
z = x.view(1, 9)
z, z.shape

(tensor([[1., 2., 3., 4., 5., 6., 7., 8., 9.]]), torch.Size([1, 9]))

In [209]:
# Changing z change x, becausea view of a tensor shares the same memory as the original tensor
z[:, 0] = 5
x

tensor([5., 2., 3., 4., 5., 6., 7., 8., 9.])

In [193]:
# Stack tensors on top of another
x_stacked = torch.stack([x, x, x, x], dim=1)
x_stacked

tensor([[5., 5., 5., 5.],
        [2., 2., 2., 2.],
        [3., 3., 3., 3.],
        [4., 4., 4., 4.],
        [5., 5., 5., 5.],
        [6., 6., 6., 6.],
        [7., 7., 7., 7.],
        [8., 8., 8., 8.],
        [9., 9., 9., 9.]])

In [199]:
x_vstacked = torch.vstack((x, x, x, x))
x_vstacked

tensor([[5., 2., 3., 4., 5., 6., 7., 8., 9.],
        [5., 2., 3., 4., 5., 6., 7., 8., 9.],
        [5., 2., 3., 4., 5., 6., 7., 8., 9.],
        [5., 2., 3., 4., 5., 6., 7., 8., 9.]])

In [200]:
x_hstacked = torch.hstack((x, x, x, x))
x_hstacked

tensor([5., 2., 3., 4., 5., 6., 7., 8., 9., 5., 2., 3., 4., 5., 6., 7., 8., 9.,
        5., 2., 3., 4., 5., 6., 7., 8., 9., 5., 2., 3., 4., 5., 6., 7., 8., 9.])

In [210]:
# Squeeze tensors - torch.squeeze removes all single dimensions from a given tensor
print(f"Previous tensor: {x_reshaped}")
print(f"Previous shape: {x_reshaped.shape}")

x_squeezed = torch.squeeze(x_reshaped)
print(f"\nNew tensor: {x_squeezed}")
print(f"New shape: {x_squeezed.shape}")

Previous tensor: tensor([[5., 2., 3., 4., 5., 6., 7., 8., 9.]])
Previous shape: torch.Size([1, 9])

New tensor: tensor([5., 2., 3., 4., 5., 6., 7., 8., 9.])
New shape: torch.Size([9])


In [214]:
# torch.unsqueeze() - adds a single dimension to a target tensor at a specific dim
print(f"Previous tensor: {x_squeezed}")
print(f"Previous shape: {x_squeezed.shape}")

x_unsqueezed = torch.unsqueeze(x_squeezed, dim=0)
print(f"\nNew tensor: {x_unsqueezed}")
print(f"New shape: {x_unsqueezed.shape}")


Previous tensor: tensor([5., 2., 3., 4., 5., 6., 7., 8., 9.])
Previous shape: torch.Size([9])

New tensor: tensor([[5., 2., 3., 4., 5., 6., 7., 8., 9.]])
New shape: torch.Size([1, 9])


In [217]:
# torch.permute - rearranges the dimensions of a target tensor in a specific order
x_original = torch.rand(size=(224, 224, 3))

# permute original tensor to rearrange the axis order
x_permuted = x_original.permute(2, 0, 1)
x_permuted.shape


torch.Size([3, 224, 224])

### Selecting data from tensors (indexing)

In [219]:
# Create a tensor
x = torch.arange(1, 10).reshape(1, 3, 3)
x

tensor([[[1, 2, 3],
         [4, 5, 6],
         [7, 8, 9]]])

In [None]:
# Indexing on dim=0
x[0]

tensor([[1, 2, 3],
        [4, 5, 6],
        [7, 8, 9]])

In [None]:
# Index on middle bracket (dim=1)
x[0][0]

tensor([1, 2, 3])

In [222]:
# Index on innermost bracket - dim=2
x[0][0][0]

tensor(1)

### Pytorch and numpy

* Data in numpy, want in Pytorch tensor -> `torch.from_numpy(ndarray)`
* Pytorch tensor -> numpy -> torch.Tensor.numpy()

In [None]:
array = np.arange(1.0, 8.0)
tensor = torch.from_numpy(array) #warning: when converting from numpy pytorch converts to float64, which is default in numpy
array, tensor

(array([1., 2., 3., 4., 5., 6., 7.]),
 tensor([1., 2., 3., 4., 5., 6., 7.], dtype=torch.float64))

In [224]:
# Change the value of array, what will this do to `tensor`
array = array + 1
array, tensor

(array([2., 3., 4., 5., 6., 7., 8.]),
 tensor([1., 2., 3., 4., 5., 6., 7.], dtype=torch.float64))

In [225]:
# Tensor to numpy
tensor = torch.ones(7)
numpy_tensor = tensor.numpy()
tensor, numpy_tensor

(tensor([1., 1., 1., 1., 1., 1., 1.]),
 array([1., 1., 1., 1., 1., 1., 1.], dtype=float32))

In [228]:
# Change tensor, what happens to `numpy_tensor`
tensor = tensor + 1
tensor, numpy_tensor

(tensor([3., 3., 3., 3., 3., 3., 3.]),
 array([1., 1., 1., 1., 1., 1., 1.], dtype=float32))

### Reproducibility

How a NN learns:

`start with random numbers -> tensor operations -> update random numbers to fit data better -> again -> again`

To reduce the randomness in NNs and Pytorch, **random seed*** is used.

In [230]:
# Create two random tensors
random_tensor_A = torch.rand(3, 4)
random_tensor_B = torch.rand(3, 4)

print(random_tensor_A)
print(random_tensor_B)
print(random_tensor_A==random_tensor_B)

tensor([[0.2528, 0.5544, 0.4865, 0.5038],
        [0.1558, 0.1096, 0.7807, 0.7445],
        [0.8389, 0.2262, 0.4634, 0.9680]])
tensor([[0.5418, 0.2601, 0.4719, 0.3170],
        [0.7519, 0.2610, 0.2992, 0.0283],
        [0.7936, 0.5974, 0.0839, 0.2943]])
tensor([[False, False, False, False],
        [False, False, False, False],
        [False, False, False, False]])


In [232]:
# Some random but reproducible tensons
RANDOM_SEED = 42

torch.manual_seed(RANDOM_SEED)
random_tensor_C = torch.rand(3,4)

torch.manual_seed(RANDOM_SEED)
random_tensor_D = torch.rand(3,4)

print(random_tensor_C)
print(random_tensor_D)
print(random_tensor_C==random_tensor_D)

tensor([[0.8823, 0.9150, 0.3829, 0.9593],
        [0.3904, 0.6009, 0.2566, 0.7936],
        [0.9408, 0.1332, 0.9346, 0.5936]])
tensor([[0.8823, 0.9150, 0.3829, 0.9593],
        [0.3904, 0.6009, 0.2566, 0.7936],
        [0.9408, 0.1332, 0.9346, 0.5936]])
tensor([[True, True, True, True],
        [True, True, True, True],
        [True, True, True, True]])


## Exercises

1. Documentation reading - A big part of deep learning (and learning to code in general) is getting familiar with the documentation of a certain framework you're using. We'll be using the PyTorch documentation a lot throughout the rest of this course. So I'd recommend spending 10-minutes reading the following (it's okay if you don't get some things for now, the focus is not yet full understanding, it's awareness). See the documentation on torch.Tensor and for torch.cuda

2. Create a random tensor with shape (7, 7).

In [7]:
import torch
tensor_A = torch.rand(7,7)
tensor_A

tensor([[0.9333, 0.2997, 0.5934, 0.6239, 0.0764, 0.0200, 0.5656],
        [0.3645, 0.6906, 0.3817, 0.0113, 0.9969, 0.7291, 0.8481],
        [0.6581, 0.9672, 0.5460, 0.4964, 0.9768, 0.3085, 0.8073],
        [0.4406, 0.9605, 0.1642, 0.4159, 0.0509, 0.8450, 0.8912],
        [0.3927, 0.4300, 0.0332, 0.3801, 0.0948, 0.3879, 0.5515],
        [0.2096, 0.4941, 0.0833, 0.7099, 0.9590, 0.3361, 0.4785],
        [0.0083, 0.5969, 0.2617, 0.6456, 0.8047, 0.1709, 0.8152]])

3. Perform a matrix multiplication on the tensor from 2 with another random tensor with shape (1, 7) (hint: you may have to transpose the second tensor).

In [8]:
tensor_B = torch.rand(1,7)
tensor_C = torch.matmul(tensor_A, tensor_B.T)
tensor_C

tensor([[1.9954],
        [1.4974],
        [2.3325],
        [1.8206],
        [1.1587],
        [1.3232],
        [1.3596]])

4. Set the random seed to 0 and do exercises 2 & 3 over again.


In [10]:
RANDOM_SEED = 0
torch.manual_seed(RANDOM_SEED)
tensor_A = torch.rand(7,7)

torch.manual_seed(RANDOM_SEED)
tensor_B = torch.rand(1, 7)

tensor_C = torch.matmul(tensor_A, tensor_B.T)
tensor_C

tensor([[1.5985],
        [1.1173],
        [1.2741],
        [1.6838],
        [0.8279],
        [1.0347],
        [1.2498]])

5. Speaking of random seeds, we saw how to set it with torch.manual_seed() but is there a GPU equivalent? (hint: you'll need to look into the documentation for torch.cuda for this one). If there is, set the GPU random seed to 1234.

############ No GPU :(

6. Create two random tensors of shape (2, 3) and send them both to the GPU (you'll need access to a GPU for this). Set torch.manual_seed(1234) when creating the tensors (this doesn't have to be the GPU random seed).

In [None]:
torch.manual_seed(1234)
A = torch.rand(2,3)
B = torch.rand(2,3)

tensor([[0.3647, 0.4709],
        [0.5184, 0.5617]])

7. Perform a matrix multiplication on the tensors you created in 6 (again, you may have to adjust the shapes of one of the tensors).

In [6]:
C = torch.mm(A, B.T)
C, C.shape

(tensor([[0.3647, 0.4709],
         [0.5184, 0.5617]]),
 torch.Size([2, 2]))

8. Find the maximum and minimum values of the output of 7.

In [4]:
torch.min(C), torch.max(C)

(tensor(0.3647), tensor(0.5617))

9. Find the maximum and minimum index values of the output of 7.

In [8]:
torch.argmin(C), torch.argmax(C),

(tensor(0), tensor(3))

10. Make a random tensor with shape (1, 1, 1, 10) and then create a new tensor with all the 1 dimensions removed to be left with a tensor of shape (10). Set the seed to 7 when you create it and print out the first tensor and it's shape as well as the second tensor and it's shape.

In [9]:
torch.manual_seed(7)
A = torch.rand(1, 1, 1, 10)
B = torch.squeeze(A)
A.shape, B.shape

(torch.Size([1, 1, 1, 10]), torch.Size([10]))