In [1]:
import torch
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
print(torch.__version__)

1.13.1+cpu


## Introduction to tensor
### Creating tensors

PyTorch tensors are created using `torch.Tensor()` ([tensor docs](https://pytorch.org/docs/stable/tensors.html))

In [2]:
# Scalar
scalar = torch.tensor(7)
scalar

tensor(7)

In [3]:
scalar.ndim

0

In [4]:
# Get tensor back as python int
scalar.item()

7

In [5]:
# Vector
vector = torch.tensor([7,1])
vector

tensor([7, 1])

In [6]:
vector.ndim

1

In [7]:
vector.shape

torch.Size([2])

In [8]:
# MATRIX 
MATRIX = torch.tensor([[7,8],
                       [9,10]])
MATRIX

tensor([[ 7,  8],
        [ 9, 10]])

In [9]:
MATRIX.ndim

2

In [10]:
MATRIX.shape

torch.Size([2, 2])

In [11]:
MATRIX[1]

tensor([ 9, 10])

In [12]:
# TENSOR
TENSOR = torch.tensor([[[1,2,3],
                       [3,6,9],
                       [2,4,5]],
                       [[1,2,3],
                       [4,5,6],
                       [7,8,9]]])
TENSOR

tensor([[[1, 2, 3],
         [3, 6, 9],
         [2, 4, 5]],

        [[1, 2, 3],
         [4, 5, 6],
         [7, 8, 9]]])

In [13]:
TENSOR.ndim

3

In [14]:
TENSOR.shape

torch.Size([2, 3, 3])

In [15]:
TENSOR[1]

tensor([[1, 2, 3],
        [4, 5, 6],
        [7, 8, 9]])

### Random Tensor

Random tensors are important because the way neural networks learn is that they start with tensors full of random numbers and then adjust those random numbers to better represent the data.  
`random numbers -> look at data -> update numbers -> look at data -> update numbers`

`torch.rand()` ([random tensor docs](https://pytorch.org/docs/stable/generated/torch.rand.html))

In [16]:
# Create a random tensor of size (3,4)
random_tensor = torch.rand(3,4)
random_tensor

tensor([[0.7273, 0.7981, 0.3164, 0.9137],
        [0.7035, 0.4104, 0.4715, 0.6636],
        [0.8538, 0.3484, 0.9460, 0.9742]])

In [17]:
random_tensor.ndim

2

In [18]:
# Create a random image with similar shape to an image tensor
rand_image_tensor = torch.rand(224,224,3)
rand_image_tensor.ndim, rand_image_tensor.shape

(3, torch.Size([224, 224, 3]))

### Zero and Ones Tensors

In [19]:
zeros = torch.zeros(1,2,3)
zeros

tensor([[[0., 0., 0.],
         [0., 0., 0.]]])

In [20]:
ones = torch.ones(2,4)
ones

tensor([[1., 1., 1., 1.],
        [1., 1., 1., 1.]])

In [21]:
ones.dtype

torch.float32

### Creating a range of tensors and tensor-like

In [22]:
# Use torch.range()
one_to_ten = torch.arange(1,11)

In [23]:
torch.arange(0,1050,50)

tensor([   0,   50,  100,  150,  200,  250,  300,  350,  400,  450,  500,  550,
         600,  650,  700,  750,  800,  850,  900,  950, 1000])

In [24]:
# Creating tensors like
ten_zeros = torch.zeros_like(one_to_ten)
ten_zeros

tensor([0, 0, 0, 0, 0, 0, 0, 0, 0, 0])

### Tensor datatypes
**Note:** Tensor dataypes is one of the 3 big errors you'll run into with PyTorch & deep learning:
1. Tensors not right datatype
2. Tensors not right shape
3. Tensors not right device

In [25]:
# Float 32 tensor
float_32_tensor = torch.tensor([3.0,6.0,9.0],
                               dtype=None, # what datatype is the tensor (e.g. float32 or float 16)
                               device=None, # what device is your tensor on
                               requires_grad=False) #whether or not to track gradients with this tensors operations
float_32_tensor

tensor([3., 6., 9.])

In [26]:
float_16_tensor = float_32_tensor.type(torch.float16)
float_16_tensor

tensor([3., 6., 9.], dtype=torch.float16)

In [27]:
float_32_tensor * float_16_tensor

tensor([ 9., 36., 81.])

### Getting information from tensors
`tensor.dtype`  
`tensor.shape`  
`tensor.device`  

In [28]:
# Create a tensor
some_tensor = torch.rand(3, 4)

# Find out details about it
print(some_tensor)
print(f"Shape of tensor: {some_tensor.shape}")
print(f"Datatype of tensor: {some_tensor.dtype}")
print(f"Device tensor is stored on: {some_tensor.device}") # will default to CPU

tensor([[0.2639, 0.2619, 0.4620, 0.1869],
        [0.0962, 0.3684, 0.6433, 0.6809],
        [0.1543, 0.4587, 0.8502, 0.9599]])
Shape of tensor: torch.Size([3, 4])
Datatype of tensor: torch.float32
Device tensor is stored on: cpu


### Manipulating tensors (operations)

- addition
- subtraction
- multiplication (element-wise)
- division
- matrix multiplication

In [29]:
# Create a tensor of values and add a number to it
tensor = torch.tensor([1, 2, 3])
tensor + 10

tensor([11, 12, 13])

In [30]:
# Multiply it by 10
tensor * 10

tensor([10, 20, 30])

In [31]:
# Subtract and reassign
tensor = tensor - 10
tensor

tensor([-9, -8, -7])

In [32]:
# Add and reassign
tensor = tensor + 10
tensor

tensor([1, 2, 3])

In [33]:
# Can also use torch functions
torch.multiply(tensor, 10)

tensor([10, 20, 30])

In [34]:
# Element-wise multiplication (each element multiplies its equivalent, index 0->0, 1->1, 2->2)
print(tensor, "*", tensor)
print("Equals:", tensor * tensor)

tensor([1, 2, 3]) * tensor([1, 2, 3])
Equals: tensor([1, 4, 9])


### Matrix Multiplication
You can also use `torch.mm()` which is a short for `torch.matmul()`  
 1. The inner dimension must match.
 - (3, 2) @ (3, 2) won't work
 - (2, 3) @ (3, 2) will work
 2. The resulting matrix has the shape of the outer dimensions.
 - (3, 2) @ (2, 3) -> (3, 3)

In [35]:
# Matrix multiplication
torch.matmul(tensor, tensor)

tensor(14)

In [36]:
# Can also use the "@" symbol for matrix multiplication, though not recommended
tensor @ tensor

tensor(14)

In [37]:
%%time
# Matrix multiplication by hand 
# (avoid doing operations with for loops at all cost, they are computationally expensive)
value = 0
for i in range(len(tensor)):
  value += tensor[i] * tensor[i]
value

CPU times: user 1.31 ms, sys: 0 ns, total: 1.31 ms
Wall time: 1.1 ms


tensor(14)

In [38]:
%%time
torch.matmul(tensor, tensor)

CPU times: user 49 µs, sys: 0 ns, total: 49 µs
Wall time: 51.7 µs


tensor(14)

### One of the most common errors in deep learning (shape errors)

In [39]:
# Shapes need to be in the right way  
tensor_A = torch.tensor([[1, 2],
                         [3, 4],
                         [5, 6]], dtype=torch.float32)

tensor_B = torch.tensor([[7, 10],
                         [8, 11], 
                         [9, 12]], dtype=torch.float32)

#torch.matmul(tensor_A, tensor_B) # (this will error)

In [40]:
# View tensor_A and tensor_B.T
print(tensor_A)
print(tensor_B.T)

tensor([[1., 2.],
        [3., 4.],
        [5., 6.]])
tensor([[ 7.,  8.,  9.],
        [10., 11., 12.]])


In [41]:
# The operation works when tensor_B is transposed
print(f"Original shapes: tensor_A = {tensor_A.shape}, tensor_B = {tensor_B.shape}\n")
print(f"New shapes: tensor_A = {tensor_A.shape} (same as above), tensor_B.T = {tensor_B.T.shape}\n")
print(f"Multiplying: {tensor_A.shape} * {tensor_B.T.shape} <- inner dimensions match\n")
print("Output:\n")
output = torch.matmul(tensor_A, tensor_B.T)
print(output) 
print(f"\nOutput shape: {output.shape}")

Original shapes: tensor_A = torch.Size([3, 2]), tensor_B = torch.Size([3, 2])

New shapes: tensor_A = torch.Size([3, 2]) (same as above), tensor_B.T = torch.Size([2, 3])

Multiplying: torch.Size([3, 2]) * torch.Size([2, 3]) <- inner dimensions match

Output:

tensor([[ 27.,  30.,  33.],
        [ 61.,  68.,  75.],
        [ 95., 106., 117.]])

Output shape: torch.Size([3, 3])


### Finding the min, max, mean, sum, etc (aggregation)

In [42]:
# Create a tensor
x = torch.arange(0, 100, 10)
x

tensor([ 0, 10, 20, 30, 40, 50, 60, 70, 80, 90])

In [43]:
print(f"Minimum: {x.min()}")
print(f"Maximum: {x.max()}")
# print(f"Mean: {x.mean()}") # this will error
print(f"Mean: {x.type(torch.float32).mean()}") # won't work without float datatype
print(f"Sum: {x.sum()}")

Minimum: 0
Maximum: 90
Mean: 45.0
Sum: 450


In [44]:
# You can also do the same as above with torch methods.
torch.max(x), torch.min(x), torch.mean(x.type(torch.float32)), torch.sum(x)

(tensor(90), tensor(0), tensor(45.), tensor(450))

### Positional min/max
You can also find the index of a tensor where the max or minimum occurs with `torch.argmax()` and `torch.argmin()` respectively.

In [45]:
# Create a tensor
tensor = torch.arange(10, 100, 10)
print(f"Tensor: {tensor}")

# Returns index of max and min values
print(f"Index where max value occurs: {tensor.argmax()}")
print(f"Index where min value occurs: {tensor.argmin()}")

Tensor: tensor([10, 20, 30, 40, 50, 60, 70, 80, 90])
Index where max value occurs: 8
Index where min value occurs: 0


### Reshaping stacking squeezing and unsqueezing
   
Often times you'll want to reshape or change the dimensions of your tensors without actually changing the values inside them.

To do so some popular methods are:

| Method | One-line description |
| ----- | ----- |
| [`torch.reshape(input shape)`](https://pytorch.org/docs/stable/generated/torch.reshape.html#torch.reshape) | Reshapes `input` to `shape` (if compatible) can also use `torch.Tensor.reshape()`. |
| [`torch.Tensor.view(shape)`](https://pytorch.org/docs/stable/generated/torch.Tensor.view.html) | Returns a view of the original tensor in a different `shape` but shares the same data as the original tensor. |
| [`torch.stack(tensors dim=0)`](https://pytorch.org/docs/1.9.1/generated/torch.stack.html) | Concatenates a sequence of `tensors` along a new dimension (`dim`) all `tensors` must be same size. |
| [`torch.squeeze(input)`](https://pytorch.org/docs/stable/generated/torch.squeeze.html) | Squeezes `input` to remove all the dimenions with value `1`. |
| [`torch.unsqueeze(input dim)`](https://pytorch.org/docs/1.9.1/generated/torch.unsqueeze.html) | Returns `input` with a dimension value of `1` added at `dim`. |
| [`torch.permute(input, dims)`](https://pytorch.org/docs/stable/generated/torch.permute.html) | Returns a *view* of the original `input` with its dimensions permuted (rearranged) to `dims`. |

In [46]:
x = torch.arange(1., 10.)
x, x.shape

(tensor([1., 2., 3., 4., 5., 6., 7., 8., 9.]), torch.Size([9]))

In [47]:
# Add an extra dimension
x_reshaped = x.reshape(1, 9)
x_reshaped, x_reshaped.shape

(tensor([[1., 2., 3., 4., 5., 6., 7., 8., 9.]]), torch.Size([1, 9]))

In [48]:
# Change view (keeps same data as original but changes view)
# See more: https://stackoverflow.com/a/54507446/7900723
z = x.view(1, 9)
z, z.shape

(tensor([[1., 2., 3., 4., 5., 6., 7., 8., 9.]]), torch.Size([1, 9]))

In [49]:
# Changing z changes x because a view of a tensor shares the same memory as the orginal
z[:,0] = 5
z, x

(tensor([[5., 2., 3., 4., 5., 6., 7., 8., 9.]]),
 tensor([5., 2., 3., 4., 5., 6., 7., 8., 9.]))

In [50]:
# stack tensor on top of each each
x_stacked = torch.stack([x,x,x,x], dim=1)
x_stacked

tensor([[5., 5., 5., 5.],
        [2., 2., 2., 2.],
        [3., 3., 3., 3.],
        [4., 4., 4., 4.],
        [5., 5., 5., 5.],
        [6., 6., 6., 6.],
        [7., 7., 7., 7.],
        [8., 8., 8., 8.],
        [9., 9., 9., 9.]])

In [51]:
print(f"Previous tensor: {x_reshaped}")
print(f"Previous shape: {x_reshaped.shape}")

# Remove extra dimension from x_reshaped
x_squeezed = x_reshaped.squeeze()
print(f"\nNew tensor: {x_squeezed}")
print(f"New shape: {x_squeezed.shape}")

Previous tensor: tensor([[5., 2., 3., 4., 5., 6., 7., 8., 9.]])
Previous shape: torch.Size([1, 9])

New tensor: tensor([5., 2., 3., 4., 5., 6., 7., 8., 9.])
New shape: torch.Size([9])


In [52]:
print(f"Previous tensor: {x_squeezed}")
print(f"Previous shape: {x_squeezed.shape}")

## Add an extra dimension with unsqueeze
x_unsqueezed = x_squeezed.unsqueeze(dim=0)
print(f"\nNew tensor: {x_unsqueezed}")
print(f"New shape: {x_unsqueezed.shape}")

Previous tensor: tensor([5., 2., 3., 4., 5., 6., 7., 8., 9.])
Previous shape: torch.Size([9])

New tensor: tensor([[5., 2., 3., 4., 5., 6., 7., 8., 9.]])
New shape: torch.Size([1, 9])


In [53]:
# Create tensor with specific shape
x_original = torch.rand(size=(224, 224, 3)) # [height, width, color_channels]

# Permute the original tensor to rearrange the axis (or dim) order
x_permuted = x_original.permute(2, 0, 1) # shifts axis 0->1, 1->2, 2->0

print(f"Previous shape: {x_original.shape}")
print(f"New shape: {x_permuted.shape}") # [color_channels, height, width]

Previous shape: torch.Size([224, 224, 3])
New shape: torch.Size([3, 224, 224])


In [54]:
x_original[0,0,0]= 728218
x_original[0,0,0],x_permuted[0,0,0]

(tensor(728218.), tensor(728218.))

## Indexing (selecting data from tensors)

Indexing in Pytorch is similar to indexing with NumPy.

In [55]:
# Create a tensor
import torch

x = torch.arange(1,10).reshape(1,3,3)
x, x.shape

(tensor([[[1, 2, 3],
          [4, 5, 6],
          [7, 8, 9]]]),
 torch.Size([1, 3, 3]))

In [56]:
x[0]

tensor([[1, 2, 3],
        [4, 5, 6],
        [7, 8, 9]])

In [57]:
x[0][0]

tensor([1, 2, 3])

In [58]:
x[0][0][0],x[0][2][2] # first and last value

(tensor(1), tensor(9))

In [59]:
# Get index 0 of 0th and 1st dimension and all values of 2nd dimension 
x[0, 0, :] # same as x[0][0]

tensor([1, 2, 3])

## PyTorch tensors & NumPy

* Data in NumPy, want in PyTorch tensor -> `torch.from_numpy(ndarrary)`
* PyTorch tensor -> NumPy -> `torch.Tensor.numpy()`

In [60]:
# NumPy arrary to tensor
import torch
import numpy as np

array = np. arange(1.0,8.0)
tensor = torch.from_numpy(array) #warning default dtype will be float64 in numpy

array, tensor

(array([1., 2., 3., 4., 5., 6., 7.]),
 tensor([1., 2., 3., 4., 5., 6., 7.], dtype=torch.float64))

In [61]:
# Change the value of array

array = array + 1
array, tensor

(array([2., 3., 4., 5., 6., 7., 8.]),
 tensor([1., 2., 3., 4., 5., 6., 7.], dtype=torch.float64))

In [62]:
# Tensor to numpy array
tensor = torch.ones(7)
numpy_tensor = tensor.numpy()

tensor, numpy_tensor

(tensor([1., 1., 1., 1., 1., 1., 1.]),
 array([1., 1., 1., 1., 1., 1., 1.], dtype=float32))

## Reproducibility (trying to take random out of random)

In short how a neural network learns:

`start with random numbers -> tensor operations -> update random numbers to try and make them better representations of the date -> again -> again ...`

To reduce the randomness in neural networks and PyTorch comes the concept of a **random seed**.  
Essentially what the random seed does is "flavour" the randomness.

In [66]:
# Create two random tensors
random_tensor_A = torch.rand(3,4)
random_tensor_B = torch.rand(3,4)

print(random_tensor_A)
print(random_tensor_B)
print(random_tensor_A==random_tensor_B)


tensor([[0.3747, 0.8058, 0.5193, 0.5689],
        [0.6559, 0.1160, 0.3792, 0.0591],
        [0.9354, 0.7521, 0.6917, 0.9389]])
tensor([[0.7579, 0.3521, 0.8172, 0.8151],
        [0.3359, 0.1704, 0.9753, 0.9939],
        [0.0259, 0.8252, 0.5074, 0.9714]])
tensor([[False, False, False, False],
        [False, False, False, False],
        [False, False, False, False]])


In [67]:
# Random but reproducible tensors
RANDOM_SEED = 42
torch.manual_seed(RANDOM_SEED)
random_tensor_C = torch.rand(3,4)

torch.manual_seed(RANDOM_SEED)
random_tensor_D = torch.rand(3,4)

print(random_tensor_C)
print(random_tensor_D)
print(random_tensor_C==random_tensor_D)

tensor([[0.8823, 0.9150, 0.3829, 0.9593],
        [0.3904, 0.6009, 0.2566, 0.7936],
        [0.9408, 0.1332, 0.9346, 0.5936]])
tensor([[0.8823, 0.9150, 0.3829, 0.9593],
        [0.3904, 0.6009, 0.2566, 0.7936],
        [0.9408, 0.1332, 0.9346, 0.5936]])
tensor([[True, True, True, True],
        [True, True, True, True],
        [True, True, True, True]])


**Resource:** What we've just covered only scratches the surface of reproducibility in PyTorch. For more, on reproducbility in general and random seeds, I'd checkout:  
* [The PyTorch reproducibility documentation](https://pytorch.org/docs/stable/notes/randomness.html) (a good exericse would be to read through this for 10-minutes and even if you don't understand it now, being aware of it is important).  
* [The Wikipedia random seed page](https://en.wikipedia.org/wiki/Random_seed) (this'll give a good overview of random seeds and pseudorandomness in general).  

In [69]:
# Setup device agnostic code
device = "cuda" if torch.cuda.is_available() else "cpu"
device

'cpu'

## Putting tensors (and models) on the GPU

The reason we want our tensors/models on the GPU is becasue using a GPU results in faster computation

In [70]:
tensor = torch.tensor([1,2,3])

print(tensor, tensor.device)

tensor([1, 2, 3]) cpu
