In [1]:
import torch
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
print(f"PyTorch version: {torch.__version__}")

# Check PyTorch has access to MPS (Metal Performance Shader, Apple's GPU architecture)
print(f"Is MPS (Metal Performance Shader) built? {torch.backends.mps.is_built()}")
print(f"Is MPS available? {torch.backends.mps.is_available()}")

# Set the device      
device = "mps" if torch.backends.mps.is_available() else "cpu"
torch.set_default_device(torch.device(device))
print(f"Using device: {device}")

PyTorch version: 2.2.0
Is MPS (Metal Performance Shader) built? True
Is MPS available? True
Using device: mps


## 00. PyTorch fundamentals

In [2]:
print(torch.__version__)

2.2.0


## Intro to tensors

### Creating tensors

In [3]:
# scalar
scalar = torch.tensor(7)
scalar

tensor(7, device='mps:0')

In [4]:
scalar.ndim

0

In [5]:
# get tensor back as python int
scalar.item()

7

In [6]:
# vector :
vector = torch.tensor([7, 7])
vector

tensor([7, 7], device='mps:0')

In [7]:
vector.ndim

1

In [8]:
vector.shape

torch.Size([2])

In [9]:
# Matrix
MATRIX = torch.tensor([[7, 8],
                       [9, 10]])
MATRIX

tensor([[ 7,  8],
        [ 9, 10]], device='mps:0')

In [10]:
MATRIX.ndim

2

In [11]:
MATRIX[1]

tensor([ 9, 10], device='mps:0')

In [12]:
MATRIX.shape

torch.Size([2, 2])

In [13]:
# Tensor
TENSOR = torch.tensor([[[1, 2, 3],
                        [3, 6, 9],
                        [2, 5, 4]]])
TENSOR

tensor([[[1, 2, 3],
         [3, 6, 9],
         [2, 5, 4]]], device='mps:0')

In [14]:
TENSOR.ndim

3

In [15]:
TENSOR.shape

torch.Size([1, 3, 3])

In [16]:
TENSOR[0]

tensor([[1, 2, 3],
        [3, 6, 9],
        [2, 5, 4]], device='mps:0')

In [17]:
TENSOR[0][2]

tensor([2, 5, 4], device='mps:0')

### Random tensors
Why random tensors?

Random tensors are important because the way many neural nets learn is that they start with tensors full of random numbers, then adjust those random numbers to better represent the data.

`Start with random number -> look at data -> update random numbers -> look at data -> update random numbers`

In [18]:
# Create a random tensor of size (3, 4)
random_tensor = torch.rand(3, 4)
random_tensor

tensor([[0.8634, 0.1199, 0.4663, 0.6736],
        [0.6518, 0.8340, 0.6629, 0.7174],
        [0.9251, 0.2244, 0.6262, 0.0065]], device='mps:0')

In [19]:
random_tensor.ndim

2

In [20]:
# create a random tensor with similar shape to an image tensor :
random_image_size_tensor = torch.rand(size=(224, 224, 3)) # height, width, color channels
random_image_size_tensor.shape, random_image_size_tensor.ndim

(torch.Size([224, 224, 3]), 3)

In [21]:
random_tensor2 = torch.rand(size=(11, 22))

## Zeros and ones

In [22]:
zeros = torch.zeros(size=(3, 1))
zeros

tensor([[0.],
        [0.],
        [0.]], device='mps:0')

In [23]:
random_tensor

tensor([[0.8634, 0.1199, 0.4663, 0.6736],
        [0.6518, 0.8340, 0.6629, 0.7174],
        [0.9251, 0.2244, 0.6262, 0.0065]], device='mps:0')

In [24]:
zeros * random_tensor

tensor([[0., 0., 0., 0.],
        [0., 0., 0., 0.],
        [0., 0., 0., 0.]], device='mps:0')

In [25]:
# tensor of ones :
ones = torch.ones(size=(3, 4))
ones

tensor([[1., 1., 1., 1.],
        [1., 1., 1., 1.],
        [1., 1., 1., 1.]], device='mps:0')

In [26]:
ones.dtype

torch.float32

### Creating a range of tensors and tensors-like

In [27]:
# Range :
one_to_ten = torch.arange(1, 11, 1)
one_to_ten

tensor([ 1,  2,  3,  4,  5,  6,  7,  8,  9, 10], device='mps:0')

In [28]:
# Creating tensor-like :
ten_zeros = torch.zeros_like(one_to_ten)
ten_zeros

tensor([0, 0, 0, 0, 0, 0, 0, 0, 0, 0], device='mps:0')

### Tensor datatypes

**Note:** tensor datatypes is one of the 3 big errors we run into with PyTorch and DL :
1. Tensors not the right datatype
2. Tensors not in the right shape
3. Tensors not on the correct device

In [29]:
# Float32 tensor :
float_32_tensor = torch.tensor([3.0, 6.0, 9.0], dtype=torch.float32,
                               device=torch.device(device),
                               requires_grad=False)
float_32_tensor

tensor([3., 6., 9.], device='mps:0')

In [30]:
float_32_tensor.dtype

torch.float32

In [31]:
float_16_tensor = float_32_tensor.type(torch.float16)
float_16_tensor

tensor([3., 6., 9.], device='mps:0', dtype=torch.float16)

In [32]:
(float_16_tensor * float_32_tensor).dtype

torch.float32

In [33]:
int32_tensor = torch.tensor([3, 6, 9], dtype=torch.int32)
int32_tensor

tensor([3, 6, 9], device='mps:0', dtype=torch.int32)

In [34]:
(float_32_tensor * int32_tensor).dtype

torch.float32

### Getting information from tensors

1. Tensors not the right datatype - call `tensor.dtype`
2. Tensors not in the right shape - call `tensor.shape`
3. Tensors not on the correct device - call `tensor.device`

In [35]:
some_tensor = torch.rand(3, 4)
some_tensor

tensor([[0.8246, 0.3041, 0.9930, 0.1678],
        [0.1009, 0.5247, 0.7202, 0.2021],
        [0.1380, 0.7613, 0.6582, 0.0247]], device='mps:0')

In [36]:
#some_tensor.dtype, some_tensor.shape, some_tensor.device

print(some_tensor)
print(f"Datatype of tensor: {some_tensor.dtype}")
print(f"Shape of tensor: {some_tensor.shape}")
print(f"Device of tensor: {some_tensor.device}")

tensor([[0.8246, 0.3041, 0.9930, 0.1678],
        [0.1009, 0.5247, 0.7202, 0.2021],
        [0.1380, 0.7613, 0.6582, 0.0247]], device='mps:0')
Datatype of tensor: torch.float32
Shape of tensor: torch.Size([3, 4])
Device of tensor: mps:0


### Manipulating tensors (tensor operations)

Tensor operations include:
* Addition
* Subtraction
* Multiplication (element-wise)
* Division
* Matrix multiplication

In [37]:
# Create a tensor and add 10 to it:
tensor = torch.tensor([1, 2, 3], dtype=torch.float32)
tensor.dtype

torch.float32

In [38]:
# Multiply tensor by 10 :
tensor * 10

tensor([10., 20., 30.], device='mps:0')

In [39]:
# Subtract : 
tensor - 10

tensor([-9., -8., -7.], device='mps:0')

In [40]:
# Try out PyTorch built-in functions : 
torch.mul(tensor, 10)

tensor([10., 20., 30.], device='mps:0')

In [41]:
torch.add(tensor, 10)

tensor([11., 12., 13.], device='mps:0')

In [42]:
torch.div(tensor, 0.5)

tensor([2., 4., 6.], device='mps:0')

### Matrix multiplication

Two main ways of performing matrix multiplication in neural networks and deep learning :
1. Element-wise multiplication
2. Matrix multiplication (dot product)
 
More information on multiplying matrices - https://www.mathsisfun.com/algebra/matrix-multiplying.html

There are two main rules that performing matrix multiplication must satisfy :
1. The **inner dimensions** must match (inner dimensions means the dimensions closest to the **@** operator:
* `(3, 2) @ (3, 2)` won't work
* `(2, 3) @ (3, 2)` will work
* `(3, 2) @ (2, 3)` will work
2. The resulting matrix has the shape of the **outer dimensions**:
* `(2, 3) @ (3, 2)` -> `(2, 2)`
* `(3, 2) @ (2, 3)` -> `(3, 3)`

In [43]:
# Element wise multiplication
print(tensor, "*", tensor)
print(f"Equals: {tensor * tensor}")
print(tensor)


tensor([1., 2., 3.], device='mps:0') * tensor([1., 2., 3.], device='mps:0')
Equals: tensor([1., 4., 9.], device='mps:0')
tensor([1., 2., 3.], device='mps:0')


In [44]:
tensor.dtype

torch.float32

In [45]:
# Matrix multiplication:
torch.matmul(tensor, tensor)

tensor(14., device='mps:0')

In [46]:
# Matrix multiplication by hand :
1 * 1 + 2 * 2 + 3 * 3

14

In [47]:
%time
value = 0
for i in range(len(tensor)):
    value += tensor[i] * tensor[i]
    
print(value)

CPU times: user 1e+03 ns, sys: 1 µs, total: 2 µs
Wall time: 2.62 µs
tensor(14., device='mps:0')


In [48]:
%time
torch.matmul(tensor, tensor)

CPU times: user 1 µs, sys: 0 ns, total: 1 µs
Wall time: 1.91 µs


tensor(14., device='mps:0')

### One of the most common errors in deep learning : shape errors

In [49]:
# Shapes for matrix multiplications :
tensor_A = torch.tensor([[1, 2],
                         [3, 4],
                         [5, 6]], dtype=torch.float32)

tensor_B = torch.tensor([[7, 10],
                         [8, 11],
                         [9, 12]], dtype=torch.float32)

torch.mm(tensor_A, tensor_B)


RuntimeError: mat1 and mat2 shapes cannot be multiplied (3x2 and 3x2)

In [None]:
tensor_A.shape, tensor_B.shape

To fix our tensor shape issues, we can manipulate the shape of one of our tensors using `transpose`

A **transpose** switches the axis or dimensions of a given tensor.

In [None]:
tensor_B.T, tensor_B

In [None]:
torch.matmul(tensor_A, tensor_B.T) # tensor_B still contains the same information, but transposed by axis

### Finding the min, max, mean, sum, etc. (tensor aggregation)


In [None]:
# Create a tensor : 
x = torch.arange(0, 100, 10)
x

In [None]:
# Find the min : 
torch.min(x), x.min()

In [None]:
#Find the max :
torch.max(x), x.max()

In [None]:
# Find the mean : 
torch.mean(x.type(torch.float32)), x.type(torch.float32).mean()

In [None]:
# Find the sum : 
torch.sum(x), x.sum()

### Finding positional min and max in a tensor (the indices of the min and max inside the tensor)

In [None]:
x.argmin(), x.argmax(), torch.argmin(x), torch.argmax(x)

### Reshaping, stacking, squeezing and unsqueezing tensors

* Reshaping - reshapes an input tensor to a defined shape
* View - return a view of an input tensor of a certain shape but keep the same memory as the original tensor
* Stacking - combine multiple tensors on top of each other (vstack) or next to each other (hstack)
* Squeeze - remove all `1` dimensions from a tensor
* Unsqueeze - add a `1` dimension to a tensor
* Permute - return a view of the input with dimensions permuted (swapped) in a certain way

In [None]:
# Let's create a tensor : 
x = torch.arange(1., 10.)
x, x.shape

In [None]:
# Add an extra dimension : 
x_reshaped = x.reshape(1, 9)
x_reshaped, x_reshaped.shape

In [None]:
# Change the view
z = x.view(3, 3)
z, z.shape

In [None]:
# Changing z changes x (because a view of a tensor shares the same memory as the original tensor
z[:, 0] = 5
z, x

In [None]:
# Stack tensors on top of each other :
x_stacked = torch.stack([x, x, x, x], dim=0)
x_stacked

In [None]:
# Squeeze - returns a tensor with all dimensions of size 1 removed
x_reshaped, x_reshaped.shape

In [None]:
x_reshaped.squeeze(), x_reshaped.squeeze().shape

In [None]:
# un-Squeeze - adds a single dimension to a target tensor at a specific dim
x_unsqueezed = torch.unsqueeze(x, dim=0)
x_unsqueezed, x_unsqueezed.shape

In [None]:
# torch.permute - rearranges the dimensions of a target tensor in a specified order, shares the same memory as the original tensor, just like a view!
x_original = torch.rand(size=(224, 224, 3)) # image data [height, width, color channels]

# Permute the original tensor to rearrange the axis (or dim) order, we move color to first column :
x_permuted = x_original.permute(2, 0, 1)

print(f"Previous shape : {x_original.shape}")
print(f"Permuted shape : {x_permuted.shape}")

## Indexing (selecting data from tensors

Indexing with PyTorch is similar to indexing with NumPy

In [50]:
# Create a tensor :
x = torch.arange(1, 10).reshape(1, 3, 3)
x, x.shape

(tensor([[[1, 2, 3],
          [4, 5, 6],
          [7, 8, 9]]], device='mps:0'),
 torch.Size([1, 3, 3]))

In [51]:
# Let's index on our new tensor :
x[0]

tensor([[1, 2, 3],
        [4, 5, 6],
        [7, 8, 9]], device='mps:0')

In [54]:
# Index on the middle bracket, dim 1
x[0][0]

tensor([1, 2, 3], device='mps:0')

In [53]:
# Index on the most inner bracket : 
x[0][0][0]

tensor(1, device='mps:0')

In [55]:
x[0][2][2]

tensor(9, device='mps:0')

In [56]:
# You can also use ":" to select "all" of a target dimension :
x[:, 0]

tensor([[1, 2, 3]], device='mps:0')

In [57]:
# Get all values of 0 and 1 dim, but only index 1 of 2nd dimensions :

x[:, :, 1]

tensor([[2, 5, 8]], device='mps:0')

In [58]:
# Get all values of the 0 dim, but only index 1 for dim 1 and 2
x[:, 1, 1]

tensor([5], device='mps:0')

In [59]:
x[:, 1, :]

tensor([[4, 5, 6]], device='mps:0')

In [60]:
# Get index 0 of 0th abd 1st dim and all values of 2nd dim :
x[0, 0, :]

tensor([1, 2, 3], device='mps:0')

In [61]:
x

tensor([[[1, 2, 3],
         [4, 5, 6],
         [7, 8, 9]]], device='mps:0')

In [62]:
# Index on x to return 9 
print(x[0][2][2])

# Index on x to return 3,6,9
print(x[:, :, 2])

tensor(9, device='mps:0')
tensor([[3, 6, 9]], device='mps:0')


## Pytorch tensors and NumPy

NumPy is a popular scientific Python numerical computing library.

And because of this PyTorch has functionality to interact with it.

* Data in NumPy, want in PyTorch tensor -> `torch.from_numpy(ndarray)`
* Pytorch tensor -> NumPy -> `torch.Tensor.numpy()

NumPy default data type is float64, Pytorch is float32 , watch out for datatype issues.

In [None]:
array = np.arange(1.0, 8.0)
tensor = torch.from_numpy(array) # when converting from np to pytorch, tensor will be float64, because numpy uses float64
array, tensor

In [None]:
# Change the value of the array, what will it do to tensor? they do not share the underlying data
array = array + 1
array, tensor

In [None]:
# Tensor to np array : 
tensor = torch.ones(7)
numpy_tensor = tensor.cpu().numpy() # need to copy to cpu first, since we use the mps
tensor, numpy_tensor

## Reproducibility - trying to take the random out of random

In short how a neural net learns is :
`start with random numbers -> tensor operations -> update random  numbers to try and make better representation of the data -> repeat -> repeat -> ...`

To reduce the randomness in NNs and Pytorch comes the concept of **random seed**.

Essentially what the random seed does is flavor the randomness.

In [None]:
# Create two random tensors 
random_tensor_A = torch.rand(3, 4)
random_tensor_B = torch.rand(3, 4)

print(random_tensor_A)
print(random_tensor_B)

print(random_tensor_A == random_tensor_B)

In [None]:
# now we make some random but reproducible tensors :
RANDOM_SEED = 42
torch.manual_seed(RANDOM_SEED)
random_tensor_C = torch.rand(3, 4)

torch.manual_seed(RANDOM_SEED)
random_tensor_D = torch.rand(3, 4)

print(random_tensor_C)
print(random_tensor_D)

print(random_tensor_C == random_tensor_D)

## Running tensors and PyTorch on the GPU

### Getting a GPU

1. Google Colab (Free, Pro, Pro+)
2. Use your own GPU - takes setup and requires $$$
3. Cloud computing - GCP, AWS, Azure etc.

For 2 and 3 PyTorch + GPU drivers takes some time to setup.

In [None]:
device = "cpu"
if torch.cuda.is_available(): 
    device = "cuda"
elif torch.backends.mps.is_available():
    device = "mps"

torch.set_default_device(torch.device(device))

### Moving tensors from GPU to CPU e.g. for using it to turn into NumPy:

In [None]:
tensor_on_GPU = torch.ones(1,5)
tensor_on_GPU

In [None]:
tensor_on_CPU = tensor_on_GPU.to("cpu")
tensor_on_CPU, tensor_on_CPU.device