# 00. PyTorch Fundamentals

In [1]:
import torch
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt

device = torch.device("mps") if (torch.mps.is_available()) else torch.device("cpu")
print(torch.__version__)

2.8.0


## Intro to Tensors
### Creating Tensors

In [2]:
# scalar
scalar = torch.tensor(7)
print(scalar)
print(scalar.ndim) # number of dimensions
print(scalar.item()) # get the value of a tensor as a standard python number
print(scalar.dtype) # data type of the tensor

tensor(7)
0
7
torch.int64


In [3]:
# Vector
vector = torch.tensor([7, 7])
print(vector)
print(vector.ndim)
print(vector.shape)
print(vector[0], vector[1])
print(vector.dtype)

tensor([7, 7])
1
torch.Size([2])
tensor(7) tensor(7)
torch.int64


In [4]:
# MATRIX
matrix = torch.tensor([[7, 8],
                       [9, 10]])
print(matrix)
print(matrix.ndim)
print(matrix.shape)
print(matrix[0], matrix[1])
print(matrix.dtype)

tensor([[ 7,  8],
        [ 9, 10]])
2
torch.Size([2, 2])
tensor([7, 8]) tensor([ 9, 10])
torch.int64


In [5]:
# Tensor
tensor = torch.tensor([[[1, 2, 3],
                        [4, 5, 6],
                        [7, 8, 9]]])
print(tensor)
print(tensor.ndim)
print(tensor.shape)
print(tensor[0])
print(tensor.dtype)

tensor([[[1, 2, 3],
         [4, 5, 6],
         [7, 8, 9]]])
3
torch.Size([1, 3, 3])
tensor([[1, 2, 3],
        [4, 5, 6],
        [7, 8, 9]])
torch.int64


### Random Tensors

Why Random Tensors?

Random tensors are important because the way many NN learn is that they start with tensors full of random numbers and then adjust those random numbers to better represent the data.

```Start with random numbers -> look at data -> update random numbers -> look at data -> update random numbers```

In [6]:
# Create a random tensor of size (3,4)
random_tensor = torch.rand(3,4)
print(random_tensor)
print(random_tensor.shape)
print(random_tensor.dtype)
print(random_tensor.ndim)

tensor([[0.7034, 0.2769, 0.5494, 0.4902],
        [0.6289, 0.2099, 0.6295, 0.9087],
        [0.0486, 0.2553, 0.8498, 0.7837]])
torch.Size([3, 4])
torch.float32
2


In [7]:
# Create a random tensor with similar shape to an image
random_image_tensor = torch.rand(size=(3, 224, 224)) # color channels (RGB), height, width
print(random_image_tensor.shape)
print(random_image_tensor.ndim)
print(random_image_tensor.dtype)

torch.Size([3, 224, 224])
3
torch.float32


In [8]:
# Create a tensor of all zeros
zeros = torch.zeros(size=(3,4))
print(zeros)

# Create a tensor of all ones
ones = torch.ones(size=(3,4))
print(ones)

tensor([[0., 0., 0., 0.],
        [0., 0., 0., 0.],
        [0., 0., 0., 0.]])
tensor([[1., 1., 1., 1.],
        [1., 1., 1., 1.],
        [1., 1., 1., 1.]])


### Creating a range of tensors and tensors-like

In [9]:
# Using torch.arange()
range_tensor = torch.arange(start=0, end=10, step=2)
print(range_tensor)

tensor([0, 2, 4, 6, 8])


In [10]:
# Creating tensors-like
like_tensor = torch.zeros_like(range_tensor)
print(like_tensor)

image_like_tensor = torch.rand_like(random_image_tensor)
print(image_like_tensor.shape)
print(image_like_tensor.dtype)

float_16_tensor = image_like_tensor.type(torch.float16).to(device)
print(float_16_tensor.dtype)
print(float_16_tensor.device)

tensor([0, 0, 0, 0, 0])
torch.Size([3, 224, 224])
torch.float32
torch.float16
mps:0


## Manipulating Tensors
Operations could include:
* Addition
* Subtraction
* Multiplication (element-wise)
* Division
* Matrix Multiplication

In [11]:
tensor = torch.tensor([1, 2, 3])
print(tensor, tensor.dtype)
tensor += 10
print(tensor, tensor.dtype)
tensor -= 10
print(tensor, tensor.dtype)
tensor *= 10
print(tensor, tensor.dtype)
tensor **= 2
print(tensor, tensor.dtype)

tensor([1, 2, 3]) torch.int64
tensor([11, 12, 13]) torch.int64
tensor([1, 2, 3]) torch.int64
tensor([10, 20, 30]) torch.int64
tensor([100, 400, 900]) torch.int64


In [12]:
# Pytroch inbuilt functions
print(f"{torch.abs(torch.tensor([-1, -2, -3])) = }")
print(f"{torch.sin(torch.tensor([0, np.pi/2, np.pi])) = }")
print(f"{torch.log(torch.tensor([1, np.e, np.e**2])) = }")
print(f"{torch.exp(torch.tensor([0, 1, 2])) = }")
print(f"{torch.sqrt(torch.tensor([1, 4, 9, 16])) = }")
print(f"{torch.round(torch.tensor([1.2, 1.5, 1.7])) = }")
print(f"{torch.sum(torch.tensor([1, 2, 3, 4, 5])) = }")
print(f"{torch.mean(torch.tensor([1, 2, 3, 4, 5]), dtype=torch.float32) = }")
print(f"{torch.max(torch.tensor([1, 2, 3, 4, 5])) = }")
print(f"{torch.min(torch.tensor([1, 2, 3, 4, 5])) = }")
print(f"{torch.argmax(torch.tensor([1, 2, 3, 4, 5])) = }")
print(f"{torch.argmin(torch.tensor([1, 2, 3, 4, 5])) = }")


torch.abs(torch.tensor([-1, -2, -3])) = tensor([1, 2, 3])
torch.sin(torch.tensor([0, np.pi/2, np.pi])) = tensor([ 0.0000e+00,  1.0000e+00, -8.7423e-08])
torch.log(torch.tensor([1, np.e, np.e**2])) = tensor([0.0000, 1.0000, 2.0000])
torch.exp(torch.tensor([0, 1, 2])) = tensor([1.0000, 2.7183, 7.3891])
torch.sqrt(torch.tensor([1, 4, 9, 16])) = tensor([1., 2., 3., 4.])
torch.round(torch.tensor([1.2, 1.5, 1.7])) = tensor([1., 2., 2.])
torch.sum(torch.tensor([1, 2, 3, 4, 5])) = tensor(15)
torch.mean(torch.tensor([1, 2, 3, 4, 5]), dtype=torch.float32) = tensor(3.)
torch.max(torch.tensor([1, 2, 3, 4, 5])) = tensor(5)
torch.min(torch.tensor([1, 2, 3, 4, 5])) = tensor(1)
torch.argmax(torch.tensor([1, 2, 3, 4, 5])) = tensor(4)
torch.argmin(torch.tensor([1, 2, 3, 4, 5])) = tensor(0)


### Matrix Multiplication

Two main ways of performing multiplication are:
1. Element-wise multiplication
2. Matrix Multiplication (dot product)

The two main rules of performing matrix multiplication needs to satisfy:
1. The **inner dimensions** must match.
2. The resulting matrix has the dimensions of the **outer dimensions**.

In [13]:
# Element wise multiplication
print(torch.mul(torch.tensor([1, 2, 3]), torch.tensor([4, 5, 6])))
print(torch.tensor([1, 2, 3]) * torch.tensor([4, 5, 6]))

tensor([ 4, 10, 18])
tensor([ 4, 10, 18])


In [14]:
# Matrix multiplication
print(torch.matmul(
    torch.tensor([[1, 2],
                [3, 4]]),                 
    torch.tensor([[5, 6],
                [7, 8]])))
print(
    torch.tensor([[1, 2],
                [3, 4]]) @ 
    torch.tensor([[5, 6],
                [7, 8]]))

tensor([[19, 22],
        [43, 50]])
tensor([[19, 22],
        [43, 50]])


### Shape Error: One of the most common errors in deep learning.

In [15]:
# Shapes for matrix multiplication
tensor_A = torch.tensor([[1, 2, 3],
                        [4, 5, 6]]) # (2, 3)
tensor_B = torch.tensor([[7, 8],
                        [9, 10],    
                        [11, 12]]) # (3, 2)
tensor_C = torch.tensor([[1, 2, 3],
                        [4, 5, 6]]) # (2, 3)
print(tensor_A.shape, tensor_B.shape, tensor_C.shape)
# print(torch.mm(tensor_A, tensor_B))
# print(tensor_A @ tensor_B)
print(torch.matmul(tensor_A, tensor_B))
# print(torch.matmul(tensor_A, tensor_C)) # Shape Error

torch.Size([2, 3]) torch.Size([3, 2]) torch.Size([2, 3])
tensor([[ 58,  64],
        [139, 154]])


To fix tensor shape issues, we can manipulate one of our tensors using a transpose.
A **transpose** switches the axes or dimensions of a given tensor.

In [16]:
print(tensor_C.T)
print(tensor_C.T.shape)
print(torch.matmul(tensor_A, tensor_C.T))

tensor([[1, 4],
        [2, 5],
        [3, 6]])
torch.Size([3, 2])
tensor([[14, 32],
        [32, 77]])


In [17]:
print(f"Original shapes: {tensor_A.shape} and {tensor_C.shape}")
print(f"New Shapes: {tensor_A.shape} (same as before) and {tensor_C.T.shape} (transposed)")
print(f"Multiplying: {tensor_A.shape} and {tensor_C.T.shape} <- Inner dimensions must match")
output = torch.matmul(tensor_A, tensor_C.T)
print(f"{output = }")
print(f"Output Shape: {output.shape}")

Original shapes: torch.Size([2, 3]) and torch.Size([2, 3])
New Shapes: torch.Size([2, 3]) (same as before) and torch.Size([3, 2]) (transposed)
Multiplying: torch.Size([2, 3]) and torch.Size([3, 2]) <- Inner dimensions must match
output = tensor([[14, 32],
        [32, 77]])
Output Shape: torch.Size([2, 2])


## Reshaping, stacking, squeezing and unsqueezing tensors

* Reshaping - Reshapes an input tensor to a defined shape
* View - Return a view of an input tensor of a certain shape but keep the same memory as the original tensor
* Stacking - Combine multiple tensors on top of each other (vstack) or side by side (hstack)
* Squeeze - Removes all `1` dimensions from a tensor.
* Unsqueeze - Adds a `1` dimension to a target tensor.
* Permute - Return a view of the input with dimensions permuted (swapped) in a certain way

In [18]:
x = torch.arange(1., 9.)
print(x, x.shape)

tensor([1., 2., 3., 4., 5., 6., 7., 8.]) torch.Size([8])


In [19]:
x_reshaped = x.reshape(2, 2, 2)
print(x_reshaped, x_reshaped.shape)

tensor([[[1., 2.],
         [3., 4.]],

        [[5., 6.],
         [7., 8.]]]) torch.Size([2, 2, 2])


In [20]:
# Change the view
x_view = x.view(2, 4)
print(x_view, x_view.shape)

tensor([[1., 2., 3., 4.],
        [5., 6., 7., 8.]]) torch.Size([2, 4])


### Changing anything in the view will change the original tensor, as they share the same memory

In [21]:
x_view[:, 0] = 100
print(x)
print(x_view)
print(x_reshaped)

tensor([100.,   2.,   3.,   4., 100.,   6.,   7.,   8.])
tensor([[100.,   2.,   3.,   4.],
        [100.,   6.,   7.,   8.]])
tensor([[[100.,   2.],
         [  3.,   4.]],

        [[100.,   6.],
         [  7.,   8.]]])


In [22]:
# Stack some tensors together
x_stacked = torch.stack([x, x, x, x], dim=0)
print(x_stacked)
print(x_stacked.shape)


tensor([[100.,   2.,   3.,   4., 100.,   6.,   7.,   8.],
        [100.,   2.,   3.,   4., 100.,   6.,   7.,   8.],
        [100.,   2.,   3.,   4., 100.,   6.,   7.,   8.],
        [100.,   2.,   3.,   4., 100.,   6.,   7.,   8.]])
torch.Size([4, 8])


In [23]:
x_stacked = torch.stack([x, x, x, x], dim=1)
print(x_stacked)
print(x_stacked.shape)

tensor([[100., 100., 100., 100.],
        [  2.,   2.,   2.,   2.],
        [  3.,   3.,   3.,   3.],
        [  4.,   4.,   4.,   4.],
        [100., 100., 100., 100.],
        [  6.,   6.,   6.,   6.],
        [  7.,   7.,   7.,   7.],
        [  8.,   8.,   8.,   8.]])
torch.Size([8, 4])


In [24]:
x_stacked = torch.vstack([x, x, x, x])
print(x_stacked)
print(x_stacked.shape)

tensor([[100.,   2.,   3.,   4., 100.,   6.,   7.,   8.],
        [100.,   2.,   3.,   4., 100.,   6.,   7.,   8.],
        [100.,   2.,   3.,   4., 100.,   6.,   7.,   8.],
        [100.,   2.,   3.,   4., 100.,   6.,   7.,   8.]])
torch.Size([4, 8])


In [25]:
x_stacked = torch.hstack([x, x, x, x])
print(x_stacked)
print(x_stacked.shape)

tensor([100.,   2.,   3.,   4., 100.,   6.,   7.,   8., 100.,   2.,   3.,   4.,
        100.,   6.,   7.,   8., 100.,   2.,   3.,   4., 100.,   6.,   7.,   8.,
        100.,   2.,   3.,   4., 100.,   6.,   7.,   8.])
torch.Size([32])


In [26]:
# Squeeze and Unsqueeze
zeros = torch.zeros(10)
print(zeros)
print(zeros.shape)

zeros_unsqueezed = zeros.unsqueeze(dim=0)
print(zeros_unsqueezed)
print(zeros_unsqueezed.shape)

zeros_unsqueezed = zeros.unsqueeze(dim=1)
print(zeros_unsqueezed)
print(zeros_unsqueezed.shape)

zeros_squeezed = zeros_unsqueezed.squeeze()
print(zeros_squeezed)
print(zeros_squeezed.shape)   

tensor([0., 0., 0., 0., 0., 0., 0., 0., 0., 0.])
torch.Size([10])
tensor([[0., 0., 0., 0., 0., 0., 0., 0., 0., 0.]])
torch.Size([1, 10])
tensor([[0.],
        [0.],
        [0.],
        [0.],
        [0.],
        [0.],
        [0.],
        [0.],
        [0.],
        [0.]])
torch.Size([10, 1])
tensor([0., 0., 0., 0., 0., 0., 0., 0., 0., 0.])
torch.Size([10])


In [27]:
# Permute - Rearranges the dimensions of a target tensor in a specified order
random_image_tensor = torch.rand(size=(224, 224, 3)) # height, width, color channels (RGB)
print(random_image_tensor.shape)
# Permute the dimensions to (color channels, height, width)
permuted_image_tensor = random_image_tensor.permute(2, 0, 1)
print(permuted_image_tensor.shape)

random_image_tensor[0, 0, 2] = 256
print(random_image_tensor[0, 0, 2])
print(permuted_image_tensor[2, 0, 0])

torch.Size([224, 224, 3])
torch.Size([3, 224, 224])
tensor(256.)
tensor(256.)


## Indexing (selecting data with tensors)

Indexing with PyTorch is the same as indexing with NumPy

In [28]:
x = torch.arange(1, 10).reshape(1, 3, 3)
print(x)
print(x.shape)

tensor([[[1, 2, 3],
         [4, 5, 6],
         [7, 8, 9]]])
torch.Size([1, 3, 3])


In [29]:
# Let's index on our new tensor
print(x[0]) # first dimension
print(x[0][0]) # second dimension
print(x[0][0][0]) # third dimension

print(x[0][2][2].item())

tensor([[1, 2, 3],
        [4, 5, 6],
        [7, 8, 9]])
tensor([1, 2, 3])
tensor(1)
9


In [30]:
# We can also use ":" to select all of a particular dimension
print(x[:, 0]) # all of the first dimension, first row of second dimension, all of third dimension

tensor([[1, 2, 3]])


In [31]:
# Get all values of 0th and 1st dimesnions but only 1st value of 2nd dimension
print(x[:, :, 0])

tensor([[1, 4, 7]])


In [32]:
# Get index 0 of 0th and 1st dimension and all values of 2nd dimension
print(x[0, 0, :])

tensor([1, 2, 3])


In [33]:
print(x[0, 2, 2])
print(x[:, :, 2])

tensor(9)
tensor([[3, 6, 9]])


## PyTorch tensors and NumPy

NumPy is a popular scientific Python numerical computing library, and so PyTorch has functionality to interact with it.
* Data in NumPy, want in PyTorch tensor -> `torch.from_numpy(ndarray)`
* PyTorch tensor, want in NumPy -> `torch.Tensor.numpy()`


In [34]:
# NumPy to PyTorch tensor
array = np.arange(1, 8)
print(array, type(array))
tensor_from_array = torch.from_numpy(array)
print(tensor_from_array, type(tensor_from_array))
print(tensor_from_array.dtype)

[1 2 3 4 5 6 7] <class 'numpy.ndarray'>
tensor([1, 2, 3, 4, 5, 6, 7]) <class 'torch.Tensor'>
torch.int64


In [35]:
# PyTorch tensor to NumPy
tensor = torch.ones(7)
print(tensor, type(tensor))
array_from_tensor = tensor.numpy()
print(array_from_tensor, type(array_from_tensor))
print(array_from_tensor.dtype)

tensor([1., 1., 1., 1., 1., 1., 1.]) <class 'torch.Tensor'>
[1. 1. 1. 1. 1. 1. 1.] <class 'numpy.ndarray'>
float32


## Reproducability (trying to takr random out of random)

In short, how a neural network learns:
`start with random numbers -> tensor ops -> update random nums top make them better representations of the data -> repeat -> repeat ...`

But for experiments, we value reproducability, so we can share our work. To reduce this randomness, we have the concept of a **random seed**. A random seed essentially flavours the randomness.

In [36]:
# Create two random tensors
rand_tensor_A = torch.rand(3, 4)
rand_tensor_B = torch.rand(3, 4)
print(rand_tensor_A)
print(rand_tensor_B)
print(rand_tensor_A == rand_tensor_B)

tensor([[0.7821, 0.2064, 0.9102, 0.0175],
        [0.2933, 0.2940, 0.5169, 0.2002],
        [0.5145, 0.3285, 0.7504, 0.0979]])
tensor([[0.0018, 0.9942, 0.7109, 0.0684],
        [0.6971, 0.5029, 0.5866, 0.7717],
        [0.6290, 0.6543, 0.0935, 0.1168]])
tensor([[False, False, False, False],
        [False, False, False, False],
        [False, False, False, False]])


In [37]:
# Let's make some random tensors with the same random seed, so they are reproducible
RANDOM_SEED = 42
torch.manual_seed(RANDOM_SEED)
rand_tensor_C = torch.rand(3, 4)
rand_tensor_D = torch.rand(3, 4)

print(rand_tensor_C)
print(rand_tensor_D)
print(rand_tensor_C == rand_tensor_D)

torch.manual_seed(RANDOM_SEED)
rand_tensor_E = torch.rand(3, 4)

print(rand_tensor_E)
print(rand_tensor_C == rand_tensor_E)

tensor([[0.8823, 0.9150, 0.3829, 0.9593],
        [0.3904, 0.6009, 0.2566, 0.7936],
        [0.9408, 0.1332, 0.9346, 0.5936]])
tensor([[0.8694, 0.5677, 0.7411, 0.4294],
        [0.8854, 0.5739, 0.2666, 0.6274],
        [0.2696, 0.4414, 0.2969, 0.8317]])
tensor([[False, False, False, False],
        [False, False, False, False],
        [False, False, False, False]])
tensor([[0.8823, 0.9150, 0.3829, 0.9593],
        [0.3904, 0.6009, 0.2566, 0.7936],
        [0.9408, 0.1332, 0.9346, 0.5936]])
tensor([[True, True, True, True],
        [True, True, True, True],
        [True, True, True, True]])


## Utilizing GPUs and TPUs
For faster computations

In [41]:
rand_tensor_A = rand_tensor_A.to(device)
rand_tensor_B = rand_tensor_B.to(device)
rand_tensor_A.device
rand_tensor_B.device

device(type='mps', index=0)

## Exercise Solutions

In [49]:
rand_tensor_seven_by_seven = torch.rand(7, 7, device=device)
rand_tensor_one_by_seven = torch.rand(1, 7, device=device)
print(rand_tensor_seven_by_seven.shape)
print(rand_tensor_one_by_seven.shape)

mat_mul_op = torch.matmul(rand_tensor_seven_by_seven, rand_tensor_one_by_seven.T)
print(mat_mul_op.shape)

torch.Size([7, 7])
torch.Size([1, 7])
torch.Size([7, 1])


In [59]:
random_seed_gen = torch.Generator(device=device).manual_seed(0)
rand_tensor_seven_by_seven = torch.rand(7, 7, device=device, generator=random_seed_gen)
rand_tensor_one_by_seven = torch.rand(1, 7, device=device, generator=random_seed_gen)
print(rand_tensor_seven_by_seven.shape)
print(rand_tensor_one_by_seven.shape)

mat_mul_op = torch.matmul(rand_tensor_seven_by_seven, rand_tensor_one_by_seven.T)
print(mat_mul_op)
print(mat_mul_op.shape)

torch.Size([7, 7])
torch.Size([1, 7])
tensor([[1.2568],
        [1.2396],
        [1.5435],
        [1.1623],
        [1.1126],
        [1.2704],
        [0.5872]], device='mps:0')
torch.Size([7, 1])


In [70]:
random_seed_gen = torch.Generator(device=device).manual_seed(1234)
first_tensor = torch.rand(2, 3, device=device, generator=random_seed_gen)
second_tensor = torch.rand(2, 3, device=device, generator=random_seed_gen)
print(first_tensor)
print(second_tensor)

mat_mul_op = torch.matmul(first_tensor, second_tensor.T)
print(mat_mul_op)

tensor([[0.4794, 0.7608, 0.8283],
        [0.8029, 0.6423, 0.0597]], device='mps:0')
tensor([[0.0223, 0.9878, 0.2711],
        [0.9123, 0.7672, 0.8879]], device='mps:0')
tensor([[0.9868, 1.7565],
        [0.6685, 1.2782]], device='mps:0')


In [66]:
print(f"{torch.max(mat_mul_op) = }")
print(f"{torch.min(mat_mul_op) = }")
print(f"{torch.argmax(mat_mul_op) = }")
print(f"{torch.argmin(mat_mul_op) = }")

torch.max(mat_mul_op) = tensor(1.7565, device='mps:0')
torch.min(mat_mul_op) = tensor(0.6685, device='mps:0')
torch.argmax(mat_mul_op) = tensor(1, device='mps:0')
torch.argmin(mat_mul_op) = tensor(2, device='mps:0')


In [72]:
random_seed_gen = torch.Generator(device=device).manual_seed(7)
large_rand_tensor = torch.rand(1, 1, 1, 10, device=device, generator=random_seed_gen)
print(large_rand_tensor)
print(large_rand_tensor.shape)

squeezed_tensor = large_rand_tensor.squeeze()
print(squeezed_tensor)
print(squeezed_tensor.shape)

tensor([[[[0.7544, 0.7100, 0.7095, 0.4036, 0.5627, 0.0098, 0.0498, 0.7344,
           0.9472, 0.2643]]]], device='mps:0')
torch.Size([1, 1, 1, 10])
tensor([0.7544, 0.7100, 0.7095, 0.4036, 0.5627, 0.0098, 0.0498, 0.7344, 0.9472,
        0.2643], device='mps:0')
torch.Size([10])
