<a href="https://colab.research.google.com/github/WilliamKyaww/PyTorch-for-Deep-Learning-Machine-Learning/blob/main/Pytorch_Fundamentalsipynb.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [None]:
import torch
import pandas as pd
import numpy as np
import matplotlib as plt

import time

print(torch.__version__)

2.6.0+cu124


## **Basics**

In [None]:
# Scalar
scalar = torch.tensor(7)
scalar

tensor(7)

In [None]:
# Vector
vector = torch.tensor([7, 7])
vector

tensor([7, 7])

In [None]:
vector.ndim

1

In [None]:
vector.shape

torch.Size([2])

In [None]:
# MATRIX
MATRIX = torch.tensor([[7,8],
                      [9,10]])
MATRIX

tensor([[ 7,  8],
        [ 9, 10]])

In [None]:
MATRIX.ndim

2

In [None]:
MATRIX.shape

torch.Size([2, 2])

In [None]:
# TENSOR
TENSOR = torch.tensor([[[1,2,3],
                        [3,6,9],
                        [2,4,5]]])
TENSOR

tensor([[[1, 2, 3],
         [3, 6, 9],
         [2, 4, 5]]])

In [None]:
TENSOR.ndim

3

In [None]:
TENSOR.shape

torch.Size([1, 3, 3])

In [None]:
TENSOR2 = torch.tensor([[[1,2,3],
                        [3,6,9],
                        [2,4,5]],
                         [[2,2,2],
                        [2,2,2],
                        [2,2,2]]])
TENSOR2

tensor([[[1, 2, 3],
         [3, 6, 9],
         [2, 4, 5]],

        [[2, 2, 2],
         [2, 2, 2],
         [2, 2, 2]]])

In [None]:
TENSOR2.ndim

3

In [None]:
TENSOR2.shape

torch.Size([2, 3, 3])

In [None]:
TENSOR[0]

tensor([[1, 2, 3],
        [3, 6, 9],
        [2, 4, 5]])

In [None]:
TENSOR2[1]

tensor([[2, 2, 2],
        [2, 2, 2],
        [2, 2, 2]])

## **Random Tensors**

In [None]:
random_tensor = torch.rand(3,4)
random_tensor
# This gives us 3 sets of 4 values each

tensor([[0.3154, 0.7472, 0.6469, 0.4001],
        [0.1791, 0.3100, 0.4079, 0.0695],
        [0.4123, 0.9489, 0.3028, 0.1239]])

In [None]:
random_tensor.ndim

2

In [None]:
# Random tensor with similar shape to an image tensor
random_imagine_size_tensor = torch.rand(size=(224,224,3)) #height, #width, #colour
random_imagine_size_tensor.shape, random_imagine_size_tensor.ndim

(torch.Size([224, 224, 3]), 3)

## **Zeroes and Ones**


In [None]:
# Tensor of all zeroes with shape 3,4
zeroes = torch.zeros(size=(3,4))
zeroes

tensor([[0., 0., 0., 0.],
        [0., 0., 0., 0.],
        [0., 0., 0., 0.]])

In [None]:
ones = torch.ones(size=(3,4))
ones

tensor([[1., 1., 1., 1.],
        [1., 1., 1., 1.],
        [1., 1., 1., 1.]])

In [None]:
ones.dtype

torch.float32

## **Creating a range of tensors and tensors-like**

In [None]:
one_to_ten = torch.arange(0,10)
one_to_ten

tensor([0, 1, 2, 3, 4, 5, 6, 7, 8, 9])

In [None]:
range = torch.arange(start = 0,end = 100, step =5)
range

tensor([ 0,  5, 10, 15, 20, 25, 30, 35, 40, 45, 50, 55, 60, 65, 70, 75, 80, 85,
        90, 95])

In [None]:
# Creating tensors where you want to replicate the shape but doeesn't specify it
ten_zeros = torch.zeros_like(input=one_to_ten)
ten_zeros

tensor([0, 0, 0, 0, 0, 0, 0, 0, 0, 0])

## **Tensor Datatype**

**Note**: Tensor datatypes is one of the 3 big error types to run into in Pytorch and Deep Learning

1. Tensors not right datatype
2. Tensors not right shape
3. Tensors not on the right device


Data Types:
1. 32-bit takes a bit more memory and therefore more detail but slower
2. 16-bit takes less memory, less detail but faster
There are more datatypes...

Device:
1. Default is cpu
2. Can be cuda (to use Nvidia GPUs)

In [None]:
float_32_tensor = torch.tensor([3.0,6.0,9.0],
                                dtype = None, # what datatype is the tensor
                               device = None, # what device is the tensor on
                               requires_grad = False # whether or not to track gradients with this tensor's operations
                               )
float_32_tensor, float_32_tensor.dtype

(tensor([3., 6., 9.]), torch.float32)

In [None]:
float_16_tensor = float_32_tensor.type(torch.float16)
float_16_tensor

tensor([3., 6., 9.], dtype=torch.float16)

In [None]:
float_16_tensor = float_32_tensor.type(torch.half)
float_16_tensor

tensor([3., 6., 9.], dtype=torch.float16)

In [None]:
float_16_tensor * float_32_tensor

tensor([ 9., 36., 81.])

In [None]:
int_32_tensor = torch.tensor([3,6,9], dtype = torch.int32)
int_32_tensor

tensor([3, 6, 9], dtype=torch.int32)

In [None]:
float_32_tensor * int_32_tensor

tensor([ 9., 36., 81.])

### Tensor Attributes

1. Tensors not right datatype - to get datatype from a tensor, use "tensor.dtype"
2. Tensors not right shape - to get shape from a tensor, use "tensor.shape"
3. Tensors not on the right device - to get device from a tensor, use "tensor.device"

In [None]:
# Create a tensor
some_tensor = torch.rand(3,4)
some_tensor

tensor([[0.3220, 0.2235, 0.6204, 0.6512],
        [0.2135, 0.2428, 0.3805, 0.4231],
        [0.8671, 0.0744, 0.2227, 0.0035]])

In [None]:
# Find out details about some tensor
print(some_tensor)
print(f"Datatype of tensor: {some_tensor.dtype}")
print(f"Shape of tensor: {some_tensor.shape}")
print(f"Device tensor is on: {some_tensor.device}")

tensor([[0.3220, 0.2235, 0.6204, 0.6512],
        [0.2135, 0.2428, 0.3805, 0.4231],
        [0.8671, 0.0744, 0.2227, 0.0035]])
Datatype of tensor: torch.float32
Shape of tensor: torch.Size([3, 4])
Device tensor is on: cpu


### Manipulating Tensors - Tensor Operations

Tensor operations include:
- Addition
- Subtraction
- Multiplication (element-wise)
- Division
- Matrix Multiplication

In [None]:
# Create a tensor and adds 10 to all values
tensor = torch.tensor([1,2,3])
tensor + 10

tensor([11, 12, 13])

In [None]:
# Multiplies all values by 10 and prints it (doesn't reassign tensor value)
tensor * 10

tensor([10, 20, 30])

In [None]:
tensor

tensor([1, 2, 3])

In [None]:
# Subtracts all values by 10
tensor - 10

tensor([-9, -8, -7])

In [None]:
# Divides all values by 10
tensor / 10

tensor([0.1000, 0.2000, 0.3000])

In [None]:
# Built in Pytorch function
print(torch.mul(tensor, 10))
print(torch.add(tensor, 10))
print(torch.sub(tensor, 10))
print(torch.div(tensor, 10))

tensor([10, 20, 30])
tensor([11, 12, 13])
tensor([-9, -8, -7])
tensor([0.1000, 0.2000, 0.3000])


## Matrix Multiplication

Two main ways of performing multiplication in neural networks and deep learning:

1. Element-wise Multiplication
2. Matrix Multiplication (dot product)

THere are two main rules that performing matrix multiplication needs to satisfy:
1. The **inner dimenstions** must match:
* `(3, 2) @ (3,2)` won't work
* `(2,3) @ (3,2)` will work
* `(3,2) @ (2,3)` will work

2. The resulting matric has the shape of the **outer dimensions**:
* `(2,3) @ (3,2)` -> `(2,2)`
* `(3,2) @ (2,3)` -> `(3,3)`

Note: @ refers to matrix multiplication (dot product)

In [None]:
# Won't work - intended
# torch.matmul(torch.rand(3,2), torch.rand(3,2))

In [None]:
# Will work
torch.matmul(torch.rand(3,2), torch.rand(2,3))

tensor([[0.3560, 0.0438, 0.7806],
        [0.6965, 0.3724, 1.0073],
        [0.5199, 0.3449, 0.6305]])

In [None]:
# Will work
torch.matmul(torch.rand(2,3), torch.rand(3,2))

tensor([[0.3792, 0.1035],
        [0.8022, 0.7795]])

In [None]:
# Element wise multiplication
print(tensor, "*", tensor)
print(f"Equals: {tensor * tensor}")

tensor([1, 2, 3]) * tensor([1, 2, 3])
Equals: tensor([1, 4, 9])


In [None]:
# Matrix multiplication - Dot Product using built-in fuction (1+4+9)
torch.matmul(tensor, tensor)

tensor(14)

In [None]:
tensor

tensor([1, 2, 3])

In [None]:
# Matrix multiplication by hand
1*1 + 2*2 + 3*3

14

In [None]:
tensor @ tensor

tensor(14)

In [None]:
tensor

tensor([1, 2, 3])

In [None]:
## Won't work - unintended (not sure why)
%%time
value = 0
for i in range(len(tensor)):
    value += tensor[i] * tensor[i]
value

TypeError: 'Tensor' object is not callable

In [None]:
%%time
torch.matmul(tensor,tensor)

CPU times: user 1.07 ms, sys: 12 µs, total: 1.08 ms
Wall time: 4 ms


tensor(14)

## One of the most common errors in deep learning is shape errors

In [None]:
# Shapes for matrix multiplication
tensor_A = torch.tensor([[1,2],
                        [3,4],
                        [5,6]])

tensor_B = torch.tensor([[7,10],
                        [8,11],
                        [9,12]])

tensor_A.shape, tensor_B.shape


(torch.Size([3, 2]), torch.Size([3, 2]))

In [None]:
# Won't work
# torch.mm(tensor_A, tensor_B) # torch.mm same as torch.matmul

We cannot multiply these two tensors because they have the same shape.

To fix our tensor shape issues, we can manipulate the shape of one of our tensors using a **transpose**

A **transpose** switches the axes or dimensions of a given tensor

In [None]:
tensor_B, tensor_B.shape

(tensor([[ 7, 10],
         [ 8, 11],
         [ 9, 12]]),
 torch.Size([3, 2]))

In [None]:
tensor_B.T, tensor_B.T.shape

(tensor([[ 7,  8,  9],
         [10, 11, 12]]),
 torch.Size([2, 3]))

In [None]:
# The matrix multiplication operation works when tensor_B is transposed

print(f"Original shapes: \n tensor_A = {tensor_A.shape}, \n tensor_B = {tensor_B.shape}")
print(f"\nNew shapes: \n tensor_A = {tensor_A.shape}, \n tensor_B = {tensor_B.T.shape}")

print(f"\nMultiplying: {tensor_A.shape} @ {tensor_B.T.shape} - inner dimensions must match")
print(f"\nOutput:\n {torch.matmul(tensor_A, tensor_B.T)}")

print(f"\nOutput shape: {torch.matmul(tensor_A, tensor_B.T).shape}")


Original shapes: 
 tensor_A = torch.Size([3, 2]), 
 tensor_B = torch.Size([3, 2])

New shapes: 
 tensor_A = torch.Size([3, 2]), 
 tensor_B = torch.Size([2, 3])

Multiplying: torch.Size([3, 2]) @ torch.Size([2, 3]) - inner dimensions must match

Output:
 tensor([[ 27,  30,  33],
        [ 61,  68,  75],
        [ 95, 106, 117]])

Output shape: torch.Size([3, 3])


## Finding the min, max, mean, sum etc (tensor aggregation)

In [None]:
# Create a tensor
x = torch.arange(0, 100, 10)

In [None]:
# Find the min
torch.min(x), x.min() # both syntax works

(tensor(0), tensor(0))

In [None]:
# Find the max
torch.max(x), x.max()

(tensor(90), tensor(90))

In [None]:
# Find the mean - won't work (intended)
# torch.mean(x), x.mean()

# This doesn't work as it not the correct datatype

In [None]:
# The datatype of the tensor
x.dtype

# int64 is "Long"
# the "mean" function can't work on tensors with dataype Long

torch.int64

In [None]:
# The torch.mean() function requires a tensor of float32 datatype to work
torch.mean(x.type(torch.float32)), x.type(torch.float32).mean()

(tensor(45.), tensor(45.))

In [None]:
# Find the sum
torch.sum(x), x.sum()

(tensor(450), tensor(450))

## Finding the positional min and max

In [None]:
x = torch.arange(1, 100, 10)
x

tensor([ 1, 11, 21, 31, 41, 51, 61, 71, 81, 91])

In [None]:
# Find the position in tensor that has the minimum value with argmin() - returns index position of target tensor where minimum value occurs
x.argmin()

tensor(0)

In [None]:
x[0]

tensor(1)

In [None]:
# Find the position in tensor that has the maximum value with argmax()
x.argmax()

tensor(9)

In [None]:
x[9]

tensor(91)

## Reshaping, viewing, stacking, squeezing, unsqueezing, permuting tensors

* Reshaping - reshapes an input tensor to a defined shape
* View - return a view of an input tensor of certain shape, but keep the same memory as the original tensor
* Stacking - combine multiple tensors on top of each other (vstack - vertical stack) or side by side (hstack - horizontal stack)
* Squeezing - removes all `1` dimensions from a tensor
* Unsqueezing - adds a `1` dimentsions to a target tensor
* Permuting - return a view of the input with dimensions permuted (swapped) in a certian way

In [None]:
# Create a tensor
import torch

x = torch.arange(1., 10.)
x, x.shape

(tensor([1., 2., 3., 4., 5., 6., 7., 8., 9.]), torch.Size([9]))

In [None]:
# Add an extra dimension
x_reshaped = x.reshape(1,9)
x_reshaped, x_reshaped.shape


(tensor([[1., 2., 3., 4., 5., 6., 7., 8., 9.]]), torch.Size([1, 9]))

In [None]:
# x_reshaped = x.reshape(3,3)

In [None]:
# Change the view
z = x.view(1,9)
z, z.shape

(tensor([[1., 2., 3., 4., 5., 6., 7., 8., 9.]]), torch.Size([1, 9]))

In [None]:
# Changing z changes x as a view of a tnsor shares the same memory as the original input
z[:,0] = 5
z, x

# Because we change the first value of z, it also changed for x

(tensor([[5., 2., 3., 4., 5., 6., 7., 8., 9.]]),
 tensor([5., 2., 3., 4., 5., 6., 7., 8., 9.]))

In [None]:
# Stack tensors on top of each other
x_stacked = torch.stack([x, x, x, x])
x_stacked

# By default the dimension is 0

tensor([[5., 2., 3., 4., 5., 6., 7., 8., 9.],
        [5., 2., 3., 4., 5., 6., 7., 8., 9.],
        [5., 2., 3., 4., 5., 6., 7., 8., 9.],
        [5., 2., 3., 4., 5., 6., 7., 8., 9.]])

In [None]:
# Stack tensors on top of each other
x_stacked = torch.stack([x, x, x, x], dim = 1)
x_stacked

# By default the dimension is 1

tensor([[5., 5., 5., 5.],
        [2., 2., 2., 2.],
        [3., 3., 3., 3.],
        [4., 4., 4., 4.],
        [5., 5., 5., 5.],
        [6., 6., 6., 6.],
        [7., 7., 7., 7.],
        [8., 8., 8., 8.],
        [9., 9., 9., 9.]])

In [None]:
# torch.squeeze () - removes all single dimensions from a target tensor
# https://pytorch.org/docs/stable/generated/torch.squeeze.html

print(f"Previous tensor: {x_reshaped}")
print(f"Previous shape: {x_reshaped.shape}")

# Remove extra dimensions from x_reshaped
x_squeezed = x_reshaped.squeeze()
print(f"\nNew tensor:{x_squeezed}")
print(f"New shape: {x_squeezed.shape}")

Previous tensor: tensor([[5., 2., 3., 4., 5., 6., 7., 8., 9.]])
Previous shape: torch.Size([1, 9])

New tensor:tensor([5., 2., 3., 4., 5., 6., 7., 8., 9.])
New shape: torch.Size([9])


In [None]:
# torch.unsqueeze() - adds a single dimension to a target tensor at a specific dim (dimension)
print(f"Previous tensor: {x_squeezed}")
print(f"Previous shape: {x_squeezed.shape}")

# Add an extra dimension with unsqueeze
x_unsqueezed = x_squeezed.unsqueeze(dim=0)
print(f"\nNew tensor: {x_unsqueezed}")
print(f"New shape: {x_unsqueezed.shape}")

Previous tensor: tensor([5., 2., 3., 4., 5., 6., 7., 8., 9.])
Previous shape: torch.Size([9])

New tensor: tensor([[5., 2., 3., 4., 5., 6., 7., 8., 9.]])
New shape: torch.Size([1, 9])


In [None]:
# torch.permute - rearranges the dimensions of a target tensor in a specified order
x_original = torch.rand(size=(224, 224, 3)) # height, width, colour channels

# Permute the original tensor to rearrage the axis (or dimension) order
x_permuted = torch.permute(x_original, (2, 0, 1)) # shifts axis
# x_permuted = x_original.permute(2, 0, 1) - also works

print(f"Original shape: {x_original.shape}")
print(f"New shape: {x_permuted.shape}") # colour channels, height, width

Original shape: torch.Size([224, 224, 3])
New shape: torch.Size([3, 224, 224])


In [None]:
# Since permute is a view, x_permuted shares the same memory as x_original. Therefore, chaning x original also changes x permuted

x_permuted[0, 0, 0] = 100

x_original[0, 0, 0]

tensor(100.)

## Indexing (selecting data from tensors)

Indexing with PyTorch is similar to indexing with NumPy.

In [None]:
# Create a tensor

import torch

x = torch.arange(1, 10). reshape(1, 3, 3)
x, x.shape

(tensor([[[1, 2, 3],
          [4, 5, 6],
          [7, 8, 9]]]),
 torch.Size([1, 3, 3]))

In [None]:
# Index on the first bracket
x[0]

tensor([[1, 2, 3],
        [4, 5, 6],
        [7, 8, 9]])

In [None]:
# Index on the middle bracket (dim= 1)
x[0][0]
# x[0, 0] works too!

tensor([1, 2, 3])

In [None]:
# Let's index on the middle bracket (dim= 2)
x[0][0][0]

tensor(1)