# 00. PyTorch Fundamentals

In [1]:
import torch

In [2]:
print(f'PyTorch version: {torch.__version__}')
print(f'CUDA available: {torch.cuda.is_available()}')
print(f'GPU name: {torch.cuda.get_device_name(0)}')

PyTorch version: 2.5.1+cu121
CUDA available: True
GPU name: NVIDIA GeForce RTX 4060 Ti


In [3]:
print('Hello I am Learning PyTorch!')

Hello I am Learning PyTorch!


## Introduction to Tensors

### Creating Tensors

In [4]:
# Scalar

scalar = torch.tensor(7)
scalar

tensor(7)

In [5]:
scalar.ndim

0

In [6]:
# Get tensor back as Python
scalar.item()

7

In [7]:
# Vector 
vector = torch.tensor([7,7])
vector

tensor([7, 7])

In [8]:
vector.ndim

1

In [9]:
vector.shape

torch.Size([2])

In [10]:
# MATRIX
MATRIX = torch.tensor([[7,8],
                       [8,9]])
MATRIX

tensor([[7, 8],
        [8, 9]])

In [11]:
MATRIX.ndim

2

In [12]:
MATRIX[0]

tensor([7, 8])

In [13]:
MATRIX[1]

tensor([8, 9])

In [14]:
MATRIX.shape

torch.Size([2, 2])

In [15]:
#TENSOR
TENSOR = torch.tensor([[[1,2,3],
                        [3,6,9],
                        [2,4,6]]])
TENSOR

tensor([[[1, 2, 3],
         [3, 6, 9],
         [2, 4, 6]]])

In [16]:
TENSOR.ndim

3

In [17]:
TENSOR[0]

tensor([[1, 2, 3],
        [3, 6, 9],
        [2, 4, 6]])

In [18]:
TENSOR.shape

torch.Size([1, 3, 3])

### Random Tensors

Why random Tensors?

Random Tensors are important because the way neural networks learn is that they start with tensors full of random numbers and then adjust those random to better represent the data

`Start with random numbers -> Look at data -> update random numbers -> Look at Data -> update random numbers`

In [19]:
# Create a random tensor of size (3,4)
random_tensor = torch.rand(3,4)
random_tensor

tensor([[0.9750, 0.8483, 0.3081, 0.1440],
        [0.2912, 0.8292, 0.5094, 0.3943],
        [0.1684, 0.8902, 0.8992, 0.8973]])

In [20]:
random_tensor.ndim

2

In [21]:
#create a random tensor with similar shape to an image tensor
random_image_size_tensor = torch.rand(size=(224,224,3)) # height, width, colour channel (R,G,B)
random_image_size_tensor.shape, random_image_size_tensor.ndim

(torch.Size([224, 224, 3]), 3)

## Zeros and Ones

In [22]:
#Create a tensor of all zeros
zeros = torch.zeros(size=(3,4))
zeros

tensor([[0., 0., 0., 0.],
        [0., 0., 0., 0.],
        [0., 0., 0., 0.]])

In [23]:
#Creat a tensor of all ones
ones = torch.ones(size=(3,4))
ones

tensor([[1., 1., 1., 1.],
        [1., 1., 1., 1.],
        [1., 1., 1., 1.]])

In [24]:
ones.dtype

torch.float32

## Range of tensors and tensors-like

In [25]:
# Use torch.range()
one_to_ten = torch.arange(1,11)
one_to_ten

tensor([ 1,  2,  3,  4,  5,  6,  7,  8,  9, 10])

In [26]:
one_to_ten = torch.arange(start=1,end=11)
one_to_ten

tensor([ 1,  2,  3,  4,  5,  6,  7,  8,  9, 10])

In [27]:
one_to_ten = torch.arange(start=1,end=11,step=2)
one_to_ten

tensor([1, 3, 5, 7, 9])

In [28]:
# Creating tensors like
ten_zeros = torch.zeros_like(one_to_ten)
ten_zeros

tensor([0, 0, 0, 0, 0])

## Tensor Datatypes

**Note**: Tensor datatypes is one of the 3 Big Errors that you'll run into with PyTorch and Deep Learning
1. Tensors not right datatype
2. Tensors not right shape
3. Tensors not on the right device

Precision in computing-->
[Link](https://en.wikipedia.org/wiki/Precision_(computer_science))

In [29]:
#Float 32 Tensor
float_32_tensor = torch.tensor([3.0, 6.0, 9.0],
                               dtype=None,
                               device=None,
                               requires_grad=False)
float_32_tensor

tensor([3., 6., 9.])

In [30]:
float_32_tensor.dtype

torch.float32

In [31]:
# Create a float 16 Tensor
float_16_tensor = float_32_tensor.type(torch.float16)
float_16_tensor

tensor([3., 6., 9.], dtype=torch.float16)

In [32]:
float_16_tensor * float_32_tensor

tensor([ 9., 36., 81.])

In [33]:
# Create a tensor
some_tensor = torch.rand(3,4)
some_tensor

tensor([[0.0488, 0.9646, 0.7208, 0.5527],
        [0.0358, 0.8431, 0.0239, 0.2613],
        [0.8031, 0.4196, 0.5411, 0.0121]])

In [34]:
#Find the details about the tensor
print(some_tensor)
print(f'Datatype of tensor: {some_tensor.dtype}')
print(f'Shape of tensor: {some_tensor.shape}')
print(f'Device tensor is on: {some_tensor.device}')

tensor([[0.0488, 0.9646, 0.7208, 0.5527],
        [0.0358, 0.8431, 0.0239, 0.2613],
        [0.8031, 0.4196, 0.5411, 0.0121]])
Datatype of tensor: torch.float32
Shape of tensor: torch.Size([3, 4])
Device tensor is on: cpu


### Manipulating Tensors (tensor operations)

Tensor operations include:
* Addition
* Subtraction
* Multiplication
* Division
* Matrix Multiplication

In [35]:
tensor = torch.rand(3,4)
tensor

tensor([[0.6738, 0.3895, 0.9136, 0.1445],
        [0.5223, 0.0860, 0.3727, 0.1176],
        [0.4496, 0.6156, 0.1067, 0.9887]])

In [36]:
#Addition
tensor + 10

tensor([[10.6738, 10.3895, 10.9136, 10.1445],
        [10.5223, 10.0860, 10.3727, 10.1176],
        [10.4496, 10.6156, 10.1067, 10.9887]])

In [37]:
#Multiplication
tensor * 10

tensor([[6.7379, 3.8955, 9.1355, 1.4446],
        [5.2230, 0.8595, 3.7273, 1.1765],
        [4.4965, 6.1560, 1.0674, 9.8866]])

In [38]:
#Subtraction
tensor - 10

tensor([[-9.3262, -9.6105, -9.0864, -9.8555],
        [-9.4777, -9.9140, -9.6273, -9.8824],
        [-9.5504, -9.3844, -9.8933, -9.0113]])

In [39]:
#Pytorch in-built functions
torch.mul(tensor, 10)

tensor([[6.7379, 3.8955, 9.1355, 1.4446],
        [5.2230, 0.8595, 3.7273, 1.1765],
        [4.4965, 6.1560, 1.0674, 9.8866]])

## Matrix Multiplication

Two main ways of performing multiplication in neural networks and deep learning
1. Element-wise multiplication
2. Dot Product

In [40]:
#Element wise multiplication
tensor = torch.tensor([1,2,3])
print(tensor, "*", tensor)

tensor([1, 2, 3]) * tensor([1, 2, 3])


In [41]:
print(f'Equals: {tensor*tensor}')

Equals: tensor([1, 4, 9])


In [42]:
# Matrix Multiplication using pytorch function
torch.matmul(tensor,tensor)

tensor(14)

## One of the most common errors in deep learning: shape errors

In [43]:
tensor_A = torch.tensor([[1,2],
                  [3,4],
                  [5,6]])
tensor_B = torch.tensor([[7,8,9],
                  [10,11,12],
                  [13,14,15]])
torch.matmul(tensor_A, tensor_B)

RuntimeError: mat1 and mat2 shapes cannot be multiplied (3x2 and 3x3)

### Inner dimensions should match

* Transpose - to change the relative position, since tensor_A is 3x2, and the inner dimension is 3. we should transpose tensor_A using `tensor_A.T`

In [44]:
#Using transpose method
torch.matmul(tensor_A.T, tensor_B)

tensor([[102, 111, 120],
        [132, 144, 156]])

In [45]:
# Create a 3x2 tensors
tensor_A = torch.rand(3,2)
tensor_B = torch.rand(3,2)
tensor_A.shape, tensor_B.shape

(torch.Size([3, 2]), torch.Size([3, 2]))

### Exercise:
Apply a matrix multiplication in tensor_A and tensor_B
Question:
What do you think will happen?
Reason:

## Tensor Aggregation 
### Finding Mean, Max, Min, Sum and etc.

In [46]:
#Creata a tensor
x = torch.arange(0,100)
x

tensor([ 0,  1,  2,  3,  4,  5,  6,  7,  8,  9, 10, 11, 12, 13, 14, 15, 16, 17,
        18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31, 32, 33, 34, 35,
        36, 37, 38, 39, 40, 41, 42, 43, 44, 45, 46, 47, 48, 49, 50, 51, 52, 53,
        54, 55, 56, 57, 58, 59, 60, 61, 62, 63, 64, 65, 66, 67, 68, 69, 70, 71,
        72, 73, 74, 75, 76, 77, 78, 79, 80, 81, 82, 83, 84, 85, 86, 87, 88, 89,
        90, 91, 92, 93, 94, 95, 96, 97, 98, 99])

In [47]:
# Find the min
torch.min(x)

tensor(0)

In [48]:
# Find the max
torch.max(x)

tensor(99)

In [49]:
#Find the mean
torch.mean(x)

RuntimeError: mean(): could not infer output dtype. Input dtype must be either a floating point or complex dtype. Got: Long

In [50]:
x.dtype

torch.int64

>The data type is int64

[Reference](https://docs.pytorch.org/docs/stable/tensor_attributes.html)

> Convert the Long Datatype to `floating point or complex.`

`torch.mean()` input must be floating point or complex. see Reference
[Reference](https://docs.pytorch.org/docs/stable/tensor_attributes.html)

In [51]:
#####  Convert the the datatype to floating point float32
torch.mean(x.type(torch.float32))

tensor(49.5000)

In [52]:
#convert to complex
torch.mean(x.type(torch.cfloat))

tensor(49.5000+0.j)

In [53]:
#Convert to float64
torch.mean(x.type(torch.double))

tensor(49.5000, dtype=torch.float64)

In [54]:
#Finding the sum
torch.sum(x)

tensor(4950)

In [55]:
#Finding the position of the minimum value
torch.argmin(x)

tensor(0)

In [56]:
x.argmin()

tensor(0)

In [57]:
#Finding the position of the max value
torch.argmax(x)

tensor(99)

In [58]:
x.argmax()

tensor(99)

In [59]:
# Create a tensor 
x = torch.arange(1,100,5)
x

tensor([ 1,  6, 11, 16, 21, 26, 31, 36, 41, 46, 51, 56, 61, 66, 71, 76, 81, 86,
        91, 96])

In [60]:
torch.argmax(x)

tensor(19)

In [61]:
torch.argmin(x)

tensor(0)

## Reshaping, Stacking, Squeezing and Unsqueezing tensors

* Reshaping - reshapes in put tensor to a defined shape
[Reshaping Documentation](https://docs.pytorch.org/docs/stable/generated/torch.reshape.html)
* View - Return a view of an input tensor of a certain shape but keep tghe same memory as the original tensor
* Stacking - combine multiple tensors on top of each other (vstack) or side by side (hstack)
* Squeeze - removes all `1` Dimension from a tensor
* Unsqueeze - add a `1` dimension to a target tensor
* Permute - Return a view of the input with dimensions permuted (swapped) in a certain way

In [62]:
x = torch.arange(1.,11.)
x, x.shape

(tensor([ 1.,  2.,  3.,  4.,  5.,  6.,  7.,  8.,  9., 10.]), torch.Size([10]))

In [63]:
# Add an extra dimension
x_reshaped = x.reshape(1,10)
x_reshaped, x_reshaped.shape

(tensor([[ 1.,  2.,  3.,  4.,  5.,  6.,  7.,  8.,  9., 10.]]),
 torch.Size([1, 10]))

> torch.reshape() - Returns a tensor with the same data and number of elements as input

In [64]:
# Change the view
z = x.view(1,10)
z, z.shape

(tensor([[ 1.,  2.,  3.,  4.,  5.,  6.,  7.,  8.,  9., 10.]]),
 torch.Size([1, 10]))

In [65]:
#Changing z changes x ( view of a tensor shares the same memory as the original input)
z[:, 0] = 3
z, x

(tensor([[ 3.,  2.,  3.,  4.,  5.,  6.,  7.,  8.,  9., 10.]]),
 tensor([ 3.,  2.,  3.,  4.,  5.,  6.,  7.,  8.,  9., 10.]))

In [74]:
# Stack tensors on top of each other
x_stacked = torch.stack([x,x,x,x], dim=0)
x_stacked

tensor([[ 3.,  2.,  3.,  4.,  5.,  6.,  7.,  8.,  9., 10.],
        [ 3.,  2.,  3.,  4.,  5.,  6.,  7.,  8.,  9., 10.],
        [ 3.,  2.,  3.,  4.,  5.,  6.,  7.,  8.,  9., 10.],
        [ 3.,  2.,  3.,  4.,  5.,  6.,  7.,  8.,  9., 10.]])

In [75]:
x_reshaped.shape

torch.Size([1, 10])

In [76]:
x_reshaped.ndim

2

In [72]:
# Squeeze
x_squeezed = x_reshaped.squeeze(1)
x_squeezed, x_squeezed.shape

(tensor([[ 3.,  2.,  3.,  4.,  5.,  6.,  7.,  8.,  9., 10.]]),
 torch.Size([1, 10]))

In [73]:
x_squeezed.ndim

2

In [77]:
%autocall 1

Automatic calling is: Smart


In [95]:
x = torch.tensor([[[1,2,3],[5,6,7]]])
x

tensor([[[1, 2, 3],
         [5, 6, 7]]])

In [96]:
x.ndim

3

In [97]:
x.squeeze()

tensor([[1, 2, 3],
        [5, 6, 7]])

In [98]:
x.ndim

3

In [100]:
torch.squeeze(x,1)

tensor([[[1, 2, 3],
         [5, 6, 7]]])

In [101]:
torch.unsqueeze(x, dim=0)

tensor([[[[1, 2, 3],
          [5, 6, 7]]]])

In [102]:
torch.unsqueeze(x, dim=1)

tensor([[[[1, 2, 3],
          [5, 6, 7]]]])

In [114]:
# Permute
#torch.permute(input, (dims)) - returns a view of the original tensor input with its dimensions permuted

#Create a tensor
x = torch.rand(size=(224,224,3))

In [113]:
#permute teh original tensor
x_permuted = torch.permute(x, (2,0,1))
x_permuted.size()

torch.Size([3, 224, 224])

In [115]:
print(f'Previous shape: {x.shape}')
print(f'New shape: {x_permuted.shape}')

Previous shape: torch.Size([224, 224, 3])
New shape: torch.Size([3, 224, 224])


In [119]:
x[0,0,0], x_permuted[0,0,0]

(tensor(0.9345), tensor(0.6517))

## Indexing

In [120]:
#Create a tensor
x = torch.arange(1,10).reshape(1,3,3)
x, x.shape

(tensor([[[1, 2, 3],
          [4, 5, 6],
          [7, 8, 9]]]),
 torch.Size([1, 3, 3]))

In [121]:
x[0]

tensor([[1, 2, 3],
        [4, 5, 6],
        [7, 8, 9]])

In [124]:
x[0,1,1]

tensor(5)

In [125]:
# Index the tensor with output 6
x[0,1,2]

tensor(6)

In [126]:
#Select all of target dimension
x[:,0]

tensor([[1, 2, 3]])

In [127]:
x[:,1]

tensor([[4, 5, 6]])

In [128]:
x[:,:,1]

tensor([[2, 5, 8]])

In [131]:
# Get all values of 0th and 1st dimensions but only index of 1 and 2nd dimension
x[:,:,1]

tensor([[2, 5, 8]])

In [132]:
# Get all the valus of 0 dimension but only the 1st index value of 1st and 2nd dimension
x[:,1,1]

tensor([5])

In [133]:
# Get the index 0 of 0th and 1st dimension and all values of 2nd dimension
x[0,0,:]

tensor([1, 2, 3])

## PyTorch Tensors and Numpy

* Data in NumPy, want in PyTorch tensor -> `torch.from_numpy(ndarray)`
* PyTorch tensor -> NumPy -> `torch.Tensor.numpy()`

In [134]:
#Numpy array to Tensor
import torch
import numpy as np

array = np.arange(1.0,10.0)
tensor = torch.from_numpy(array)
array, tensor

(array([1., 2., 3., 4., 5., 6., 7., 8., 9.]),
 tensor([1., 2., 3., 4., 5., 6., 7., 8., 9.], dtype=torch.float64))

In [135]:
array.dtype, tensor.dtype

(dtype('float64'), torch.float64)

In [143]:
array = np.arange(1., 10., dtype=np.float32)
tensor = torch.from_numpy(array)
array, tensor
array.dtype, tensor.dtype

(dtype('float32'), torch.float32)

## Reproducibility

Neural Network learns:
start with random numbers -> tensor operations -> update random numbers to try and make them better representations of the data -> again -> again -> again...

To reduce the randomness in neural networks and PyTorch comes with concept of a **random_seed**

In [146]:
torch.rand(3,3)

tensor([[0.0432, 0.9308, 0.0693],
        [0.2714, 0.8684, 0.5089],
        [0.8230, 0.4088, 0.2550]])

In [147]:
#Create a two random tensors
random_tensor_A = torch.rand(3,4)
random_tensor_B = torch.rand(3,4)

print(random_tensor_A)
print(random_tensor_B)
print(random_tensor_A == random_tensor_B)

tensor([[0.2567, 0.3251, 0.0532, 0.7345],
        [0.5613, 0.0075, 0.9866, 0.3734],
        [0.4140, 0.6060, 0.9816, 0.9705]])
tensor([[0.1770, 0.2239, 0.0018, 0.1958],
        [0.8273, 0.6908, 0.9147, 0.5921],
        [0.7099, 0.4125, 0.9824, 0.6218]])
tensor([[False, False, False, False],
        [False, False, False, False],
        [False, False, False, False]])


In [148]:
torch.rand(3,4) == torch.rand(3,4)

tensor([[False, False, False, False],
        [False, False, False, False],
        [False, False, False, False]])

In [153]:
# Making a reproducibility
seed = 42
torch.manual_seed(seed)

random_tensor_A = torch.rand(3,4)

torch.manual_seed(seed)
random_tensor_B = torch.rand(3,4)

random_tensor_A == random_tensor_B

tensor([[True, True, True, True],
        [True, True, True, True],
        [True, True, True, True]])

## GPU Access

In [154]:
# Check for GPU access with PyTorch
torch.cuda.is_available()

True

In [155]:
# Setup device agnostic code
device = 'cuda' if torch.cuda.is_available() else 'cpu'
device

'cuda'

In [156]:
# Count number of devices
torch.cuda.device_count()

1

In [157]:
# using tensor/models in GPU
tensor = torch.tensor([1,2,3])

#Tensor no on GPU
print(tensor, tensor.device)

tensor([1, 2, 3]) cpu


In [158]:
#Move tensor to GPU
tensor_gpu = tensor.to(device)
tensor_gpu

tensor([1, 2, 3], device='cuda:0')