# 00. Pytorch Fundementals

In [1]:
import torch
import numpy as np
import pandas as pd


print(torch.__version__)
print(torch.cuda.get_device_name())

1.13.0
NVIDIA GeForce RTX 3070 Laptop GPU


## Introduction to Tensors

### Creating Tensors

Pytorch tensors are created using [`torch.tensor()`](https://pytorch.org/docs/stable/tensors.html)

In [2]:
# Creating a scalar
scalar = torch.tensor(7)
scalar

tensor(7)

In [3]:
# Number of dimensions
print(scalar.ndim)

0


In [4]:
# Get tensor back as python int
scalar.item()

7

In [5]:
# Vector
vector = torch.tensor([10, 7])
print(vector)

# Get the dimensions of the vector
print(vector.ndim)

# Get the shape of the vector
print(vector.shape)

tensor([10,  7])
1
torch.Size([2])


In [6]:
# MATRIX

MATRIX = torch.tensor([[7, 8],
                       [9, 10]])

print(MATRIX)

# Get the dimensions of the matrix
print(MATRIX.ndim)


# access the first row of the matrix
print(MATRIX[0])

# get the shape of the matrix
print(MATRIX.shape)

tensor([[ 7,  8],
        [ 9, 10]])
2
tensor([7, 8])
torch.Size([2, 2])


In [7]:
# TENSOR 
TENSOR = torch.tensor([[[1, 2, 3],
                        [4, 5, 6],
                        [7, 8, 9]],
                        [[10, 11, 12],
                        [13, 15, 16],
                        [17, 18, 10]]])

print(TENSOR)

# Get the dimensions of the tensor
print(TENSOR.ndim)

# Get the shape of the tensor
print(TENSOR.shape)

tensor([[[ 1,  2,  3],
         [ 4,  5,  6],
         [ 7,  8,  9]],

        [[10, 11, 12],
         [13, 15, 16],
         [17, 18, 10]]])
3
torch.Size([2, 3, 3])


### Random tensors

Why random tensors?

Random tensors are important because the way many neural netoworks learn is that they start with tensors full of random numbers and then adjust those random numbers to better represent the data.

`Start with random numbers -> Look at the data -> update random numbers -> look at the data`

[Torch random tensors](https://pytorch.org/docs/stable/generated/torch.rand.html)

In [8]:
# Create random tensor of size (3, 4)
random_tensor = torch.rand(1, 3, 4)
random_tensor

tensor([[[0.4699, 0.9621, 0.6741, 0.8279],
         [0.6760, 0.3412, 0.4338, 0.1561],
         [0.2489, 0.0171, 0.7618, 0.8348]]])

In [9]:
# Get the dimensions of the random tensor
random_tensor.ndim

3

In [10]:
# Get the shape of the random tensor
random_tensor.shape

torch.Size([1, 3, 4])

In [11]:
# Create random tensor with similar shape to an image tensor
random_image_tensor = torch.rand(size=(224, 224, 3)) # height, width , colour channels

random_image_tensor.shape, random_image_tensor.ndim

(torch.Size([224, 224, 3]), 3)

### Zeros and ones

In [12]:
# Create a tensor of all zeros
zeros = torch.zeros(size=(3, 4))
zeros

tensor([[0., 0., 0., 0.],
        [0., 0., 0., 0.],
        [0., 0., 0., 0.]])

In [13]:
zeros * random_tensor

tensor([[[0., 0., 0., 0.],
         [0., 0., 0., 0.],
         [0., 0., 0., 0.]]])

In [14]:
# Create a tensor of all ones
ones = torch.ones(size=(3, 4))
ones

tensor([[1., 1., 1., 1.],
        [1., 1., 1., 1.],
        [1., 1., 1., 1.]])

### Creating a range of tensors and tensors-like


In [15]:
# Use torch.arange
one_to_ten = torch.arange(0, 11) # start, end, step
one_to_ten

tensor([ 0,  1,  2,  3,  4,  5,  6,  7,  8,  9, 10])

In [16]:
# Creating tensor-like
ten_zeros = torch.zeros_like(input=one_to_ten)
ten_zeros

tensor([0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0])

### Tensor Datatypes

**Note:** Tensor datatypes is one of the 3 big erros you'll run into with PyTorch & deep learning.
1. Tensors not the right datatype
2. TEnsors not right shape
3. Tensors not the right device

In [17]:
# Float 32 type tensor
float_32_tensor =  torch.tensor([3.0, 6.0, 9.0],
                                dtype=None, # what datatype is the tensor (i.e float32, float64, float16)
                                device=None, # what device is your tensor is on
                                requires_grad=False ) # whether or not to track gradients with this operations
float_32_tensor

tensor([3., 6., 9.])

In [18]:
# Float 16 tensor 
float_16_tensor = float_32_tensor.type(torch.float16)
float_16_tensor

tensor([3., 6., 9.], dtype=torch.float16)

In [19]:
float_16_tensor * float_32_tensor

tensor([ 9., 36., 81.])

In [20]:
# int32 tensor
int_32_tensor = torch.tensor([3, 6, 9], 
                            dtype=torch.int32)
int_32_tensor

tensor([3, 6, 9], dtype=torch.int32)

In [21]:
int_32_tensor * float_32_tensor

tensor([ 9., 36., 81.])

### Getting information from the tensors (tensor attributes)

1. Tensors not the right datatype - to get datatype from a tensor, you can use `tensor.dtype`
2. TEnsors not right shape - to get shape from a tensor, you can use `tensor.shape`
3. Tensors not the right device - to get device from a tensor, you can use `tensor.device`

In [22]:
# Create a tensor
some_tensor = torch.rand(3,4)
some_tensor

tensor([[0.2538, 0.1446, 0.7415, 0.4370],
        [0.4720, 0.2544, 0.4547, 0.9002],
        [0.5082, 0.5922, 0.3021, 0.1562]])

In [23]:
# Find out details about some tensor
print(some_tensor)
print(f"Datatype of the tensor: {some_tensor.dtype}")
print(f"Shape of the tensor: {some_tensor.shape}")
print(f"Device tensor is on: {some_tensor.device}")

tensor([[0.2538, 0.1446, 0.7415, 0.4370],
        [0.4720, 0.2544, 0.4547, 0.9002],
        [0.5082, 0.5922, 0.3021, 0.1562]])
Datatype of the tensor: torch.float32
Shape of the tensor: torch.Size([3, 4])
Device tensor is on: cpu


In [24]:
# Create another tensor and get some information from it
some_tensor = torch.rand((10, 7),
                        device='cpu')
print(some_tensor)
print("Datatype of the tensor: ", some_tensor.dtype)
print("Shape of the tensor: ", some_tensor.shape)
print("Device the tensor is on: ", some_tensor.device)

tensor([[0.2125, 0.8948, 0.4261, 0.5214, 0.2426, 0.3199, 0.4411],
        [0.7027, 0.0527, 0.1050, 0.3207, 0.5803, 0.8879, 0.3182],
        [0.8401, 0.2370, 0.4830, 0.6242, 0.5594, 0.4550, 0.0556],
        [0.4243, 0.6137, 0.9849, 0.1319, 0.5327, 0.9259, 0.1806],
        [0.3497, 0.8135, 0.0220, 0.7252, 0.4221, 0.6558, 0.6345],
        [0.0358, 0.9184, 0.0048, 0.3173, 0.6671, 0.4517, 0.3803],
        [0.8265, 0.6834, 0.0873, 0.9524, 0.5834, 0.0318, 0.2617],
        [0.3393, 0.4753, 0.6690, 0.3471, 0.7613, 0.3544, 0.4508],
        [0.3523, 0.1190, 0.9961, 0.3075, 0.5861, 0.1710, 0.9059],
        [0.8348, 0.3848, 0.0689, 0.5781, 0.5385, 0.3313, 0.8745]])
Datatype of the tensor:  torch.float32
Shape of the tensor:  torch.Size([10, 7])
Device the tensor is on:  cpu


### Manipulaiting Tensors (tensor operations)

Tensor operations include:
* Addition
* Subtraction
* Multiplication(element-wise)
* Division
* Matrix multiplication  

In [25]:
# Create a tensor and add 10 to it
tensor = torch.tensor([1, 2, 3])
tensor + 10

tensor([11, 12, 13])

In [26]:
# Multipy tensor by 10
tensor * 10

tensor([10, 20, 30])

In [27]:
tensor

tensor([1, 2, 3])

In [28]:
# Subtract 10 from the tensor
tensor - 10

tensor([-9, -8, -7])

In [29]:
# Try out PyTorch inbuilt functions
torch.mul(tensor, 10),  torch.add(tensor, 10)

(tensor([10, 20, 30]), tensor([11, 12, 13]))

### Matrix multiplication
Therer are two ways to do matrix multiplication in deep learning
1. Element-wise multiplication
2. Matrix multiplication (dot product)

There are two main rules that performing matrix mulitplication needs to satisfy:
1. The **tensor dimensions** must match:
  * `(3, 2) @ (3, 2)` won't work
  * `(2, 3) @ (3, 2)` will work
  * `(3, 2) @ (2, 3)` will work

2. The resulting matrix has the shape of the **outer dimensions**:
* `(2, 3) @ (3, 2) ` -> `(2, 2)`
* `(3, 2) @ (2, 3)` -> `(3, 3)` 

In [30]:
# Element wise multiplication
print(tensor, "*", tensor)
print("Equals: ", (tensor*tensor))

tensor([1, 2, 3]) * tensor([1, 2, 3])
Equals:  tensor([1, 4, 9])


In [31]:
# Matrix multiplication
torch.matmul(tensor, tensor)

tensor(14)

In [32]:
# matrix multiplication using traditional loops
%time
value = 0
for i in range(len(tensor)):
  value += tensor[i] * tensor[i]
print(value)

CPU times: total: 0 ns
Wall time: 0 ns
tensor(14)


In [33]:
%time
torch.matmul(tensor, tensor)

CPU times: total: 0 ns
Wall time: 0 ns


tensor(14)

### One of the most common erros in deep learning are shape errors

In [34]:
# Shapes for matrix multiplication
tensor_A = torch.tensor([[1, 2],
                         [3, 4], 
                         [5, 6]])

tensor_B = torch.tensor([[7, 10],
                         [8, 11],
                         [9, 12]])

# torch.mm(tensor_A, tensor_B) # torch.mm is same as torch.matmul(it's an alias for matmul)

In [35]:
tensor_A.shape, tensor_B.shape

(torch.Size([3, 2]), torch.Size([3, 2]))

To fix our shape issues, we can manipulate the shape of one of our tensors using **transpose**

A **transpose** switches the axes or dimesnions of a given tensor.

In [36]:
tensor_B.T, tensor_B.T.shape

(tensor([[ 7,  8,  9],
         [10, 11, 12]]),
 torch.Size([2, 3]))

In [37]:
# Now we perform matrix mulitplication
torch.matmul(tensor_A, tensor_B.T)

tensor([[ 27,  30,  33],
        [ 61,  68,  75],
        [ 95, 106, 117]])

In [38]:
### Finding the min, max, mean, sum, etx (tensor aggregation)
# Create a tensor
X = torch.arange(0, 100, 10)
X

tensor([ 0, 10, 20, 30, 40, 50, 60, 70, 80, 90])

In [39]:
# Find the min
torch.min(X), X.min()

(tensor(0), tensor(0))

In [40]:
# Find max 
torch.max(X), X.max()

(tensor(90), tensor(90))

In [41]:
# Find the mean Note - the torch.mean requires a tensor of float32
torch.mean(X.type(torch.float32)), X.type(torch.float32).mean()

(tensor(45.), tensor(45.))

In [42]:
# Find the sum
torch.sum(X), X.sum()

(tensor(450), tensor(450))

### Finding the positionsal min and max values

In [43]:
# Finding the positional max
torch.argmax(X), X.argmax()

(tensor(9), tensor(9))

In [44]:
# Finding the positional min
torch.argmin(X), X.argmin()

(tensor(0), tensor(0))

## Reshaping, stacking , squeezing and unsqueezing tensors

* Reshaping - reshapes an input tensor to a defined shape]
* View - return a view of an input tensor but keep the same memory as the original tensor
* Stacking - conbine multiple tenosors on top of each other (vstack) or side by side (hstack)
* Squeeze - removes all `1` dimensions from a tensor
* Unsqueeze - add a `1` dimension to a tensor
* Permute - Return a view of the input with dimensions permuted(swapped) in a certain way



In [45]:
# Let's create a tensor
x = torch.arange(1., 10.)
x, x.shape

(tensor([1., 2., 3., 4., 5., 6., 7., 8., 9.]), torch.Size([9]))

In [46]:
# Add an extra dimension
x_reshaped = x.reshape(9, 1)
x_reshaped, x_reshaped.shape

(tensor([[1.],
         [2.],
         [3.],
         [4.],
         [5.],
         [6.],
         [7.],
         [8.],
         [9.]]),
 torch.Size([9, 1]))

In [47]:
# Change the view
z = x.view(1, 9)
z, z.shape

(tensor([[1., 2., 3., 4., 5., 6., 7., 8., 9.]]), torch.Size([1, 9]))

In [48]:
# Changing x changes x (because a view of a tensor shares the same memory as the original tensor)
z[:, 0] = 5
z, x

(tensor([[5., 2., 3., 4., 5., 6., 7., 8., 9.]]),
 tensor([5., 2., 3., 4., 5., 6., 7., 8., 9.]))

In [49]:
# Stack tensors on on top of each other
x_stacked  = torch.stack([x, x, x, x], dim=1)
x_stacked

tensor([[5., 5., 5., 5.],
        [2., 2., 2., 2.],
        [3., 3., 3., 3.],
        [4., 4., 4., 4.],
        [5., 5., 5., 5.],
        [6., 6., 6., 6.],
        [7., 7., 7., 7.],
        [8., 8., 8., 8.],
        [9., 9., 9., 9.]])

In [50]:
# torch.squeeze() - removes all the one dimensions from a target tensor
print("Previous tensor: ", x_reshaped)
print("Previous shape: ", x_reshaped.shape)

# Removes the extra dimensions from the x_reshaped
x_squeezed = x_reshaped.squeeze()
print("\nNew Tensor: ", x_squeezed)
print("New shape: ", x_squeezed.shape)

Previous tensor:  tensor([[5.],
        [2.],
        [3.],
        [4.],
        [5.],
        [6.],
        [7.],
        [8.],
        [9.]])
Previous shape:  torch.Size([9, 1])

New Tensor:  tensor([5., 2., 3., 4., 5., 6., 7., 8., 9.])
New shape:  torch.Size([9])


In [51]:
# torch.unsqueeze - adds a single dimension to a target tensor at a specific dim (dimension)
print("Previous target: ", x_squeezed)
print("Previous shape: ", x_squeezed.shape)

# Add an extra dimension with unsqueeze
x_unsqueezed = x_squeezed.unsqueeze(dim=0)
print('\n New tensor: ', x_unsqueezed)
print("New shape: ", x_unsqueezed.shape)

Previous target:  tensor([5., 2., 3., 4., 5., 6., 7., 8., 9.])
Previous shape:  torch.Size([9])

 New tensor:  tensor([[5., 2., 3., 4., 5., 6., 7., 8., 9.]])
New shape:  torch.Size([1, 9])


In [52]:
# torch.permute - rearranges the dimensions of the tensor to specified order
x_original = torch.rand(size=(224, 224, 3)) # height, width , color channels


# Permute the original tensor to rearrange the axis (or dim) order
x_permuted = x_original.permute(2, 0, 1) # shifts axis 0->1, 1->2, 2->0

print(f"Previous shape: {x_original.shape}")
print(f"New shape: {x_permuted.shape}") # color channels, height, width

Previous shape: torch.Size([224, 224, 3])
New shape: torch.Size([3, 224, 224])


## Indexing (selecting data from tensors)
Indexing with PyTorch is similar to indexing with NumPy

In [53]:
# Create a tensor
x = torch.arange(1, 10).reshape(1, 3, 3)
x, x.shape

(tensor([[[1, 2, 3],
          [4, 5, 6],
          [7, 8, 9]]]),
 torch.Size([1, 3, 3]))

In [54]:
# Let's index on our new tensor
x[0]

tensor([[1, 2, 3],
        [4, 5, 6],
        [7, 8, 9]])

In [55]:
# Let's index on the middle bracket (dim=1)
x[0][0]

tensor([1, 2, 3])

In [56]:
# Let's index on the most inner bracket
x[0][0][0]

tensor(1)

In [57]:
# You can also use ":" to select "all" of a target dimension
x[:, 0]

tensor([[1, 2, 3]])

In [58]:
# Get all values of 0th and 1st dimension but only index 1 of 2nd dimension
x[:, :, 1]

tensor([[2, 5, 8]])

In [59]:
# Get all values of 0 dimension but only the 1 index value of the 1st and 2nd dimension
x[:, 1, 1]

tensor([5])

In [60]:
# Get index 0 of 0th and 1st dimension and all values of 2nd dimension
x[0, 0, :]

tensor([1, 2, 3])

In [61]:
# Index on x to return 9
x[0, 2, 2]

# Index on x to return 3, 6, 9
x[:, :, 2]

tensor([[3, 6, 9]])

## PyTorch tensors & NumPy

NumPy is a popular scientific Python numerical computing library

And because of this, PyTorch has functionality to interact with it.

* Data in NumPy , want in PyTorch tensor ->  `torch.from_numpy(ndarray)`
* PyTorch tensor -> NumPy -> `torch.Tensor.numpy()`

In [63]:
# Numpy array to tensor
array = np.arange(1.0, 8.0)
tensor = torch.from_numpy(array) # warning - when converting numpy to pytorch, pytorch reflects numpy's default datatype i.e float64
array, tensor

(array([1., 2., 3., 4., 5., 6., 7.]),
 tensor([1., 2., 3., 4., 5., 6., 7.], dtype=torch.float64))

In [64]:
array.dtype, tensor.dtype

(dtype('float64'), torch.float64)

In [65]:
# Change the value of array, what will this do to `tensor`?
array = array + 1
array, tensor

(array([2., 3., 4., 5., 6., 7., 8.]),
 tensor([1., 2., 3., 4., 5., 6., 7.], dtype=torch.float64))

In [66]:
# Tensor to numpy array
tensor = torch.ones(7)
numpy_tensor = tensor.numpy()
tensor, numpy_tensor

(tensor([1., 1., 1., 1., 1., 1., 1.]),
 array([1., 1., 1., 1., 1., 1., 1.], dtype=float32))

In [67]:
# Change the tensor, what happens to numpy tensor?
tensor = tensor + 1
tensor, numpy_tensor

(tensor([2., 2., 2., 2., 2., 2., 2.]),
 array([1., 1., 1., 1., 1., 1., 1.], dtype=float32))

## Reproducibility (trying to take out the random out of random)

In short how the neural network learns:

`start with random numbers -> tensor operations -> update random numbers and make them better representations of the data -> again -> again -> again`

To reduce the randomness in neural networks and PyTorch comes the concept of **random seed**

Essentially what the random seed does is "flavour" the randomness.

In [85]:
import torch

# Create random tensors
random_tensor_A = torch.rand(3, 4)
random_tensor_B = torch.rand(3, 4)

print(random_tensor_A)
print(random_tensor_B)
print(random_tensor_A == random_tensor_B)

tensor([[0.0878, 0.2581, 0.9000, 0.5993],
        [0.3151, 0.7136, 0.4509, 0.6292],
        [0.0757, 0.5871, 0.0907, 0.1619]])
tensor([[0.9588, 0.9965, 0.0879, 0.7730],
        [0.3766, 0.7892, 0.1305, 0.9632],
        [0.9690, 0.5998, 0.5547, 0.7703]])
tensor([[False, False, False, False],
        [False, False, False, False],
        [False, False, False, False]])


In [87]:
# Let's make some random but reproducible tensors

# Set random seed
RANDOM_SEED = 42

torch.manual_seed(RANDOM_SEED) 
random_tensor_C = torch.rand(3, 4)

torch.manual_seed(RANDOM_SEED) 
random_tensor_D = torch.rand(3, 4)

print(random_tensor_C)
print(random_tensor_D)
print(random_tensor_C == random_tensor_D)

tensor([[0.8823, 0.9150, 0.3829, 0.9593],
        [0.3904, 0.6009, 0.2566, 0.7936],
        [0.9408, 0.1332, 0.9346, 0.5936]])
tensor([[0.8823, 0.9150, 0.3829, 0.9593],
        [0.3904, 0.6009, 0.2566, 0.7936],
        [0.9408, 0.1332, 0.9346, 0.5936]])
tensor([[True, True, True, True],
        [True, True, True, True],
        [True, True, True, True]])


## Running tensors and PyTorch objects on the GPUs (and making faster operations)

GPUs = faster computations on numbers, thanks to CUDA + NVIDIA hardware + PyTorch working behind the scenes to make it nice.

### 1. Getting a GPU

1. Easiest - Use Google colab for free GPU (options for upgrade as well) 
2. Use your own GPU - takes a little bit of setup aand requires the investment of purchasing a GPU, thers's a lot of options.
3. Use Cloud computing - GCP, AWS, Azure, these services allow to rent computers on the cloud  and access them.

In [91]:
!nvidia-smi

Thu Nov 10 18:24:28 2022       
+-----------------------------------------------------------------------------+
| NVIDIA-SMI 526.47       Driver Version: 526.47       CUDA Version: 12.0     |
|-------------------------------+----------------------+----------------------+
| GPU  Name            TCC/WDDM | Bus-Id        Disp.A | Volatile Uncorr. ECC |
| Fan  Temp  Perf  Pwr:Usage/Cap|         Memory-Usage | GPU-Util  Compute M. |
|                               |                      |               MIG M. |
|   0  NVIDIA GeForce ... WDDM  | 00000000:01:00.0 Off |                  N/A |
| N/A   49C    P5    20W /  N/A |      0MiB /  8192MiB |      0%      Default |
|                               |                      |                  N/A |
+-------------------------------+----------------------+----------------------+
                                                                               
+-----------------------------------------------------------------------------+
| Proces

### 2. Check for GPU access with PyTorch

In [93]:
# Check for GPU access with PyTorch
import torch
torch.cuda.is_available()

True

For PyTorch since it's capable of running compute on the GPU or CPU, it's best practice to setup device agnostic code

In [96]:
# Setup device agnostic code
device = "cuda" if torch.cuda.is_available() else "cpu"
device

'cuda'

In [95]:
# Count number of GPUS
torch.cuda.device_count()

1

### 3. Putting tensors (and models) on the GPU

The reason we want our tensors/models on the GPU is because using a GPU results in faster computations.

In [97]:
# Create a tensor (defualt is CPU)
tensor = torch.tensor([1, 2, 3], device='cpu')

# Tensor not on GPU
print(tensor, tensor.device)

tensor([1, 2, 3]) cpu


In [98]:
# Move tensor to GPU  (if available)
tensor_on_gpu = tensor.to(device)
tensor_on_gpu

tensor([1, 2, 3], device='cuda:0')

### 4. Moving tensors back to the CPU

In [100]:
# if tensor is on GPU, can't transform it to NumPy
tensor_on_gpu.numpy()

TypeError: can't convert cuda:0 device type tensor to numpy. Use Tensor.cpu() to copy the tensor to host memory first.

In [101]:
# To fix the GPU tensor with NumPy issue, we cant first set it to the GPU

tensor_back_on_cpu = tensor_on_gpu.cpu().numpy()
tensor_back_on_cpu

array([1, 2, 3], dtype=int64)

In [103]:
tensor_on_gpu

tensor([1, 2, 3], device='cuda:0')