<a href="https://colab.research.google.com/github/andrii4k-kit/pytorch-learning/blob/main/00_fundamentals.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

## 00. Fundamentals of DL in Pytorch


In [None]:
import torch
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
print(torch.__version__)

2.9.0+cpu


In [None]:
!nvidia-smi

/bin/bash: line 1: nvidia-smi: command not found


# Creating and operating Tensors

In [None]:
scalar = torch.tensor(7)
scalar

tensor(7)

In [None]:
scalar.ndim

0

In [None]:
scalar.item()

7

In [None]:
vector = torch.tensor([7, 7])
vector.ndim
vector.shape

torch.Size([2])

In [None]:
# MATRIX
MATRIX = torch.tensor([[7, 8 ],
                       [9, 10]])
MATRIX.shape

torch.Size([2, 2])

In [None]:
# TENSOR
TENSOR = torch.tensor([[[1,2,3],
                        [4,5,6],
                        [7,8,9]],
                       [[1,2,3],
                        [4,5,6],
                        [7,8,9]]])
TENSOR.ndim

3

### RANDOM Tensors
Random Tensors are important because the way many neural networks learn is that they start with tensors full of random numbers and then adjust those random numbers to better represent the data.


In [None]:
# Create a random tensors of size (3, 3, 3)
random_tensor = torch.rand(3, 224, 224)
random_tensor

tensor([[[0.0040, 0.1256, 0.5703,  ..., 0.1898, 0.2511, 0.0823],
         [0.2462, 0.4005, 0.5985,  ..., 0.1587, 0.0304, 0.5283],
         [0.4915, 0.9212, 0.9837,  ..., 0.6829, 0.3587, 0.3114],
         ...,
         [0.6833, 0.0808, 0.7999,  ..., 0.2400, 0.4359, 0.5247],
         [0.5339, 0.7476, 0.1945,  ..., 0.3936, 0.8271, 0.1570],
         [0.4598, 0.5967, 0.8393,  ..., 0.4763, 0.7965, 0.8781]],

        [[0.5759, 0.8166, 0.4658,  ..., 0.3115, 0.9832, 0.1077],
         [0.7697, 0.1901, 0.8175,  ..., 0.2124, 0.1643, 0.0810],
         [0.8260, 0.9543, 0.1245,  ..., 0.2861, 0.1240, 0.3852],
         ...,
         [0.3063, 0.0145, 0.8951,  ..., 0.1458, 0.4864, 0.4333],
         [0.8209, 0.8920, 0.9635,  ..., 0.4947, 0.5776, 0.9541],
         [0.7630, 0.1195, 0.1553,  ..., 0.5180, 0.9720, 0.8923]],

        [[0.8483, 0.7056, 0.9460,  ..., 0.5414, 0.0516, 0.2976],
         [0.6413, 0.5492, 0.4813,  ..., 0.5272, 0.5387, 0.2306],
         [0.9952, 0.5202, 0.9997,  ..., 0.0567, 0.4470, 0.

In [None]:
random_tensor.ndim

3

In [None]:
random_image_size_tensor = torch.rand(size=(224, 224, 3))
random_image_size_tensor.shape, random_image_size_tensor.ndim

(torch.Size([224, 224, 3]), 3)

### Zeros and ones


In [None]:
# create a tensor of all zeros
zeros = torch.zeros(size=(3, 4))
zeros

tensor([[0., 0., 0., 0.],
        [0., 0., 0., 0.],
        [0., 0., 0., 0.]])

In [None]:
# create a tensor of all ones
ones = torch.ones(size=(3, 4))
ones
ones.dtype

torch.float32

In [None]:
# USe torch.range()
one_to_ten = torch.arange(start=0, end=10, step=1)
one_to_ten

tensor([0, 1, 2, 3, 4, 5, 6, 7, 8, 9])

In [None]:
# creating tensors like
ten_zeros = torch.ones_like(input=one_to_ten)
ten_zeros

tensor([1, 1, 1, 1, 1, 1, 1, 1, 1, 1])

### Tensor datatypes

In [None]:
float_32_tensor = torch.tensor([3.0, 6.0, 9.0],
                               dtype=None, # datazztype e.g. float32 or float 16
                               device=None, # what device is your tensor on
                               requires_grad=False) # whether or nor to track graddients with this tensor operations
float_32_tensor

tensor([3., 6., 9.])

In [None]:
float_16_tensor = torch.tensor([3.0, 6.0, 9.0], dtype=torch.float16)
new_tensor = float_32_tensor * float_16_tensor
new_tensor.dtype

torch.float32

### Getinng information from tensors(tensor attributes)

1. Tesnors not right datatype - can use tensor.datatype
2. Tensors not right shape - can use tensor.shape
3. Tensors not on the right device - can use tensor.device

In [None]:
some_tensor = torch.rand(3, 4)
some_tensor

tensor([[0.5192, 0.9339, 0.8465, 0.9291],
        [0.9003, 0.8482, 0.3627, 0.4938],
        [0.2916, 0.5576, 0.2550, 0.5212]])

In [None]:
print(some_tensor)
print(f"Datatype of tensor: {some_tensor.dtype}")
print(f"Shape of tensor: {some_tensor.shape}")
print(f"Device tensor is on: {some_tensor.device}")

tensor([[0.5192, 0.9339, 0.8465, 0.9291],
        [0.9003, 0.8482, 0.3627, 0.4938],
        [0.2916, 0.5576, 0.2550, 0.5212]])
Datatype of tensor: torch.float32
Shape of tensor: torch.Size([3, 4])
Device tensor is on: cpu


### Manipuilating Tensors (tensor operations)
Tensor operations include:
* Addition
* Subtarction
* Multiplikation
* Division
* Matrix multiplication

In [None]:
ones_tensor = torch.tensor([[1,2,3],
                            [4,5,6],
                            [7,8,9]])


### Matrix multiplication

In [None]:
%%time
MATRIX_MUL = ones_tensor @ ones_tensor
MATRIX_MUL


CPU times: user 836 µs, sys: 0 ns, total: 836 µs
Wall time: 1.59 ms


tensor([[ 30,  36,  42],
        [ 66,  81,  96],
        [102, 126, 150]])

In [None]:
%%time
MATRIX = torch.matmul(ones_tensor, ones_tensor)
MATRIX

CPU times: user 286 µs, sys: 0 ns, total: 286 µs
Wall time: 242 µs


tensor([[ 30,  36,  42],
        [ 66,  81,  96],
        [102, 126, 150]])

### One of the most common error in DL is shape error

* The inner dimensions must match
`(3,2) @ (3, 2)` -> won't work
* The resulting matrix has the shape of the outer dimensions and further matrix multiplications rules...



In [None]:
# Shapes for matrix multiplication
tensor_A = torch.tensor([[1, 2],
                         [3, 4],
                         [5, 6]])

tensor_B = torch.tensor([[7, 10],
                         [8, 11],
                         [9, 12]])
torch.mm(tensor_A, tensor_B.T)

# With .T I can transpose the matrix


tensor([[ 27,  30,  33],
        [ 61,  68,  75],
        [ 95, 106, 117]])

### Finfing the min, max, mean, sum, etc. (tensor aggregation)

In [None]:
# Create a tensor
x = torch.arange(1, 100, 10, dtype=torch.float32)
x

tensor([ 1., 11., 21., 31., 41., 51., 61., 71., 81., 91.])

In [None]:
# Find the min
torch.min(x), x.min()

(tensor(1.), tensor(1.))

In [None]:
# Find the max
torch.max(x), x.max()

(tensor(91.), tensor(91.))

In [None]:
# Find the mean
torch.mean(x), x.mean()

(tensor(46.), tensor(46.))

In [None]:
# Find the sum
torch.sum(x), x.sum()

(tensor(460.), tensor(460.))

### Finidng the positional min and max

In [None]:
x.argmin(), x[0]

(tensor(0), tensor(1.))

## Reshaping, stacking, squeezing and unqueezing tensors
* Reshaping - reshapes an input tensor to a defined shape
* View - Return a view of an input tensor of certain shape but keep the same money as the the original tensor
* Stacking - combine multiple tensors on top of each other (vstack) or side by side (hstack)
* Squeeze - removes all `1` dimensions from a tensor
* Unsqueeze - add a 1 dimension to a target tensor
* Permute - Return a view of the input with dimensions permuted (swapped) in a certain way.

In [None]:
import torch
x = torch.arange(1., 10.)
x, x.shape

(tensor([1., 2., 3., 4., 5., 6., 7., 8., 9.]), torch.Size([9]))

In [None]:
# Add an extra dimension
x_reshaped = x.reshape(1, 9)
x_reshaped, x_reshaped.shape

(tensor([[1., 2., 3., 4., 5., 6., 7., 8., 9.]]), torch.Size([1, 9]))

In [None]:
# Change the view
z = x.view(1, 9)
z, z.shape

(tensor([[1., 2., 3., 4., 5., 6., 7., 8., 9.]]), torch.Size([1, 9]))

In [None]:
# Changing z changes x (because a view of a tensor shares the same memory as the original input)
z[:, 0] = 5
z, x

(tensor([[5., 2., 3., 4., 5., 6., 7., 8., 9.]]),
 tensor([5., 2., 3., 4., 5., 6., 7., 8., 9.]))

In [None]:
# Stack tensors on top of each other
x_stacked = torch.stack([x, x, x, x], dim=1)
x_stacked

tensor([[5., 5., 5., 5.],
        [2., 2., 2., 2.],
        [3., 3., 3., 3.],
        [4., 4., 4., 4.],
        [5., 5., 5., 5.],
        [6., 6., 6., 6.],
        [7., 7., 7., 7.],
        [8., 8., 8., 8.],
        [9., 9., 9., 9.]])

In [None]:
# torch.squeeze( - removes all single dimensions from a target tensor)
x_reshaped.shape
x_squeezed = x_reshaped.squeeze()
x_reshaped.shape, x_squeezed.squeeze().shape

(torch.Size([1, 9]), torch.Size([9]))

### Indexing

In [None]:
import torch
x = torch.arange(1, 10).reshape(1, 3, 3)
x = x.unsqueeze(dim=0)
x, x.shape

(tensor([[[[1, 2, 3],
           [4, 5, 6],
           [7, 8, 9]]]]),
 torch.Size([1, 1, 3, 3]))

In [None]:
x[1]

IndexError: index 1 is out of bounds for dimension 0 with size 1

## PyTorch tensors & NumPy

NumPy is a popular scientific Python numerical computing library.

And because of this, PyTorch has functionality to interact with it.

* Data in NumPy, want in PyTorch tensor -> `torch.from_numpy(ndarray)`
* PyTorch tensor -> NumPy -> `torch.Tensor.numpy()`

In [None]:
import  torch
import numpy as np
array = np.arange(1.0, 8.0)
tensor = torch.from_numpy(array)
array, tensor

(array([1., 2., 3., 4., 5., 6., 7.]),
 tensor([1., 2., 3., 4., 5., 6., 7.], dtype=torch.float64))

In [None]:
tensor = torch.ones(7)
numpy_tensor = tensor.numpy()
tensor, numpy_tensor

(tensor([1., 1., 1., 1., 1., 1., 1.]),
 array([1., 1., 1., 1., 1., 1., 1.], dtype=float32))

## trying to take random number out of random -> Reproducbility

to reduce randomness we use the concept if a random seed

In [None]:
import torch
# Create two randoms tensors

random_tensor_A = torch.rand(3, 4)
random_tensor_B = torch.rand(3, 4)


print(random_tensor_A)
print(random_tensor_B)
print(random_tensor_A == random_tensor_B)



tensor([[0.3689, 0.3532, 0.1337, 0.4163],
        [0.9168, 0.7150, 0.1486, 0.1316],
        [0.2964, 0.5421, 0.8326, 0.0500]])
tensor([[0.4341, 0.7875, 0.6899, 0.7248],
        [0.7896, 0.2367, 0.7715, 0.6546],
        [0.0656, 0.2311, 0.5113, 0.5935]])
tensor([[False, False, False, False],
        [False, False, False, False],
        [False, False, False, False]])


In [None]:
import torch
# set the random seed
RANDOM_SEED = 42
torch.manual_seed(RANDOM_SEED)
random_tensor_C = torch.rand(3, 4)

torch.manual_seed(RANDOM_SEED)
random_tensor_D = torch.rand(3, 4)
print(random_tensor_C == random_tensor_D)


tensor([[True, True, True, True],
        [True, True, True, True],
        [True, True, True, True]])


## Runnig tensors and PyTorch obejcts on the GPUs (and making faster computations)


In [None]:
!nvidia-smi

Tue Dec 30 16:28:23 2025       
+-----------------------------------------------------------------------------------------+
| NVIDIA-SMI 550.54.15              Driver Version: 550.54.15      CUDA Version: 12.4     |
|-----------------------------------------+------------------------+----------------------+
| GPU  Name                 Persistence-M | Bus-Id          Disp.A | Volatile Uncorr. ECC |
| Fan  Temp   Perf          Pwr:Usage/Cap |           Memory-Usage | GPU-Util  Compute M. |
|                                         |                        |               MIG M. |
|   0  Tesla T4                       Off |   00000000:00:04.0 Off |                    0 |
| N/A   49C    P8             10W /   70W |       0MiB /  15360MiB |      0%      Default |
|                                         |                        |                  N/A |
+-----------------------------------------+------------------------+----------------------+
                                                

## Check for GPU access with PyTorch

In [2]:
import torch
torch.cuda.is_available()

True

In [3]:
# Setup device agnostic code
device = "cuda" if torch.cuda.is_available() else "cpu"
device

'cuda'

## 3. Putting tensors(and models) on the GPU

In [7]:
# Create a tensor (default on the CPU)
tensor = torch.tensor([1,2,3])

# Tensor not on GPU
print(tensor, tensor.device)

tensor([1, 2, 3]) cpu


In [5]:
# Move tensor to GPU(if available)
tensor_on_gpu = tensor.to(device)
tensor_on_gpu

tensor([1, 2, 3], device='cuda:0')

### Moving Tensors back to CPU

In [8]:
# If tensor is on GPU, can't transform it to NumPy
tensor_on_gpu.numpy()

TypeError: can't convert cuda:0 device type tensor to numpy. Use Tensor.cpu() to copy the tensor to host memory first.

In [9]:
# To fix the GPU tensor with Numpy issue, we can first set it to the GPU
tensor_back_on_cpu = tensor_on_gpu.cpu().numpy()
tensor_back_on_cpu

array([1, 2, 3])

# 00. PyTorch Fundamentals Exercises

### 1. Documentation reading

A big part of deep learning (and learning to code in general) is getting familiar with the documentation of a certain framework you're using. We'll be using the PyTorch documentation a lot throughout the rest of this course. So I'd recommend spending 10-minutes reading the following (it's okay if you don't get some things for now, the focus is not yet full understanding, it's awareness):
  * The documentation on [`torch.Tensor`](https://pytorch.org/docs/stable/tensors.html#torch-tensor).
  * The documentation on [`torch.cuda`](https://pytorch.org/docs/master/notes/cuda.html#cuda-semantics).



In [10]:
# No code solution (reading)

### 2. Create a random tensor with shape `(7, 7)`.


In [21]:
# Import torch
import torch

# Create random tensor
random_tensor_A = torch.rand(7, 7)
random_tensor_A

tensor([[0.4159, 0.6975, 0.0831, 0.5737, 0.4346, 0.2451, 0.1872],
        [0.2978, 0.7430, 0.2930, 0.1600, 0.1403, 0.0240, 0.1954],
        [0.3671, 0.5965, 0.8167, 0.3775, 0.3737, 0.7128, 0.4465],
        [0.2359, 0.0711, 0.0835, 0.3175, 0.2280, 0.2741, 0.9565],
        [0.8427, 0.8297, 0.7786, 0.7178, 0.2535, 0.3738, 0.6612],
        [0.3925, 0.4949, 0.3046, 0.4567, 0.4467, 0.4121, 0.7019],
        [0.0414, 0.1128, 0.7168, 0.8949, 0.1734, 0.4503, 0.2913]])

### 3. Perform a matrix multiplication on the tensor from 2 with another random tensor with shape `(1, 7)` (hint: you may have to transpose the second tensor).

In [22]:
# Create another random tensor
random_tensor_B = torch.rand(1, 7)
# Perform matrix multiplication
torch.mm(random_tensor_A, random_tensor_B.T)

tensor([[1.1759],
        [0.9048],
        [1.6464],
        [1.0219],
        [2.1093],
        [1.4516],
        [1.0796]])

### 4. Set the random seed to `0` and do 2 & 3 over again.

The output should be:
```
(tensor([[1.8542],
         [1.9611],
         [2.2884],
         [3.0481],
         [1.7067],
         [2.5290],
         [1.7989]]), torch.Size([7, 1]))
```

In [34]:
# Set manual seed
RANDOM_SEED = 0

# Create two random tensors
torch.manual_seed(RANDOM_SEED)
random_tensor_C = torch.rand(7, 7)
torch.manual_seed(RANDOM_SEED)
random_tensor_D = torch.rand(1, 7)

# Matrix multiply tensors
torch.mm(random_tensor_C, random_tensor_D.T)

tensor([[1.5985],
        [1.1173],
        [1.2741],
        [1.6838],
        [0.8279],
        [1.0347],
        [1.2498]])

### 5. Speaking of random seeds, we saw how to set it with `torch.manual_seed()` but is there a GPU equivalent? (hint: you'll need to look into the documentation for `torch.cuda` for this one)
  * If there is, set the GPU random seed to `1234`.

In [35]:
# Set random seed on the GPU
RANDOM_GPU_SEED = torch.cuda.manual_seed(1234)
RANDOM_GPU_SEED


### 6. Create two random tensors of shape `(2, 3)` and send them both to the GPU (you'll need access to a GPU for this). Set `torch.manual_seed(1234)` when creating the tensors (this doesn't have to be the GPU random seed). The output should be something like:

```
Device: cuda
(tensor([[0.0290, 0.4019, 0.2598],
         [0.3666, 0.0583, 0.7006]], device='cuda:0'),
 tensor([[0.0518, 0.4681, 0.6738],
         [0.3315, 0.7837, 0.5631]], device='cuda:0'))
```

In [43]:
# Set random seed
RANDOM_GPU_SEED = 1234


# Check for access to GPU
if torch.cuda.is_available():
  device = "cuda"
else:
    print("No GPU")
    device = "cpu"


# Create two random tensors on GPU
torch.cuda.manual_seed(RANDOM_GPU_SEED)
random_tensor_1 = torch.rand(2, 3)
random_tensor_X = random_tensor_1.to(device)

torch.cuda.manual_seed(RANDOM_GPU_SEED)
random_tensor_2 = torch.rand(2, 3)
random_tensor_Y = random_tensor_2.to(device)



### 7. Perform a matrix multiplication on the tensors you created in 6 (again, you may have to adjust the shapes of one of the tensors).

The output should look like:
```
(tensor([[0.3647, 0.4709],
         [0.5184, 0.5617]], device='cuda:0'), torch.Size([2, 2]))
```

In [45]:
# Perform matmul on tensor_A and tensor_B
matmul = torch.mm(random_tensor_X, random_tensor_Y.T)
matmul, matmul.shape

(tensor([[0.3382, 0.4767],
         [0.6297, 0.6014]], device='cuda:0'),
 torch.Size([2, 2]))

### 8. Find the maximum and minimum values of the output of 7.

In [47]:
# Find max
max_v = torch.max(matmul)
max_v
# Find min
min_v = torch.min(matmul)
max_v, min_v

(tensor(0.6297, device='cuda:0'), tensor(0.3382, device='cuda:0'))

### 9. Find the maximum and minimum index values of the output of 7.

In [48]:
# Find arg max
max_v_arg = torch.argmax(matmul)

# Find arg min
min_v_arg = torch.argmin(matmul)
max_v_arg, min_v_arg

(tensor(2, device='cuda:0'), tensor(0, device='cuda:0'))


### 10. Make a random tensor with shape `(1, 1, 1, 10)` and then create a new tensor with all the `1` dimensions removed to be left with a tensor of shape `(10)`. Set the seed to `7` when you create it and print out the first tensor and it's shape as well as the second tensor and it's shape.

The output should look like:

```
tensor([[[[0.5349, 0.1988, 0.6592, 0.6569, 0.2328, 0.4251, 0.2071, 0.6297,
           0.3653, 0.8513]]]]) torch.Size([1, 1, 1, 10])
tensor([0.5349, 0.1988, 0.6592, 0.6569, 0.2328, 0.4251, 0.2071, 0.6297, 0.3653,
        0.8513]) torch.Size([10])
```

In [49]:
# Set seed
RANDOM_SEED_1 = 42

# Create random tensor
torch.manual_seed(RANDOM_SEED_1)
rand_t = torch.rand(1, 1, 1, 10)

# Remove single dimensions
rand_t_squeezed = rand_t.squeeze()

# Print out tensors and their shapes
rand_t, rand_t.shape, rand_t_squeezed, rand_t_squeezed.shape

(tensor([[[[0.8823, 0.9150, 0.3829, 0.9593, 0.3904, 0.6009, 0.2566, 0.7936,
            0.9408, 0.1332]]]]),
 torch.Size([1, 1, 1, 10]),
 tensor([0.8823, 0.9150, 0.3829, 0.9593, 0.3904, 0.6009, 0.2566, 0.7936, 0.9408,
         0.1332]),
 torch.Size([10]))