In [2]:
import torch
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
print(torch.__version__)

2.6.0+cu126


### Tensors
A way to represent datas (array)

In [3]:
#scalar
scalar = torch.tensor(7)
scalar

tensor(7)

In [4]:
scalar.ndim

0

In [5]:
#Get tensor back as python int
scalar.item()

7

In [6]:
#Vector
vector = torch.tensor([7, 7, 7])
vector

tensor([7, 7, 7])

In [7]:
vector.ndim

1

In [8]:
vector.shape

torch.Size([3])

In [9]:
#Matrix
matrix = torch.tensor([[1, 1], [2, 2], [3, 3]])
matrix

tensor([[1, 1],
        [2, 2],
        [3, 3]])

In [10]:
matrix.ndim

2

In [11]:
matrix.shape

torch.Size([3, 2])

In [12]:
#Tensor
tensor = torch.tensor([[[1, 2, 3], [4, 5, 6], [7, 8, 9]], [[9, 8, 7], [6, 5, 4], [3, 2, 1]]])
tensor

tensor([[[1, 2, 3],
         [4, 5, 6],
         [7, 8, 9]],

        [[9, 8, 7],
         [6, 5, 4],
         [3, 2, 1]]])

In [13]:
tensor.ndim

3

In [14]:
tensor.shape

torch.Size([2, 3, 3])

In [15]:
#Random tensor
random = torch.rand(3, 5, 5)
random

tensor([[[0.4302, 0.4051, 0.1899, 0.8468, 0.2952],
         [0.8830, 0.8391, 0.7698, 0.4949, 0.1042],
         [0.7413, 0.2917, 0.7902, 0.3828, 0.9766],
         [0.1291, 0.4885, 0.1867, 0.1856, 0.3209],
         [0.0937, 0.0258, 0.0519, 0.0348, 0.5498]],

        [[0.5074, 0.2173, 0.5263, 0.4303, 0.2191],
         [0.4833, 0.6036, 0.3117, 0.9520, 0.6582],
         [0.2857, 0.0672, 0.4378, 0.1238, 0.1853],
         [0.2132, 0.3286, 0.2704, 0.7941, 0.7723],
         [0.8401, 0.8024, 0.1934, 0.9230, 0.5536]],

        [[0.3446, 0.0493, 0.6406, 0.9298, 0.4205],
         [0.6101, 0.5262, 0.0045, 0.0416, 0.4434],
         [0.1600, 0.7011, 0.8314, 0.6026, 0.4513],
         [0.0582, 0.9847, 0.5409, 0.0789, 0.1373],
         [0.9917, 0.1710, 0.5336, 0.5027, 0.8885]]])

In [16]:
random.ndim

3

In [17]:
random_image = torch.rand(3, 224, 224) #color channel (RGB), height, width
random_image.shape, random_image.ndim

(torch.Size([3, 224, 224]), 3)

## Unique tensors, zero and One

In [18]:
zeros = torch.zeros(3, 4)
ones = torch.ones(1, 2)
zeros, ones

(tensor([[0., 0., 0., 0.],
         [0., 0., 0., 0.],
         [0., 0., 0., 0.]]),
 tensor([[1., 1.]]))

In [19]:
ones.dtype #default type

torch.float32

## Range of Tensors and Tensors-like

In [20]:
ranged_num = torch.arange(0, 1000, 77) # [Start, End), +1 parameter -> step (spaces of each num)
ranged_num

tensor([  0,  77, 154, 231, 308, 385, 462, 539, 616, 693, 770, 847, 924])

In [21]:
range_zero = torch.zeros_like(ranged_num)
range_zero

tensor([0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0])

## Data Types of Tensors

3 Big notes for this part :
1. Use correct tensor data type (tensor.dtype)
2. Use correct tensor size/shape (tensor.shape)
3. Use correct tensor device (tensor.device)

In [22]:
float_32_tensor = torch.tensor([3.0, 6.0, 9.0], 
                               dtype = None, #Data type of tensor, default is float32
                               device = None, #Normally "cpu" or "cuda"(gpu)
                               requires_grad = False) # Track gradients
float_32_tensor, float_32_tensor.dtype

(tensor([3., 6., 9.]), torch.float32)

In [23]:
float_16_tensor = float_32_tensor.type(torch.float16)
float_16_tensor, float_16_tensor.dtype

(tensor([3., 6., 9.], dtype=torch.float16), torch.float16)

In [24]:
float_16_tensor * float_32_tensor

tensor([ 9., 36., 81.])

## Tensor Operations
Including :
* Addition
* Substraction
* Multiplication (element-wise)
* Division
* Matrix Multiplication

In [25]:
#Creating a tensor and do some math operations
tensor = torch.tensor([1, 2, 3])
tensor + 10, tensor * 10

(tensor([11, 12, 13]), tensor([10, 20, 30]))

In [26]:
#Element-wise multiplication
print(tensor, "*", tensor)
print(f"Equals : {tensor * tensor}")

tensor([1, 2, 3]) * tensor([1, 2, 3])
Equals : tensor([1, 4, 9])


In [27]:
#Matrix multiplication
##torch.mm(a, b) torch.matmul(a, b)
torch.matmul(torch.rand(2, 3), torch.rand(3, 5)), tensor.shape

(tensor([[0.6821, 0.7965, 0.9769, 0.6432, 1.0576],
         [0.9706, 0.5444, 0.9029, 0.6027, 0.7971]]),
 torch.Size([3]))

In [28]:
#Matrix Transpose
tensorA = torch.rand(2, 3)
tensorB = torch.rand(2, 3)
print(f"Multiplying {tensorA.shape} with {tensorB.shape} will cause error, \n thus we can use transpose to change the matrix's shape")
print(f"Pre-transpose : {tensorB.shape}\nPost-transpose : {tensorB.T.shape}")
print(f"Thus multiplication of 2 matrix would be \n{torch.matmul(tensorA, tensorB.T)}")

Multiplying torch.Size([2, 3]) with torch.Size([2, 3]) will cause error, 
 thus we can use transpose to change the matrix's shape
Pre-transpose : torch.Size([2, 3])
Post-transpose : torch.Size([3, 2])
Thus multiplication of 2 matrix would be 
tensor([[0.2415, 0.1746],
        [0.4809, 0.3504]])


## Tensor's min, max, sum, mean, etc (aggregate)

In [29]:
#Create a tensor
tensor = torch.arange(0, 100, 10,)
tensor, tensor.dtype

(tensor([ 0, 10, 20, 30, 40, 50, 60, 70, 80, 90]), torch.int64)

In [30]:
#Min and Max
torch.min(tensor), tensor.min(), torch.max(tensor), tensor.max()

(tensor(0), tensor(0), tensor(90), tensor(90))

In [31]:
#Mean and Sum
#For mean, error with int64 type, so we change it to float32
torch.mean(tensor.type(torch.float32)), torch.sum(tensor)
#tensor.type(torch.float32).mean()      tensor.sum()

(tensor(45.), tensor(450))

In [32]:
#Positional min and max
x = tensor
x.argmin() #Find min and return the index
x.argmax() #Find max and return the index

tensor(9)

## Reshaping, stacking, squeezing and unsqueezing tensors
* Reshaping - reshapes an input tensor to a defined shape
* View - return a view of an input tensor of certain shape but keep the same memory as the original tensor
* Stacking - combine multiple tensor on top of each other (vstack / vertical stack) or side by side (hstack / horizontal stack)
* Squeeze - removes all `1` dimensions from a tensor
* Unsqueeze - add a `1` dimension to a target tensor
* Permute - return a view of the input with dimensions permuted (swapped) in a certain way

In [33]:
x = torch.arange(1., 10.)
x, x.shape

(tensor([1., 2., 3., 4., 5., 6., 7., 8., 9.]), torch.Size([9]))

In [34]:
# Reshape a tensor
x_reshaped = x.reshape(3, 3)
x_reshaped, x_reshaped.shape

(tensor([[1., 2., 3.],
         [4., 5., 6.],
         [7., 8., 9.]]),
 torch.Size([3, 3]))

In [35]:
# Change the view
z = x.view(9, 1)
z, z.shape
# Note that in this case, z and x shares the same memory, so if x is changed, it will affect z

(tensor([[1.],
         [2.],
         [3.],
         [4.],
         [5.],
         [6.],
         [7.],
         [8.],
         [9.]]),
 torch.Size([9, 1]))

In [36]:
# Stack tensors
x_stack = torch.stack([x, x, x, x], dim = 1)
x_stack

tensor([[1., 1., 1., 1.],
        [2., 2., 2., 2.],
        [3., 3., 3., 3.],
        [4., 4., 4., 4.],
        [5., 5., 5., 5.],
        [6., 6., 6., 6.],
        [7., 7., 7., 7.],
        [8., 8., 8., 8.],
        [9., 9., 9., 9.]])

In [37]:
x

tensor([1., 2., 3., 4., 5., 6., 7., 8., 9.])

In [38]:
y = x.reshape(1, 9)
y

tensor([[1., 2., 3., 4., 5., 6., 7., 8., 9.]])

In [42]:
#Squeeze removes all 1 dimensions from a tensor, one may say it removes the useless dimensions of a tensor
print(f"Before squeeze : {y}")
print(f"Shape : {y.shape}")
y_squeezed = y.squeeze()
print(f"After squeeze : {y_squeezed}")
print(f"Shape after squeeze : {y_squeezed.shape}")

Before squeeze : tensor([[1., 2., 3., 4., 5., 6., 7., 8., 9.]])
Shape : torch.Size([1, 9])
After squeeze : tensor([1., 2., 3., 4., 5., 6., 7., 8., 9.])
Shape after squeeze : torch.Size([9])


In [43]:
#Unsqueeze adds a dimension to a target, in a specific location
print(f"Before unsqueeze : {y_squeezed}")
print(f"Shape : {y_squeezed.shape}")
y_unsqueezed = y_squeezed.unsqueeze(dim = 0)
print(f"After squeeze : {y_unsqueezed}")
print(f"Shape after squeeze : {y_unsqueezed.shape}")

Before unsqueeze : tensor([1., 2., 3., 4., 5., 6., 7., 8., 9.])
Shape : torch.Size([9])
After squeeze : tensor([[1., 2., 3., 4., 5., 6., 7., 8., 9.]])
Shape after squeeze : torch.Size([1, 9])


In [50]:
#Permute [torch.permute(tensor, (new_dim))] returns a view of a tensor, with its dimensions rearranged
z = torch.tensor([[[1, 1, 1]], [[1, 1, 1]]])
z.shape, torch.permute(z, (1, 0, 2)).shape

(torch.Size([2, 1, 3]), torch.Size([1, 2, 3]))

In [51]:
z, torch.permute(z, (1, 0, 2))

(tensor([[[1, 1, 1]],
 
         [[1, 1, 1]]]),
 tensor([[[1, 1, 1],
          [1, 1, 1]]]))

## PyTorch and NumPy
Sometimes, we need numpy values in pytorch and vice versa, so we can switch a np array -> tensor or tensor -> np array
* torch.from_numpy(np_array_name) (np_array -> tensor, the default datatype of this action is numpy's datatype)
* tensor_name.numpy() (tensor -> np_array, the default datatype is the tensor's datatype)

In [53]:
# NumPy to PyTorch
arr = np.arange(1, 10)
tensor_arr = torch.from_numpy(arr)
arr, tensor_arr

(array([1, 2, 3, 4, 5, 6, 7, 8, 9]),
 tensor([1, 2, 3, 4, 5, 6, 7, 8, 9], dtype=torch.int32))

In [54]:
# PyTorch to NumPy
tensorx = torch.arange(1, 10)
arr_np = tensorx.numpy()
tensorx, arr_np

(tensor([1, 2, 3, 4, 5, 6, 7, 8, 9]),
 array([1, 2, 3, 4, 5, 6, 7, 8, 9], dtype=int64))

## Reproducibility (trying to take the random out of random)
As you learn more about neural networks and machine learning, you'll start to discover how much randomness plays a part.

Well, pseudorandomness that is. Because after all, as they're designed, a computer is fundamentally deterministic (each step is predictable) so the randomness they create are simulated randomness (though there is debate on this too, but since I'm not a computer scientist, I'll let you find out more yourself).

How does this relate to neural networks and deep learning then?

We've discussed neural networks start with random numbers to describe patterns in data (these numbers are poor descriptions) and try to improve those random numbers using tensor operations (and a few other things we haven't discussed yet) to better describe patterns in data.

In short:

`start with random numbers -> tensor operations -> try to make better (again and again and again)`

Although randomness is nice and powerful, sometimes you'd like there to be a little less randomness.

Why?

So you can perform repeatable experiments.

For example, you create an algorithm capable of achieving X performance.

And then your friend tries it out to verify you're not crazy.

How could they do such a thing?

That's where reproducibility comes in.

In other words, can you get the same (or very similar) results on your computer running the same code as I get on mine?

Let's see a brief example of reproducibility in PyTorch.

We'll start by creating two random tensors, since they're random, you'd expect them to be different right?

In [1]:
import torch

# Create two random tensors
random_tensor_A = torch.rand(3, 4)
random_tensor_B = torch.rand(3, 4)

print(f"Tensor A:\n{random_tensor_A}\n")
print(f"Tensor B:\n{random_tensor_B}\n")
print(f"Does Tensor A equal Tensor B? (anywhere)")
random_tensor_A == random_tensor_B

Tensor A:
tensor([[0.9179, 0.8829, 0.3671, 0.2523],
        [0.4117, 0.3291, 0.8586, 0.7649],
        [0.8909, 0.6356, 0.1138, 0.9691]])

Tensor B:
tensor([[0.7175, 0.9745, 0.7322, 0.0500],
        [0.4113, 0.1008, 0.8430, 0.3450],
        [0.5774, 0.5568, 0.2783, 0.5962]])

Does Tensor A equal Tensor B? (anywhere)


tensor([[False, False, False, False],
        [False, False, False, False],
        [False, False, False, False]])

In [2]:
import torch
import random

# # Set the random seed
RANDOM_SEED=42 # try changing this to different values and see what happens to the numbers below
torch.manual_seed(seed=RANDOM_SEED) 
random_tensor_C = torch.rand(3, 4)

# Have to reset the seed every time a new rand() is called 
# Without this, tensor_D would be different to tensor_C 
torch.random.manual_seed(seed=RANDOM_SEED) # try commenting this line out and seeing what happens
random_tensor_D = torch.rand(3, 4)

print(f"Tensor C:\n{random_tensor_C}\n")
print(f"Tensor D:\n{random_tensor_D}\n")
print(f"Does Tensor C equal Tensor D? (anywhere)")
random_tensor_C == random_tensor_D

Tensor C:
tensor([[0.8823, 0.9150, 0.3829, 0.9593],
        [0.3904, 0.6009, 0.2566, 0.7936],
        [0.9408, 0.1332, 0.9346, 0.5936]])

Tensor D:
tensor([[0.8823, 0.9150, 0.3829, 0.9593],
        [0.3904, 0.6009, 0.2566, 0.7936],
        [0.9408, 0.1332, 0.9346, 0.5936]])

Does Tensor C equal Tensor D? (anywhere)


tensor([[True, True, True, True],
        [True, True, True, True],
        [True, True, True, True]])

## PyTorch setup with GPU

In [3]:
import torch
torch.cuda.is_available()

True

In [4]:
#Setup device agnostic code
device = "cuda" if torch.cuda.is_available() else "cpu"
device

'cuda'

In [5]:
torch.cuda.device_count()

1

In [6]:
#Default tensor (on CPU)
x = torch.tensor([1, 2, 3])

print(x, x.device)

tensor([1, 2, 3]) cpu


In [7]:
#Move tensor to GPU (if available)
x_new = x.to(device)
x_new

tensor([1, 2, 3], device='cuda:0')

In [10]:
#NumPy does not work with GPU, so we need to move gpu tensors back to cpu
x_cpu = x_new.cpu().numpy()
x_cpu

array([1, 2, 3], dtype=int64)