In [3]:
import torch

In [4]:
print("Hello World!")

Hello World!


In [5]:
!nvidia-smi

Thu Apr 24 22:21:45 2025       
+-----------------------------------------------------------------------------------------+
| NVIDIA-SMI 570.86.15              Driver Version: 570.86.15      CUDA Version: 12.8     |
|-----------------------------------------+------------------------+----------------------+
| GPU  Name                 Persistence-M | Bus-Id          Disp.A | Volatile Uncorr. ECC |
| Fan  Temp   Perf          Pwr:Usage/Cap |           Memory-Usage | GPU-Util  Compute M. |
|                                         |                        |               MIG M. |
|   0  NVIDIA H200                    On  |   00000000:AF:00.0 Off |                   On |
| N/A   28C    P0             80W /  700W |                  N/A   |     N/A      Default |
|                                         |                        |              Enabled |
+-----------------------------------------+------------------------+----------------------+

+----------------------------------------------

In [6]:
print(torch.__version__)

2.6.0+cu124


In [7]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt

## TENSORS

In [8]:
# scalar
scalar = torch.tensor(7)
scalar

tensor(7)

In [9]:
scalar.ndim

0

In [10]:
scalar.item()

7

In [11]:
# vector
vector = torch.tensor([1, 2])
print(vector, vector.shape, vector.ndim)

tensor([1, 2]) torch.Size([2]) 1


In [12]:
# matrix
MATRIX = torch.tensor([[3, 4],
                      [5, 6]])

In [13]:
MATRIX.shape

torch.Size([2, 2])

In [14]:
MATRIX

tensor([[3, 4],
        [5, 6]])

In [15]:
MATRIX.ndim

2

In [16]:
MATRIX[1]

tensor([5, 6])

In [17]:
MATRIX[0]

tensor([3, 4])

In [18]:
# Tensor
TENSOR = torch.tensor([[[1, 2, 3],
                       [3, 6, 9],
                       [4, 6, 8]]])

In [19]:
TENSOR.shape

torch.Size([1, 3, 3])

In [20]:
TENSOR.ndim

3

In [21]:
TENSOR[0][1][0]

tensor(3)

In [22]:
# TENSOR[1]
# invalid

### Random Tensors

In [23]:
random_tensor = torch.rand(3, 4)
random_tensor

tensor([[0.9764, 0.7420, 0.5442, 0.1473],
        [0.4445, 0.2538, 0.4813, 0.5836],
        [0.7571, 0.7375, 0.6755, 0.5657]])

In [24]:
random_tensor.ndim

2

In [25]:
random_image_size_tensor = torch.rand(size=(224, 224, 3)) # height, width, colour channels.
random_image_size_tensor.shape, random_image_size_tensor.ndim

(torch.Size([224, 224, 3]), 3)

### Zeros and Ones
- helpful for masks

In [26]:
# create a tensor of all zeros
zeros = torch.zeros(size=(3,4))
zeros

tensor([[0., 0., 0., 0.],
        [0., 0., 0., 0.],
        [0., 0., 0., 0.]])

In [27]:
zeros * random_tensor

tensor([[0., 0., 0., 0.],
        [0., 0., 0., 0.],
        [0., 0., 0., 0.]])

In [28]:
# create a tensor of all ones
ones = torch.ones(size=(3,4))
ones

tensor([[1., 1., 1., 1.],
        [1., 1., 1., 1.],
        [1., 1., 1., 1.]])

In [29]:
torch.eye(2)

tensor([[1., 0.],
        [0., 1.]])

In [30]:
ones.dtype

torch.float32

### creating a range of tensors

In [31]:
# use torch.range()
one_to_ten = torch.arange(0, 10)
one_to_ten = torch.arange(start=1, end=11, step=3)
one_to_ten

tensor([ 1,  4,  7, 10])

In [32]:
ten_zeros = torch.zeros_like(input=one_to_ten)
ten_zeros

tensor([0, 0, 0, 0])

### tensor datatypes
3 most common errors:
    1. tensors not right datatype
    2. tensors not right shape
    3. tensors not on the right device.

In [33]:
float_32_tensor = torch.tensor([3.0, 6.0, 9.0],
                               dtype=None,
                               device=None,
                               requires_grad=False)
float_32_tensor

tensor([3., 6., 9.])

In [34]:
float_32_tensor.dtype

torch.float32

In [35]:
float_16_tensor = float_32_tensor.type(torch.float16) # whack, it copies over the tensor!
float_16_tensor

tensor([3., 6., 9.], dtype=torch.float16)

In [36]:
copy = ones.type(torch.float16)
copy

tensor([[1., 1., 1., 1.],
        [1., 1., 1., 1.],
        [1., 1., 1., 1.]], dtype=torch.float16)

In [37]:
copy * ones # works, but lowkey shouldn't!

tensor([[1., 1., 1., 1.],
        [1., 1., 1., 1.],
        [1., 1., 1., 1.]])

In [38]:
int_32_tensor = torch.tensor([[1, 2, 3],[3, 6, 9]], dtype=torch.int32)
# kinda surprised he didn't cover slicing.
int_32_tensor[1][::1] # negative index doesn't work here. i think it's possible tho.

tensor([3, 6, 9], dtype=torch.int32)

In [39]:
my_list = list(range(5))
my_list[::-1] # works on standard lists.

[4, 3, 2, 1, 0]

### matrix multiplication

In [40]:
torch.matmul(ones, zeros.T) # ones with copy won't work (dtype error)

tensor([[0., 0., 0.],
        [0., 0., 0.],
        [0., 0., 0.]])

In [41]:
tensor = torch.tensor([1, 2, 3])
tensor @ tensor # = torch.matmul.

tensor(14)

In [42]:
%%time
value = 0
for i in range(len(tensor)):
    value += tensor[i] * tensor[i]
print(value)

tensor(14)
CPU times: user 0 ns, sys: 341 μs, total: 341 μs
Wall time: 350 μs


In [43]:
%%time
tensor @ tensor

CPU times: user 42 μs, sys: 12 μs, total: 54 μs
Wall time: 56 μs


tensor(14)

In [44]:
tensor_A = torch.tensor([[1, 2],
                         [3, 4],
                         [5, 6]], dtype=torch.float32)

tensor_B = torch.tensor([[7, 10],
                         [8, 11], 
                         [9, 12]], dtype=torch.float32)


In [45]:
tensor_A.shape

torch.Size([3, 2])

In [46]:
tensor_A @ tensor_B.T

tensor([[ 27.,  30.,  33.],
        [ 61.,  68.,  75.],
        [ 95., 106., 117.]])

In [47]:
torch.manual_seed(42)
linear = torch.nn.Linear(in_features=2, # in_features = matches inner dimension of input 
                         out_features=8) # out_features = describes outer value 
x = tensor_A
output = linear(x)
print(f"Input shape: {x.shape}\n")
print(f"Output:\n{output}\n\nOutput shape: {output.shape}")

Input shape: torch.Size([3, 2])

Output:
tensor([[ 2.2595,  1.2380, -0.1997,  0.6665, -0.7400,  0.7964,  0.4267,  0.6104],
        [ 4.5145,  2.2058, -0.2241,  0.8086, -0.5308,  2.2903,  1.6631,  1.0926],
        [ 6.7696,  3.1736, -0.2486,  0.9506, -0.3216,  3.7842,  2.8995,  1.5748]],
       grad_fn=<AddmmBackward0>)

Output shape: torch.Size([3, 8])


# tensor aggregation: min, max, mean, sum

In [48]:
tensor_C = tensor_A @ tensor_B.T

In [49]:
torch.min(tensor_C), tensor_C.min()

(tensor(27.), tensor(27.))

In [50]:
torch.mean(tensor_C)

tensor(68.)

In [51]:
copy.mean()
copy_long = copy.type(torch.long)

In [52]:
copy_long.dtype

torch.int64

In [53]:
copy_long.type(torch.float32).mean()

tensor(1.)

In [54]:
torch.sum(tensor_C), tensor_C.sum()

(tensor(612.), tensor(612.))

In [55]:
tensor_C.argmin()

tensor(0)

In [56]:
reversed(tensor_C).argmin(), reversed(tensor_C)

(tensor(6),
 tensor([[ 95., 106., 117.],
         [ 61.,  68.,  75.],
         [ 27.,  30.,  33.]]))

In [57]:
tensor_C.argmax()

tensor(8)

### reshaping, stacking, squeezing and unsqueezing.

* reshaping - reshapes an input tensor to a defined shape
* view - return a view of an input tensor of certain shape but keep the same memory as the original tensor
* stacking - combine multiple tensors on top of each other (vstack) or side by side (hstack)
* squeeze - removes all 1 dimensions from a tensor
* unsqueeze - add a 1 dimension to a target tensor
* permute - return a view of the input with dimensions permuted (swapped) in a certain way

In [58]:
x = torch.arange(1., 10.)
x, x.shape

(tensor([1., 2., 3., 4., 5., 6., 7., 8., 9.]), torch.Size([9]))

In [59]:
# add an extra dimension
x.reshape(3, 3)

tensor([[1., 2., 3.],
        [4., 5., 6.],
        [7., 8., 9.]])

In [60]:
# change the view
z = x.view(1,9)
z, z.shape

(tensor([[1., 2., 3., 4., 5., 6., 7., 8., 9.]]), torch.Size([1, 9]))

In [61]:
z[:, 0] = 5
z, x

(tensor([[5., 2., 3., 4., 5., 6., 7., 8., 9.]]),
 tensor([5., 2., 3., 4., 5., 6., 7., 8., 9.]))

In [62]:
x_stacked = torch.stack([x, x, x, x], dim=1)
x_stacked.shape, x.shape
x_stacked

tensor([[5., 5., 5., 5.],
        [2., 2., 2., 2.],
        [3., 3., 3., 3.],
        [4., 4., 4., 4.],
        [5., 5., 5., 5.],
        [6., 6., 6., 6.],
        [7., 7., 7., 7.],
        [8., 8., 8., 8.],
        [9., 9., 9., 9.]])

In [63]:
# torch.squeeze() - removes all single dimensions from a target tensor
x = torch.zeros(2, 1, 2, 1, 2)
x.shape

torch.Size([2, 1, 2, 1, 2])

In [64]:
y = torch.squeeze(x)
y.shape, y.size()

(torch.Size([2, 2, 2]), torch.Size([2, 2, 2]))

In [65]:
y = torch.squeeze(x, 0)
y.size()

torch.Size([2, 1, 2, 1, 2])

In [66]:
y = torch.squeeze(x, (1, 3))
y.size()

torch.Size([2, 2, 2])

In [67]:
# torch.unsqueeze() - adds a single dimension to a target tensor at a specific dimension
x = torch.tensor([1 ,2 ,3 ,4 ])
torch.unsqueeze(x, 0)

tensor([[1, 2, 3, 4]])

In [68]:
torch.unsqueeze(x, 1) # I guess here, you do not need to know the length of the tensor.

tensor([[1],
        [2],
        [3],
        [4]])

In [69]:
x.reshape(4, 1)

tensor([[1],
        [2],
        [3],
        [4]])

In [70]:
# torch.permute - rearranges the dimensions of a target tensor in a specified order
x_original = torch.rand(size=(224, 224, 3)) # [height, width, colour_channels]

# bring colour_channels to the front
x_permuted = x_original.permute(2, 0, 1)
x_permuted.shape

torch.Size([3, 224, 224])

### indexing

In [71]:
x = torch.arange(1, 10).reshape(1, 3, 3)
x, x.shape

(tensor([[[1, 2, 3],
          [4, 5, 6],
          [7, 8, 9]]]),
 torch.Size([1, 3, 3]))

In [72]:
x[0]

tensor([[1, 2, 3],
        [4, 5, 6],
        [7, 8, 9]])

In [73]:
x[0][1]

tensor([4, 5, 6])

In [74]:
x[0, 1] # note this is not defined for non-tensors.

tensor([4, 5, 6])

In [93]:
array_2d = [[ np.random.randint(5) for i in range(5)] for i in range(5)]
#array_2d[0, 0] # not defined, see error:

In [76]:
x[0, 0, 0].item()

1

In [77]:
# todo: practise slicing tensors.
x[:, 0], x[:, :, 1], x[:, 1, 1], x[0, 0, :]

(tensor([[1, 2, 3]]), tensor([[2, 5, 8]]), tensor([5]), tensor([1, 2, 3]))

In [78]:
x[0, 2, 2]

tensor(9)

In [79]:
x[:, :, 2]

tensor([[3, 6, 9]])

## Pytorch Tensors and Numpy
- the default numpy array is in `float64`
- default for torch is `float32`

In [80]:
array = np.arange(1.0, 8.0)
tensor = torch.from_numpy(array)
array, tensor

(array([1., 2., 3., 4., 5., 6., 7.]),
 tensor([1., 2., 3., 4., 5., 6., 7.], dtype=torch.float64))

In [81]:
array.dtype

dtype('float64')

In [82]:
torch.arange(1.0, 8.0).dtype

torch.float32

In [83]:
# we can fix the conversion with some layering:
tensor = torch.from_numpy(array).type(torch.float32)

In [84]:
array = array + 1
array, tensor # you get new memory for the tensor.

(array([2., 3., 4., 5., 6., 7., 8.]), tensor([1., 2., 3., 4., 5., 6., 7.]))

In [85]:
tensor = torch.ones(7)
numpy_tensor = tensor.numpy()
tensor, numpy_tensor # note the dtype.

(tensor([1., 1., 1., 1., 1., 1., 1.]),
 array([1., 1., 1., 1., 1., 1., 1.], dtype=float32))

In [86]:
tensor = tensor + 1
tensor, numpy_tensor

(tensor([2., 2., 2., 2., 2., 2., 2.]),
 array([1., 1., 1., 1., 1., 1., 1.], dtype=float32))

In [87]:
0.1 + 0.2

0.30000000000000004

## Randomness / Reproducibility

In [None]:
import random

RANDOM_SEED=55
torch.manual_seed(seed=RANDOM_SEED)

random_tensor_C = torch.rand(3, 4)

##### seed resets on each occurence, as used above^
torch.random.manual_seed(seed=RANDOM_SEED) 
#####
random_tensor_D = torch.rand(3, 4)

# check:
random_tensor_C == random_tensor_D

## GPUs

In [95]:
torch.cuda.is_available()

True

In [98]:
# device agnostic line:
device = "cuda" if torch.cuda.is_available() else "cpu"

In [99]:
device

'cuda'

In [100]:
torch.cuda.device_count()

1

### putting tensors on the gpu

In [101]:
tensor = torch.tensor([1, 2, 3])
print(tensor, tensor.device)

tensor([1, 2, 3]) cpu


In [102]:
# move tensor to GPU (if available)
tensor_on_gpu = tensor.to(device)
tensor_on_gpu

tensor([1, 2, 3], device='cuda:0')

In [107]:
tensor_back_on_cpu = tensor_on_gpu.cpu()

In [108]:
tensor_back_on_cpu.numpy()

array([1, 2, 3])