## 00. PyTorch Fundamentals

In [None]:
print("Hello i'm excited to learn PyTorch!")

Hello i'm excited to learn PyTorch!


In [None]:
!nvidia-smi #only runs when connected to a gpu

Sun May 11 18:12:29 2025       
+-----------------------------------------------------------------------------------------+
| NVIDIA-SMI 550.54.15              Driver Version: 550.54.15      CUDA Version: 12.4     |
|-----------------------------------------+------------------------+----------------------+
| GPU  Name                 Persistence-M | Bus-Id          Disp.A | Volatile Uncorr. ECC |
| Fan  Temp   Perf          Pwr:Usage/Cap |           Memory-Usage | GPU-Util  Compute M. |
|                                         |                        |               MIG M. |
|   0  Tesla T4                       Off |   00000000:00:04.0 Off |                    0 |
| N/A   39C    P8             10W /   70W |       0MiB /  15360MiB |      0%      Default |
|                                         |                        |                  N/A |
+-----------------------------------------+------------------------+----------------------+
                                                

In [None]:
#installing pytorch and fundamental data science packages
import torch
print(torch.__version__)
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt

#version stands for pytorch version + cuda version 124

2.6.0+cu124


## Introduction to Tensors

### Creating tensors

PyTorch tensors are created using torch.Tensor() = https://pytorch.org/docs/stable/tensors.html

In [None]:
#scalar
scalar = torch.tensor(7)
scalar

tensor(7)

In [None]:
scalar.ndim

0

In [None]:
#Get tensor back as python int
scalar.item()

7

In [None]:
# Vector, a vector usually has magnitude and direction
vector = torch.tensor([7, 7])
vector

tensor([7, 7])

In [None]:
vector.ndim

1

In [None]:
vector.shape

torch.Size([2])

In [None]:
# MATRIX
MATRIX = torch.tensor([[7, 8],
                       [9, 10]])

MATRIX

tensor([[ 7,  8],
        [ 9, 10]])

In [None]:
MATRIX.ndim

2

In [None]:
MATRIX[0]

tensor([7, 8])

In [None]:
MATRIX[1]

tensor([ 9, 10])

In [None]:
MATRIX.shape

torch.Size([2, 2])

In [None]:
# TENSOR
TENSOR = torch.tensor([[[1, 2, 3],
                        [3, 6, 5],
                        [2, 4, 5]]])

TENSOR

#most of the time we won't be crafting tensors by hand pytorch does this behind the scenes

tensor([[[1, 2, 3],
         [3, 6, 5],
         [2, 4, 5]]])

In [None]:
TENSOR.ndim #3 square brackets

3

In [None]:
TENSOR.shape #means one tensor, 3 rows, 3 cols

torch.Size([1, 3, 3])

In [None]:
TENSOR[0]

tensor([[1, 2, 3],
        [3, 6, 5],
        [2, 4, 5]])

In [None]:
#creating a tensor to practice

tensor = torch.tensor([[[1, 2, 9],
                        [9, 0, 1],
                        [23, 1, 1]],
                       [[1, 2, 3],
                        [4, 5, 6],
                        [7, 8 ,90]]])
tensor

tensor([[[ 1,  2,  9],
         [ 9,  0,  1],
         [23,  1,  1]],

        [[ 1,  2,  3],
         [ 4,  5,  6],
         [ 7,  8, 90]]])

In [None]:
tensor.ndim #still 3 dimensions even though theres 2 seperate tensors sthe first shape index will just be
#changed to 2

3

In [None]:
tensor.shape

torch.Size([2, 3, 3])

In [None]:
tensor[0] #gets the first tensor

tensor([[ 1,  2,  9],
        [ 9,  0,  1],
        [23,  1,  1]])

In [None]:
tensor[1] #gets the second tensor

tensor([[ 1,  2,  3],
        [ 4,  5,  6],
        [ 7,  8, 90]])

In [None]:
tensor[1, 0] # second tensor first vector / row

tensor([1, 2, 3])

In [None]:
tensor[0, 0, 0] #picking a scalar out, the first tensor, first vector, first position

tensor(1)

### Random tensors

why random tensors?

Random tensors are important because the way many neural networks learn is that they start with tensors full of random numbers and then adjust those random numbers to better represent the data.

'start with random numbers -> look at data -> update random numbers -> look at data -> update random numbers'

torch random tensors - https://pytorch.org/docs/stable/generated/torch.rand.html

In [None]:
#create a random tensor of size (3, 4)
random_tensor = torch.rand(3, 4)
random_tensor

tensor([[0.9328, 0.9740, 0.6457, 0.8169],
        [0.2541, 0.0325, 0.2398, 0.9367],
        [0.1778, 0.3220, 0.1157, 0.1212]])

In [None]:
random_tensor.ndim

2

In [None]:
#create a random tensor with similar shape to an image tensor
random_image_size_tensor = torch.rand(size = (224, 224, 3)) #height, width, color channels
random_image_size_tensor.shape, random_image_size_tensor.ndim

(torch.Size([224, 224, 3]), 3)

In [None]:
#pytorch allows to create tensors quite easily with rand function

#can use the size parameter or not
random_tensor_2 = torch.rand(size = (5, 10, 10))
random_tensor_2

#creates 5 seperate tensors with 10 by 10 shape

random_tensor_2.ndim, random_tensor_2.shape

(3, torch.Size([5, 10, 10]))

### Zeros and ones

In [None]:
#zeros is more common than ones in use, random is the most common
zeros = torch.zeros(size = (3, 4))
zeros

tensor([[0., 0., 0., 0.],
        [0., 0., 0., 0.],
        [0., 0., 0., 0.]])

In [None]:
#create a tensor of all ones
ones = torch.ones(size = (3, 4))
ones

tensor([[1., 1., 1., 1.],
        [1., 1., 1., 1.],
        [1., 1., 1., 1.]])

In [None]:
ones.dtype

torch.float32

In [None]:
random_tensor.dtype

torch.float32

### Creating a range of tensors and tensors-like

In [None]:
#use torch.arange()
one_to_ten = torch.arange(start = 1, end = 11, step = 1)
one_to_ten

tensor([ 1,  2,  3,  4,  5,  6,  7,  8,  9, 10])

In [None]:
#creating tensors like

#gets zeros in the same shape as one_to_ten
ten_zeros = torch.zeros_like(input = one_to_ten)
ten_zeros

tensor([0, 0, 0, 0, 0, 0, 0, 0, 0, 0])

###Tensor dataypes

**Note** Tensor datatypes is one of the 3 big erros you'll run into with PyTorch and deep learning

1. Tensors not right datatype
2. Tensors not right shape
3. Tensors not on the right device (none by default, using cpu)

In [None]:
#Float 32 tensor
#some of the most important parameters when creating tensors
float_32_tensor = torch.tensor([3.0, 6.0, 9.0], dtype = torch.float32,
                               device = None, #what device is your tensor on?
                               requires_grad= False) #whether or not to track gradients with this tensors operations

float_32_tensor

tensor([3., 6., 9.])

In [None]:
#the dtype deals with precision in mathematics, detail in which the quantity is expressed

#float 32 means a number contains 32 bits in computer memory
#32 bit is single precision
#16 bit is half precision
#if we sacrafice some precision we can calculate faster on numbers that take less memory

In [None]:
float_16_tensor = float_32_tensor.type(torch.float16)
float_16_tensor

tensor([3., 6., 9.], dtype=torch.float16)

In [None]:
float_32_tensor.dtype

torch.float32

In [None]:
float_16_tensor * float_32_tensor #it works, sometimes if you think something may error it actually results in no error.

#some operations will run an error

tensor([ 9., 36., 81.])

In [None]:
int_32_tensor = torch.tensor([3, 6, 9], dtype = torch.int32)
int_32_tensor

tensor([3, 6, 9], dtype=torch.int32)

In [None]:
float_32_tensor * int_32_tensor #still works

tensor([ 9., 36., 81.])

###Getting information from our tensors (tensor attributes)

1. check the shape - tensor.shape

2. check the data type - tensor.dtype

3. check the device - tensor.device

In [None]:
#create a tensor

some_tensor = torch.rand(3, 4)
some_tensor

tensor([[0.2480, 0.3402, 0.6409, 0.1930],
        [0.8761, 0.5250, 0.0695, 0.6753],
        [0.1749, 0.1685, 0.5069, 0.6597]])

In [None]:
#find out details of some tensor
print(some_tensor)
print()
print(f"Datatype of tensor: {some_tensor.dtype}")
print()
print(f"Shape of tensor: {some_tensor.shape}")
print()
print(f"Device tensor is on: {some_tensor.device}")

tensor([[0.2480, 0.3402, 0.6409, 0.1930],
        [0.8761, 0.5250, 0.0695, 0.6753],
        [0.1749, 0.1685, 0.5069, 0.6597]])

Datatype of tensor: torch.float32

Shape of tensor: torch.Size([3, 4])

Device tensor is on: cpu


In [None]:
own_tensor = torch.rand(size = (4, 4),
                        dtype = torch.float16,
                        device = torch.device('cuda'))

In [None]:
own_tensor

tensor([[0.9434, 0.9727, 0.7168, 0.9517],
        [0.7456, 0.1146, 0.7754, 0.5635],
        [0.0654, 0.3408, 0.9224, 0.6064],
        [0.1954, 0.5488, 0.4417, 0.4783]], device='cuda:0',
       dtype=torch.float16)

###Manipulating Tensors (tensor operations)

tensor operations include:

* addition
* subtraction
* multiplication (element wise)
* division
* matrix multiplication


In [None]:
#create a tensor
tensor = torch.tensor([1, 2, 3])
tensor + 10

tensor([11, 12, 13])

In [None]:
#multiply tensor by 10
tensor * 10

tensor([10, 20, 30])

In [None]:
tensor

tensor([1, 2, 3])

In [None]:
#subtract 10
tensor - 10

tensor([-9, -8, -7])

In [None]:
#try out pytorch in-built functions
torch.mul(tensor, 10) #same as regular python operator, generally the operators are more undertandable

tensor([10, 20, 30])

In [None]:
torch.add(tensor, 10)

tensor([11, 12, 13])

In [None]:
torch.subtract(tensor, 10)

tensor([-9, -8, -7])

### Matrix Multiplication

1. Element wise multiplication
2. matrix multiplcation (dot product)

There are two main rules that performing matrix multiplication needs to satisfy:

1 . The **inner dimensions** must match:
* (3, 2) @ (3, 2) wont work
* (2, 3) @ (3, 2) will work
* (3, 2) @ (2, 3) will work

2. The resulting matrix has the shape of the **outer dimensions**:

(2,3) @ (3, 2) -> (2, 2)
(3, 2) @ (2, 3) -> (3, 3)

In [None]:
torch.matmul(torch.rand(3, 10), torch.rand(10, 3)) #one of the most common errors in deep learning is if the inner dimensions dont match

tensor([[2.8787, 1.9943, 2.3935],
        [2.3817, 2.1590, 1.6894],
        [2.6790, 2.6102, 1.8306]])

In [None]:
#element-wise multiplication
print(tensor, "*", tensor)
print(f"Equals: {tensor * tensor}")

tensor([1, 2, 3]) * tensor([1, 2, 3])
Equals: tensor([1, 4, 9])


In [None]:
#matrix multiplication
torch.matmul(tensor, tensor)

tensor(14)

In [None]:
#matrix multiplication by hand
1*1 + 2*2 + 3*3

14

In [None]:
%%time
value = 0
for i in range(len(tensor)):
  value += tensor[i] * tensor[i]
print(value)

tensor(14)
CPU times: user 608 µs, sys: 134 µs, total: 742 µs
Wall time: 787 µs


In [None]:
%%time
torch.matmul(tensor, tensor) #much faster than using the for loop by hand for mathematical operations

CPU times: user 33 µs, sys: 0 ns, total: 33 µs
Wall time: 36.2 µs


tensor(14)

### One of the most common errors in deep learning is shape errors

In [None]:
#shapes for matrix multiplication
tensor_A = torch.tensor([[7, 10],
                        [8, 11],
                        [9, 12]])

tensor_B = torch.tensor([[7, 10],
                        [8, 11],
                        [9,12]])

torch.matmul(tensor_A, tensor_B.reshape(2, 3))

tensor([[159, 160, 176],
        [177, 179, 196],
        [195, 198, 216]])

In [None]:
tensor_A.shape, tensor_B.shape #need to adjust the shape for matrix multiplication

(torch.Size([3, 2]), torch.Size([3, 2]))

To fix our tensor shape issues we can maniipulate the shape using a transpose.

A transpose switches the axes or dimensions of a given tensor

In [None]:
tensor_B.T, tensor_B.T.shape # tensor_B.T Transposes the shape (like rehape to the inverse)

(tensor([[ 7,  8,  9],
         [10, 11, 12]]),
 torch.Size([2, 3]))

In [None]:
#the matrix multiplication operation works when tensor_B is transposed
print(f"original shapes: tensor_A = {tensor_A.shape}, tensor_B = {tensor_B.shape}")
print(f"New shapes: tensor_A = {tensor_A.shape} (same shape as above), tensor_B.T = {tensor_B.T.shape}")
print(f"multiplying: {tensor_A.shape} @ {tensor_B.T.shape} <- inner dimensions must match")
print("output:\n")
output = torch.matmul(tensor_A, tensor_B.T)
print(output)
print(f"\nOutput shape: {output.shape}")


original shapes: tensor_A = torch.Size([3, 2]), tensor_B = torch.Size([3, 2])
New shapes: tensor_A = torch.Size([3, 2]) (same shape as above), tensor_B.T = torch.Size([2, 3])
multiplying: torch.Size([3, 2]) @ torch.Size([2, 3]) <- inner dimensions must match
output:

tensor([[149, 166, 183],
        [166, 185, 204],
        [183, 204, 225]])

Output shape: torch.Size([3, 3])


### Finding the min, max, mean, sum, etc (tensor aggregation)


In [None]:
#create a tensor
x = torch.arange(0, 100, 10)
x

tensor([ 0, 10, 20, 30, 40, 50, 60, 70, 80, 90])

In [None]:
#find the max
torch.max(x), x.max()

(tensor(90), tensor(90))

In [None]:
torch.min(x), x.min()

(tensor(0), tensor(0))

In [None]:
torch.mean(x, dtype = torch.float32), x.type(torch.float32).mean() #doesnt work on data type long need to convert the data type

(tensor(45.), tensor(45.))

In [None]:
#find the sum
torch.sum(x), x.sum() #best to pick a style and stick with that throughout code

(tensor(450), tensor(450))

### Finding the positional min and max

In [None]:
#find positional min and max
torch.argmax(x.type(torch.float32)), torch.argmin(x), x.argmax(), x.argmin()

(tensor(9), tensor(0), tensor(9), tensor(0))

In [None]:
#argmin find the position in tensor that has the minimum value, returns index position of target tensor where
#the min value occurs
torch.argmin(x)

tensor(0)

In [None]:
x[0]

tensor(0)

In [None]:
#find the position in tensor that has the maximum value with argmax
torch.argmax(x)

tensor(9)

In [None]:
x[9]

tensor(90)

## Rehshaping, Stacking, Squeezing, and Unsqueezing

* Reshaping - reshapes an input tensor to a defined shape
* View - Return a view of an input tensor of certain shape but keep the same memory as the original tensor
* Combine multiple tensors on top of each other (vstack) or side by side (hstack)
* Squeeze - removes all '1' dimensions from a tensor
* Unsqueeze - add a '1' dimension to a target tensor
* Permute - Return a View of the input with dimensions permuted (swapped) in a certain way


In [None]:
#lets create a tensor
import torch
x= torch.arange(1., 10.)
x, x.shape

(tensor([1., 2., 3., 4., 5., 6., 7., 8., 9.]), torch.Size([9]))

In [None]:
#Add an extra dimension
x_reshaped = x.reshape(1, 9)
x_reshaped, x_reshaped.shape

(tensor([[1., 2., 3., 4., 5., 6., 7., 8., 9.]]), torch.Size([1, 9]))

In [None]:
#change the view
z =  x.view(1, 9)
z, z.shape

(tensor([[1., 2., 3., 4., 5., 6., 7., 8., 9.]]), torch.Size([1, 9]))

In [None]:
#chaning z changes x, because a view of a tensor shares the same memory as the original input
z[:, 0] = 5
z, x

(tensor([[5., 2., 3., 4., 5., 6., 7., 8., 9.]]),
 tensor([5., 2., 3., 4., 5., 6., 7., 8., 9.]))

In [None]:
#stack tensors on top of each other
x_stacked  = torch.stack([x, x, x, x], dim = 1)
x_stacked

tensor([[5., 5., 5., 5.],
        [2., 2., 2., 2.],
        [3., 3., 3., 3.],
        [4., 4., 4., 4.],
        [5., 5., 5., 5.],
        [6., 6., 6., 6.],
        [7., 7., 7., 7.],
        [8., 8., 8., 8.],
        [9., 9., 9., 9.]])

In [None]:
v_stack = torch.stack([x, x]) #another function to stack tensors instead of listing the dimension
v_stack

tensor([[5., 2., 3., 4., 5., 6., 7., 8., 9.],
        [5., 2., 3., 4., 5., 6., 7., 8., 9.]])

In [None]:
#torch.squeeze() removes all single dimensions from a target tensor
print(f"Previous tensor: {x_reshaped}")
print(f"previous shape: {x_reshaped.shape}")

#remove extra dimensions from x_reshaped
x_squeezed = x_reshaped.squeeze()
print(f"\nNew tensor: {x_squeezed}")
print(f"New shape: {x_squeezed.shape}")

Previous tensor: tensor([[5., 2., 3., 4., 5., 6., 7., 8., 9.]])
previous shape: torch.Size([1, 9])

New tensor: tensor([5., 2., 3., 4., 5., 6., 7., 8., 9.])
New shape: torch.Size([9])


In [None]:
# torch.unsqueeze() - adds a single dimension to a target tensor at a specific dim (dimension)
print(f"previous target:{x_squeezed}")
print(f"previous Shape: {x_squeezed.shape}")

#add an extra dimension with unsqueeze
x_unsqueezed = x_squeezed.unsqueeze(dim = 0)
print(f"\nNew tensor:{x_unsqueezed}")
print(f"New shape: {x_unsqueezed.shape}")

previous target:tensor([5., 2., 3., 4., 5., 6., 7., 8., 9.])
previous Shape: torch.Size([9])

New tensor:tensor([[5., 2., 3., 4., 5., 6., 7., 8., 9.]])
New shape: torch.Size([1, 9])


In [None]:
#torch.permute , returns a view of the original input tensor with its dimensions permuted (rearanged)
x_original = torch.rand(size = (224, 224, 3)) # [height, width, color_channels]

#permute the original tensor to rearrange the axis (or dim) order
x_permute = x_original.permute(2, 0, 1) #shifts axis 0 -> 1, 1 -> 2, 2 ->0

print(f"Previous shape: {x_original.shape}")
print(f"New shape: {x_permute.shape}") # [color_channels, height, width]

Previous shape: torch.Size([224, 224, 3])
New shape: torch.Size([3, 224, 224])


In [None]:
#change a value in x_original and see if that same value is changed in x_permute
x_original[0, 0, 0] = 1 #changing the value here updates it in the permuted tensor because the permute function creates a view

In [None]:
x_permute[0, 0, 0]

tensor(1.)

##Indexing (selecting data from tensors)

indexing with PyTorch is similar to indexing with NumPy


In [None]:
#create a tensor
import torch
x = torch.arange(1, 10).reshape(1, 3, 3)
x, x.shape

(tensor([[[1, 2, 3],
          [4, 5, 6],
          [7, 8, 9]]]),
 torch.Size([1, 3, 3]))

In [None]:
#lets index on our new tensor
x[0]

tensor([[1, 2, 3],
        [4, 5, 6],
        [7, 8, 9]])

In [None]:
#lets index on the middle bracket (dim = 1)
x[0, 0], x[0][0]

(tensor([1, 2, 3]), tensor([1, 2, 3]))

In [None]:
#lets index on the most inner bracket (last dimension)
x[0][2][2] #gives us the 0th index and the 0th element

tensor(9)

In [None]:
#you can also use ":" to select all of a target dimension
x[:, 0]

tensor([[1, 2, 3]])

In [None]:
#get all values of 0th and 1st dimensions but only index 1 of the second dimension
x[:, :, 1]

tensor([[2, 5, 8]])

In [None]:
#get all values of the 0 dimension but only the 1 index value of the 1st and 2nd dimension
x[:, 1, 1]

tensor([5])

In [None]:
#get index 0 of 0th and 1st dimension and all values of 2nd dimension
x[0, 0, :]

tensor([1, 2, 3])

In [None]:
#index on x to return 9
print(x[0, 2, 2])

#index on x to return 3, 6, 9
print(x[0, :, 2])

tensor(9)
tensor([3, 6, 9])


## PyTorch tensors and NumPy

NumPy is a popular scientific Python numerical computing library.

And because of this, PyTorch has functionality to interact with it.

* Data in NumPy, want in PyTorch tensor -> 'torch.from_numpy(ndarray)'
* PyTorch tensor -> NumPy -> torch.tensor.numpy()

In [None]:
#NumPy array to tensor
import torch
import numpy as np

array = np.arange(1.0, 8.0)
tensor = torch.from_numpy(array).type(torch.float32) #warning: when converting from numpy to pytorch, pytorch reflects numpys default datatype of float64 unless specified otherwise
array, tensor

(array([1., 2., 3., 4., 5., 6., 7.]), tensor([1., 2., 3., 4., 5., 6., 7.]))

In [None]:
torch.arange(1.0, 8.0).dtype

torch.float32

In [None]:
#change the value of array, what will this do to tensor?
array = array + 1
array, tensor #the new tensor doesnt change if you change the original array

(array([2., 3., 4., 5., 6., 7., 8.]), tensor([1., 2., 3., 4., 5., 6., 7.]))

In [None]:
#tensor to numpy array
tensor = torch.ones(7)
numpy_tensor = tensor.numpy()
tensor, numpy_tensor #going between pytorch and numpy carries the dtype from where it was created

(tensor([1., 1., 1., 1., 1., 1., 1.]),
 array([1., 1., 1., 1., 1., 1., 1.], dtype=float32))

In [None]:
#change the tensor, what happens to numpy tensor?
tensor = tensor + 1
tensor, numpy_tensor #does noto share the same memory

(tensor([2., 2., 2., 2., 2., 2., 2.]),
 array([1., 1., 1., 1., 1., 1., 1.], dtype=float32))

## Reproducabiltiy (trying to take the random out of random)

In short how a neural network learns:
'start with random numbers -> tensor operations -> update random numbers to try and make them better representations of the data -> again -> again -> again

to reduce the randomness in neural networks and pytorch comes the concept of a **random seed**

essentially what the random seed does is flavor the randomness.

In [1]:
import torch

#create two random tensors
random_tensor_A = torch.rand(3, 4)
random_tensor_B = torch.rand(3, 4)

print(random_tensor_A)
print(random_tensor_B)
print(random_tensor_A == random_tensor_B)

tensor([[0.5617, 0.2935, 0.1386, 0.1573],
        [0.4978, 0.6539, 0.6828, 0.9548],
        [0.5362, 0.0363, 0.6205, 0.5972]])
tensor([[0.9810, 0.5440, 0.8840, 0.6542],
        [0.6898, 0.6882, 0.2512, 0.8835],
        [0.6171, 0.7463, 0.2449, 0.9826]])
tensor([[False, False, False, False],
        [False, False, False, False],
        [False, False, False, False]])


In [2]:
# Let's make some random but reproducible tensors
import torch

#set the random seed
RANDOM_SEED = 42 #different flavors of randomness
torch.manual_seed(RANDOM_SEED)
random_tensor_C = torch.rand(3, 4)

torch.manual_seed(RANDOM_SEED) #need to
random_tensor_D = torch.rand(3, 4)

print(random_tensor_C)
print(random_tensor_D)
print(random_tensor_C == random_tensor_D)

tensor([[0.8823, 0.9150, 0.3829, 0.9593],
        [0.3904, 0.6009, 0.2566, 0.7936],
        [0.9408, 0.1332, 0.9346, 0.5936]])
tensor([[0.8823, 0.9150, 0.3829, 0.9593],
        [0.3904, 0.6009, 0.2566, 0.7936],
        [0.9408, 0.1332, 0.9346, 0.5936]])
tensor([[True, True, True, True],
        [True, True, True, True],
        [True, True, True, True]])


## Running tensors and PyTorch onjects on the GPUs (and making faster computations)

* GPUs = faster computation on numbers, thanks to CUDA + NVIDIA hardware + PyTorch working behind the scenes to make everything work.

### 1. Getting a GPU

1. Easiest: Using google colab for a free GPU (options to upgrade as well)

2. Use your own GPU - takes a little bit of set up and requires the investment of purchasing a GPU, lots of options

3. Use cloud computing - GCP, AWS, Azure, these services allow you to rent computers on the cloud and access them

For 2, 3 PyTorch + GPU drivers (CUDA) takes a little bit of setting up. To do this, refrer to PyTorch set up documentation

In [3]:
!nvidia-smi

Sun May 11 19:54:36 2025       
+-----------------------------------------------------------------------------------------+
| NVIDIA-SMI 550.54.15              Driver Version: 550.54.15      CUDA Version: 12.4     |
|-----------------------------------------+------------------------+----------------------+
| GPU  Name                 Persistence-M | Bus-Id          Disp.A | Volatile Uncorr. ECC |
| Fan  Temp   Perf          Pwr:Usage/Cap |           Memory-Usage | GPU-Util  Compute M. |
|                                         |                        |               MIG M. |
|   0  Tesla T4                       Off |   00000000:00:04.0 Off |                    0 |
| N/A   55C    P8              9W /   70W |       0MiB /  15360MiB |      0%      Default |
|                                         |                        |                  N/A |
+-----------------------------------------+------------------------+----------------------+
                                                

### 2. Check for GPU access with PyTorch



In [4]:
#check for GPU access with PyTorch
import torch
torch.cuda.is_available()

True

for PyTorch since its capable of running compute on the cpu or the gpu, it's best practice to setup device agnostic code

In [5]:
#setup device agnositc code
device = "cuda" if torch.cuda.is_available() else "cpu"
device

'cuda'

In [6]:
# count the number of devices
torch.cuda.device_count()

1

## Putting tensors and models on the GPU

The reason we want our tensors and models on the GPU is using a GPU results in faster computations.

We can discover patterns in our data faster and do more experiments

In [7]:
#create a tensor (default on the CPU)
tensor = torch.tensor([1, 2, 3], device = "cpu")

#Tensor not on GPU
print(tensor, tensor.device)

tensor([1, 2, 3]) cpu


In [8]:
# Move tensor to GPU (if available)

#to method moves tensors but could also move models across devices
tensor_on_gpu = tensor.to(device)
tensor_on_gpu # lastly is the index of the gpu that its stored on

tensor([1, 2, 3], device='cuda:0')

### 4. Moving tensors back to the CPU


In [9]:
# if tensor is on GPU can't transform it to NumPy
tensor_on_gpu.numpy()

TypeError: can't convert cuda:0 device type tensor to numpy. Use Tensor.cpu() to copy the tensor to host memory first.

In [11]:
#to fix the gpu tensor with numpy issue, we can first set it to the cpu
tensor_back_on_cpu = tensor_on_gpu.cpu().numpy()
tensor_back_on_cpu, tensor_back_on_cpu.device

(array([1, 2, 3]), 'cpu')

In [12]:
tensor_on_gpu

tensor([1, 2, 3], device='cuda:0')

## Exercises


In [17]:
#create a random tensor with shape (7,7)
import torch
random_tensor = torch.rand(7, 7)
random_tensor

tensor([[0.5932, 0.1123, 0.1535, 0.2417, 0.7262, 0.7011, 0.2038],
        [0.6511, 0.7745, 0.4369, 0.5191, 0.6159, 0.8102, 0.9801],
        [0.1147, 0.3168, 0.6965, 0.9143, 0.9351, 0.9412, 0.5995],
        [0.0652, 0.5460, 0.1872, 0.0340, 0.9442, 0.8802, 0.0012],
        [0.5936, 0.4158, 0.4177, 0.2711, 0.6923, 0.2038, 0.6833],
        [0.7529, 0.8579, 0.6870, 0.0051, 0.1757, 0.7497, 0.6047],
        [0.1100, 0.2121, 0.9704, 0.8369, 0.2820, 0.3742, 0.0237]])

In [21]:
random_tensor_2 = torch.rand(1, 7)
torch.matmul(random_tensor, random_tensor_2.T)

tensor([[1.0697],
        [1.5364],
        [1.4769],
        [1.1345],
        [1.0047],
        [1.3555],
        [0.8287]])

In [23]:
#set the random seed to 0 and repeat steps to 2 & 3 to create reproducible results
RANDOM_SEED = 0
torch.manual_seed(RANDOM_SEED)
random_tensor_3 = torch.rand(7, 7)
random_tensor_4 = torch.rand(1, 7)

#multiply the two random seeded tensors
torch.matmul(random_tensor_3, random_tensor_4.T)

tensor([[1.8542],
        [1.9611],
        [2.2884],
        [3.0481],
        [1.7067],
        [2.5290],
        [1.7989]])

In [26]:
torch.cuda.is_available()

True

In [36]:
#creating reproducibility on the GPU

torch.cuda.manual_seed(1234)
gpu_tensor_A = torch.rand(size = (2, 3), device = "cuda")

torch.cuda.manual_seed(1234)
gpu_tensor_B = torch.rand(size = (2 ,3), device = 'cuda')

print(gpu_tensor_A)
print(gpu_tensor_B)

tensor([[0.1272, 0.8167, 0.5440],
        [0.6601, 0.2721, 0.9737]], device='cuda:0')
tensor([[0.1272, 0.8167, 0.5440],
        [0.6601, 0.2721, 0.9737]], device='cuda:0')


In [39]:
#run matrix multiplication on the tensors stored on the GPU
matmul_gpu_tensor = torch.matmul(gpu_tensor_A, gpu_tensor_B.T)
matmul_gpu_tensor

tensor([[0.9792, 0.8358],
        [0.8358, 1.4578]], device='cuda:0')

In [42]:
#finding the max of the tensor above
torch.max(matmul_gpu_tensor), matmul_gpu_tensor.max()

(tensor(1.4578, device='cuda:0'), tensor(1.4578, device='cuda:0'))

In [43]:
#finding the min of the tensor above
torch.min(matmul_gpu_tensor)

tensor(0.8358, device='cuda:0')

In [44]:
#find the max index value of the above tensor
torch.argmax(matmul_gpu_tensor)

tensor(3, device='cuda:0')

In [50]:
matmul_gpu_tensor.argmin()

tensor(1, device='cuda:0')

In [54]:
#question 10 - squeezing a tensor to get rid of excess dimensions
torch.manual_seed(7)
random_tensor = torch.rand(size = (1, 1, 1, 10), device = 'cpu')

print(f"The output of the first tensor is: {random_tensor}")
print(f"\nThe shape of the first tensor is: {random_tensor.shape}")
random_tensor_2 = random_tensor.squeeze()
print(f"\nThe output of the second tensor is: {random_tensor_2}")
print(f"\nThe shape of the second tensor is {random_tensor_2.shape}")



The output of the first tensor is: tensor([[[[0.5349, 0.1988, 0.6592, 0.6569, 0.2328, 0.4251, 0.2071, 0.6297,
           0.3653, 0.8513]]]])

The shape of the first tensor is: torch.Size([1, 1, 1, 10])

The output of the second tensor is: tensor([0.5349, 0.1988, 0.6592, 0.6569, 0.2328, 0.4251, 0.2071, 0.6297, 0.3653,
        0.8513])

The shape of the second tensor is torch.Size([10])
