# 00 pytorch fundamentals

## Tensors

In [1]:
import torch
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import random

print(torch.__version__)

2.1.0+cu121


In [2]:
# scalar
scalar = torch.tensor(7)
print(scalar)
print(scalar.ndim)
print(scalar.item())
print(scalar.shape)

tensor(7)
0
7
torch.Size([])


In [3]:
# vector
vector = torch.tensor([7,7])
print(vector)
print(vector.ndim) # dimension refers to the number of brackets
print(vector.shape)


tensor([7, 7])
1
torch.Size([2])


In [4]:
# MATRIX
MATRIX = torch.tensor([[7,8],[9,10]])
print(MATRIX.ndim)
print(MATRIX[0])
print(MATRIX[1])
print(MATRIX[0].ndim)
print(MATRIX.shape)


2
tensor([7, 8])
tensor([ 9, 10])
1
torch.Size([2, 2])


In [5]:
# TENSOR
TENSOR = torch.tensor([[[1,2,3],
                        [3,4,5],
                        [4,5,6]]])
print(TENSOR)
print(TENSOR.ndim)
print(TENSOR.shape)
print('\nthis is the 0-th element')
print(TENSOR[0])


tensor([[[1, 2, 3],
         [3, 4, 5],
         [4, 5, 6]]])
3
torch.Size([1, 3, 3])

this is the 0-th element
tensor([[1, 2, 3],
        [3, 4, 5],
        [4, 5, 6]])


## Random Tensors

why random tensors?

important because the way many neurual networks learn is that they strt with tensors full of ranodom numbers and then adjust those random number to better represent the data


In [6]:
# create a random tensor of size (3,4)
random_tensor = torch.rand(3,4)
print(random_tensor)
print(random_tensor.ndim)


tensor([[0.1854, 0.2608, 0.2583, 0.7433],
        [0.7270, 0.3442, 0.0831, 0.3876],
        [0.3406, 0.7382, 0.2276, 0.6820]])
2


In [7]:
# create a random tensor with similar shape to an image tensor
random_image_size_tensor = torch.rand(size=(224,224,3)) # height, width, color channels (R,G,B)
print(random_image_size_tensor.shape, random_image_size_tensor.ndim)


torch.Size([224, 224, 3]) 3


## Zeros and Ones

In [8]:
# Create a tensor with all zeros
zeros = torch.zeros(size = (3,4))
ones = torch.ones(size = (3,4))

print('\nthis is the all one tensor')
print(ones)

print('\nthis is all zero')
print(zeros)

print('\nthis is the random_tensor')
print(random_tensor)

print('\nthis is the combo below')
print(zeros * random_tensor)



this is the all one tensor
tensor([[1., 1., 1., 1.],
        [1., 1., 1., 1.],
        [1., 1., 1., 1.]])

this is all zero
tensor([[0., 0., 0., 0.],
        [0., 0., 0., 0.],
        [0., 0., 0., 0.]])

this is the random_tensor
tensor([[0.1854, 0.2608, 0.2583, 0.7433],
        [0.7270, 0.3442, 0.0831, 0.3876],
        [0.3406, 0.7382, 0.2276, 0.6820]])

this is the combo below
tensor([[0., 0., 0., 0.],
        [0., 0., 0., 0.],
        [0., 0., 0., 0.]])


In [9]:
print(random_tensor.dtype, zeros.dtype, ones.dtype)

torch.float32 torch.float32 torch.float32


## Create a range of tensors and tensors-like

In [10]:
# use torch.range()
one_to_ten = torch.arange(start=1, end=11,step=1)

print(one_to_ten,one_to_ten.shape,one_to_ten.ndim)

tensor([ 1,  2,  3,  4,  5,  6,  7,  8,  9, 10]) torch.Size([10]) 1


In [11]:
# create tensor-alike
ten_zeros = torch.zeros_like(input=one_to_ten)
print(ten_zeros)

tensor([0, 0, 0, 0, 0, 0, 0, 0, 0, 0])


## Tensor Datatypes

**Note:** three most common errors
1. Tensors not right datatype
2. tensors not right shape
3. tensors not on the right device

In [12]:
# Float 32 Tensor
float_32_tensor = torch.tensor([3.0,6.0,9.0],
                               dtype=None, # defines the datatype (e.g. float32, float16, more in the documentation)
                               device=None, # what device is the tensor on
                               requires_grad=False) # whether to track gradient during operation

print(float_32_tensor)
print(float_32_tensor.dtype)


tensor([3., 6., 9.])
torch.float32


In [13]:
float_16_tensor = float_32_tensor.type(torch.float16)
print(float_16_tensor.dtype)

torch.float16


In [14]:
float_16_tensor * float_32_tensor

tensor([ 9., 36., 81.])

In [15]:
int_32_tensor = torch.tensor([3,6,9],dtype=torch.int32)
print(int_32_tensor)
float_32_tensor * int_32_tensor

tensor([3, 6, 9], dtype=torch.int32)


tensor([ 9., 36., 81.])

## getting information from tensors
1. Tensors not right datatype     - to get datatype from a tensor we can use tensor.dtyoe
2. tensors not right shape.  - to get shape use tensor.shape
3. tensors not on the right device.   - to get device from a tensor, use tensor.device

In [16]:
some_tensor = torch.rand(3,4)
some_tensor, some_tensor.dtype, some_tensor.shape,some_tensor.device

(tensor([[0.8346, 0.7018, 0.5618, 0.6832],
         [0.2789, 0.7724, 0.2413, 0.7176],
         [0.1251, 0.7252, 0.9960, 0.4107]]),
 torch.float32,
 torch.Size([3, 4]),
 device(type='cpu'))

## Mainpulating tensors (Tensor operations)

Tensor operations include:

* Addition
* Subtraction
* Multiplication
* Division
* Matrix Multiplicaiotn

In [17]:
tensor = torch.tensor([1,2,3])
tensor + 10, tensor * 10, tensor - 10,torch.add(tensor,10) ,torch.mul(tensor, 10)

(tensor([11, 12, 13]),
 tensor([10, 20, 30]),
 tensor([-9, -8, -7]),
 tensor([11, 12, 13]),
 tensor([10, 20, 30]))

 Matrix Multiplication

Two main ways to perform multiplication in neural netowkrs and deep learning:

1. element-wise multiplication
2. Matrix multiplication (Dot Product)



In [18]:
# element wise multiplication
print(tensor, '*', tensor)
print(f'equlas: {tensor*tensor}')

tensor([1, 2, 3]) * tensor([1, 2, 3])
equlas: tensor([1, 4, 9])


In [19]:
# Matrix multiplication / dot product
%%time
torch.matmul(tensor,tensor)

CPU times: user 1.26 ms, sys: 47 µs, total: 1.3 ms
Wall time: 1.49 ms


tensor(14)

In [20]:
%%time
value = 0
for i in range(len(tensor)):
    value += tensor[i] * tensor[i]
print(value)

tensor(14)
CPU times: user 1.45 ms, sys: 0 ns, total: 1.45 ms
Wall time: 1.54 ms


In [21]:
tensor_A = torch.rand([3,2])
tensor_B = torch.rand([3,2])
print(tensor_A, tensor_B, sep = '\n\n')

tensor([[0.4957, 0.3300],
        [0.3792, 0.5378],
        [0.8323, 0.7018]])

tensor([[0.3777, 0.6860],
        [0.6949, 0.8201],
        [0.1383, 0.3663]])


In [22]:
# Transpose
tensor_B,tensor_B.T

(tensor([[0.3777, 0.6860],
         [0.6949, 0.8201],
         [0.1383, 0.3663]]),
 tensor([[0.3777, 0.6949, 0.1383],
         [0.6860, 0.8201, 0.3663]]))

In [23]:
torch.matmul(tensor_A,tensor_B.T)

tensor([[0.4136, 0.6151, 0.1895],
        [0.5122, 0.7045, 0.2495],
        [0.7958, 1.1538, 0.3722]])

## Tensor Aggregation
Finding the min max mean sum etc

In [24]:
x = torch.arange(0,100,10)
x,x.dtype

(tensor([ 0, 10, 20, 30, 40, 50, 60, 70, 80, 90]), torch.int64)

In [25]:
torch.min(x),torch.max(x),torch.mean(x.type(torch.float32)), torch.sum(x) # Note that the mean funciton require input of float32 datatype

(tensor(0), tensor(90), tensor(45.), tensor(450))

In [26]:
x.max(),x.min(),x.type(torch.float32).mean(), x.sum() # Note that the mean funciton require input of float32 datatype

(tensor(90), tensor(0), tensor(45.), tensor(450))

## Finding the positional min and max

In [27]:
x

tensor([ 0, 10, 20, 30, 40, 50, 60, 70, 80, 90])

In [28]:
# Find the position in tensor that has the minimum/maximum value with argmin() -> returns index position of target tensor where the minimum value occurs
x.argmin(), x.argmax()

(tensor(0), tensor(9))

In [29]:
x[0],x[9]

(tensor(0), tensor(90))

## Reshaping stacking squeezing and unsqueezing
* reshape - reshape an input tensor to a defined shape
* View - return a view of an input tensor of cettain shape but keep the same memory as the original tensor
* stacking - combining multiple tensors on topof each other (vstack) or side by side(hstack)
* squuze - remove all '1' dimensions from a tensor
* unsqueeze - add a '1' dimension to a tensor
* permute - return a view of the input with dimensions permuted(swapped) in a certain way


In [30]:
x = torch.arange(1.,10.)
x, x.shape

(tensor([1., 2., 3., 4., 5., 6., 7., 8., 9.]), torch.Size([9]))

In [31]:
# Add an extra dimension
x_reshaped = x.reshape(1, 9) # the product of the shape has to be equal to the total amount of elements
x_reshaped, x_reshaped.shape

(tensor([[1., 2., 3., 4., 5., 6., 7., 8., 9.]]), torch.Size([1, 9]))

In [32]:
x

tensor([1., 2., 3., 4., 5., 6., 7., 8., 9.])

In [33]:
# change the view
z = x.view(1,9)
z,z.shape

(tensor([[1., 2., 3., 4., 5., 6., 7., 8., 9.]]), torch.Size([1, 9]))

In [34]:
# change z will change x as well
z[:,0] = 5
z, x

(tensor([[5., 2., 3., 4., 5., 6., 7., 8., 9.]]),
 tensor([5., 2., 3., 4., 5., 6., 7., 8., 9.]))

In [35]:
# Stack tensors on top of each other
x_stacked = torch.stack([x,x,x,x],dim=0) # dimension 1 and 0 means h/v stack (order may be wrong)
x_stacked

tensor([[5., 2., 3., 4., 5., 6., 7., 8., 9.],
        [5., 2., 3., 4., 5., 6., 7., 8., 9.],
        [5., 2., 3., 4., 5., 6., 7., 8., 9.],
        [5., 2., 3., 4., 5., 6., 7., 8., 9.]])

In [36]:
# torch.squeeze remove all dimension of 1 tensors
x_squeezed = x_reshaped.squeeze()
x_reshaped,x_reshaped.shape,x_reshaped.squeeze(),x_reshaped.squeeze().shape,x_reshaped.squeeze().ndim

(tensor([[5., 2., 3., 4., 5., 6., 7., 8., 9.]]),
 torch.Size([1, 9]),
 tensor([5., 2., 3., 4., 5., 6., 7., 8., 9.]),
 torch.Size([9]),
 1)

In [37]:
# torch.unsqueeze() add a single dimension to a taret tensor ata specific dim
x_unsqueezed = x_squeezed.unsqueeze(dim = 0)
x_unsqueezed,x_unsqueezed.shape

(tensor([[5., 2., 3., 4., 5., 6., 7., 8., 9.]]), torch.Size([1, 9]))

In [38]:
# torch.permute = rearrange the dimensions of a target tensor in a specified order
x_original = torch.rand(size=(1,2,3,4))
x_permuted = x_original.permute(2,0,3,1)

x_original.shape, x_permuted.shape,x_original # their will share the same memory,

(torch.Size([1, 2, 3, 4]),
 torch.Size([3, 1, 4, 2]),
 tensor([[[[0.9927, 0.4479, 0.6576, 0.0494],
           [0.9227, 0.6603, 0.7990, 0.3244],
           [0.7292, 0.4948, 0.8366, 0.8429]],
 
          [[0.9443, 0.1181, 0.9490, 0.0819],
           [0.4825, 0.0521, 0.1899, 0.6888],
           [0.9424, 0.4456, 0.7833, 0.4818]]]]))

## indexing
this is very similar to numpy

In [39]:
x = torch.arange(1,10).reshape(1,3,3)
x,x.shape


(tensor([[[1, 2, 3],
          [4, 5, 6],
          [7, 8, 9]]]),
 torch.Size([1, 3, 3]))

In [40]:
x[0],x[0][0],x[0,0],x[0,0,0],x[:,1,1],x[0,1,1],

(tensor([[1, 2, 3],
         [4, 5, 6],
         [7, 8, 9]]),
 tensor([1, 2, 3]),
 tensor([1, 2, 3]),
 tensor(1),
 tensor([5]),
 tensor(5))

## Pytorch tensors and NumPy
we may have data in NumPy and change it to tensor or the other way

* torch.from_numpy(ndarray) - NumPy array -> PyTorch tensor.
* torch.Tensor.numpy() - PyTorch tensor -> NumPy array.

In [41]:
array = np.arange(1.0,8.0)
tensor = torch.from_numpy(array)
array,tensor,torch.arange(1.0,8.0).dtype # Note that they ahve different default float type when converting to tensor it will be defaulted to float64 instead of float32


(array([1., 2., 3., 4., 5., 6., 7.]),
 tensor([1., 2., 3., 4., 5., 6., 7.], dtype=torch.float64),
 torch.float32)

In [42]:
# change the value of the array
array = array + 1
array,tensor # change of the array will not apply to the tensor


(array([2., 3., 4., 5., 6., 7., 8.]),
 tensor([1., 2., 3., 4., 5., 6., 7.], dtype=torch.float64))

In [43]:
# Tensor to NumPy
tensor = torch.ones(7)
numpy_from_tensor = tensor.numpy() # keep in mind that the dtype will be the original dtype

tensor,numpy_from_tensor

(tensor([1., 1., 1., 1., 1., 1., 1.]),
 array([1., 1., 1., 1., 1., 1., 1.], dtype=float32))

In [44]:
# change to tensor
tensor = tensor + 1 # HAVE TO BE CAREFUL WHEN USING OPERATORS LIKE +=
tensor, numpy_from_tensor

(tensor([2., 2., 2., 2., 2., 2., 2.]),
 array([1., 1., 1., 1., 1., 1., 1.], dtype=float32))

## Reproducebility (trying to take random out of random)

We use the **Random Seed**

In [45]:
random_tensor_A = torch.rand(3,4)
random_tensor_B = torch.rand(3,4)

print(random_tensor_A)
print(random_tensor_B)
print(random_tensor_A == random_tensor_B)



tensor([[7.4405e-01, 1.6946e-04, 8.4240e-01, 7.5357e-02],
        [4.2946e-01, 5.7417e-01, 3.6194e-01, 6.4052e-01],
        [9.6056e-01, 9.4782e-01, 5.4095e-01, 9.8076e-01]])
tensor([[0.1029, 0.6220, 0.6592, 0.3806],
        [0.8190, 0.7773, 0.0723, 0.7257],
        [0.6087, 0.7965, 0.6380, 0.8070]])
tensor([[False, False, False, False],
        [False, False, False, False],
        [False, False, False, False]])


In [46]:
# Let's make some random but reproducible seed
RANDOM_SEED = 42
torch.manual_seed(RANDOM_SEED)
random_tensor_C = torch.rand(3,4)
torch.manual_seed(RANDOM_SEED) # have to use the seed every time
random_tensor_D = torch.rand(3,4)
print(random_tensor_C)
print(random_tensor_D)
print(random_tensor_C == random_tensor_D)


tensor([[0.8823, 0.9150, 0.3829, 0.9593],
        [0.3904, 0.6009, 0.2566, 0.7936],
        [0.9408, 0.1332, 0.9346, 0.5936]])
tensor([[0.8823, 0.9150, 0.3829, 0.9593],
        [0.3904, 0.6009, 0.2566, 0.7936],
        [0.9408, 0.1332, 0.9346, 0.5936]])
tensor([[True, True, True, True],
        [True, True, True, True],
        [True, True, True, True]])


## Running tensors and PyTorch objects on GPUs (and making faster computations)

### 1. Getting a GPU

1. Use Google Colab
2. Use your own
3. use cloud computing

### 2. Check for GPUs access with PyTorch

In [48]:
# Check for GPU accesss with PyTorch
torch.cuda.is_available()

True

In [49]:
# Set up device agnostic code
device = 'cuda' if torch.cuda.is_available() else 'cpu'
device

'cuda'

For PyTorch since it is acapable of computing on the GPU and CPU, it is best practice to setup deviceagnositc code:

E.g. run on GPU if available else default to CPU

In [51]:
# Count number of devices
torch.cuda.device_count()


1

### 3. Putting tensors (and models) on the GPU

The reason we want our tensors/models on the GPU is because using a GPU results in faster computations

In [54]:
# Create a tensor (default on the CPU)
tensor = torch.tensor([1,2,3], device='cpu')

# Tensor not on GPU
print(tensor,tensor.device)


tensor([1, 2, 3]) cpu


In [55]:
# Move tensor to GPU if available
tensor_on_gpu = tensor.to(device)
tensor_on_gpu


tensor([1, 2, 3], device='cuda:0')

### 4. Moving tensors back to the CPU

In [58]:
# if tensor is on GPu, can not transform it to NumPy
#
# tensor_on_gpu.numpy() # This wont work due to device issue

# to fix this, we first set it to the CPU
tensor_back_on_cpu = tensor_on_gpu.cpu()
tensor_back_on_cpu,tensor_back_on_cpu.numpy()

(tensor([1, 2, 3]), array([1, 2, 3]))