# Pytorch Fundamentals 

Resource Notebook: https://www.learnpytorch.io/00_pytorch_fundamentals/ 

In [43]:
import pandas as pd
import numpy as np
import torch
import sklearn
import matplotlib
import torchinfo, torchmetrics

# Check PyTorch access (should print out a tensor)
print(torch.randn(3, 3))

# Check for GPU (should return True)
print(torch.cuda.is_available())

tensor([[-0.8494, -0.8227, -0.6107],
        [ 0.9762, -1.6806,  0.0796],
        [ 0.9494,  1.0556,  0.0749]])
True


## Introduction to Tensors 

### Creating Tensors
Pytorch Tensors are created using `torch.Tensor()` = https://docs.pytorch.org/docs/stable/tensors.html 

In [44]:
# Scalar Tensor 
scalar = torch.tensor(7)
print(scalar)

tensor(7)


In [45]:
# You can check the dimensions of the tensor using .ndim
print(scalar.ndim)  # Output: 0

0


In [46]:
# You can turn a torch.Tensor to a Python integer using .item()
print(scalar.item())  # Output: 7

7


In [47]:
# Vector: 
# A vector is a single dimension tensor but can contain many numbers. 
# A vector has a magnitude and direction.
vector = torch.tensor([7, 7])
print(vector)
print(vector.ndim)  # Output: 1
print(vector.shape)  # Output: torch.Size([2])

# You can tell the number of dimensions a tensor in PyTorch has by the number of square brackets on the outside ([) and you only need to count one side.
# How many square brackets does vector have?
# Another important concept for tensors is their shape attribute. The shape tells you how the elements inside them are arranged.

tensor([7, 7])
1
torch.Size([2])


In [48]:
# Matrix
MATRIX = torch.tensor([[7, 8], 
                       [9, 10]])
MATRIX


tensor([[ 7,  8],
        [ 9, 10]])

In [49]:
# Check number of dimensions
print(MATRIX.ndim)

2


In [50]:
MATRIX.shape

torch.Size([2, 2])

In [51]:
# We get the output torch.Size([2, 2]) because MATRIX is two elements deep and two elements wide.

In [52]:
# Lets create a Tensor 
# Tensor
TENSOR = torch.tensor([[[1, 2, 3],
                        [3, 6, 9],
                        [2, 4, 5]]])
TENSOR

# Tensors can represent almost anything.
# The one we just created could be the sales numbers for a steak and almond butter store


tensor([[[1, 2, 3],
         [3, 6, 9],
         [2, 4, 5]]])

In [53]:

# Check number of dimensions for TENSOR
TENSOR.ndim

3

In [54]:
# Check shape of TENSOR
print(TENSOR.shape)
print(TENSOR[0])
print(TENSOR[0][0])
print(TENSOR[0][1])
print(TENSOR[0][2])
# That means there's 1 dimension of 3 by 3

torch.Size([1, 3, 3])
tensor([[1, 2, 3],
        [3, 6, 9],
        [2, 4, 5]])
tensor([1, 2, 3])
tensor([3, 6, 9])
tensor([2, 4, 5])


In [55]:
TENSOR = torch.tensor([[[1, 2, 3],
                        [3, 6, 9],
                        [2, 4, 5]],

                        [[4, 5, 6], 
                         [7, 8, 9], 
                         [1, 2, 3]]
                         
                        ])

In [56]:
# Check shape of TENSOR
print(TENSOR.shape)
print(TENSOR[1])
print(TENSOR[1][0])
print(TENSOR[1][1])
print(TENSOR[1][2])

torch.Size([2, 3, 3])
tensor([[4, 5, 6],
        [7, 8, 9],
        [1, 2, 3]])
tensor([4, 5, 6])
tensor([7, 8, 9])
tensor([1, 2, 3])


### Random Tensors 

Why random Tensors?

Random tensors are important b/c the way many neural networks learn is that tehy start with tensors full of random numbers and then adjust those random numbers to better represent the data. 

`Start with random numbers -> look at data -> update random numbers -> look at data -> update random numbers...`

In [57]:
# Create a random Tensor with Pytorch 
random_tensor = torch.rand(3, 4)
random_tensor, random_tensor.dtype

(tensor([[0.7678, 0.3502, 0.9142, 0.3372],
         [0.3015, 0.8417, 0.7773, 0.9543],
         [0.0314, 0.0236, 0.4368, 0.7975]]),
 torch.float32)

The flexibility of `torch.rand()` is that we can adjust the size to be whatever we want.

For example, say you wanted a random tensor in the common image shape of `[224, 224, 3] ([height, width, color_channels])`.

In [58]:
# Create a random tensor of size (224, 224, 3)
random_image_size_tensor = torch.rand(size=(224, 224, 3)) 
random_image_size_tensor.shape, random_image_size_tensor.ndim

(torch.Size([224, 224, 3]), 3)

In [59]:
print(random_image_size_tensor)

tensor([[[0.4095, 0.1798, 0.5829],
         [0.2611, 0.8119, 0.8452],
         [0.3881, 0.5583, 0.1888],
         ...,
         [0.6504, 0.1764, 0.7407],
         [0.0819, 0.7136, 0.5554],
         [0.6536, 0.2022, 0.0829]],

        [[0.9268, 0.4982, 0.8938],
         [0.2995, 0.3782, 0.3427],
         [0.5673, 0.2583, 0.5941],
         ...,
         [0.0545, 0.6132, 0.2157],
         [0.6785, 0.7469, 0.7410],
         [0.4631, 0.0079, 0.2257]],

        [[0.3690, 0.2505, 0.0276],
         [0.0657, 0.6243, 0.9725],
         [0.6991, 0.7354, 0.3525],
         ...,
         [0.9812, 0.1534, 0.1935],
         [0.3515, 0.8059, 0.1181],
         [0.0202, 0.1972, 0.4277]],

        ...,

        [[0.9189, 0.9009, 0.2838],
         [0.4253, 0.0529, 0.2944],
         [0.2383, 0.0530, 0.9549],
         ...,
         [0.7745, 0.4159, 0.9507],
         [0.9742, 0.6971, 0.2229],
         [0.5790, 0.3398, 0.3865]],

        [[0.6116, 0.3193, 0.0986],
         [0.4722, 0.5531, 0.0504],
         [0.

In [60]:
special_tensor = torch.rand(size=(1000, 1000, 5))
special_tensor.shape, special_tensor.ndim

(torch.Size([1000, 1000, 5]), 3)

In [61]:
print(special_tensor)

tensor([[[0.1444, 0.1480, 0.3024, 0.8242, 0.4510],
         [0.8559, 0.2102, 0.4115, 0.2883, 0.2356],
         [0.7640, 0.0621, 0.2759, 0.2927, 0.7002],
         ...,
         [0.0221, 0.9111, 0.7872, 0.2912, 0.8392],
         [0.1114, 0.5727, 0.3387, 0.9907, 0.7441],
         [0.8977, 0.3996, 0.6004, 0.4508, 0.3760]],

        [[0.6832, 0.9299, 0.6423, 0.6426, 0.6391],
         [0.3856, 0.7977, 0.5495, 0.1194, 0.5019],
         [0.0998, 0.7185, 0.3531, 0.7862, 0.1268],
         ...,
         [0.3699, 0.5720, 0.1237, 0.5251, 0.0534],
         [0.9968, 0.5695, 0.8580, 0.1504, 0.4081],
         [0.3072, 0.8456, 0.6237, 0.3541, 0.6169]],

        [[0.8734, 0.8331, 0.1461, 0.8958, 0.4781],
         [0.1108, 0.9811, 0.4676, 0.5918, 0.3324],
         [0.6081, 0.4571, 0.7052, 0.0905, 0.7408],
         ...,
         [0.5009, 0.2168, 0.4987, 0.9670, 0.7554],
         [0.0137, 0.9717, 0.1432, 0.8085, 0.1045],
         [0.0283, 0.2957, 0.0690, 0.5918, 0.5280]],

        ...,

        [[0.9557, 0.

### Zeroes and Ones 

Sometimes we may just want to fill tensors with zeroes and ones. This typically happens a lot with masking (like masking some of the values in one tensor with zeroes to let a model know not to learn them).

In [62]:
# Create a tensor of all zeros
zeros = torch.zeros(size=(3, 4))
zeros, zeros.dtype

(tensor([[0., 0., 0., 0.],
         [0., 0., 0., 0.],
         [0., 0., 0., 0.]]),
 torch.float32)

In [63]:
# Create a tensor of all ones
ones = torch.ones(size=(3, 4))
ones, ones.dtype

(tensor([[1., 1., 1., 1.],
         [1., 1., 1., 1.],
         [1., 1., 1., 1.]]),
 torch.float32)

### Creating a range of Tensors and Tensors-like

Sometimes you might want a range of numbers, such as 1 to 10 or 0 to 100. 
You can use `torch.arange(start, end, step)` to do so. 

Where: 
* start = start of range 
* end = end of range 
* step = how many steps in between each value 

In [64]:
# Create a range of values 0 to 10
zero_to_ten = torch.arange(start=0, end=11, step=1)
zero_to_ten

tensor([ 0,  1,  2,  3,  4,  5,  6,  7,  8,  9, 10])

Sometimes you might want a tensor of a certain type with the same shape as another tensor. For example, a tensor of all zeros with the same shape as a previous tensor. 

To do so you can use `torch.zeros_like(inpyt)` or `torch.ones_like(input)` which return a tensor filled with zeros or ones in the same shape as the input respectively. 


In [65]:
# Can also create a tensor of zeros similar to another tensor
ten_zeros = torch.zeros_like(input=zero_to_ten) # will have same shape
ten_zeros

tensor([0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0])

In [66]:
print(zero_to_ten.shape)
print(ten_zeros.shape)

torch.Size([11])
torch.Size([11])


### Tensor Datatypes

**Note:** Tensor datatypes is one of the 3 big errors you'll run into with PyTorch and Deep Learning:
1. Tensors not right datatype
2. Tensors not right shape
3. Tensors not on the right device

In [67]:
# Default datatype for tensors is float32
float_32_tensor = torch.tensor([3.0, 6.0, 9.0],
                               dtype=None, # defaults to None, which is torch.float32 or whatever datatype is passed
                                            # What datatype is the tensor (e.g. float32, float16, etc.)
                               device=None, # defaults to None, which uses the default tensor type
                                            # What device is the tensor on (e.g. CPU, GPU)
                               requires_grad=False) # if True, operations performed on the tensor are recorded 
                                                    # Whether or not to track gradients with the tensor operations

float_32_tensor.shape, float_32_tensor.dtype, float_32_tensor.device

(torch.Size([3]), torch.float32, device(type='cpu'))

See the [PyTorch documentation for a list of all available tensor datatypes](https://pytorch.org/docs/stable/tensors.html#data-types)

### Getting information from Tensors

Once you've created tensors (or someone else or a PyTorch module has created them for you), you might want to get some information from them.

We've seen these before but three of the most common attributes you'll want to find out about tensors are:

* shape - what shape is the tensor? (some operations require specific shape rules)
* dtype - what datatype are the elements within the tensor stored in?
* device - what device is the tensor stored on? (usually GPU or CPU)

Let's create a random tensor and find out details about it.

In [68]:
# Create a tensor
some_tensor = torch.rand(3, 4)

# Find out details about it
print(some_tensor)
print(f"Shape of tensor: {some_tensor.shape}")
print(f"Datatype of tensor: {some_tensor.dtype}")
print(f"Device tensor is stored on: {some_tensor.device}") # will default to CPU

tensor([[0.1311, 0.8406, 0.5795, 0.0614],
        [0.5261, 0.7681, 0.3020, 0.5147],
        [0.5856, 0.1236, 0.9616, 0.6575]])
Shape of tensor: torch.Size([3, 4])
Datatype of tensor: torch.float32
Device tensor is stored on: cpu


### Manipulating Tensors (Tensor Operations)

In deep learning, data (images, text, video, audio, protein structures, etc) gets represented as tensors.

A model learns by investigating those tensors and performing a series of operations (could be 1,000,000s+) on tensors to create a representation of the patterns in the input data.

These operations are often a wonderful dance between:

* Addition
* Substraction
* Multiplication (element-wise)
* Division
* Matrix multiplication

And that's it. Sure there are a few more here and there but these are the basic building blocks of neural networks.

Stacking these building blocks in the right way, you can create the most sophisticated of neural networks (just like lego!).

#### Basic Operations


In [69]:
# Create a tensor of values and add a number to it
tensor = torch.tensor([1, 2, 3])
tensor + 10

tensor([11, 12, 13])

In [70]:
# Multiply it by 10
tensor * 10

tensor([10, 20, 30])

Notice how the tensor values above didn't end up being tensor([110, 120, 130]), this is because the values inside the tensor don't change unless they're reassigned.

In [71]:
# Tensors don't change unless reassigned
tensor

tensor([1, 2, 3])

Let's subtract a number and this time we'll reassign the tensor variable.

In [72]:
# Subtract and reassign
tensor = tensor - 10
tensor

tensor([-9, -8, -7])

In [73]:
# Add and reassign
tensor = tensor + 10
tensor

tensor([1, 2, 3])

PyTorch also has a bunch of built-in functions like torch.mul() (short for multiplication) and torch.add() to perform basic operations.

In [74]:
# Can also use torch functions
torch.multiply(tensor, 10)

tensor([10, 20, 30])

In [75]:
# Original tensor is still unchanged 
tensor

tensor([1, 2, 3])

However, it's more common to use the operator symbols like * instead of torch.mul()

In [76]:
# Element-wise multiplication (each element multiplies its equivalent, index 0->0, 1->1, 2->2)
print(tensor, "*", tensor)
print("Equals:", tensor * tensor)

tensor([1, 2, 3]) * tensor([1, 2, 3])
Equals: tensor([1, 4, 9])


#### Matrix Multiplication (is all you need)

Two main ways of performing multiplication in neural networks and deep learning:
1. Element-wise multiplication
2. Matrix Multiplication 

In [77]:
# Element-wise matrix multiplication
print(tensor, '*', tensor)
print(f"Equals: {tensor * tensor}")

tensor([1, 2, 3]) * tensor([1, 2, 3])
Equals: tensor([1, 4, 9])


In [78]:
# Matrix multiplication
torch.matmul(tensor, tensor)

tensor(14)

In [79]:
# Can also use the "@" symbol for matrix multiplication, though not recommended
tensor @ tensor

tensor(14)

You can do matrix multiplication by hand but it's not recommended.

The in-built `torch.matmul()` method is faster.

In [80]:
%%time
# Matrix multiplication by hand 
# (avoid doing operations with for loops at all cost, they are computationally expensive)
value = 0
for i in range(len(tensor)):
  value += tensor[i] * tensor[i]
value

CPU times: total: 0 ns
Wall time: 1.5 ms


tensor(14)

In [81]:
%%time
torch.matmul(tensor, tensor)

CPU times: total: 0 ns
Wall time: 0 ns


tensor(14)

### One of the most common errors in deep learning (shape errors)

Because much of deep learning is multiplying and performing operations on matrices and matrices have a strict rule about what shapes and sizes can be combined, one of the most common errors you'll run into in deep learning is shape mismatches.

In [82]:
# Shapes need to be in the right way  
tensor_A = torch.tensor([[1, 2],
                         [3, 4],
                         [5, 6]], dtype=torch.float32)

tensor_B = torch.tensor([[7, 10],
                         [8, 11], 
                         [9, 12]], dtype=torch.float32)

# torch.matmul(tensor_A, tensor_B) # (this will error)

We can make matrix multiplication work between `tensor_A` and `tensor_B` by making their inner dimensions match.

One of the ways to do this is with a transpose (switch the dimensions of a given tensor).

You can perform transposes in PyTorch using either:
* `torch.transpose(input, dim0, dim1)` - where input is the desired tensor to transpose and `dim0` and `dim1` are the dimensions to be swapped. 
* `tensor.T` - where tensor is the desired tensor to transpose

In [83]:
# View tensor_A and tensor_B
print(tensor_A)
print(tensor_B)

tensor([[1., 2.],
        [3., 4.],
        [5., 6.]])
tensor([[ 7., 10.],
        [ 8., 11.],
        [ 9., 12.]])


In [84]:
# View tensor_A and tensor_B.T
print(tensor_A)
print(tensor_B.T)

tensor([[1., 2.],
        [3., 4.],
        [5., 6.]])
tensor([[ 7.,  8.,  9.],
        [10., 11., 12.]])


In [85]:
# The operation works when tensor_B is transposed
print(f"Original shapes: tensor_A = {tensor_A.shape}, tensor_B = {tensor_B.shape}\n")
print(f"New shapes: tensor_A = {tensor_A.shape} (same as above), tensor_B.T = {tensor_B.T.shape}\n")
print(f"Multiplying: {tensor_A.shape} * {tensor_B.T.shape} <- inner dimensions match\n")
print("Output:\n")
output = torch.matmul(tensor_A, tensor_B.T)
print(output) 
print(f"\nOutput shape: {output.shape}")

Original shapes: tensor_A = torch.Size([3, 2]), tensor_B = torch.Size([3, 2])

New shapes: tensor_A = torch.Size([3, 2]) (same as above), tensor_B.T = torch.Size([2, 3])

Multiplying: torch.Size([3, 2]) * torch.Size([2, 3]) <- inner dimensions match

Output:

tensor([[ 27.,  30.,  33.],
        [ 61.,  68.,  75.],
        [ 95., 106., 117.]])

Output shape: torch.Size([3, 3])


### Finding the min, max, mean, sum, etc (aggregation)

There are ways to aggregate tensor (go from more values to less values). 

In [86]:
# Create a tensor 
x = torch.arange(0, 100, 10)
print(x.dtype)

torch.int64


In [87]:
# Perform agregation 
print(f"Minimum: {x.min()}")
print(f"Maximum: {x.max()}")
# print(f"Mean: {x.mean()}") # this will error
print(f"Mean: {x.type(torch.float32).mean()}") # won't work without float datatype, mean() requires a float tensor dtype not int
print(f"Sum: {x.sum()}")

Minimum: 0
Maximum: 90
Mean: 45.0
Sum: 450


You can also do the same as above using torch methods

In [88]:
torch.max(x), torch.min(x), torch.mean(x.type(torch.float32)), torch.sum(x)

(tensor(90), tensor(0), tensor(45.), tensor(450))

### Positional min/max

You can also find the index of a tensor where the max or minimum occurs with torch.argmax() and torch.argmin() respectively.

This is helpful incase you just want the position where the highest (or lowest) value is and not the actual value itself

In [None]:
# Create a tensor
tensor = torch.arange(10, 100, 10)
print(f"Tensor: {tensor}")

# Returns index/position of max and min values
print(f"Index where max value occurs: {tensor.argmax()}")
print(f"Index where min value occurs: {tensor.argmin()}")

Tensor: tensor([10, 20, 30, 40, 50, 60, 70, 80, 90])
Index where max value occurs: 8
Index where min value occurs: 0
