# 00.PyTorch Fundamentals

In [1]:
import torch
import matplotlib.pyplot as plt
import pandas as pd
import numpy as np
print(torch.__version__)

2.4.1


## a. Introduction to tensors

### i.creating tensors

created using **torch.tensor** - https://pytorch.org/docs/stable/tensors.html

#### + scalar

In [3]:
scalar = torch.tensor(7)
scalar

tensor(7)

In [5]:
# dimension of tensor
scalar.ndim

0

In [6]:
# to retrieve the scalar as an integer
scalar.item()

7

In [35]:
# check the datatype
scalar.dtype

torch.int64

#### + vector

In [7]:
vec = torch.tensor([5,2])
vec

tensor([5, 2])

In [8]:
vec.ndim

1

In [13]:
vec.shape

torch.Size([2])

#### + matrix

matrix and tensor is conventionally stored as uppercase variables

In [21]:
MAT = torch.tensor([[5,2],
                    [1,2]])
MAT

tensor([[5, 2],
        [1, 2]])

#### + tensor
It is a n-dimensional array of any shape starting from 0

In [23]:
TENS = torch.tensor([[[2,3,4],
                      [5,2,1],
                      [5,7,9]],
                    [[2,5,2],
                    [1,2,8],
                    [4,6,9]]])
TENS.shape, TENS.ndim

(torch.Size([2, 3, 3]), 3)

### ii. creating random tensors

they are important as parameters are initialised with random numbers

In [30]:
randn_tens = torch.rand(size=(2,1,3)) # create random number of uniform distribution between 0 and 1. eg: an image of HXWXchs
randn_tens

tensor([[[0.9445, 0.2271, 0.5071]],

        [[0.8499, 0.1605, 0.9990]]])

#### + impart reproduciblity
Note that we need to explicitely give the seed everytime before the tensor creation

In [141]:
RANDOM_SEED = 42
torch.manual_seed(RANDOM_SEED)

tens1 = torch.rand(size=(2,1,3)) 

torch.manual_seed(RANDOM_SEED)

tens2 = torch.rand(size=(2,1,3))
tens1==tens2

tensor([[[True, True, True]],

        [[True, True, True]]])

### iii. creating zero tensors

In [32]:
zeros = torch.zeros(size = (2,1,3))
zeros

tensor([[[0., 0., 0.]],

        [[0., 0., 0.]]])

### iv. Creating zeros-like
Used to create zero tensors of same size as another tensor. Mostly useful to,
* Initiate a parameter matrix with zero
* Nulltify the parameter matrix by multiplying with zero matrix of same size

In [13]:
zeros_like = torch.zeros_like(input = randn_tens)
zeros_like

tensor([[[0., 0., 0.]],

        [[0., 0., 0.]]])

### iv. creating ones tensors

In [33]:
ones = torch.ones(size = (2,1,3))
ones

tensor([[[1., 1., 1.]],

        [[1., 1., 1.]]])

### v. Creating range of tensors

In [12]:
torch.arange(10,0,-2)

tensor([10,  8,  6,  4,  2])

## b. Tensor datatypes
the default datatype is float32 if float values are given as input and will be int64, if inputs are integers

for floating points,
* 32bit - till $10^{-7}$ precision. eg: 5.0011525 x $10^8$. Single precision
* 64bit - till $10^{-15} - 10^{-17}$ precision. Double precision

for integers,
* uint (unsigned) - used if tensors only contain non-negative values. The range is doubled due to absence of negative values

In [15]:
float_32_tens = torch.tensor([3.,6,9], dtype=None)
float_32_tens.dtype

torch.float32

In [18]:
float_16_tens = torch.tensor([3.,6,9], dtype=torch.float16)
float_16_tens.dtype

torch.float16

In [22]:
# convert data type
float_32_tens = float_32_tens.type(torch.float16)
float_32_tens.dtype

torch.float16

In [23]:
float_32_tens.device

device(type='cpu')

In [28]:
# convert tensor datatype, device
float_16_tens.to(dtype=torch.float32, device="cuda:0")

tensor([3., 6., 9.], device='cuda:0')

## Tensor operations
The main issues with tensor operations,
* Tensors not having right datatype (rarely gives an error, therefore better to have of uniform type) `tensor.dtype`
* Tensors not in right shape - `tensor.shape`
* tensors not on the right device (operations between cuda tensor and cpu tensor wont work) - `tensor.device`

### i. matrix multiplication

In [2]:
mat1 = torch.rand(size=(2,3))
mat2 = torch.rand(size=(3,6))

In [46]:
# matrix multiplication
multi = torch.matmul(mat1, mat2)
multi

tensor([[0.9872, 1.0764, 0.7090, 0.4307, 0.5358, 0.9093],
        [0.5132, 0.7958, 0.8043, 0.1024, 0.3107, 0.8639]])

### ii. Tensor aggregation
#### + min, max, sum etc.

In [66]:
vec = torch.tensor([5,15,10, 12, 5])

In [63]:
torch.max(multi, axis=0), multi.max(axis=1) # if required along an axis. both the functions work fine

(torch.return_types.max(
 values=tensor([0.9872, 1.0764, 0.8043, 0.4307, 0.5358, 0.9093]),
 indices=tensor([0, 0, 1, 0, 0, 0])),
 torch.return_types.max(
 values=tensor([1.0764, 0.8639]),
 indices=tensor([1, 5])))

In [61]:
# note that the operation wont work with integer datatype
# torch.mean(vec)
torch.mean(vec.type(torch.float32))

tensor(10.)

#### + find the position (argmin and argmax)

used with sigmoid and softmax to find the index of the maximum probability

In [67]:
torch.argmin(vec) # gives the index value. Gives the first occuring minimum

tensor(0)

### iii. hermitian
transpose conjugate

In [3]:
mat1.H

tensor([[0.5574, 0.7834],
        [0.0200, 0.3291],
        [0.7185, 0.0979]])

## Tensor reshaping, stack, squeezing and unsqueezing
* reshaping or view - but view shares the same memory. Therefore, shows the same tensor from different perspective. so, changing a view's value will change the original tensor as well
* stack
* squeeze - removes `1` dimensions from a tensor
* unsqueeze - adds `1` dimensions wherever required
* permute - return a view of a tensor with dimensions permutted (swapped) in a certain way

In [123]:
x = torch.arange(1.,10.)

### i. Reshape

In [74]:
x.reshape(3,3) # always fills rowswise

tensor([[1., 2., 3.],
        [4., 5., 6.],
        [7., 8., 9.]])

### ii. view

In [94]:
z = x.view(3,3)
z += 1
z, x

(tensor([[ 5.,  6.,  7.],
         [ 8.,  9., 10.],
         [11., 12., 13.]]),
 tensor([ 5.,  6.,  7.,  8.,  9., 10., 11., 12., 13.]))

### iii. stack
takes a list of tensors and concatenates along the described dimension. Always will be able to concatenate along an extra dimension. That is, eg: 3X3, 3X3 concatenation along `dim=0`, will give 2X3X3, similarly, if `dim=2`, it gives 3X3X2

In [89]:
# along 0th dimension - default
z = torch.stack([x,x], dim=0) # stacked vertically
z

tensor([[ 4.,  5.,  6.,  7.,  8.,  9., 10., 11., 12.],
        [ 4.,  5.,  6.,  7.,  8.,  9., 10., 11., 12.]])

In [124]:
# along 1st dimension
torch.stack((x,x), dim=1)

tensor([[1., 1.],
        [2., 2.],
        [3., 3.],
        [4., 4.],
        [5., 5.],
        [6., 6.],
        [7., 7.],
        [8., 8.],
        [9., 9.]])

In [102]:
torch.stack([z,z], dim=2).shape

torch.Size([3, 3, 2])

### iv. squeeze

In [111]:
x = torch.ones(size=(1,2,3,1,2,1))
x.shape, torch.squeeze(x).shape

(torch.Size([1, 2, 3, 1, 2, 1]), torch.Size([2, 3, 2]))

### v. unsqueeze

In [116]:
y = torch.squeeze(x)
torch.unsqueeze(y, dim=-1).shape

torch.Size([9, 1])

### vi. permute
swap the dimension values. mostly used with images. Will also share the same memory

In [125]:
x = torch.rand(size=(4,2,3))
x

tensor([[[1.8782e-01, 9.6123e-01, 8.3677e-01],
         [5.0146e-01, 9.6511e-01, 5.6166e-01]],

        [[3.3834e-01, 6.7815e-01, 6.0816e-01],
         [8.6678e-01, 8.5294e-04, 9.5655e-01]],

        [[4.5870e-01, 5.3338e-01, 8.9574e-01],
         [2.8480e-01, 3.4833e-01, 9.5908e-02]],

        [[7.6449e-01, 4.9989e-01, 3.2407e-01],
         [8.7857e-01, 4.7287e-01, 9.1087e-01]]])

In [128]:
y = torch.permute(x, dims=(2,0,1)) # indices of dimensions
y

tensor([[[1.8782e-01, 5.0146e-01],
         [3.3834e-01, 8.6678e-01],
         [4.5870e-01, 2.8480e-01],
         [7.6449e-01, 8.7857e-01]],

        [[9.6123e-01, 9.6511e-01],
         [6.7815e-01, 8.5294e-04],
         [5.3338e-01, 3.4833e-01],
         [4.9989e-01, 4.7287e-01]],

        [[8.3677e-01, 5.6166e-01],
         [6.0816e-01, 9.5655e-01],
         [8.9574e-01, 9.5908e-02],
         [3.2407e-01, 9.1087e-01]]])

In [130]:
x[0,0,0] = 5 # since they share the same memory
y[0,0,0]

tensor(5.)

## PyTorch and NumPy

### i. numpy -> tensor
Note: default of numpy is double point precision. Therefore, change it always

In [137]:
arr = np.arange(8.)
tens = torch.from_numpy(arr).type(torch.float32)
tens

tensor([0., 1., 2., 3., 4., 5., 6., 7.])

### ii. tensor -> numpy

In [138]:
tens.numpy()

array([0., 1., 2., 3., 4., 5., 6., 7.], dtype=float32)

## Running tensors on GPUs

### i. check availbility

In [142]:
torch.cuda.is_available()

True

### ii. count number of gpus
required if different models are to be run on different gpus

In [145]:
torch.cuda.device_count()

1

### iii. setup gpu

In [6]:
device = "cuda:0" if torch.cuda.is_available() else "cpu" # :0 is the index of the gpu. since we only have 1, can be ignored

### iv. Putting tensors (models) on the GPU

In [4]:
tens = torch.arange(10., device ="cpu") # cpu is default
tens, tens.device

(tensor([0., 1., 2., 3., 4., 5., 6., 7., 8., 9.]), device(type='cpu'))

In [7]:
tens_on_gpu = tens.to(device = device)
tens_on_gpu

tensor([0., 1., 2., 3., 4., 5., 6., 7., 8., 9.], device='cuda:0')

### Moving tensors back to cpu
Used since numpy can work only on cpu

In [183]:
tens_on_gpu.cpu().numpy()

array([0., 1., 2., 3., 4., 5., 6., 7., 8., 9.], dtype=float32)

### check if a tensor is running on gpu

In [10]:
tens_on_gpu.is_cuda

True