<a href="https://colab.research.google.com/github/AbhishekKaushikCV/AbhishekKaushikCV/blob/main/Intro_PyTorch.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

### Tutorial 2: Introduction to PyTorch
- PyTorch is an open source machine learning framework that allows you to write your own neural networks and optimize them efficiently

In [None]:
## Standard libraries
import os
import math
import numpy as np 
import time

## Imports for plotting
import matplotlib.pyplot as plt
%matplotlib inline 
from IPython.display import set_matplotlib_formats
set_matplotlib_formats('svg', 'pdf') # For export
from matplotlib.colors import to_rgba
import seaborn as sns
sns.set()

## Progress bar
from tqdm.notebook import tqdm

### The basics of PyTorch

In [None]:
import torch
print("Using torch:",torch.__version__)

Using torch: 1.10.0+cu111


In [None]:
# Set the seed
torch.manual_seed(42)

<torch._C.Generator at 0x7fa53284d650>

### Tensors: Different functions to create tensors
     - torch.Tensor() : allocates memory, and use previous values in memory
     - torch.zeros() : creates tensor filled with zeros
     - torch.ones() :  creates tensor filled with ones
     - torch.rand() : creates tensor with random values Uniformly sampled between 0 and 1
     - torch.randn() :  creates tensor with random values from Normal Distribution with 0 mean and 1 Variance
     - torch.arange() : creates a tensor containing values N,N+1,N+2 ..., M


In [None]:
# torch.Tensor()
x = torch.Tensor(2,3,4) # 2 sets with 1 set having 3 rows and 4 columns
print(x)

tensor([[[8.8319e-02, 3.0728e-41, 7.0065e-44, 7.0065e-44],
         [6.3058e-44, 6.7262e-44, 7.0065e-44, 6.3058e-44],
         [6.8664e-44, 7.2868e-44, 1.1771e-43, 6.8664e-44]],

        [[7.9874e-44, 8.1275e-44, 7.0065e-44, 7.2868e-44],
         [8.1275e-44, 7.0065e-44, 7.7071e-44, 6.4460e-44],
         [7.1466e-44, 7.7071e-44, 7.7071e-44, 7.1466e-44]]])


In [None]:
x = torch.zeros(2,3,4)
print(x)

tensor([[[0., 0., 0., 0.],
         [0., 0., 0., 0.],
         [0., 0., 0., 0.]],

        [[0., 0., 0., 0.],
         [0., 0., 0., 0.],
         [0., 0., 0., 0.]]])


In [None]:
x = torch.rand(2,3,4)
print(x)

tensor([[[0.8823, 0.9150, 0.3829, 0.9593],
         [0.3904, 0.6009, 0.2566, 0.7936],
         [0.9408, 0.1332, 0.9346, 0.5936]],

        [[0.8694, 0.5677, 0.7411, 0.4294],
         [0.8854, 0.5739, 0.2666, 0.6274],
         [0.2696, 0.4414, 0.2969, 0.8317]]])


In [None]:
x = torch.randn(2,3,4)
print(x)

tensor([[[ 1.4451,  0.8564,  2.2181,  0.5232],
         [ 0.3466, -0.1973, -1.0546,  1.2780],
         [ 0.7281, -0.7106, -0.6021,  0.9604]],

        [[ 0.4048, -1.3543, -0.4976,  0.4747],
         [-0.1976,  1.2683,  1.2243,  0.0981],
         [ 1.7423, -1.3527,  0.2191,  0.5526]]])


In [None]:
x = torch.arange(1,10)
print(x)

tensor([1, 2, 3, 4, 5, 6, 7, 8, 9])


In [None]:
# Reshape the tensor
y = x.view(3,3)
print(y)

tensor([[1, 2, 3],
        [4, 5, 6],
        [7, 8, 9]])


In [None]:
shape=y.shape
print(shape)

torch.Size([3, 3])


In [None]:
# Shape  and size of a tensor
shape = x.shape
print("Shape:", x.shape)

size = x.size()
print("Size:", size)

dim1, dim2, dim3 = x.size()
print("Size:", dim1, dim2, dim3)

Shape: torch.Size([2, 3, 4])
Size: torch.Size([2, 3, 4])
Size: 2 3 4


### Tensor to NumPy and vice-versa

In [None]:
np_arr = np.array([[1,2,3],[4,5,6],[7,8,9]])
np_arr

array([[1, 2, 3],
       [4, 5, 6],
       [7, 8, 9]])

In [None]:
tensor = torch.from_numpy(np_arr)
tensor

tensor([[1, 2, 3],
        [4, 5, 6],
        [7, 8, 9]])

In [None]:
arr = tensor.numpy()
arr

array([[1, 2, 3],
       [4, 5, 6],
       [7, 8, 9]])

- In case you have a tensor on GPU, you need to call `.cpu()` on the tensor beforehand. Hence, you get a line like `np_arr = tensor.cpu().numpy()`.

### Tensor Operations:


In [None]:
# Addition
x1 = torch.arange(2,10)
x1 = x1.view(2,4)
x2 = torch.arange(2,10)
x2 = x2.view(2,4)
print(x1)
print(x2)
y = x1 + x2
print(y)

tensor([[2, 3, 4, 5],
        [6, 7, 8, 9]])
tensor([[2, 3, 4, 5],
        [6, 7, 8, 9]])
tensor([[ 4,  6,  8, 10],
        [12, 14, 16, 18]])


In [None]:
# Inplace addition
x1 = torch.rand(3,3)
x2 = torch.rand(3,3)

print(x1)
print('Before add:',x2)

x2.add_(x1) # Underscore postfix in Inplace addition 'x2.add_(x1)'
print('After add:',x2)

tensor([[0.4654, 0.1612, 0.1568],
        [0.2083, 0.3289, 0.1054],
        [0.9192, 0.4008, 0.9302]])
Before add: tensor([[0.6558, 0.0766, 0.8460],
        [0.3624, 0.3083, 0.0850],
        [0.0029, 0.6431, 0.3908]])
After add: tensor([[1.1211, 0.2378, 1.0028],
        [0.5707, 0.6372, 0.1903],
        [0.9222, 1.0438, 1.3210]])


In [None]:
# Changing the shape of the tensor
x = torch.arange(6)
print("X:",x)

X: tensor([0, 1, 2, 3, 4, 5])


In [None]:
x = x.view(2, 3)
print("X", x)
print(x.shape)


X tensor([[0, 1, 2],
        [3, 4, 5]])
torch.Size([2, 3])


In [None]:
# Swapping dimension '0' and '1'
x = x.permute(1,0)
print("X:",x)
print("Swapped axes:",x.shape)

X: tensor([[0, 3],
        [1, 4],
        [2, 5]])
Swapped axes: torch.Size([3, 2])


In [None]:
W = torch.arange(9).view(3, 3) # We can also stack multiple operations in a single line
print("W", W)

W tensor([[0, 1, 2],
        [3, 4, 5],
        [6, 7, 8]])


In [None]:
# Matrix multiplication
x = torch.arange(6)
x = x.view(2,3)
print("X:",x)

y = torch.arange(6)
y = y.view(3,2)
print("Y:",y)

p = torch.matmul(x,y)
print("Product:",p)



X: tensor([[0, 1, 2],
        [3, 4, 5]])
Y: tensor([[0, 1],
        [2, 3],
        [4, 5]])
Product: tensor([[10, 13],
        [28, 40]])


## Indexing:

In [None]:
x = torch.arange(12).view(3,4)
print("X:",x)

X: tensor([[ 0,  1,  2,  3],
        [ 4,  5,  6,  7],
        [ 8,  9, 10, 11]])


In [None]:
x[:,2] # third column

tensor([ 2,  6, 10])

In [None]:
x[0] # first row

tensor([0, 1, 2, 3])

In [None]:
print(x[0:2,-1]) # First two rows, last column

tensor([3, 7])


In [None]:
print(x[:,1:3]) # Middle two columns

tensor([[ 1,  2],
        [ 5,  6],
        [ 9, 10]])


## Dynamic Computation graph and Backpropogation:
-  PyTorch is a define-by-run framework; this means that we can just do our manipulations, and PyTorch will keep track of that graph for us. Thus, we create a dynamic computation graph along the way.

In [None]:
# Default tensor requires grad = False

x = torch.ones((3,))
print(x)
print(x.requires_grad)

tensor([1., 1., 1.])
False


In [None]:
# Change it to gradient tensor

x.requires_grad_(True)
print(x.requires_grad)

True


##  Computaion Graph: 

### Function: 


$$y = \frac{1}{|x|}\sum_i \left[(x_i + 2)^2 + 3\right]$$

-  $x$ are our parameters, and we want to optimize (either maximize or minimize) the output $y$. 
- For this, we want to obtain the gradients $\partial y / \partial \mathbf{x}$. For our example, we'll use $\mathbf{x}=[0,1,2]$ as our input.

In [None]:
x = torch.arange(3, dtype=torch.float32, requires_grad=True) # Only float tensors can have gradients
print("X", x)

X tensor([0., 1., 2.], requires_grad=True)


In [None]:
# Created the computationgraph based on above equation

a = x + 2
b = a ** 2
c = b + 3
y = c.mean()
print("Y", y)

Y tensor(12.6667, grad_fn=<MeanBackward0>)


 ## Backpropogation:

- We can perform backpropagation on the computation graph by calling the function `backward()` on the last output, which effectively calculates the gradients for each tensor that has the property `requires_grad=True`:

In [None]:
y.backward()

`x.grad` will now contain the gradient $\partial y/ \partial \mathcal{x}$, and this gradient indicates how a change in $\mathbf{x}$ will affect output $y$ given the current input $\mathbf{x}=[0,1,2]$:

In [None]:
print(x.grad)

tensor([1.3333, 2.0000, 2.6667])


## GPU_Support:


In [None]:
gpu_avail = torch.cuda.is_available()
print(f"Is the GPU available? {gpu_avail}")

Is the GPU available? True


-  By default, all tensors we create are stored on the CPU
- We can push a tensor to the GPU by using the function `.to(...)`, or `.cuda()`
-  Good practice to define a `device` object in your code which points to the GPU if you have one, and otherwise to the CPU

In [None]:
device = torch.device("cuda") if torch.cuda.is_available() else torch.device("cpu")
print("Device", device)

Device cuda


In [None]:
# Example:

x = torch.zeros(2, 3)
x = x.to(device)
print("X", x)

X tensor([[0., 0., 0.],
        [0., 0., 0.]], device='cuda:0')


In [None]:
x = torch.randn(5000, 5000)

## CPU version
start_time = time.time()
_ = torch.matmul(x, x)
end_time = time.time()
print(f"CPU time: {(end_time - start_time):6.5f}s")

## GPU version
x = x.to(device)
# CUDA is asynchronous, so we need to use different timing functions
start = torch.cuda.Event(enable_timing=True)
end = torch.cuda.Event(enable_timing=True)
start.record()
_ = torch.matmul(x, x)
end.record()
torch.cuda.synchronize()  # Waits for everything to finish running on the GPU
print(f"GPU time: {0.001 * start.elapsed_time(end):6.5f}s")  # Milliseconds to seconds

CPU time: 3.20933s
GPU time: 0.14917s


In [None]:
# GPU operations have a separate seed we also want to set
if torch.cuda.is_available(): 
    torch.cuda.manual_seed(42)
    torch.cuda.manual_seed_all(42)
    
# Additionally, some operations on a GPU are implemented stochastic for efficiency
# We want to ensure that all operations are deterministic on GPU (if used) for reproducibility
torch.backends.cudnn.determinstic = True
torch.backends.cudnn.benchmark = False