<a href="https://colab.research.google.com/github/dtuleva/DL_23_Lectures_and_Quests/blob/main/bonus_lecture_PyTorch_and__JAX.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [220]:
import numpy as np

import torch
import torch.nn as nn # beware of namespace; there is nn in tensorflow and jax too
import torch.nn.functional  as F

from torchvision.datasets import CIFAR10
from torchvision import transforms as T





# PyTorch and JAX
### bonus lecture

## Torch

In [None]:
torch.tensor([[1, 2, 3], [4, 5, 6], [7, 8, 9]])

tensor([[1, 2, 3],
        [4, 5, 6],
        [7, 8, 9]])

In [None]:
torch.Tensor(2, 5) # 2, 5 is shape; makes a tensor that is not initialised; values are whatever happens to be in memory

tensor([[3.0221e+32, 8.9068e-15, 2.0704e-19, 2.7565e+12, 6.9785e+22],
        [7.5556e+31, 1.5768e-19, 4.2324e+21, 6.8888e+22, 4.4591e+30]])

In [None]:
torch.zeros(2, 5)

tensor([[0., 0., 0., 0., 0.],
        [0., 0., 0., 0., 0.]])

In [None]:
torch.ones(2, 5)

tensor([[1., 1., 1., 1., 1.],
        [1., 1., 1., 1., 1.]])

In [None]:
a = torch.rand((2000, 6)) # torch.random for other distributions
b = torch.rand((2000, 6))
w = torch.rand((6, 1)) # broadcasting - rank 1 tensor does not transpose properly (same as numpy)

In [None]:
a == b

tensor([[False, False, False, False, False, False],
        [False, False, False, False, False, False],
        [False, False, False, False, False, False],
        ...,
        [False, False, False, False, False, False],
        [False, False, False, False, False, False],
        [False, False, False, False, False, False]])

In [None]:
torch.all(a == b)

tensor(False)

In [None]:
torch.all((a + b) == torch.add(a, b))

tensor(True)

In [None]:
a @ b.T

tensor([[2.0515, 1.1019, 2.0896,  ..., 1.8781, 1.5323, 1.3784],
        [1.1844, 0.5961, 1.2851,  ..., 1.3839, 1.3026, 1.1775],
        [1.7176, 0.5805, 1.7451,  ..., 1.9654, 1.6873, 1.8770],
        ...,
        [1.6369, 0.8913, 1.8754,  ..., 1.8238, 2.0730, 1.6184],
        [1.4304, 0.7652, 1.6221,  ..., 1.4995, 1.3009, 1.2881],
        [0.6894, 0.5261, 1.0284,  ..., 0.6456, 1.2559, 0.7867]])

In [None]:
torch.matmul(b, a.T)

tensor([[2.0515, 1.1844, 1.7176,  ..., 1.6369, 1.4304, 0.6894],
        [1.1019, 0.5961, 0.5805,  ..., 0.8913, 0.7652, 0.5261],
        [2.0896, 1.2851, 1.7451,  ..., 1.8754, 1.6221, 1.0284],
        ...,
        [1.8781, 1.3839, 1.9654,  ..., 1.8238, 1.4995, 0.6456],
        [1.5323, 1.3026, 1.6873,  ..., 2.0730, 1.3009, 1.2559],
        [1.3784, 1.1775, 1.8770,  ..., 1.6184, 1.2881, 0.7867]])

In [None]:
(a @ w).shape

torch.Size([2000, 1])

In [None]:
z = a @ w + b

In [None]:
z.min()

tensor(0.2851)

In [None]:
torch.relu(z)

tensor([[2.2321, 2.7146, 1.8869, 2.1619, 2.6045, 2.0364],
        [1.6818, 1.8088, 1.8714, 1.9938, 2.3606, 1.6485],
        [2.3686, 2.6794, 2.3008, 2.6176, 2.7673, 2.2022],
        ...,
        [2.9925, 2.6922, 2.3296, 2.2344, 2.7687, 2.8540],
        [1.6953, 1.8640, 2.1309, 2.0331, 1.4892, 1.9086],
        [1.3251, 1.5032, 0.9667, 1.3226, 0.8722, 1.3571]])

In [None]:
x_np = np.linspace(-5, 5, 5000)
y_np = np.sin(x_np)

In [None]:
x_torch = torch.tensor(x_np)
type(x_torch)

torch.Tensor

In [None]:
torch.sin(x_np) # cannot feed np array directly to torch function

TypeError: ignored

In [None]:
y_torch = torch.sin(x_torch)
# or
x_torch.sin()

tensor([ 0.9589,  0.9595,  0.9601,  ..., -0.9601, -0.9595, -0.9589],
       dtype=torch.float64)

In [None]:
# can be chained for unary operations
torch.all(x_torch.sin().cos()) == torch.all(torch.cos(torch.sin(x_torch)))

tensor(True)

In [None]:
x_torch[:4]

tensor([-5.0000, -4.9980, -4.9960, -4.9940], dtype=torch.float64)

In [None]:
a

tensor([[0.3813, 0.7953, 0.6023, 0.3146, 0.9928, 0.1448],
        [0.0260, 0.3393, 0.2938, 0.4114, 0.4557, 0.8604],
        [0.5277, 0.7079, 0.0279, 0.7140, 0.2600, 0.9134],
        ...,
        [0.0020, 0.5616, 0.9106, 0.5456, 0.5573, 0.9432],
        [0.7844, 0.2344, 0.2062, 0.8122, 0.5131, 0.0908],
        [0.1464, 0.1212, 0.7608, 0.6751, 0.1295, 0.0342]])

In [None]:
a[:, 1]

tensor([0.7953, 0.3393, 0.7079,  ..., 0.5616, 0.2344, 0.1212])

Gradients

In [None]:
y_torch

tensor([ 0.9589,  0.9595,  0.9601,  ..., -0.9601, -0.9595, -0.9589],
       dtype=torch.float64)

In [None]:
y_torch.requires_grad

False

In [None]:
type(y_torch.grad)

NoneType

In [None]:
x_torch = torch.tensor(x_np, requires_grad = True) # state that gradients are needed by initialisation / reinitialisation of variable
# x_torch = torch.tensor(x_torch, requires_grad = True)


In [None]:
y_torch = torch.sin(x_torch)

In [None]:
y_torch.backward()

RuntimeError: ignored

In [None]:
y_torch.grad

  y_torch.grad


In [None]:
x_torch.grad

In [2]:
a = torch.tensor([2, 3, 5], dtype = float, requires_grad = True)
b = torch.tensor([8.0, 15, -2])


In [3]:
y = (2 * a ** 2 + 3 * b)** 3

In [5]:
y.backward(torch.tensor([1, 1, 1]))

In [10]:
a.grad

tensor([ 24576., 142884., 116160.], dtype=torch.float64)

In [14]:
b.grad is None

True

In [15]:
y.grad

  y.grad


This way grad is directly calculated from scratch each time, not optimal for NN; even the same operation on same inputs is calculated again - slow and computationally expensive. What we need is compiling, which in pytorch is not defined very well (unlike tensorflow and jax)

In [24]:
mock_data = torch.rand((20, 2)) # need float tensors everywhere

In [28]:
fully_connected_layer = torch.nn.Linear(2, 3) # no input layer syntax sugar, need to explicitly state the in and output shapes, no inpit dynamic

In [32]:
z = fully_connected_layer(mock_data)

# equiv to tf.keras.layers.Dense(30)(mock_data)

In [33]:
z.min()

tensor(-1.0466, grad_fn=<MinBackward1>)

In [34]:
activation = torch.nn.functional.relu(z)

In [35]:
activation.min()

tensor(0., grad_fn=<MinBackward1>)

In [None]:
torch.nn.MSELoss(activation, y_expected).backward()

In [172]:
mock_data = torch.rand((1, 28, 28))  # in pytorch images are (channels, height, width)
mock_labels = torch.rand(20, 10)

In [160]:
class CNN(nn.Module):
  def __init__(self): # only architecture, no functions (relu, ...) here
    super(CNN, self).__init__()
    self.conv1 = nn.Conv2d(in_channels = 1, out_channels = 10, kernel_size = 3, padding = "same") # or kernel (3, 3)
    self.pool = nn.MaxPool2d(kernel_size = 2) # needs initialisation, but can be used multiple times after different layers (unlike tf)
    self.conv2 = nn.Conv2d(in_channels = 10, out_channels = 20, kernel_size = 3)
    self.flatten = nn.Flatten()
    self.dense1 = nn.Linear(in_features = 6 * 6, out_features = 25) # need to calc in dimentions
    self.dense2 = nn.Linear(25, 10) # 10 as num classes

  def forward(self, x):
    x = F.relu(self.conv1(x))
    x = self.pool(x)
    x = F.relu(self.conv2(x))
    x = self.pool(x)
    x = self.flatten(x)
    x = F.relu(self.dense1(x))
    x = self.dense2(x) # don't need activation, here crossenthropy works properly with direct input

    return x

In [161]:
model = CNN()
model

CNN(
  (conv1): Conv2d(1, 10, kernel_size=(3, 3), stride=(1, 1), padding=same)
  (pool): MaxPool2d(kernel_size=2, stride=2, padding=0, dilation=1, ceil_mode=False)
  (conv2): Conv2d(10, 20, kernel_size=(3, 3), stride=(1, 1))
  (flatten): Flatten(start_dim=1, end_dim=-1)
  (dense1): Linear(in_features=36, out_features=25, bias=True)
  (dense2): Linear(in_features=25, out_features=10, bias=True)
)

In [162]:
model.forward(mock_data).shape

torch.Size([20, 10])

In [163]:
# model.forward(mock_data).view(-1, 1) # = reshape(whatever rows, 1 column)

In [164]:
# [param for param in model.parameters()] # trainable params

In [165]:
model

CNN(
  (conv1): Conv2d(1, 10, kernel_size=(3, 3), stride=(1, 1), padding=same)
  (pool): MaxPool2d(kernel_size=2, stride=2, padding=0, dilation=1, ceil_mode=False)
  (conv2): Conv2d(10, 20, kernel_size=(3, 3), stride=(1, 1))
  (flatten): Flatten(start_dim=1, end_dim=-1)
  (dense1): Linear(in_features=36, out_features=25, bias=True)
  (dense2): Linear(in_features=25, out_features=10, bias=True)
)

if we have fancy or scary forward functions we can writhe the backwards method too, if not autograde will take care of it. __Very important__ to remember to call ```zero_grad``` for every train step, so that same calculations are not done again

In [180]:
# __ train step start
model.zero_grad()

In [181]:
result = model.forward(mock_data)

In [182]:
result.backward(mock_labels) # 20 examples 10 classes
# train step end__

In [183]:
model.conv1.bias

Parameter containing:
tensor([-0.1114, -0.1415,  0.0221,  0.3326,  0.0802, -0.0162,  0.1328, -0.0536,
        -0.2341,  0.1089], requires_grad=True)

In [184]:
model.conv1.bias.grad

tensor([ 0.0000, -0.5599,  0.7457,  1.2413, -0.1051,  0.7896,  0.0923,  0.5590,
         0.0000,  0.8152])

We have the gradiens, now we need loss and optimizer.

No compiling, here it is done directly:

In [185]:
loss = nn.CrossEntropyLoss()

In [186]:
loss(result, mock_labels)

tensor(12.1921, grad_fn=<DivBackward1>)

In [192]:
torch.optim.Adam

torch.optim.adam.Adam

No ```.fit```; need to apply adam on forward + backward; then skip backward for inference

ready datasets:

unlike tf, torch makes difference between dataset and way to work with said dataset

In [210]:
train_data = CIFAR10(root = ".", download = True, transform = T.ToTensor()) # transform = [] preprocess and augmentation here; can add custom functions to the list
                                                                            # if we list function composition, dataset element cannot be called directly

Files already downloaded and verified


In [214]:
dataloader = torch.utils.data.DataLoader(train_data, batch_size = 4, num_workers = 2) # num_workers = how may batches to get at once

In [212]:
train_data[0]

(tensor([[[0.2314, 0.1686, 0.1961,  ..., 0.6196, 0.5961, 0.5804],
          [0.0627, 0.0000, 0.0706,  ..., 0.4824, 0.4667, 0.4784],
          [0.0980, 0.0627, 0.1922,  ..., 0.4627, 0.4706, 0.4275],
          ...,
          [0.8157, 0.7882, 0.7765,  ..., 0.6275, 0.2196, 0.2078],
          [0.7059, 0.6784, 0.7294,  ..., 0.7216, 0.3804, 0.3255],
          [0.6941, 0.6588, 0.7020,  ..., 0.8471, 0.5922, 0.4824]],
 
         [[0.2431, 0.1804, 0.1882,  ..., 0.5176, 0.4902, 0.4863],
          [0.0784, 0.0000, 0.0314,  ..., 0.3451, 0.3255, 0.3412],
          [0.0941, 0.0275, 0.1059,  ..., 0.3294, 0.3294, 0.2863],
          ...,
          [0.6667, 0.6000, 0.6314,  ..., 0.5216, 0.1216, 0.1333],
          [0.5451, 0.4824, 0.5647,  ..., 0.5804, 0.2431, 0.2078],
          [0.5647, 0.5059, 0.5569,  ..., 0.7216, 0.4627, 0.3608]],
 
         [[0.2471, 0.1765, 0.1686,  ..., 0.4235, 0.4000, 0.4039],
          [0.0784, 0.0000, 0.0000,  ..., 0.2157, 0.1961, 0.2235],
          [0.0824, 0.0000, 0.0314,  ...,

In [213]:
T.ToTensor # reordes to (channels, height, width) from (w, h, ch) and rescales to (0, 1) from (0, 255)

torchvision.transforms.transforms.ToTensor

In [219]:
dataloader.generator # no pre-fetch function :(

## JAX