In [219]:
import torch
import torch.utils.data
import numpy as np
import random

# List of resources

- PyTorch tutorials: https://pytorch.org/tutorials/
- PyTorch manual: https://pytorch.org/docs/stable/index.html
- Einstein summation in PyTorch: https://rockt.github.io/2018/04/30/einsum

# Installing PyTorch

- Use Anaconda
- https://pytorch.org/get-started/locally/
- At the moment we need Python 3.6 to support tensorboard
```
conda create --name torch python=3.6
conda install numpy, scipy, matplotlib, jupyter
conda install pytorch torchvision -c pytorch
pip install tensorflow # for tensorboard
pip install tensorboardX # bridge from PyTorch
conda activate torch
```

# PyTorch is like Numpy

- PyTorch works in immediate mode, which is different to the default TensorFlow model.
- PyTorch is very picky about datatypes, and defaults to single precision (numpy defaults to double).
- How to set datatype via `dtype=torch.float64` and `.double()`
- Annoying differences from numpy: `np.sum(x, axis=1)` versus `torch.sum(x, dim=1)`, or `np.clip()` versus `torch.clamp()`

# `torch.autograd`: Computing derivatives

- Computation graph, visualizing it
- Chain rule to computer derivatives
- Explain why backward() needs to take an argument (default=1) and why we get the derivatives out of the leaves
- Use of `requires_grad` and `with no_grad()` to control what gets differentiated. Show impact on graph.
- Example with computation graph not being a tree (re-using a parameter in multiple places)
- Computing second derivatives with `backward(create_graph=True)`
- Explain what `backward(retain_graph=True)` does. What does it do?
- Don't do in-place modifications to tensors. But it's fine to do `x = 4 * x`.

In [72]:
x = torch.tensor(3.0)

In [73]:
x

tensor(3.)

In [74]:
y = torch.tensor(4.0, requires_grad=True)

In [75]:
z = x * y * y

In [76]:
z.backward(retain_graph=True)

In [77]:
x.grad

In [78]:
y.grad

tensor(24.)

In [58]:
z = z * y

In [59]:
z.backward()

In [60]:
y.grad

tensor(168.)

In [61]:
y

tensor(4., requires_grad=True)

In [62]:
z

tensor(192., grad_fn=<ThMulBackward>)

In [None]:
x y^3 --> 3 x y^2 --> 3*3*4*4 --> 9 * 16

In [64]:
9*16

144

In [213]:
x = torch.tensor(2.0, requires_grad=True)
y = x*x
print(f'y.requires_grad = {y.requires_grad}')
z = x*y
z.backward()
print(f'dz/dx = {x.grad}')

y.requires_grad = True
dz/dx = 12.0


In [214]:
x = torch.tensor(2.0, requires_grad=True)
y = x*x
y = y.detach() # can't say y.requires_grad = False
print(f'y.requires_grad = {y.requires_grad}')
z = x*y
z.backward()
print(f'dz/dx = {x.grad}')

y.requires_grad = False
dz/dx = 4.0


In [215]:
x = torch.tensor(2.0, requires_grad=True)
with torch.no_grad():
    y = x*x
print(f'y.requires_grad = {y.requires_grad}')
z = x*y
z.backward()
print(f'dz/dx = {x.grad}')

y.requires_grad = False
dz/dx = 4.0


# Using a GPU

# `torch.nn`: Easy neural-network computation

- Convention: the first index is the data-item index, so N images each of size 128x128 will be in a tensor like N x 128 x 128

# Visualizing run-time output with TensorBoard

# `torch.utils.data`: Managing train/test data

In [148]:
x = torch.tensor([0,1,2,3,4,5,6,7,8,9])
y = torch.tensor([10,11,12,13,14,15,16,17,18,19])
d = torch.utils.data.TensorDataset(x, y)

In [216]:
d[0]

(tensor(0), tensor(10))

In [217]:
d[1]

(tensor(1), tensor(11))

In [223]:
(x_sample, y_sample) = d[random.sample(range(len(d)), 4)]
print(f'x_sample = {x_sample}')
print(f'y_sample = {y_sample}')

x_sample = tensor([2, 6, 7, 9])
y_sample = tensor([12, 16, 17, 19])


# Saving and restoring models