# Getting started with PyTorch

Content based on tutorial provided by PyTorch on this [page](https://pytorch.org/tutorials/beginner/blitz/tensor_tutorial.html#sphx-glr-beginner-blitz-tensor-tutorial-py)

In [1]:
import torch
import numpy as np
from __future__ import print_function

# First introduction to PyTorch
---
## Tensors
---
### Construction of tensors


Construct a 5x3 empty matrix

In [2]:
torch.empty(5, 3)

tensor([[ 0.0000e+00,  0.0000e+00,  0.0000e+00],
        [ 0.0000e+00,  0.0000e+00,  0.0000e+00],
        [-8.6107e+04, -7.5334e-42,  2.7909e+23],
        [ 0.0000e+00,  5.6052e-44,  0.0000e+00],
        [ 0.0000e+00,  0.0000e+00,  2.1019e-44]])

Construct a 5x3 random matrix

In [3]:
torch.rand(5, 3)

tensor([[0.8241, 0.5042, 0.5092],
        [0.8995, 0.0683, 0.6613],
        [0.5826, 0.2350, 0.7217],
        [0.0242, 0.3755, 0.4920],
        [0.1863, 0.4135, 0.3714]])

Construct a 5x3 matrix of zeros, with dtype torch.long

In [4]:
torch.zeros(5, 3, dtype=torch.long)

tensor([[0, 0, 0],
        [0, 0, 0],
        [0, 0, 0],
        [0, 0, 0],
        [0, 0, 0]])

Construct a tensor from data (array)

In [5]:
x = torch.tensor([5.5, 3])

Construct a tensor based on an existing tensor -> will resuse properties like dtype

In [6]:
x.dtype

torch.float32

In [7]:
x = x.new_ones(5, 3)
x.dtype

torch.float32

In [8]:
x

tensor([[1., 1., 1.],
        [1., 1., 1.],
        [1., 1., 1.],
        [1., 1., 1.],
        [1., 1., 1.]])

In [9]:
x = x.new_ones(5, 3, dtype=torch.double)
x.dtype

torch.float64

Construct a tensor based on an existing vector -> will reuse shape

In [10]:
x = torch.randn_like(x, dtype=torch.float)

In [11]:
x

tensor([[-0.3767,  1.0405, -0.4490],
        [-1.1901,  0.4863, -2.3297],
        [-1.1671,  1.2494,  1.9640],
        [ 1.8573, -1.2589, -0.2335],
        [ 1.5515,  0.4633,  0.6289]])

In [12]:
x.size()

torch.Size([5, 3])

In [13]:
x.shape

torch.Size([5, 3])

### Operations


In [14]:
x

tensor([[-0.3767,  1.0405, -0.4490],
        [-1.1901,  0.4863, -2.3297],
        [-1.1671,  1.2494,  1.9640],
        [ 1.8573, -1.2589, -0.2335],
        [ 1.5515,  0.4633,  0.6289]])

In [15]:
y = torch.rand(5,3)

In [16]:
y

tensor([[0.5436, 0.9277, 0.6549],
        [0.6883, 0.2586, 0.0727],
        [0.0550, 0.4823, 0.5226],
        [0.5174, 0.7187, 0.3993],
        [0.6537, 0.6708, 0.9096]])

Addition: syntax 1

In [17]:
x+y

tensor([[ 0.1669,  1.9682,  0.2060],
        [-0.5019,  0.7449, -2.2570],
        [-1.1121,  1.7317,  2.4866],
        [ 2.3747, -0.5402,  0.1658],
        [ 2.2052,  1.1341,  1.5385]])

Addition: syntax 2

In [18]:
torch.add(x, y)

tensor([[ 0.1669,  1.9682,  0.2060],
        [-0.5019,  0.7449, -2.2570],
        [-1.1121,  1.7317,  2.4866],
        [ 2.3747, -0.5402,  0.1658],
        [ 2.2052,  1.1341,  1.5385]])

Addition: providing an output tensor as argument 

In [19]:
result = torch.empty_like(x)
torch.add(x, y, out=result)
result

tensor([[ 0.1669,  1.9682,  0.2060],
        [-0.5019,  0.7449, -2.2570],
        [-1.1121,  1.7317,  2.4866],
        [ 2.3747, -0.5402,  0.1658],
        [ 2.2052,  1.1341,  1.5385]])

Addition: in-place

In [20]:
y.add_(x)
y

tensor([[ 0.1669,  1.9682,  0.2060],
        [-0.5019,  0.7449, -2.2570],
        [-1.1121,  1.7317,  2.4866],
        [ 2.3747, -0.5402,  0.1658],
        [ 2.2052,  1.1341,  1.5385]])

Any operation that mutates a tensor in-place is post-fixed with an `_`. 

For example: `x.copy_(y)`, `x.t_()`, will change `x`.

### Indexing


In [21]:
x

tensor([[-0.3767,  1.0405, -0.4490],
        [-1.1901,  0.4863, -2.3297],
        [-1.1671,  1.2494,  1.9640],
        [ 1.8573, -1.2589, -0.2335],
        [ 1.5515,  0.4633,  0.6289]])

In [22]:
# Get column 1
x[:,1]

tensor([ 1.0405,  0.4863,  1.2494, -1.2589,  0.4633])

In [24]:
# Get row 1
x[1,:]

tensor([-1.1901,  0.4863, -2.3297])

### Resizing


In [25]:
x = torch.randn(4, 4)
x

tensor([[-2.2350,  1.8292,  1.2618, -0.4176],
        [-0.2330, -0.7037, -0.3219,  1.0281],
        [ 0.9853, -0.8345, -1.0712,  0.8927],
        [ 0.0132,  0.2404,  0.4074, -0.6992]])

In [26]:
y = x.view(16)
y

tensor([-2.2350,  1.8292,  1.2618, -0.4176, -0.2330, -0.7037, -0.3219,  1.0281,
         0.9853, -0.8345, -1.0712,  0.8927,  0.0132,  0.2404,  0.4074, -0.6992])

In [27]:
z = x.view(-1, 8)
z

tensor([[-2.2350,  1.8292,  1.2618, -0.4176, -0.2330, -0.7037, -0.3219,  1.0281],
        [ 0.9853, -0.8345, -1.0712,  0.8927,  0.0132,  0.2404,  0.4074, -0.6992]])

In [28]:
print(x.size(), y.size(), z.size())

torch.Size([4, 4]) torch.Size([16]) torch.Size([2, 8])


In [29]:
x = torch.randn(1)
x

tensor([-0.4076])

When tensor has only one element, use `.item()` to get value as a Python number

In [30]:
x.item()

-0.4075830280780792

## Numpy Bridge
---
### Converting a Torch tensor to a NumPy Array

In [31]:
a = torch.ones(5)
a

tensor([1., 1., 1., 1., 1.])

In [32]:
b = a.numpy()
b

array([1., 1., 1., 1., 1.], dtype=float32)

In [33]:
print(type(a), type(b))

<class 'torch.Tensor'> <class 'numpy.ndarray'>


In [34]:
a.add_(1)
a

tensor([2., 2., 2., 2., 2.])

In [35]:
b

array([2., 2., 2., 2., 2.], dtype=float32)

**Remark : change in a will affect b**

### Converting NumPy array to Torch Tensor

In [36]:
a = np.ones(5)
b = torch.from_numpy(a)
np.add(a, 1, out=a)

array([2., 2., 2., 2., 2.])

In [37]:
b

tensor([2., 2., 2., 2., 2.], dtype=torch.float64)

**Remark : change in a will affect b as well**

## CUDA Tensors
---

In [39]:
x = torch.rand(5, 3)
x

tensor([[0.4388, 0.0484, 0.9890],
        [0.8003, 0.0562, 0.2808],
        [0.7326, 0.0038, 0.2143],
        [0.9742, 0.3869, 0.2366],
        [0.3471, 0.2360, 0.7897]])

In [40]:
if torch.cuda.is_available():
    device = torch.device('cuda')
    y = torch.ones_like(x, device=device)
    x = x.to(device)
    z = x + y
    print(z)
    print(z.to("cpu", dtype=torch.double))

tensor([[1.4388, 1.0484, 1.9890],
        [1.8003, 1.0562, 1.2808],
        [1.7326, 1.0038, 1.2143],
        [1.9742, 1.3869, 1.2366],
        [1.3471, 1.2360, 1.7897]], device='cuda:0')
tensor([[1.4388, 1.0484, 1.9890],
        [1.8003, 1.0562, 1.2808],
        [1.7326, 1.0038, 1.2143],
        [1.9742, 1.3869, 1.2366],
        [1.3471, 1.2360, 1.7897]], dtype=torch.float64)


---

# Autograd

---

## Tensors

---

Attributes `.requires_grad` as `True` : tracks all operation on the tensor.

Method `.backward()` : to have all the gradients computed automatically.

Gradients are accumulated into the `.grad` attribute.

---

To stop a tensor from tracking history, call `.detach()` function to detach it from the computation history and prevent future computation from being tracked

Another way is to wrap the code block in ` with torch.no_grad():`

---
Another important class for autograd implementation : `Function`.

`Tensor` and `Function` are interconnected and build up an acyclic graph, that encodes a complete history of computation. Each tensor has a `.grad_fn` attribute that references a `Function` that has created the `Tensor` (except for Tensors creates by the user - their `grad_fn` is `None`).

---

If you want to compute the derivatives, you can call `.backward()` on a `Tensor`. If `Tensor` is a scalar (i.e. it holds a one element data), you donâ€™t need to specify any arguments to `backward()`, however if it has more elements, you need to specify a `gradient` argument that is a tensor of matching shape.


In [79]:
x = torch.ones(2, 2, requires_grad=True)
x

tensor([[1., 1.],
        [1., 1.]], requires_grad=True)

In [80]:
y = x + 2
y

tensor([[3., 3.],
        [3., 3.]], grad_fn=<AddBackward0>)

In [81]:
y.grad_fn

<AddBackward0 at 0x19151e92cd0>

In [82]:
z = y * y * 3
out = z.mean()
print(z)
print(out)

tensor([[27., 27.],
        [27., 27.]], grad_fn=<MulBackward0>)
tensor(27., grad_fn=<MeanBackward0>)


`.requires_grad_( ... )` changes an existing Tensor\'s `requires_grad` flag in-place. The input flag defaults to `False` if not given.

In [83]:
a = torch.randn(2, 2)
a = (a * 3) / (a - 1)
a.requires_grad

False

In [84]:
a.requires_grad_(True)
a.requires_grad

True

In [85]:
b = (a * a).sum()
print(b.requires_grad)
print(b.grad_fn)

True
<SumBackward0 object at 0x0000019151E83190>


## Gradients

---

In [90]:
x = torch.randn(3, requires_grad=True)
x

tensor([-0.3666,  0.1877, -0.3046], requires_grad=True)

In [91]:
y = x * 2
y.data

tensor([-0.7332,  0.3754, -0.6091])

In [100]:
y.data.norm()

tensor(1.0244)

In [104]:
while y.data.norm() < 1000:
    y = y * 2

In [105]:
y

tensor([-750.7642,  384.4138, -623.7446], grad_fn=<MulBackward0>)

In [106]:
v = torch.tensor([.1, 1, .0001], dtype=torch.float)
v


tensor([1.0000e-01, 1.0000e+00, 1.0000e-04])

In [107]:
y.backward(v)

In [113]:
x.grad

tensor([2.0480e+02, 2.0480e+03, 2.0480e-01])

In [114]:
print(x.requires_grad)
print((x ** 2).requires_grad)

with torch.no_grad():
    print((x ** 2).requires_grad)

True
True
False


In [115]:
print(x.requires_grad)
y = x.detach()
print(x.requires_grad)
print(x.eq(y).all())

True
True
tensor(True)
