[View in Colaboratory](https://colab.research.google.com/github/abhinavrk/GoogleColabMusings/blob/master/torch_tutorial.ipynb)

# What is Pytorch

In [0]:
# http://pytorch.org/
from os import path
from wheel.pep425tags import get_abbr_impl, get_impl_ver, get_abi_tag
platform = '{}{}-{}'.format(get_abbr_impl(), get_impl_ver(), get_abi_tag())

accelerator = 'cu80' if path.exists('/opt/bin/nvidia-smi') else 'cpu'

!pip install -q http://download.pytorch.org/whl/{accelerator}/torch-0.4.0-{platform}-linux_x86_64.whl torchvision

import torch

In [2]:
x = torch.empty(5, 3)
print(x)

tensor(1.00000e-37 *
       [[ 2.7389,  0.0000,  0.0000],
        [ 0.0000,  0.0000,  0.0000],
        [ 0.0000,  0.0000,  0.0000],
        [ 0.0000,  0.0000,  0.0000],
        [ 0.0000,  0.0000,  0.0000]])


In [4]:
x = torch.rand(5, 3)
print(x)


tensor([[ 0.8858,  0.7787,  0.2055],
        [ 0.7969,  0.9603,  0.5067],
        [ 0.1421,  0.0221,  0.6333],
        [ 0.8726,  0.3720,  0.0765],
        [ 0.1968,  0.1332,  0.4348]])


In [6]:
x = torch.zeros(5, 3, dtype=torch.long)
print(x)

tensor([[ 0,  0,  0],
        [ 0,  0,  0],
        [ 0,  0,  0],
        [ 0,  0,  0],
        [ 0,  0,  0]])


In [7]:
x = torch.tensor([5.5, 3])
print(x)

tensor([ 5.5000,  3.0000])


In [8]:
x = x.new_ones(5, 3, dtype=torch.double)      # new_* methods take in sizes
print(x)

x = torch.randn_like(x, dtype=torch.float)    # override dtype!
print(x)                                      # result has the same size

tensor([[ 1.,  1.,  1.],
        [ 1.,  1.,  1.],
        [ 1.,  1.,  1.],
        [ 1.,  1.,  1.],
        [ 1.,  1.,  1.]], dtype=torch.float64)
tensor([[ 1.7475,  0.9172, -0.9811],
        [-0.8624, -0.6107, -2.1555],
        [ 0.1523, -0.9467,  0.0952],
        [ 0.2686, -1.2228, -0.4925],
        [ 0.1598,  0.8110,  0.7318]])


In [9]:
print(x.size())

torch.Size([5, 3])


In [10]:
y = torch.rand(5, 3)
print(x + y)

tensor([[ 2.0146,  0.9481,  0.0142],
        [-0.6639,  0.2515, -1.6063],
        [ 0.7273, -0.1162,  0.8218],
        [ 0.5404, -0.9030,  0.2436],
        [ 0.2292,  1.4945,  0.8081]])


In [11]:
print(torch.add(x, y))

tensor([[ 2.0146,  0.9481,  0.0142],
        [-0.6639,  0.2515, -1.6063],
        [ 0.7273, -0.1162,  0.8218],
        [ 0.5404, -0.9030,  0.2436],
        [ 0.2292,  1.4945,  0.8081]])


In [12]:
result = torch.empty(5, 3)
torch.add(x, y, out=result)
print(result)

tensor([[ 2.0146,  0.9481,  0.0142],
        [-0.6639,  0.2515, -1.6063],
        [ 0.7273, -0.1162,  0.8218],
        [ 0.5404, -0.9030,  0.2436],
        [ 0.2292,  1.4945,  0.8081]])


In [13]:
# adds x to y
y.add_(x)
print(y)

tensor([[ 2.0146,  0.9481,  0.0142],
        [-0.6639,  0.2515, -1.6063],
        [ 0.7273, -0.1162,  0.8218],
        [ 0.5404, -0.9030,  0.2436],
        [ 0.2292,  1.4945,  0.8081]])


In [14]:
print(x[:, 1])

tensor([ 0.9172, -0.6107, -0.9467, -1.2228,  0.8110])


In [15]:
x = torch.randn(4, 4)
y = x.view(16)
z = x.view(-1, 8)  # the size -1 is inferred from other dimensions
print(x.size(), y.size(), z.size())

torch.Size([4, 4]) torch.Size([16]) torch.Size([2, 8])


In [16]:
a = torch.ones(5)
print(a)

tensor([ 1.,  1.,  1.,  1.,  1.])


In [17]:
b = a.numpy()
print(b)

[1. 1. 1. 1. 1.]


In [18]:
a.add_(1)
print(a)
print(b)

tensor([ 2.,  2.,  2.,  2.,  2.])
[2. 2. 2. 2. 2.]


In [19]:
import numpy as np
a = np.ones(5)
b = torch.from_numpy(a)
np.add(a, 1, out=a)
print(a)
print(b)

[2. 2. 2. 2. 2.]
tensor([ 2.,  2.,  2.,  2.,  2.], dtype=torch.float64)


In [20]:
# let us run this cell only if CUDA is available
# We will use ``torch.device`` objects to move tensors in and out of GPU
if torch.cuda.is_available():
    device = torch.device("cuda")          # a CUDA device object
    y = torch.ones_like(x, device=device)  # directly create a tensor on GPU
    x = x.to(device)                       # or just use strings ``.to("cuda")``
    z = x + y
    print(z)
    print(z.to("cpu", torch.double))       # ``.to`` can also change dtype together!

tensor([[ 0.8624,  1.3965,  1.1757,  1.3538],
        [ 0.8049,  0.8824,  0.5007,  1.6184],
        [ 1.1935,  1.6094,  1.3424,  1.3359],
        [-0.8174,  1.8935,  1.1532, -0.3462]], device='cuda:0')
tensor([[ 0.8624,  1.3965,  1.1757,  1.3538],
        [ 0.8049,  0.8824,  0.5007,  1.6184],
        [ 1.1935,  1.6094,  1.3424,  1.3359],
        [-0.8174,  1.8935,  1.1532, -0.3462]], dtype=torch.float64)


# Summary - PyTorch Numpy

Essentially this is just a numpy wrapper. Running something explicitly on GPU (i.e. creating a matrix on the GPU directly so all future operations run on the GPU as well). 

# Autograd - Automatic Differentiation

In [0]:
import torch

In [22]:
x = torch.ones(2, 2, requires_grad=True)
print(x)

tensor([[ 1.,  1.],
        [ 1.,  1.]])


In [23]:
y = x + 2
print(y)

tensor([[ 3.,  3.],
        [ 3.,  3.]])


In [24]:
print(y.grad_fn)

<AddBackward0 object at 0x7f4a61ae3c88>


In [25]:
z = y * y * 3
out = z.mean()

print(z, out)

tensor([[ 27.,  27.],
        [ 27.,  27.]]) tensor(27.)


In [26]:
a = torch.randn(2, 2)
a = ((a * 3) / (a - 1))
print(a.requires_grad)
a.requires_grad_(True)
print(a.requires_grad)
b = (a * a).sum()
print(b.grad_fn)

False
True
<SumBackward0 object at 0x7f4a61ae3cc0>


In [0]:
out.backward()

In [28]:
print(x.grad)

tensor([[ 4.5000,  4.5000],
        [ 4.5000,  4.5000]])


In [29]:
x = torch.randn(3, requires_grad=True)

y = x * 2
while y.data.norm() < 1000:
    y = y * 2

print(y)

tensor([ -458.1458, -1199.5879,   484.2182])


In [30]:
gradients = torch.tensor([0.1, 1.0, 0.0001], dtype=torch.float)
y.backward(gradients)

print(x.grad)

tensor([  102.4000,  1024.0000,     0.1024])


In [31]:
print(x.requires_grad)
print((x ** 2).requires_grad)

with torch.no_grad():
    print((x ** 2).requires_grad)

True
True
False


# Summary - Autograd

The autograd package automatically tracks all the operations performed on the inputs, and uses it to automatically calculate the derivative.

## Unsure

What the hell is `grad_tensors` - why does `y.backwards` take in an input. Is this to account for the metric? is this just the metric tensor? does this mean you're not allowed to have variable metric-tensors?