The following tutorial is adopted from:

https://brsoff.github.io/tutorials/beginner/deep_learning_60min_blitz.html

**WHAT IS PYTORCH?**

>It’s a Python-based scientific computing package targeted at two sets of audiences:
> 1. A replacement for NumPy to use the power of GPUs
> 2. a deep learning research platform that provides maximum flexibility and speed

**1.1 Tensors**

In [3]:
from __future__ import print_function
import torch
# import sklearn

In [10]:
# Task-01 Construct a 5x3 matrix, uninitialized:

x = torch.empty(5, 4)
print(x)

tensor([[0., 0., 0., 0.],
        [0., 0., 0., 0.],
        [0., 0., 0., 0.],
        [0., 0., 0., 0.],
        [0., 0., 0., 0.]])


In [11]:
# Task-02 Construct a randomly initialized matrix:

x = torch.rand(5,4)
print(x)

tensor([[0.3680, 0.9626, 0.8652, 0.7306],
        [0.8027, 0.0995, 0.7038, 0.0563],
        [0.3149, 0.4003, 0.2489, 1.0000],
        [0.4656, 0.5140, 0.1001, 0.0315],
        [0.0328, 0.9139, 0.8585, 0.6620]])


In [12]:
# Task-03 Construct a matrix filled zeros and of dtype long:

x = torch.zeros(5, 4, dtype=torch.long)
print(x)

tensor([[0, 0, 0, 0],
        [0, 0, 0, 0],
        [0, 0, 0, 0],
        [0, 0, 0, 0],
        [0, 0, 0, 0]])


In [17]:
# Task-04 Construct a tensor directly from data:

x = torch.tensor([5.5, 3, 4., -1.])
print(x)

tensor([ 5.5000,  3.0000,  4.0000, -1.0000])


In [21]:
# Task-05 These methods will reuse properties of the 
# input tensor, e.g. dtype, unless new values are provided by user

x = x.new_ones(5, 4, dtype=torch.double)      # new_* methods take in sizes
print(x)
print(type(x))

x = torch.randn_like(x, dtype=torch.float)    # override dtype!
print(x)
print(type(x))

tensor([[1., 1., 1., 1.],
        [1., 1., 1., 1.],
        [1., 1., 1., 1.],
        [1., 1., 1., 1.],
        [1., 1., 1., 1.]], dtype=torch.float64)
<class 'torch.Tensor'>
tensor([[-0.9979, -1.5495,  1.1157, -0.2400],
        [-0.2616, -0.1638, -0.3428, -1.0579],
        [-0.3920,  1.0789, -0.4735,  0.7098],
        [-0.6866,  0.2733, -0.9771, -0.2519],
        [-2.4096, -0.1932,  1.4749, -0.5340]])
<class 'torch.Tensor'>


In [29]:
# Task-06 Get its size:

print(x.size())

torch.Size([5, 4])


**1.2 Operations**

In [30]:
# Task-07 Addition using +

y = torch.rand(5, 4)
print(x + y)

tensor([[-0.8057, -1.0035,  1.2262,  0.2044],
        [-0.2284,  0.2516,  0.6283, -0.8004],
        [ 0.2728,  1.5704,  0.3198,  1.1272],
        [ 0.0509,  0.9239, -0.6427,  0.5087],
        [-1.5537,  0.0363,  1.8336, -0.0499]])


In [31]:
# Task-08 Addition using add() method

print(torch.add(x, y))

tensor([[-0.8057, -1.0035,  1.2262,  0.2044],
        [-0.2284,  0.2516,  0.6283, -0.8004],
        [ 0.2728,  1.5704,  0.3198,  1.1272],
        [ 0.0509,  0.9239, -0.6427,  0.5087],
        [-1.5537,  0.0363,  1.8336, -0.0499]])


In [33]:
# Task-09 Addition: providing an output tensor as argument

result = torch.empty(5, 4)
torch.add(x, y, out=result)
print(result)

tensor([[-0.8057, -1.0035,  1.2262,  0.2044],
        [-0.2284,  0.2516,  0.6283, -0.8004],
        [ 0.2728,  1.5704,  0.3198,  1.1272],
        [ 0.0509,  0.9239, -0.6427,  0.5087],
        [-1.5537,  0.0363,  1.8336, -0.0499]])


In [34]:
# Task-10 Addition: in-place
# Any operation that mutates a tensor in-place is post-fixed with an _. 
# For example: x.copy_(y), x.t_(), will change x.

# adds x to y
y.add_(x)
print(y)

tensor([[-0.8057, -1.0035,  1.2262,  0.2044],
        [-0.2284,  0.2516,  0.6283, -0.8004],
        [ 0.2728,  1.5704,  0.3198,  1.1272],
        [ 0.0509,  0.9239, -0.6427,  0.5087],
        [-1.5537,  0.0363,  1.8336, -0.0499]])


In [35]:
# Task-11 You can use standard NumPy-like indexing with all bells and whistles!

print(x[:, 1])

tensor([-1.5495, -0.1638,  1.0789,  0.2733, -0.1932])


In [36]:
# Task-12 Resizing: If you want to resize/reshape tensor, you can use torch.view

x = torch.randn(4, 4)
y = x.view(16)
z = x.view(-1, 8)  # the size -1 is inferred from other dimensions
print(x.size(), y.size(), z.size())

torch.Size([4, 4]) torch.Size([16]) torch.Size([2, 8])


In [39]:
# Task-12 Get numerical value: 
# If you have a one element tensor, use .item() 
# to get the value as a Python number

x = torch.randn(1)
print(x)
print(x.item())
print(y[0].item())

tensor([-0.9528])
-0.9528326988220215
-1.0214507579803467


**1.3 Converting a Torch Tensor to a NumPy Array**

In [41]:
# Task-13 Converting a Torch Tensor to a NumpPy Array
a = torch.ones(5)
print(a)
b = a.numpy()
print(b)

tensor([1., 1., 1., 1., 1.])
[1. 1. 1. 1. 1.]


In [43]:
# Task-14 See how the numpy array changed in value.

a.add_(1)
print(a)
print(b)

# They share the same memory

tensor([3., 3., 3., 3., 3.])
[3. 3. 3. 3. 3.]


In [44]:
# Task-15 Converting NumPy Array to Torch Tensor

import numpy as np
a = np.ones(5)
b = torch.from_numpy(a)
np.add(a, 1, out=a)
print(a)
print(b)

[2. 2. 2. 2. 2.]
tensor([2., 2., 2., 2., 2.], dtype=torch.float64)


In [45]:
# Task-16 Tensors can be moved onto any device using the .to method.

# let us run this cell only if CUDA is available
# We will use ``torch.device`` objects to move tensors in and out of GPU
if torch.cuda.is_available():
    device = torch.device("cuda")          # a CUDA device object
    y = torch.ones_like(x, device=device)  # directly create a tensor on GPU
    x = x.to(device)                       # or just use strings ``.to("cuda")``
    z = x + y
    print(z)
    print(z.to("cpu", torch.double))       # ``.to`` can also change dtype together!

**1.4 Autograd: automatic differentiation**

Central to all neural networks in PyTorch is the *autograd* package. Let’s first briefly visit this, and we will then go to training our first neural network.

The autograd package provides automatic differentiation for all operations on Tensors. It is a define-by-run framework, which means that your backprop is defined by how your code is run, and that every single iteration can be different.

In [47]:
# Task-17 Create a tensor and set requires_grad=True to track computation with it

x = torch.ones(2, 2, requires_grad=True)
print(x)

tensor([[1., 1.],
        [1., 1.]], requires_grad=True)


In [48]:
y = x + 2
print(y)

tensor([[3., 3.],
        [3., 3.]], grad_fn=<AddBackward0>)


In [49]:
print(y.grad_fn)

<AddBackward0 object at 0x7f9a20e68520>


In [50]:
# Task-18 do operations on y

z = y * y * 3
out = z.mean()
print(z, out)

tensor([[27., 27.],
        [27., 27.]], grad_fn=<MulBackward0>) tensor(27., grad_fn=<MeanBackward0>)


In [51]:
a = torch.randn(2, 2)
a = ((a * 3) / (a - 1))
print(a.requires_grad)
a.requires_grad_(True)
print(a.requires_grad)
b = (a * a).sum()
print(b.grad_fn)

False
True
<SumBackward0 object at 0x7f9a3468d480>


**1.5 Gradients**

Let’s backprop now Because out contains a single scalar, 
out.backward() is equivalent to out.backward(torch.tensor(1)).

In [54]:
# Task-19 do the backprop

out.backward()

RuntimeError: Trying to backward through the graph a second time (or directly access saved tensors after they have already been freed). Saved intermediate values of the graph are freed when you call .backward() or autograd.grad(). Specify retain_graph=True if you need to backward through the graph a second time or if you need to access saved tensors after calling backward.

In [55]:
print(x.grad)

tensor([[4.5000, 4.5000],
        [4.5000, 4.5000]])


In [57]:
# Task-20 the autograd operation could complicated

x = torch.randn(3, requires_grad=True)
y = x * 2
while y.data.norm() < 1000:
    y = y * 2
print(y)

tensor([-1281.4146,  1171.6089,   338.1314], grad_fn=<MulBackward0>)


In [58]:

gradients = torch.tensor([0.1, 1.0, 0.0001], dtype=torch.float)
y.backward(gradients)
print(x.grad)

tensor([2.0480e+02, 2.0480e+03, 2.0480e-01])


In [59]:
# Task-21 Use requires_grad()
# You can also stop autograd from tracking history on Tensors with 
# .requires_grad=True by wrapping the code block in with torch.no_grad():

print(x.requires_grad)
print((x ** 2).requires_grad)

with torch.no_grad():
    print((x ** 2).requires_grad)

True
True
False


In [60]:
# Task-22 create a neural network model

# A typical training procedure for a neural network is as follows:

# Define the neural network that has some learnable parameters (or weights)
# Iterate over a dataset of inputs
# Process input through the network
# Compute the loss (how far is the output from being correct)
# Propagate gradients back into the network’s parameters
# Update the weights of the network, typically using a simple update rule: 
# weight = weight - learning_rate * gradient

import torch
import torch.nn as nn
import torch.nn.functional as F


class Net(nn.Module):

    def __init__(self):
        super(Net, self).__init__()
        # 1 input image channel, 6 output channels, 5x5 square convolution
        # kernel
        self.conv1 = nn.Conv2d(1, 6, 5)
        self.conv2 = nn.Conv2d(6, 16, 5)
        # an affine operation: y = Wx + b
        self.fc1 = nn.Linear(16 * 5 * 5, 120)
        self.fc2 = nn.Linear(120, 84)
        self.fc3 = nn.Linear(84, 10)

    def forward(self, x):
        # Max pooling over a (2, 2) window
        x = F.max_pool2d(F.relu(self.conv1(x)), (2, 2))
        # If the size is a square you can only specify a single number
        x = F.max_pool2d(F.relu(self.conv2(x)), 2)
        x = x.view(-1, self.num_flat_features(x))
        x = F.relu(self.fc1(x))
        x = F.relu(self.fc2(x))
        x = self.fc3(x)
        return x

    def num_flat_features(self, x):
        size = x.size()[1:]  # all dimensions except the batch dimension
        num_features = 1
        for s in size:
            num_features *= s
        return num_features


net = Net()
print(net)

Net(
  (conv1): Conv2d(1, 6, kernel_size=(5, 5), stride=(1, 1))
  (conv2): Conv2d(6, 16, kernel_size=(5, 5), stride=(1, 1))
  (fc1): Linear(in_features=400, out_features=120, bias=True)
  (fc2): Linear(in_features=120, out_features=84, bias=True)
  (fc3): Linear(in_features=84, out_features=10, bias=True)
)


In [62]:
# Task-23 The learnable parameters of a model are returned by net.parameters()

params = list(net.parameters())
print(len(params))
print(params[0].size())  # conv1's .weight

10
torch.Size([6, 1, 5, 5])


In [63]:
# Task-24 
input = torch.randn(1, 1, 32, 32)
out = net(input)
print(out)

tensor([[-0.1862,  0.0671,  0.0488,  0.1243,  0.0143,  0.1071,  0.0546,  0.0751,
         -0.0982, -0.0209]], grad_fn=<AddmmBackward0>)


In [65]:
# Task-25 Zero the gradient buffers of all parameters and backprops with random gradients:

net.zero_grad()
out.backward(torch.randn(1, 10))

RuntimeError: Trying to backward through the graph a second time (or directly access saved tensors after they have already been freed). Saved intermediate values of the graph are freed when you call .backward() or autograd.grad(). Specify retain_graph=True if you need to backward through the graph a second time or if you need to access saved tensors after calling backward.

In [67]:
# Task-26 Loss function 

output = net(input)
target = torch.randn(10)  # a dummy target, for example
target = target.view(1, -1)  # make it the same shape as output
criterion = nn.MSELoss()

loss = criterion(output, target)
print(loss)


tensor(1.3458, grad_fn=<MseLossBackward0>)


In [69]:
print(loss.grad_fn)  # MSELoss
print(loss.grad_fn.next_functions[0][0])  # Linear
print(loss.grad_fn.next_functions[0][0].next_functions[0][0])  # ReLU

<MseLossBackward0 object at 0x7f9a000b5b70>
<AddmmBackward0 object at 0x7f9a000a5de0>
<AccumulateGrad object at 0x7f9a20e7e680>


In [70]:
# Task-27 Backprop

net.zero_grad()     # zeroes the gradient buffers of all parameters

print('conv1.bias.grad before backward')
print(net.conv1.bias.grad)

loss.backward()

print('conv1.bias.grad after backward')
print(net.conv1.bias.grad)

conv1.bias.grad before backward
None
conv1.bias.grad after backward
tensor([-0.0236,  0.0037,  0.0204,  0.0084, -0.0122, -0.0308])


In [71]:
# Update the weights

learning_rate = 0.01
for f in net.parameters():
    f.data.sub_(f.grad.data * learning_rate)

In [72]:
import torch.optim as optim

# create your optimizer
optimizer = optim.SGD(net.parameters(), lr=0.01)

# in your training loop:
optimizer.zero_grad()   # zero the gradient buffers
output = net(input)
loss = criterion(output, target)
loss.backward()
optimizer.step()    # Does the update