<a href="https://colab.research.google.com/github/alelom/Notebooks/blob/master/Statistical%20Learning/Pytorch/Exercise%20Files/Autograd.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

# Autograd

In [2]:
# Setting seeds to try and ensure we have the same results - this is not guaranteed across PyTorch releases.
import torch
torch.manual_seed(0)
torch.backends.cudnn.deterministic = True
torch.backends.cudnn.benchmark = False

import numpy as np
np.random.seed(0)

In [3]:
import torch
from torchvision import datasets, transforms
import torch.nn.functional as F
from torch import nn

mean, std = (0.5,), (0.5,)

# Create a transform and normalise data
transform = transforms.Compose([transforms.ToTensor(),
                                transforms.Normalize(mean, std)
                              ])

# Download FMNIST training dataset and load training data
trainset = datasets.FashionMNIST('~/.pytorch/FMNIST/', download=True, train=True, transform=transform)
trainloader = torch.utils.data.DataLoader(trainset, batch_size=64, shuffle=True)

# Download FMNIST test dataset and load test data
testset = datasets.FashionMNIST('~/.pytorch/FMNIST/', download=True, train=False, transform=transform)
testloader = torch.utils.data.DataLoader(testset, batch_size=64, shuffle=False)

Downloading http://fashion-mnist.s3-website.eu-central-1.amazonaws.com/train-images-idx3-ubyte.gz to /root/.pytorch/FMNIST/FashionMNIST/raw/train-images-idx3-ubyte.gz


HBox(children=(FloatProgress(value=1.0, bar_style='info', max=1.0), HTML(value='')))

Extracting /root/.pytorch/FMNIST/FashionMNIST/raw/train-images-idx3-ubyte.gz to /root/.pytorch/FMNIST/FashionMNIST/raw
Downloading http://fashion-mnist.s3-website.eu-central-1.amazonaws.com/train-labels-idx1-ubyte.gz to /root/.pytorch/FMNIST/FashionMNIST/raw/train-labels-idx1-ubyte.gz


HBox(children=(FloatProgress(value=1.0, bar_style='info', max=1.0), HTML(value='')))

Extracting /root/.pytorch/FMNIST/FashionMNIST/raw/train-labels-idx1-ubyte.gz to /root/.pytorch/FMNIST/FashionMNIST/raw
Downloading http://fashion-mnist.s3-website.eu-central-1.amazonaws.com/t10k-images-idx3-ubyte.gz to /root/.pytorch/FMNIST/FashionMNIST/raw/t10k-images-idx3-ubyte.gz


HBox(children=(FloatProgress(value=1.0, bar_style='info', max=1.0), HTML(value='')))

Extracting /root/.pytorch/FMNIST/FashionMNIST/raw/t10k-images-idx3-ubyte.gz to /root/.pytorch/FMNIST/FashionMNIST/raw
Downloading http://fashion-mnist.s3-website.eu-central-1.amazonaws.com/t10k-labels-idx1-ubyte.gz to /root/.pytorch/FMNIST/FashionMNIST/raw/t10k-labels-idx1-ubyte.gz


HBox(children=(FloatProgress(value=1.0, bar_style='info', max=1.0), HTML(value='')))

Extracting /root/.pytorch/FMNIST/FashionMNIST/raw/t10k-labels-idx1-ubyte.gz to /root/.pytorch/FMNIST/FashionMNIST/raw
Processing...
Done!




In [4]:
class FMNIST(nn.Module):
  def __init__(self):
    super().__init__()
    self.fc1 = nn.Linear(784, 128)
    self.fc2 = nn.Linear(128,64)
    self.fc3 = nn.Linear(64,10)
    
  def forward(self, x):
    x = x.view(x.shape[0], -1)
    
    x = F.relu(self.fc1(x))
    x = F.relu(self.fc2(x))
    x = self.fc3(x)
    x = F.log_softmax(x, dim=1)
    
    return x
    
#model = FMNIST()   






In [5]:
# nn.Sequential() allows to define the ANN exactly like with the class representation.
# In addition, this model definition allows us to see the weights for the network.


model = nn.Sequential(nn.Linear(784, 128),
                      nn.ReLU(),
                      nn.Linear(128, 64),
                      nn.ReLU(),
                      nn.Linear(64, 10),
                      nn.LogSoftmax(dim=1))

In [6]:
# The model such defined can be indexed.
model[0] # this is the first layer definition

Linear(in_features=784, out_features=128, bias=True)

In [7]:
model[1] # this is the first activation function definition
         # and so on.

ReLU()

In [8]:
# We can also check the weights assigned to each layer:
model[0].weight # first layer initial weights

Parameter containing:
tensor([[-0.0003,  0.0192, -0.0294,  ...,  0.0219,  0.0037,  0.0021],
        [-0.0198, -0.0150, -0.0104,  ..., -0.0203, -0.0060, -0.0299],
        [-0.0201,  0.0149, -0.0333,  ..., -0.0203,  0.0012,  0.0080],
        ...,
        [ 0.0018, -0.0295,  0.0085,  ..., -0.0037,  0.0036,  0.0300],
        [-0.0233, -0.0220, -0.0064,  ...,  0.0115, -0.0324, -0.0158],
        [ 0.0309,  0.0066,  0.0125,  ...,  0.0286,  0.0350, -0.0105]],
       requires_grad=True)

In [9]:
model[2].weight # second layer initial weights

Parameter containing:
tensor([[-0.0825, -0.0339,  0.0713,  ..., -0.0854,  0.0097, -0.0645],
        [ 0.0488,  0.0832,  0.0728,  ..., -0.0440,  0.0541,  0.0612],
        [-0.0055, -0.0093, -0.0157,  ...,  0.0782,  0.0032, -0.0530],
        ...,
        [ 0.0009,  0.0023, -0.0776,  ...,  0.0575,  0.0759,  0.0016],
        [-0.0815, -0.0214, -0.0414,  ...,  0.0702, -0.0252,  0.0703],
        [-0.0815,  0.0329, -0.0134,  ...,  0.0773,  0.0774,  0.0468]],
       requires_grad=True)

In [10]:
# We can also check the initial weights' gradient for a certain layer:
print(model[0].weight.grad) # None prior to training

None


# Training

In [13]:
# Batch of training sample images with their labels
images, labels_true = next(iter(trainloader))
images = images.view(images.shape[0], -1)

In [12]:
# Definition of the Loss function
criterion = nn.NLLLoss()

### Forward-pass, calculate the loss, then do a backward-pass

In [20]:
# forward pass: get predicted labels
labels_predict = model(images)
labels_predict[:5]

tensor([[-2.1456, -2.3718, -2.2915, -2.2040, -2.2056, -2.2932, -2.4681, -2.4290,
         -2.3867, -2.2800],
        [-2.1586, -2.3401, -2.2397, -2.1257, -2.1700, -2.3202, -2.5069, -2.3997,
         -2.4614, -2.3823],
        [-2.2052, -2.3124, -2.2088, -2.2332, -2.2492, -2.3104, -2.3566, -2.5074,
         -2.4150, -2.2687],
        [-2.1303, -2.3323, -2.1528, -2.1503, -2.3084, -2.2915, -2.4367, -2.5488,
         -2.4333, -2.3268],
        [-2.1809, -2.3413, -2.2179, -2.1996, -2.2279, -2.2823, -2.4368, -2.4494,
         -2.4117, -2.3237]], grad_fn=<SliceBackward>)

In [22]:
 # calculate loss
loss = criterion(labels_predict, labels_true)
loss # this is a tensor object

tensor(2.3144, grad_fn=<NllLossBackward>)

In [23]:
# backward pass
loss.backward()
loss        

tensor(2.3144, grad_fn=<NllLossBackward>)

In [24]:
print('Initial weights : ',model[0].weight)
print('Initial weights gradient : ',model[0].weight.grad)

Initial weights :  Parameter containing:
tensor([[-0.0003,  0.0192, -0.0294,  ...,  0.0219,  0.0037,  0.0021],
        [-0.0198, -0.0150, -0.0104,  ..., -0.0203, -0.0060, -0.0299],
        [-0.0201,  0.0149, -0.0333,  ..., -0.0203,  0.0012,  0.0080],
        ...,
        [ 0.0018, -0.0295,  0.0085,  ..., -0.0037,  0.0036,  0.0300],
        [-0.0233, -0.0220, -0.0064,  ...,  0.0115, -0.0324, -0.0158],
        [ 0.0309,  0.0066,  0.0125,  ...,  0.0286,  0.0350, -0.0105]],
       requires_grad=True)
Initial weights gradient :  tensor([[-3.4056e-03, -3.4056e-03, -3.4056e-03,  ..., -3.5191e-03,
         -3.4047e-03, -3.4047e-03],
        [-1.7043e-03, -1.7043e-03, -1.7043e-03,  ..., -1.9390e-03,
         -1.7004e-03, -1.7004e-03],
        [-3.2319e-04, -3.2319e-04, -3.2319e-04,  ..., -3.2319e-04,
         -3.2319e-04, -3.2319e-04],
        ...,
        [ 8.4142e-05,  8.4142e-05,  8.4142e-05,  ...,  8.0253e-05,
          8.3021e-05,  8.3021e-05],
        [-1.6067e-03, -1.6067e-03, -1.6067e-0