# WMLMIA 2018
## Tutorial 1 : PyTorch for Deep Neural Networks

### Follow instructions given in the [PyTorch](https://pytorch.org/) website for installation
### Tensor initialization and slicing

In [None]:
# Importing the library
import torch

In [None]:
# Defining tensors
x = torch.Tensor(2,2) # Uninitialized (garbage value present in memory)
y = torch.rand(2,2) # Random initialization
print(x,y)

In [None]:
# Obtaining size of tensors
print(x.size())

In [None]:
# Arithmetic operations
z = torch.rand(2,2)
print(y,z,y+z)
print(z,2*z)

In [None]:
# Indexing is similar to numpy indexing
print(y)
print(y[1,1])
print(y[:,0])

In [None]:
# Converting to numpy
y_np = y.numpy()
print(y)
print(y_np)

In [None]:
import numpy as np
# Converting from numpy to tensor
x_np = np.ones((3,3))
x_py = torch.from_numpy(x_np)
print(x_np,x_py)

In [None]:
# Improving computational time with GPU acceleration
import time
use_gpu = torch.cuda.is_available()
x = torch.randn(10000,10000)
if use_gpu:
    x = torch.randn(100,100)
    cpuStart = time.time()
    y = x*x
    cpuEnd = time.time()-cpuStart
    x = x.cuda(1)
    gpuStart = time.time()
    y = x*x
    gpuEnd = time.time()-gpuStart
    print('CPU computation completed in {:.6f}s, GPU computation completed in {:.6f}s'\
          .format(cpuEnd,gpuEnd))

### Autograd
#### This package provides automatic differentiation for all operations on Tensors

In [None]:
from torch.autograd import Variable
a = torch.ones(2,2)
a_var = Variable(a,requires_grad=True)
print(a)
print(a_var)

In [None]:
b = a_var+2
print(b)

In [None]:
print(b.grad_fn)

In [None]:
c = b*b*3
d = c.mean()
print(c)
print(d)

In [None]:
d.backward()

In [None]:
# Gradients
print(a_var.grad)

### Torchvision datasets

In [None]:
%matplotlib inline
import torch
from torchvision import datasets,transforms
import matplotlib.pyplot as plt

In [None]:
apply_transform = transforms.Compose([transforms.ToTensor()])
trainDset = datasets.MNIST('./MNIST',train=True, download=True, transform= apply_transform)
testDset = datasets.MNIST('./MNIST',train=False, download=True, transform= apply_transform)

In [None]:
# Number of samples
print(len(trainDset),len(testDset))

In [None]:
# Displaying sample image from the dataset
img = trainDset[0][0].numpy().transpose(1,2,0).squeeze(2)
plt.imshow(img,'gray')
print('Label: '+str(trainDset[0][1]))

In [None]:
# Creating dataloader for loading data in batches
trainLoader = torch.utils.data.DataLoader(trainDset, batch_size=10, shuffle=True, num_workers=1, pin_memory=False)
testLoader = torch.utils.data.DataLoader(testDset, batch_size=10, shuffle=True, num_workers=1, pin_memory=False)

### Defining a multi-layer perceptron

In [None]:
import torch.nn.functional as F
import torch.nn as nn
class MLP(nn.Module):
    def __init__(self):
        super(MLP, self).__init__()
        self.fc1 = nn.Linear(28*28,100)
        self.fc2 = nn.Linear(100,10)
    def forward(self, x):
        x = self.fc1(x)
        x = torch.sigmoid(x)
        x = self.fc2(x)
        x = F.log_softmax(x,dim=1)
        return x

In [None]:
net = MLP()
print(net)

In [None]:
params = list(net.parameters())
print('No. of parameters :'+str(len(params)))
print('Dimensions of first parameter: '+str(params[0].size())) # Weights of fc1
print('Dimensions of second parameter: '+str(params[1].size())) # Biases of fc1

In [None]:
inp = trainLoader.dataset[0][0]
label = trainLoader.dataset[0][1]

In [None]:
from torch.autograd import Variable
# Feed-forward data through network
out = net(Variable(inp.view(-1,28*28)))
print(inp.size())
print(out.size())

In [None]:
# Backpropagating gradients
net.zero_grad()
out.backward(torch.randn(1, 10)) # Using random gradients

In [None]:
out = net(Variable(inp.view(-1,28*28)))
# Defining loss function
criterion = nn.NLLLoss() # Negative log-likelihood loss
label = label.unsqueeze(0) # Adding additional dimension
loss = criterion(out,Variable(label.long())) # NLLLoss() expects the labels to be of dtype 'long'
print(loss)

In [None]:
# Backprogattion
net.zero_grad()     # zeroes the gradient buffers of all parameters

print('Bias gradient of fc1 before backward')
print(net.fc1.bias.grad[:10])

loss.backward()

print('Bias gradient of fc1 after backward')
print(net.fc1.bias.grad[:10])

In [None]:
import copy
# Updataing weights of the network
learning_rate = 1
init_params = copy.deepcopy(net.fc2.weight.data) # Copying initial parameters

for f in net.parameters():    
    f.data.sub_(f.grad.data * learning_rate)
updated_params = net.fc2.weight.data
print(init_params[0,:5])
print(updated_params[0,:5])