# ANN in Pytorch

In [None]:
!pip3 install torch
!pip3 install torchvision
!pip3 install tqdm

In [None]:
import torch

def sigmoid_activation(x):
    """ Sigmoid activation function

        Arguments
        ---------
        x: torch.Tensor
    """
    return 1/(1+torch.exp(-x))

In [None]:
### Generate some data and set the random seed so things are predictable
torch.manual_seed(7)

# Input features, 5 random normal variables
x = torch.randn((1, 5))

# True weights for our data, random normal variables again
weights = torch.randn_like(x)

# True bias term
bias = torch.randn((1, 1))

print(f"Input vector for neuron: {x}")
print(f"Weights of input: {weights}")
print(f"Bias : {bias}")

In [None]:
# Compute the output here
y = sigmoid_activation(torch.sum(x * weights) + bias)
y

#### Matrix multiplication in Pytorch

In [None]:
print(f"Shape of weight matrix: {weights.shape}")
print(f"Shape of input vector: {x.shape}")

y = sigmoid_activation(torch.mm(x, weights.view(5,1)) + bias)
y

In [None]:
### Generate some data
torch.manual_seed(7)

# Features are 3 random normal variables
features = torch.randn((1, 3))

# Define the size of each layer in our network

# Number of input units, must match number of input features
n_input = features.shape[1]
# Number of hidden units
n_hidden = 2
# Number of output units
n_output = 1

# Weights from inputs to hidden layer
W1 = torch.randn(n_input, n_hidden)
# Weights from hidden layer to output layer
W2 = torch.randn(n_hidden, n_output)

# Bias terms for hidden and output layers
B1 = torch.randn((1, n_hidden))
B2 = torch.randn((1, n_output))

In [None]:
h = sigmoid_activation(torch.mm(features, W1) + B1)
output = sigmoid_activation(torch.mm(h, W2) + B2)
print(output)

### Pytorch Autograd

In [None]:
x = torch.tensor(5.0, requires_grad=True)

Let's consider a function of x:  $$f(x) = x^2 + 2x + 1$$

The following code will compute and **accumulate** the gradient w.r.t $x$:

In [None]:
# Compute function f(x) = x^2 + 2x + 1
z = x ** 2 + 2*x + 1
print(z, z.requires_grad)

print(f"Gradient on tensor before backward(): {x.grad}")
# Compute and propagate the gradient
z.backward()
print(f"Gradient on tensor after backward(): {x.grad}")

In [None]:
x.grad = None

In [None]:
with torch.no_grad():
    x = torch.tensor(5.0, requires_grad=True)
    print(f"x.requires_grad {x.requires_grad}")

    z_no_grad = x ** 2 + 2*x + 1

    # Even if x requires gradient, we cannot compute the gradient of function z_no_grad inside this block
    print(f"Value of z: {z_no_grad}, Requires grad?: {z_no_grad.requires_grad}")
    # z.backward()  will trigger an error, because no gradient is tracked

### Tensor to numpy array and vice-versa

In [None]:
import numpy as np
a = np.random.rand(4,3)
a

In [None]:
b = torch.from_numpy(a)
b

In [None]:
b.numpy()


## Model Design in Pytorch


In [None]:
from torchvision import datasets, transforms
from torch.utils.data import DataLoader

batch_size = 32
test_batch_size = 100

data_transformations = transforms.Compose([
                           transforms.ToTensor(),
                           # Normalize an image with mean 0.1307 and standard deviation 0.3081.
                           transforms.Normalize((0.1307,), (0.3081,))
                       ])

mnist_train = datasets.MNIST('../data', train=True, download=True,
                           transform=data_transformations)
mnist_test = datasets.MNIST('../data', train=False,
                            transform=data_transformations)

train_loader = DataLoader(mnist_train,
                          batch_size=batch_size, shuffle=True)
test_loader = DataLoader(mnist_test,
                         batch_size=test_batch_size, shuffle=True)

In [None]:
import matplotlib.pyplot as plt
# Get the next batch from loader
images, labels = next(iter(train_loader))

print(f"Label={labels[0]}")
plt.imshow(images[0].reshape(28,28), cmap="gray")

### Model building

In [None]:
import torch.nn as nn
from torch import sigmoid

use_cuda = torch.cuda.is_available()
device = torch.device("cuda" if use_cuda else "cpu")


# Definition of the very simple network with 1 hidden layer
class ToyNet(nn.Module):
    def __init__(self):
        # Invoke the parent's constructor
        super(ToyNet, self).__init__()
        # nn implements feedworward layer as nn.Linear(a, b),
        # where a, b - input and output dims of weight matrix.
        # Bias is included by default.
        self.hidden = nn.Linear(3, 2)
        self.output = nn.Linear(2, 1)


    def forward(self, x):
        x = sigmoid(self.hidden(x))
        x = self.output(x)
        return sigmoid(x)


model = ToyNet().to(device)

print(f"ToyNet model architecture:\n {model}")
print("\n")

toy_x = torch.randn((1,3))
print(f"Prediction of {toy_x} : {model(toy_x)}")
print(f"Prediction of {toy_x} by .forward : {model.forward(toy_x)}")
toy_xs = torch.randn((4,3))
print(f"Prediction on batch: {model(toy_xs)}")

In [None]:
import torch
import torch.nn as nn
import torch.nn.functional as F

class Net(nn.Module):
    def __init__(self):
        super(Net, self).__init__()
        # Shape of 1st hidden linear layer: (input_array_size, 256)
        self.hidden1 = nn.Linear(28*28, 256)
        # (256, 100)
        self.hidden2 = nn.Linear(256, 100)
        # (100, 10)
        self.output = nn.Linear(100, 10)

    # The batch x is of size (batch, 28 * 28)
    def forward(self, x):
        # Flatten of 2D image to 1D array
        x = x.view(-1, 28*28)
        # Complete the flow in hidden layers and output
        x = F.relu(self.hidden1(x))
        x = F.relu(self.hidden2(x))
        x = self.output(x)
        return F.log_softmax(x, dim=1)

model = Net().to(device)

print(model)

### Training loop

In [None]:
# Number of iterations over the whole data set
epochs = 5
# Learning rate for Stochastic Gradient Descent
lr = 0.01
# SGD parameter to accelerate the optimization
momentum = 0.5
# Loss function - cross entropy, the multiclass variant
criterion = nn.CrossEntropyLoss()

In [None]:
from tqdm import tqdm

def train( model, device, train_loader, optimizer, epoch):
    model.train()
    # A wrapper over data loader to show progress bar
    bar = tqdm(train_loader)
    iteration = 0
    overall_loss = 0
    for data, target in bar:
        data, target = data.to(device), target.to(device)
        # To avoid an accumulation of gradients
        optimizer.zero_grad()
        # Prediction
        output = model(data)
        # Error between prediction and ground truth
        loss = criterion(output, target)
        # Compute gradient
        loss.backward()
        # Update params of model
        optimizer.step()

        iteration += 1
        overall_loss += loss.item()
        bar.set_postfix({"Loss": format(overall_loss/iteration, '.6f')})

In [None]:
def test( model, device, test_loader):
    model.eval()
    test_loss = 0
    correct = 0
    with torch.no_grad():
        for data, target in test_loader:
            data, target = data.to(device), target.to(device)
            output = model(data)
            # sum up batch loss
            test_loss += F.nll_loss(output, target, reduction='sum').item()
            pred = output.argmax(dim=1, keepdim=True)  # get the index of the max log-probability
            correct += pred.eq(target.view_as(pred)).sum().item()

    test_loss /= len(test_loader.dataset)

    test_loss /= len(test_loader.dataset)
    print(f"Test set: Average loss: {test_loss}, Accuracy: {100. * correct / len(test_loader.dataset)} ")

In [None]:
import torch.optim as optim

optimizer = optim.SGD(model.parameters(), lr=lr, momentum=momentum)


for epoch in range(1, epochs + 1):
    train(model, device, train_loader, optimizer, epoch)
    test(model, device, test_loader)
