# Neural Networks with PyTorch: introduction

In [1]:
import os
import torch
import numpy as np
from torch import nn
from torch.utils.data import DataLoader, Dataset
from torchvision import datasets, transforms
from torchvision.transforms import ToTensor
import matplotlib.pyplot as plt

## Brief introduction to PyTorch: tensors

[PyTorch documentation](https://pytorch.org/docs/stable/index.html)


We can think of tensors as the PyTorch counterpart of NumPy arrays. Moreover, they offer a several advantages:
- They can run on GPUs or other hardware accelerators.
- They are optimized for automatic differentiation (backpropagation).
- Tensors and NumPy arrays share the same underlying memory.

In [None]:
# from list to tensor
x = [[1, 2],[3, 4]]
x_tensor = torch.tensor(x)
x_tensor

In [None]:
# from numpy array to tensor
x_array = np.array(x)
x_tensor = torch.from_numpy(x_array)
x_tensor

In [None]:
# tensor of ones with the same shape as x_tensor
x_ones = torch.ones_like(x_tensor)
x_ones

In [None]:
# random tensor with the same shape as x_tensor
x_rand = torch.rand_like(x_tensor, dtype=torch.float)
x_rand

In [None]:
# build tensors with given shape
shape = (2,3)
rand_tensor = torch.rand(shape)
ones_tensor = torch.ones(shape)
zeros_tensor = torch.zeros(shape)

print(f"Random Tensor: \n {rand_tensor} \n")
print(f"Ones Tensor: \n {ones_tensor} \n")
print(f"Zeros Tensor: \n {zeros_tensor}")

In [None]:
# tensor attributes
tensor = torch.rand(3,4)

print(f"Shape: {tensor.shape}")
print(f"Datatype: {tensor.dtype}")
print(f"Device tensor is stored on: {tensor.device}")

In [None]:
tensor = torch.arange(12).reshape((3,4))
print(f"Tensor: {tensor}")
print(f"First row: {tensor[0]}")
print(f"First column: {tensor[:, 0]}")
print(f"Last column: {tensor[:, -1]}")
tensor[:,1] = 0
print(tensor)

In [None]:
# concatenate tensors
conc_tensor = torch.cat([tensor, tensor, tensor], dim=1)
conc_tensor

In [None]:
# tensor multiplication
t1 = tensor @ tensor.T
# tensor multiplication (explicit formulation)
t2 = tensor.matmul(tensor.T)
print(t1,t2) 

# element-wise product 
e1 = tensor * tensor
# element-wise product (explicit formulation)
e2 = tensor.mul(tensor)
print(e1,e2)

### Interoperation with NumPy 

In [None]:
# from tensor to numpy arrays
t = torch.ones(5)
print(f"t: {t}")
n = t.numpy()
print(f"n: {n}")

In [None]:
# changing the tensor will also change its numpy counterpart
t.add_(1)
print(f"t: {t}")
print(f"n: {n}")

## Classification on the FashionMNIST dataset

In this section, we are going to see how to train a classifier on a famous dataset, called Fashion-MNIST. It consists of Zalando’s article images, represented by $28 \times 28$ grayscale images.

The following parameters are important for the loading functions:
- `root`: is a string explicitating the path where the data are stored;
- `train`: is a boolean specifying if the dataset is for training or for test;
- `download`: is a boolean specifying to download the data (or not);
- `transform`: specify the feature transformations.

In [None]:
training_data = datasets.FashionMNIST(
    root="data",
    train=True,
    download=True,
    transform=ToTensor()
)

test_data = datasets.FashionMNIST(
    root="data",
    train=False,
    download=True,
    transform=ToTensor()
)

In [None]:
# 10 classes
labels_map = {
    0: "T-Shirt",
    1: "Trouser",
    2: "Pullover",
    3: "Dress",
    4: "Coat",
    5: "Sandal",
    6: "Shirt",
    7: "Sneaker",
    8: "Bag",
    9: "Ankle Boot",
}

# plot 9 random sample of the training set
figure = plt.figure(figsize=(8, 8))
cols, rows = 3, 3
for i in range(1, cols * rows + 1):
    sample_idx = torch.randint(len(training_data), size=(1,)).item()
    img, label = training_data[sample_idx]
    figure.add_subplot(rows, cols, i)
    plt.title(labels_map[label])
    plt.axis("off")
    plt.imshow(img.squeeze(), cmap="gray")
plt.show()

### Data Loaders

For effective training, it is important to split the datasets into mini-batches and to
shuffle them at each epoch. We can use the `DataLoader` class of PyTorch for this
purpose and for iterating over batches during training.

In [16]:
train_dataloader = DataLoader(training_data, batch_size=64, shuffle=True)
test_dataloader = DataLoader(test_data, batch_size=64, shuffle=True)

### Building a Neural Network

We can define the architecture of the NN with a class derived from `nn.Module`.

In [18]:
class NeuralNetwork(nn.Module):
    def __init__(self):
        super().__init__()
        # convert a 28x28 tensor into a contiguous array of length 784
        self.flatten = nn.Flatten()
        # Sequential is a container of different modules, such that data flows from one
        # module to the next
        self.linear_relu_stack = nn.Sequential(
            # linear combination with bias of inputs (array of length 784) into a
            # array of length 512
            nn.Linear(28*28, 512), 
            # ReLU activation
            nn.ReLU(),
            nn.Linear(512, 512),
            nn.ReLU(),
            nn.Linear(512, 10),
        )

    def forward(self, x):
        x = self.flatten(x)
        logits = self.linear_relu_stack(x)
        return logits

In [None]:
# initialize model (with random weights)
model = NeuralNetwork()
# print model structure
print(model)

In [None]:
# generate one random image
X = torch.rand(1, 28, 28)
# forward pass
logits = model(X)

# use Softmax to turn the outputs of the NN into probabilities
pred_probab = nn.Softmax(dim=1)(logits)
# take the maximum probability class as the prediction
y_pred = pred_probab.argmax(1)
print(f"Predicted class: {y_pred}")

**Notice**: the cross-entropy loss in PyTorch (see
[docs](https://pytorch.org/docs/stable/generated/torch.nn.CrossEntropyLoss.html#crossentropyloss))
applies the Softmax function to the
outputs of the network to get a tensor of probabilities before computing the loss, so we
don't need to add a softmax operation to the final layer of the NN.

### Network training

In [24]:
def train_step(dataloader, model, loss_fn, optimizer):
    size = len(dataloader.dataset)
    for batch, (X, y) in enumerate(dataloader):
        # Compute prediction and loss
        pred = model(X)
        loss = loss_fn(pred, y)

        # Backpropagation
        loss.backward()
        # Updating model parameters using gradient information
        optimizer.step()
        # Reset gradient
        optimizer.zero_grad()

        if batch % 100 == 0:
            loss, current = loss.item(), (batch + 1) * len(X)
            print(f"loss: {loss:>7f}  [{current:>5d}/{size:>5d}]")

In [25]:
def test_step(dataloader, model, loss_fn):
    size = len(dataloader.dataset)
    num_batches = len(dataloader)
    test_loss, correct = 0, 0

    # Evaluating the model with torch.no_grad() ensures that no gradients 
    # are computed during test mode and also serves to reduce unnecessary 
    # gradient computations and memory usage for tensors with requires_grad = True
    with torch.no_grad():
        for X, y in dataloader:
            pred = model(X)
            test_loss += loss_fn(pred, y).item()
            correct += (pred.argmax(1) == y).type(torch.float).sum().item()

    test_loss /= num_batches
    correct /= size
    print(f"Test Error: \n Accuracy: {(100*correct):>0.1f}%, Avg loss: {test_loss:>8f} \n")

In [None]:
model = NeuralNetwork()
learning_rate = 5e-3
batch_size = 64

loss_fn = nn.CrossEntropyLoss()
optimizer = torch.optim.SGD(model.parameters(), lr=learning_rate)

epochs = 20

# Main training loop
for t in range(epochs):
    print(f"Epoch {t+1}\n-------------------------------")
    train_step(train_dataloader, model, loss_fn, optimizer)
    test_step(test_dataloader, model, loss_fn)
print("Done!")

#### Bonus exercise:
Solve the classification problem for the MNIST dataset. Use the
`torchvision.datasets.MNIST` class to load the data. Plot the _average_ lossess on the
training and the test sets vs epoch number. Check the predictions of the trained model
on some samples of the test dataset. (No solution to this exercise is provided, as the
implementation is very close to the one above :-)).