# Basics on how to build a simple Neural Network

## 0. Imports

In [22]:
import torch
import torch.nn as nn # Network Modules
import torch.optim as optim # Gradient Descent, SGD, Adam, ...
import torch.nn.functional as F # Activation functions

# The Data Loader gives us easier data set management
# allowing us to create mini batches and this kind of things easily
from torch.utils.data import DataLoader

# Datasets from torchvision: https://pytorch.org/vision/stable/datasets.html
import torchvision.datasets as datasets

# Transformations to perform on our data set (for data augmentation, for example)
import torchvision.transforms as transforms

## 1. Create a Fully Connected Network

Model of the neural network:

In [23]:
class NN(nn.Module):
    def __init__(self, input_size, num_classes):
        # call the initialization of the nn.Module
        super(NN, self).__init__()

        # create here the NN modules that are going to be used
        self.fc1 = nn.Linear(input_size, 50)
        self.fc2 = nn.Linear(50, num_classes)
    
    def forward(self, x):
        # assembly the modules that participate on the forward propagation part
        x = F.relu(self.fc1(x))
        x = self.fc2(x)
        return x

## 2. Set Device

In [24]:
DEVICE = torch.device('cuda' if torch.cuda.is_available() else 'cpu')

## 3. Hyperparameters

In [25]:
INPUT_SIZE = 784
NUM_CLASSES = 10
LEARNING_RATE = 0.001
BATCH_SIZE = 64
NUM_EPOCHS = 1

## 4. Load Data

- `root`: Where the dataset is going to be downloaded.
- `train`: If True: download the training set. If False: Download the test set.
- `transform`: Transformations to perform on the dataset (from NumPy to Tensor to be run on PyTorch).

In [26]:
train_ds = datasets.MNIST(root='data', train=True, transform=transforms.ToTensor(), download=True)
test_ds = datasets.MNIST(root='data', train=False, transform=transforms.ToTensor(), download=True)

⚠️ Be careful not to shuffle the data if it has to follow an specific order, like in some NLP cases.

In [27]:
train_loader = DataLoader(dataset=train_ds, batch_size=BATCH_SIZE, shuffle=True)
test_loader = DataLoader(dataset=test_ds, batch_size=BATCH_SIZE, shuffle=True)

## 5. Initialize network

In [28]:
model = NN(input_size=INPUT_SIZE, num_classes=NUM_CLASSES).to(DEVICE)

## 6. Loss & Optimizer

In [29]:
criterion = nn.CrossEntropyLoss()
optimizer = optim.Adam(model.parameters(), lr=LEARNING_RATE)

## 7. Train Network

In [30]:
for epoch in range(NUM_EPOCHS):
    for batch_idx, (data, targets) in enumerate(train_loader):
        # Carry data to CUDA if possible
        data = data.to(device=DEVICE)
        targets = targets.to(device=DEVICE)

        # Get to correct shape: [64, 1, 28, 28] -> [64, 784]
        # - The Linear layer expects one input per neuron, therefore,
        #   we cannot introduce an array per neuron. We've first to convert it to only 1 value.
        data = data.reshape(data.shape[0], -1) # -1 flatten all the following layers

        ### Forward ###
        scores = model(data)
        loss = criterion(scores, targets)

        ### Backward ###

        # For each batch, set all the gradients to 0 to avoid using previous gradients
        # on a new batch and run through new problems
        optimizer.zero_grad()
        loss.backward()

        # perform the optimization
        optimizer.step()

## 8. Accuracy & Test

In [38]:
def check_accuracy(loader, model):
    dataset_type = "training" if loader.dataset.train else 'test'
    print(f"Checking accuracy on {dataset_type} data")

    num_correct = 0
    num_samples = 0
    model.eval() # in other cases, it'll disable dropout and this kind of layers

    # with torch.no_grad() we avoid computing the gradients in the calculations
    with torch.no_grad():
        for x, y in loader:
            x = x.to(device=DEVICE)
            y = y.to(device=DEVICE)

            x = x.reshape(x.shape[0], -1)

            scores = model(x)
            # Remember we said that the output shape is gonna be nn.Linear(50, 10)
            # We want to take the greatest value, so just apply argmax
            predictions = scores.argmax(dim=1)

            # Remember, x, predictions & y are batches of 64 elements.
            # if we perform (predictions == y), we'll obtain a tensor like the following one:
            # tensor([True, False, True, True]).sum() = 4
            num_correct += (predictions == y).sum()
            num_samples += predictions.size(0)
        
        acc = (float(num_correct) / float(num_samples)) * 100
        print(f"Got {num_correct} / {num_samples} with accuracy {acc:.2f}")
    
    model.train() # to remove the model.eval() part
    return acc

In [39]:
check_accuracy(train_loader, model)
check_accuracy(test_loader, model)

Checking accuracy on training data
Got 55819 / 60000 with accuracy 93.03
Checking accuracy on test data
Got 9313 / 10000 with accuracy 93.13


93.13