<a href="https://colab.research.google.com/github/fernanda-palacios/ai-code-notebooks/blob/main/e_neural_networks.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

# **Neural Networks for Digit Classification**

A neural network with activation functions, dropout and batch normalization

In [None]:
import torch
import torch.nn as nn
import torch.nn.functional as F
import torch.optim as optim
import matplotlib.pyplot as plt
from torchvision import datasets, transforms
from torch.utils.data import DataLoader
from tqdm import tqdm

# for reproducibility
torch.manual_seed(1)

mnist_train = datasets.MNIST('data', train=True, download=True, transform=transforms.ToTensor())
mnist_test = datasets.MNIST('data', train=False, download=True, transform=transforms.ToTensor())

Downloading http://yann.lecun.com/exdb/mnist/train-images-idx3-ubyte.gz
Downloading http://yann.lecun.com/exdb/mnist/train-images-idx3-ubyte.gz to data/MNIST/raw/train-images-idx3-ubyte.gz


100%|██████████| 9912422/9912422 [00:00<00:00, 28704439.60it/s]


Extracting data/MNIST/raw/train-images-idx3-ubyte.gz to data/MNIST/raw

Downloading http://yann.lecun.com/exdb/mnist/train-labels-idx1-ubyte.gz
Downloading http://yann.lecun.com/exdb/mnist/train-labels-idx1-ubyte.gz to data/MNIST/raw/train-labels-idx1-ubyte.gz


100%|██████████| 28881/28881 [00:00<00:00, 109131255.70it/s]


Extracting data/MNIST/raw/train-labels-idx1-ubyte.gz to data/MNIST/raw

Downloading http://yann.lecun.com/exdb/mnist/t10k-images-idx3-ubyte.gz
Downloading http://yann.lecun.com/exdb/mnist/t10k-images-idx3-ubyte.gz to data/MNIST/raw/t10k-images-idx3-ubyte.gz


100%|██████████| 1648877/1648877 [00:00<00:00, 37157639.82it/s]


Extracting data/MNIST/raw/t10k-images-idx3-ubyte.gz to data/MNIST/raw

Downloading http://yann.lecun.com/exdb/mnist/t10k-labels-idx1-ubyte.gz
Downloading http://yann.lecun.com/exdb/mnist/t10k-labels-idx1-ubyte.gz to data/MNIST/raw/t10k-labels-idx1-ubyte.gz


100%|██████████| 4542/4542 [00:00<00:00, 20797520.49it/s]


Extracting data/MNIST/raw/t10k-labels-idx1-ubyte.gz to data/MNIST/raw



In [None]:
def train(model, train, valid, batch_size, train_iters, lr, device):
    train_loader = DataLoader(train, batch_size=batch_size, shuffle=True)
    test_loader = DataLoader(valid, batch_size=batch_size, shuffle=True)
    crit = nn.CrossEntropyLoss()
    opt = optim.SGD(model.parameters(), lr=lr)

    model = model.to(device) # place model on the device specifed by the user

    for i in range(train_iters):
        train_loop = tqdm(train_loader, total=len(train_loader), position=0, leave=True) # loading bar for training loop
        train_loop.set_description(f"Training iteration [{i+1}/{train_iters}]")
        model = model.train()

        for img_batch, labels in train_loop:
            img_batch, labels = img_batch.to(device), labels.to(device) # placing input data and labels on the same device as model
            num_batches = img_batch.shape[0]
            img_batch = img_batch.view(num_batches, -1) # reshaping/flattening input data

            opt.zero_grad()
            predictions = model(img_batch)
            loss = crit(predictions, labels)
            loss.backward()
            opt.step()

            train_loop.set_postfix(loss = loss.item()) # gives the loss to the loading bar for display

        num_correct = 0
        for img_batch, labels in test_loader:
            num_batches = img_batch.shape[0]
            img_batch, labels = img_batch.to(device).view(num_batches, -1), labels.to(device)
            with torch.no_grad(): # tells torch not to accumulate gradients, which we do not need while testing since we are not updating any weights or biases
                predictions = model(img_batch)
                predicted_probabilities = F.softmax(predictions, dim=-1) # turning output values (usually referred to as logits) into probabilities
                highest_probs = predicted_probabilities.argmax(dim=-1) # taking the index values of highest probabilities
                num_correct = num_correct + (highest_probs == labels).sum().item()
        print(f"Testing accuracy: {num_correct/len(valid)}")

In [None]:
class MNISTClassifierWithDropoutAndBatchNorm(nn.Module):
    def __init__(self):
        super(MNISTClassifierWithDropoutAndBatchNorm, self).__init__()
        self.layer1 = nn.Linear(28 * 28, 50)
        self.layer2 = nn.Linear(50, 20)
        self.layer3 = nn.Linear(20, 10)
        self.dropout1 = nn.Dropout(0.4)
        self.dropout2 = nn.Dropout(0.4)
        self.dropout3 = nn.Dropout(0.4)
        self.batch_norm1 = nn.BatchNorm1d(50)
        self.batch_norm2 = nn.BatchNorm1d(20)

    def forward(self, img):
        flattened = img.view(-1, 28 * 28)
        activation1 = F.relu(self.batch_norm1(self.layer1(self.dropout1(flattened))))
        activation2 = F.relu(self.batch_norm2(self.layer2(self.dropout2(activation1))))
        output = self.layer3(self.dropout3(activation2))
        return output

In [None]:
model = MNISTClassifierWithDropoutAndBatchNorm().to("cuda")

def num_trainable_params(model):
    return sum(p.numel() for p in model.parameters() if p.requires_grad)

print("Number of trainable parameters: ", num_trainable_params(model))

Number of trainable parameters:  40620


In [None]:
train(
    model=model,
    train=mnist_train,
    valid=mnist_test,
    batch_size=128,
    train_iters=9,
    lr=0.01,
    device="cuda"
)

Training iteration [1/9]: 100%|██████████| 469/469 [00:09<00:00, 47.44it/s, loss=1.42]


Testing accuracy: 0.5473


Training iteration [2/9]: 100%|██████████| 469/469 [00:09<00:00, 49.38it/s, loss=1.12]


Testing accuracy: 0.6295


Training iteration [3/9]: 100%|██████████| 469/469 [00:09<00:00, 48.15it/s, loss=1.11]


Testing accuracy: 0.6718


Training iteration [4/9]: 100%|██████████| 469/469 [00:08<00:00, 52.14it/s, loss=1.03]


Testing accuracy: 0.6978


Training iteration [5/9]: 100%|██████████| 469/469 [00:08<00:00, 53.77it/s, loss=0.847]


Testing accuracy: 0.712


Training iteration [6/9]: 100%|██████████| 469/469 [00:09<00:00, 51.72it/s, loss=0.779]


Testing accuracy: 0.7282


Training iteration [7/9]: 100%|██████████| 469/469 [00:09<00:00, 50.53it/s, loss=0.979]


Testing accuracy: 0.731


Training iteration [8/9]: 100%|██████████| 469/469 [00:09<00:00, 50.21it/s, loss=0.683]


Testing accuracy: 0.7476


Training iteration [9/9]: 100%|██████████| 469/469 [00:09<00:00, 51.37it/s, loss=0.7]


Testing accuracy: 0.7541
