In [19]:
%matplotlib inline

import torch
import torchvision
import torchvision.transforms as transforms
import matplotlib.pyplot as plt
import numpy as np
import torch.nn as nn
import torch.nn.functional as F
import torch.optim as optim

In [20]:
BATCH_SIZE = 32
TEST_EPOCHS = 1
FINAL_EPOCHS = 10
DROPOUT_PROBABILITIES = [float(0.25 + (i*0.05)) for i in range(0, 10)]

In [21]:
transform = transforms.Compose(
    [transforms.ToTensor(),
     transforms.Normalize((0.5), (0.5))])

trainset = torchvision.datasets.MNIST(root='./data', train=True,
                                        download=True, transform=transform)
trainloader = torch.utils.data.DataLoader(trainset, batch_size=BATCH_SIZE,
                                          shuffle=True, num_workers=2)

testset = torchvision.datasets.MNIST(root='./data', train=False,
                                       download=True, transform=transform)
testloader = torch.utils.data.DataLoader(testset, batch_size=BATCH_SIZE,
                                         shuffle=False, num_workers=2)

classes = ('0', '1', '2', '3', '4', '5', '6', '7', '8', '9')

In [22]:
class Net(nn.Module):
    def __init__(self, dropout_probability):
        super().__init__()
        
        self.convolutional_layer = nn.Sequential(
            nn.Conv2d(in_channels=1, out_channels=6, kernel_size=5, padding=1),
            nn.ReLU(),
            nn.MaxPool2d(kernel_size=2, stride=2),
            nn.BatchNorm2d(6),
            nn.Conv2d(in_channels=6, out_channels=14, kernel_size=5, padding=1),
            nn.ReLU(),
            nn.MaxPool2d(kernel_size=2, stride=2),
            nn.Conv2d(in_channels=14, out_channels=120, kernel_size=5, padding=1),
            nn.ReLU()
        )
        
        self.linear_layer = nn.Sequential(
            nn.Linear(in_features=1080, out_features=256),
            nn.ReLU(),
            nn.Dropout(dropout_probability),
            nn.Linear(in_features=256, out_features=84),
            nn.ReLU(),
            nn.BatchNorm1d(84),
            nn.Linear(in_features=84, out_features=10)
        )

    def forward(self, x):
        x = self.convolutional_layer(x)
        x = torch.flatten(x, 1) # flatten all dimensions except batch
        x = self.linear_layer(x)
        x = F.softmax(x, dim=1)
        return x

In [23]:
def RunNetwork(dropout, epochs):
    print(f'Running model for {epochs} epoch(s); dropout probability: {dropout:.2f}')
    net = Net(dropout)
    criterion = nn.CrossEntropyLoss()
    optimizer = optim.SGD(net.parameters(), lr=0.001, momentum=0.9)

    for epoch in range(epochs):  # loop over the dataset multiple times

        running_loss = 0.0
        for i, data in enumerate(trainloader, 0):
            # get the inputs; data is a list of [inputs, labels]
            inputs, labels = data

            # zero the parameter gradients
            optimizer.zero_grad()

            # forward + backward + optimize
            outputs = net(inputs)
            loss = criterion(outputs, labels)
            loss.backward()
            optimizer.step()

            # print statistics
            running_loss += loss.item()
            if i % 200 == 199:    # print every 200 mini-batches
                print(f'[dropout probability: {dropout:.2f}, epoch: {epoch + 1:3d}, mini-batch: {i + 1:5d}] loss: {running_loss / 200:.3f}')
                running_loss = 0.0

    correct = 0
    total = 0
    # since we're not training, we don't need to calculate the gradients for our outputs
    with torch.no_grad():
        for data in testloader:
            images, labels = data
            # calculate outputs by running images through the network
            outputs = net(images)
            # the class with the highest energy is what we choose as prediction
            _, predicted = torch.max(outputs.data, 1)
            total += labels.size(0)
            correct += (predicted == labels).sum().item()

    accuracy = correct / total
    print(f'Accuracy of the network on the 10000 test images: {accuracy}\n')
    return accuracy

In [24]:
print('Testing models with different dropout probabilities for one epoch each.\n')
accuracies = []

for dropout_probability in DROPOUT_PROBABILITIES:
    accuracies.append(RunNetwork(dropout_probability, TEST_EPOCHS))
    
best_dropout = DROPOUT_PROBABILITIES[accuracies.index(max(accuracies))]
RunNetwork(best_dropout, FINAL_EPOCHS)

Testing models with different dropout probabilities for one epoch each.

Running model for 1 epoch(s); dropout probability: 0.25
[dropout probability: 0.25, epoch:   1, mini-batch:   200] loss: 2.136
[dropout probability: 0.25, epoch:   1, mini-batch:   400] loss: 1.781
[dropout probability: 0.25, epoch:   1, mini-batch:   600] loss: 1.651
[dropout probability: 0.25, epoch:   1, mini-batch:   800] loss: 1.601
[dropout probability: 0.25, epoch:   1, mini-batch:  1000] loss: 1.569
[dropout probability: 0.25, epoch:   1, mini-batch:  1200] loss: 1.550
[dropout probability: 0.25, epoch:   1, mini-batch:  1400] loss: 1.538
[dropout probability: 0.25, epoch:   1, mini-batch:  1600] loss: 1.530
[dropout probability: 0.25, epoch:   1, mini-batch:  1800] loss: 1.524
Accuracy of the network on the 10000 test images: 0.9721

Running model for 1 epoch(s); dropout probability: 0.30
[dropout probability: 0.30, epoch:   1, mini-batch:   200] loss: 2.116
[dropout probability: 0.30, epoch:   1, mini-ba

[dropout probability: 0.25, epoch:   1, mini-batch:  1800] loss: 1.525
[dropout probability: 0.25, epoch:   2, mini-batch:   200] loss: 1.515
[dropout probability: 0.25, epoch:   2, mini-batch:   400] loss: 1.515
[dropout probability: 0.25, epoch:   2, mini-batch:   600] loss: 1.512
[dropout probability: 0.25, epoch:   2, mini-batch:   800] loss: 1.507
[dropout probability: 0.25, epoch:   2, mini-batch:  1000] loss: 1.505
[dropout probability: 0.25, epoch:   2, mini-batch:  1200] loss: 1.502
[dropout probability: 0.25, epoch:   2, mini-batch:  1400] loss: 1.502
[dropout probability: 0.25, epoch:   2, mini-batch:  1600] loss: 1.499
[dropout probability: 0.25, epoch:   2, mini-batch:  1800] loss: 1.503
[dropout probability: 0.25, epoch:   3, mini-batch:   200] loss: 1.496
[dropout probability: 0.25, epoch:   3, mini-batch:   400] loss: 1.491
[dropout probability: 0.25, epoch:   3, mini-batch:   600] loss: 1.494
[dropout probability: 0.25, epoch:   3, mini-batch:   800] loss: 1.494
[dropo

0.9898