Build our first neural network

1. MNIST Dataset
2. DataLoader and Transformations
3. Multi-layer Neural Network
4. Loss and Optimizer
5. Training loop (batch optimzer)
6. Model Evaluation
7. GPU support


In [None]:
import torch
import torch.nn as nn
import torchvision
import torchvision.transforms as transform
import matplotlib.pyplot as plt

In [None]:
# Device configuration to use if GPU available and hyperparaters configuration

device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')

# tunable hyperparameters
hidden_size = 100        # this parameter can be tuned to affect the training accuracy 
number_of_epochs = 500   # this parameter can be tuned to affect the training accuracy
batch_size = 100         # this parameter can be tuned to affect the training accuracy
learning_rate = 0.001    # this parameter can be tuned to affect the training accuracy

# fixed parameters
input_size = 784        # images are 28x28 pixels, convert that in flattened 1-d tensor
number_of_classes = 10  # 10 different digits to classify: 0-9
print(device)

### Step 1 & 2. MNIST Dataset

In [None]:
# Download the training and test datasets
train_dataset = torchvision.datasets.MNIST(root='./data', train=True,
                                          transform=transform.ToTensor(), download=True)
test_dataset = torchvision.datasets.MNIST(root='./data', train=False,
                                          transform=transform.ToTensor(), download=False)

# Define the Dataloader
train_loader = torch.utils.data.DataLoader(dataset=train_dataset, batch_size=batch_size, shuffle=True)
test_loader = torch.utils.data.DataLoader(dataset=test_dataset, batch_size=batch_size, shuffle=False)

# Get the first dataset as tensors and unpack it
examples = iter(train_loader)
features, labels = examples.next()

# samples=100 in our batch; 1 is the channel (not RGB); 28 x 28 is our image sizez
print(features.shape, labels.shape)

Let's examine what our data looks like

In [None]:
%matplotlib inline

In [None]:
for i in range(6):
    plt.subplot(2, 3, i+1)
    plt.imshow(features[i][0], cmap='gray')
plt.show()

### Step 3. Multi-Layer Neural Network

In [None]:
class NeuralNet(nn.Module):
    def __init__(self, input_size, hidden_size, num_classes):
        super(NeuralNet, self).__init__()
        
        # Create out layers
        self.l1 = nn.Linear(input_size, hidden_size)
        self.relu = nn.ReLU()
        # last layer has the number of classes we want to classify
        self.l2 = nn.Linear(hidden_size, num_classes)
        
    def forward(self, x):
        out = self.l1(x)
        out = self.relu(x)
        out = self.l2(x)
        
        # Don't apply activation function since the loss function will apply the softmax for us
        return out

In [None]:
model = NeuralNet(input_size, hidden_size, number_of_classes)
print(model)

### Step 4. Loss and Optimizer

In [None]:
criterion = nn.CrossEntropyLoss()  # This will apply the softfax for us, hence we don't do it in the forward feed
optimzer = torch.optim.Adam(model.parameters(), lr=learning_rate)

### Step 5 & 6. Train in batches and Evaluate

In [None]:
n_total_steps = len(train_loader)
# Iterate over the epochs
for epoch in range(number_of_epochs):
    
    # Iterate over each batch
    for i, (images, labels) in enumerate(train_loader):  
        # origin shape: [100, 1, 28, 28]
        # resized: [100, 784]
        images = images.reshape(-1, 28*28).to(device) # push to appropriate device
        labels = labels.to(device)
        
        # Forward pass
        outputs = model(images)
        loss = criterion(outputs, labels)
        
        # Backward pass
        optimizer.zero_grad() # zero out the gradients from the previous pass
        loss.backward()
        optimizer.step()      # compute the gradients
        
        # Print out some progress report
        if (i+1) % 100 == 0:
            print(f'Epoch [{epoch+1}/{num_epochs}], Step [{i+1}/{n_total_steps}], Loss: {loss.item():.4f}')
            
# We don't want any gradients computed here so with wrap this in a with statement
with torch.no_grad():
    # Test the model
    n_correct = 0
    n_samples = 0
    for images, labels in test_loader:
        images = images.reshape(-1, 28*28).to(device)
        labels = labels.to(device)
        outputs = model(images)
        # max returns (value ,index)
        _, predicted = torch.max(outputs.data, 1)
        n_samples += labels.size(0)
        n_correct += (predicted == labels).sum().item()

    acc = 100.0 * n_correct / n_samples
    print(f'Accuracy of the network on the 10000 test images: {acc} %')