In [104]:
import torch
import torch.nn as nn  # Import neural networks (nn)
import torch.nn.functional as F  # Import nn functionality
import torch.optim as optim  # Import Optimizer
from torch.utils.data import DataLoader
from torchvision import datasets
from torchvision.transforms import ToTensor

In [105]:
# Use torchvision to download training data
train_data = datasets.MNIST(
    root = 'data',
    train = True, # Use ToTensor to define the transformation method
    transform = ToTensor(),
    download = True
)
test_data = datasets.MNIST(
    root = 'data',
    train = False,
    transform = ToTensor(),
    download = True
)

In [106]:
test_data

Dataset MNIST
    Number of datapoints: 10000
    Root location: data
    Split: Test
    StandardTransform
Transform: ToTensor()

In [107]:
train_data.data.shape

torch.Size([60000, 28, 28])

In [108]:
test_data.data.shape

torch.Size([10000, 28, 28])

In [109]:
# Define pytorch loaders
loaders = {
    'train': DataLoader(train_data, batch_size = 100, shuffle=True, num_workers=1),
    'test': DataLoader(test_data, batch_size = 100, shuffle=True, num_workers=1)
}

In [110]:
loaders

{'train': <torch.utils.data.dataloader.DataLoader at 0x220f0b2ad20>,
 'test': <torch.utils.data.dataloader.DataLoader at 0x220ef03f4d0>}

In [111]:
# Define neural network architecture
class CNN(nn.Module):  # define nn as convolutional neural network, inherit from nn.Module
    # Define initialization
    def __init__(self):
        super(CNN, self).__init__() # Calls constructor of parent class

        '''
        Defines convulational layers for feature extraction
        '''
        # Create NN layers
        # defines first layer with 1 input channel, 10 output chanels, and kernal size of 5x5    
        self.conv1 = nn.Conv2d(1, 10, kernel_size = 5)

        # Defines second layer with 10 input channels from first layer and 20 output channels
        self.conv2 = nn.Conv2d(10, 20, kernel_size = 5)

        # Defines dropout layer which regularizes by randomly zeroing elements to prevent overfitting
        self.conv2_drop = nn.Dropout2d() # regularization layer

        # Deffines first fullyconnected/dense layer 
        self.fc1 = nn.Linear(320, 50) # 320 calculated from output of conv2

        # defines second dense layer with 50 inpput and 10 output corresponding to digits 0-9
        self.fc2 = nn.Linear(50, 10) # 10 outputs for digit class

    def forward(self, x): # defines activation function
        '''
        applies rectified linear unit function (relu) as the activation function (introduce non-linearity
        to learn more complex patterns and relationships in data) to max pooling (operation that caluclates 
        maximum value for patches of feature map) to reduce the dimensionality. 
        '''
        # torch.nn.functional.relu(input, inplace=False) → Tensor[SOURCE]
        # applies the recefied lienar unit function element-wise to max pooling of input planes
        x = F.relu(F.max_pool2d(self.conv1(x), 2))

        # applies second convolution to result and applies dropout, max pooling, and ReLU
        x = F.relu(F.max_pool2d(self.conv2_drop(self.conv2(x)), 2))

        # shapes x into 1-dimensional tensor with 320 inputs needed for deep layers
        x = x.view(-1, 320) # 20 * 4 * 4 = 320 Should flatten x to have size 320

        # applies relu to first output layer
        x = F.relu(self.fc1(x))

        # applies droput regularization to first output
        x = F.dropout(x, training=self.training)

        # computes the scores for each of the 10 classes using last layer
        x = self.fc2(x)

        # returns softmax (rescales so that elements line in range)
        # normalizes logits into probabilities for confidence. Takes highest as answer
        return F.softmax(x, dim=1)

In [112]:
# Configure device to detect if NVIDIA cuda enabled gpu is avaliable
device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')

# Assign device to NN
model = CNN().to(device)

# Configure optimize for model learning (load parameters and learning rate)
optimizer = optim.Adam(model.parameters(), lr=0.001)

# Define loss function
loss_fn = nn.CrossEntropyLoss()

In [113]:
# Define training process
def train(epoch):
    model.train() # Put model in training mode
    for batch_idx, (data, target) in enumerate(loaders['train']):
        data, target = data.to(device), target.to(device)
        optimizer.zero_grad() # Zero out all gradients for each batch before back prop
        output = model(data)

        # Calculate loss, backward propogate, and optimize
        loss = loss_fn(output, target) # Calculate error from desired error
        loss.backward() # Do backward propogation for improvement
        optimizer.step()  # Do optimizer step

        if batch_idx % 25 == 0:# Every 25
            # Fancy print statement
            print(f'Train Epoch: {epoch} [{batch_idx * len(data)} / {len(loaders["train"].dataset)} ({100. * batch_idx / len(loaders["train"]):.0f}%)]\t{loss.item():.6f}')

In [114]:
def test():
    model.eval() # put model into eval mode

    test_loss = 0
    correct = 0

    with torch.no_grad(): # Disable gradient function and no back prop for test
        for data, target in loaders['test']:
            data, target = data.to(device), target.to(device)
            output = model(data)
            test_loss += loss_fn(output, target).item()
            pred = output.argmax(dim=1, keepdim=True)
            correct += pred.eq(target.view_as(pred)).sum().item()

    test_loss /= len(loaders['test'].dataset)
    print(f'\nTest set: Average loss: {test_loss:.4f}, Accuracy {correct}/{len(loaders["test"].dataset)} ({100. * correct / len(loaders["test"].dataset):.0f}%\n)')

In [116]:
if __name__ == '__main__':   
    for epoch in range(1,11):
        train(epoch)
        test()


Test set: Average loss: 0.0149, Accuracy 9761/10000 (98%
)

Test set: Average loss: 0.0149, Accuracy 9761/10000 (98%
)

Test set: Average loss: 0.0148, Accuracy 9773/10000 (98%
)

Test set: Average loss: 0.0148, Accuracy 9770/10000 (98%
)

Test set: Average loss: 0.0148, Accuracy 9784/10000 (98%
)

Test set: Average loss: 0.0148, Accuracy 9797/10000 (98%
)

Test set: Average loss: 0.0148, Accuracy 9801/10000 (98%
)

Test set: Average loss: 0.0148, Accuracy 9795/10000 (98%
)

Test set: Average loss: 0.0148, Accuracy 9819/10000 (98%
)

Test set: Average loss: 0.0148, Accuracy 9804/10000 (98%
)
