In [1]:
# importing the necessary libraries
import torch
import torch.nn as nn    # All neural network modules resides in this nn.linear, nn.Conv2D, BatchNorm, Loss functions
import torch.optim as optim    # For all optimization algorithms SGD, Adam etc
import torch.nn.functional as F    # All functions that don't have any parameters
from torch.utils.data import DataLoader    # Gives easier data management and creates mini batches
import torchvision.datasets as datasets    # Has standard datasets that we can import in a nice and easy way
import torchvision.transforms as transforms    # Transformations we can perform on our datasets

In [4]:
# Loading the datasets and setting their required parameters
batch_size = 64
train_dataset = datasets.MNIST(root="dataset/", train=True, transform=transforms.ToTensor(), download=True)

Downloading http://yann.lecun.com/exdb/mnist/train-images-idx3-ubyte.gz
Failed to download (trying next):
HTTP Error 503: Service Unavailable

Downloading https://ossci-datasets.s3.amazonaws.com/mnist/train-images-idx3-ubyte.gz
Downloading https://ossci-datasets.s3.amazonaws.com/mnist/train-images-idx3-ubyte.gz to dataset/MNIST\raw\train-images-idx3-ubyte.gz


100.0%


Extracting dataset/MNIST\raw\train-images-idx3-ubyte.gz to dataset/MNIST\raw

Downloading http://yann.lecun.com/exdb/mnist/train-labels-idx1-ubyte.gz
Downloading http://yann.lecun.com/exdb/mnist/train-labels-idx1-ubyte.gz to dataset/MNIST\raw\train-labels-idx1-ubyte.gz


102.8%


Extracting dataset/MNIST\raw\train-labels-idx1-ubyte.gz to dataset/MNIST\raw

Downloading http://yann.lecun.com/exdb/mnist/t10k-images-idx3-ubyte.gz
Downloading http://yann.lecun.com/exdb/mnist/t10k-images-idx3-ubyte.gz to dataset/MNIST\raw\t10k-images-idx3-ubyte.gz


100.0%


Extracting dataset/MNIST\raw\t10k-images-idx3-ubyte.gz to dataset/MNIST\raw

Downloading http://yann.lecun.com/exdb/mnist/t10k-labels-idx1-ubyte.gz
Downloading http://yann.lecun.com/exdb/mnist/t10k-labels-idx1-ubyte.gz to dataset/MNIST\raw\t10k-labels-idx1-ubyte.gz


112.7%

Extracting dataset/MNIST\raw\t10k-labels-idx1-ubyte.gz to dataset/MNIST\raw




  return torch.from_numpy(parsed.astype(m[2], copy=False)).view(*s)


In [5]:
train_loader = DataLoader(dataset=train_dataset, batch_size=batch_size, shuffle=True)

In [6]:
# Downloading and loading the test dataset
test_dataset = datasets.MNIST(root='dataset/', train=False, transform=transforms.ToTensor(), download=True)

test_loader = DataLoader(dataset=test_dataset, batch_size=batch_size, shuffle=True)

In [8]:
# Building the model
class NN(nn.Module):
    
    def __init__(self, input_size, num_classes):
        super(NN, self).__init__()
        self.fc1 = nn.Linear(input_size, 50)
        self.fc2 = nn.Linear(50, num_classes)
        
    def forward(self, x):
        x = F.relu(self.fc1(x))
        x = self.fc2(x)
        return x

In [10]:
# Setting up the training loop
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
input_size = 784 
num_classes = 10
learning_rate = 0.001
num_epochs = 3

"""
When we initialize the model the weights and biases of the model will be initialized under the hood of PyTorch to random small 
numbers and if you want a customized weight initialization it can be added in the NN class.
"""

"""
The standard loss function for classifications tasks in PyTorch is the CrossEntropyLoss() which applies the softmax function
and negative log likelihood given the predictions of the model and data labels. This is also the reason why we do not apply 
softmax to the outputs from our neural network, because it is already included in CrossEntropyLoss and we do not want to apply 
it twice.
"""


model = NN(input_size=input_size, num_classes=num_classes).to(device)
criterion = nn.CrossEntropyLoss()
optimizer = optim.Adam(model.parameters(), lr=learning_rate)


In [11]:
# Training our model
for epoch in range(num_epochs):
    
    print(f"Epoch: {epoch}")
    
    for batch_idx, (data, targets) in enumerate(train_loader):
        
        # Get data to cuda if possible
        data = data.to(device=device)
        targets = targets.to(device=device)
        
        # Get to the correct shape, 28x28->784
        data = data.reshape(data.shape[0], -1)
        
        # Forward Propagation
        scores = model(data)
        loss = criterion(scores, targets)
        
        # As pytorch accumulates gradients in backpropagation to udpate them we need to make them zero
        optimizer.zero_grad()
        
        # Backpropagation
        loss.backward()
        
        # Optimizer step
        optimizer.step()

Epoch: 0
Epoch: 1
Epoch: 2


In [18]:
def check_accuracy(loader, model):
    
    num_correct = 0
    num_samples = 0
    model.eval() 
    
    with torch.no_grad():
        
        for x, y, in loader:
            x = x.to(device=device)
            y = y.to(device=device)
            x = x.reshape(x.shape[0], -1)
            
            scores = model(x)
            _, predictions = scores.max(1)
            num_correct += (predictions == y).sum()
            num_samples += predictions.size(0)
            
        print(
            f"Got {num_correct} / {num_samples} with accuracy"
            f"{float(num_correct) / float(num_samples) * 100: .2f}"
        )
        
    # We have already trained our model, but in future we would want to train the model as we evaluate 
    model.train()

In [19]:
check_accuracy(train_loader, model)

Got 57596 / 60000 with accuracy 95.99


In [20]:
check_accuracy(test_loader, model)

Got 9544 / 10000 with accuracy 95.44
