In [1]:
from mnist_loader import load_mnist_data
from tqdm import tqdm
import matplotlib.pyplot as plt
import torch
import torch.nn as nn
import numpy as np
import warnings

In [2]:
# Set log level UserWarning 'off'
warnings.filterwarnings("ignore", category=UserWarning)

In [3]:
# Device configuration for GPU or CPU
device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')

In [4]:
# Load dataset (all in numpy ndarray type)
X_train, Y_train, X_test, Y_test = load_mnist_data(download=False)  # Set to True if data is not downloaded.

In [5]:
# Transform to torch tensors
X_train = torch.from_numpy(X_train)
Y_train = torch.from_numpy(Y_train)
X_test = torch.from_numpy(X_test)
Y_test = torch.from_numpy(Y_test)

In [6]:
class MyNeuralNet(nn.Module):
    def __init__(self, sizes):  # sizes = list of number of neurons in each layer
        super(MyNeuralNet, self).__init__()
        self.sizes = sizes
        self.num_layers = len(sizes)
        self.relu = nn.ReLU()
        self.sm = nn.LogSoftmax(dim=1)
        
        # List of weight matrix for each layer
        self.nn_layers = nn.ModuleList([nn.Linear(x, y, bias=False) for x, y in zip(sizes[:-1], sizes[1:])])
        
    def forward(self, a1):
        # Default init
        z = self.nn_layers[0](a1)
        a = a1
        
        # Hidden layer activations
        for i in range(0, self.num_layers - 2):
            z = self.nn_layers[i](a)
            a = self.relu(z)  # Apply ReLU
        
        # Last layer activation
        z = self.nn_layers[self.num_layers - 2](a)
        a = self.sm(z)  # Apply log soft max
        return a

In [7]:
# Train model
def train_model(model=None, optimizer=None, criterion=None, batch_size=100, num_epochs=10, log=False):
    current_batch = 0
    total_training_examples = X_train.shape[0]
    pbar = tqdm(total=num_epochs)  # Progress bar
    
    for epoch in range(num_epochs):
        # Pick random number of training examples = batch_size
        rand_samp = np.random.randint(0, X_train.shape[0], size=batch_size)
        batch_images = torch.tensor(X_train[rand_samp].reshape(-1, 28*28)).float().to(device=device)
        batch_labels = torch.tensor(Y_train[rand_samp]).long().to(device=device)

        # forward prop
        outputs = model(batch_images)  # Get activations from last layer
        loss = criterion(outputs, batch_labels)  # Calculate delta from output

        # backward prop
        optim.zero_grad()
        loss.backward()
        optim.step()
        
        pbar.update(1)
        if log:
            current_batch += 100
            print(f'epoch {epoch+1}/{num_epochs}, loss = {loss.item():.4f}')

    pbar.close()
    print(f'FINAL_LOSS = {loss.item():.4f}')
    return model

In [8]:
# Test model
def test_model(model):
    with torch.no_grad():
        n_correct = 0
        n_sample = Y_test.shape[0]

    pbar = tqdm(total=len(X_test))
    for image, label in zip(X_test, Y_test):
        image = image.reshape(-1,28*28).float().to(device)
        label = label.long().to(device)
        output = model(image)
        _, prediction = torch.max(output,1)
        n_correct += 1 if (prediction[0] == label) else 0
        pbar.update(1)

    pbar.close()
    print(f'Num correct predictions: {n_correct}, total predictions: {n_sample}')
    acc = 100 * n_correct / n_sample
    print(f'Accuracy: {acc}')

In [23]:
# Parameters
batch_size = 500
num_epochs = 10000
learning_rate = 0.0005

# Neural Network inits
sizes = [784, 10, 10]
model = MyNeuralNet(sizes).to(device=device)
criterion = nn.CrossEntropyLoss()
optim = torch.optim.Adam(model.parameters(), lr=learning_rate)

In [24]:
model = train_model(model=model,
           optimizer=optim,
           criterion=criterion,
           batch_size=batch_size,
           num_epochs=num_epochs,
           log=False)

100%|██████████| 10000/10000 [00:34<00:00, 292.37it/s]

FINAL_LOSS = 0.2571





In [25]:
test_model(model)

100%|██████████| 10000/10000 [00:04<00:00, 2169.79it/s]

Num correct predictions: 9201, total predictions: 10000
Accuracy: 92.01



