In [78]:
# MNIST in PyTorch with an optimizer

import torch
import torch.nn
import torch.nn.functional as F
import requests, gzip, os, hashlib, tempfile
import numpy as np
import random

In [79]:
def fetch(url):
    temp_dir = tempfile.gettempdir()
    file_path = os.path.join(temp_dir, hashlib.md5(url.encode('utf-8')).hexdigest())

    # if exists don't redownload
    if os.path.isfile(file_path):
        with open(file_path, "rb") as file:
            data = file.read()
    else:
        with open(file_path, "wb") as file:
            data = requests.get(url).content
            file.write(data)

    return np.frombuffer(gzip.decompress(data), dtype=np.uint8).copy()

In [80]:
class mnist_model(torch.nn.Module):
    def __init__(self):
        super(mnist_model, self).__init__()
        self.l1 = torch.nn.Linear(784, 128, bias=True)
        self.l2 = torch.nn.Linear(128, 10, bias=True)
        self.sm = torch.nn.LogSoftmax(dim=1)
    def forward(self, x):
        a1 = F.relu(self.l1(x))
        a2 = self.sm(self.l2(a1))
        return a2

In [81]:
class MNIST_MLP():
    def __init__(self, device):
        self.model = mnist_model().to(device)
        self.nllLoss = torch.nn.NLLLoss(reduction="mean")
        
    def train(self, x, y, batch_size, iterations, learning_rate):
        y = y.long() if isinstance(y, torch.Tensor) else torch.tensor(y, dtype=torch.long)

        # normalize
        if x.max() > 1.0:
            x = x / 255.0

        optim = torch.optim.SGD(self.model.parameters(), lr=learning_rate, momentum=0)

        for _ in range(iterations):
            sample_start = random.randint(0, len(x) - batch_size)
            x_sample = x[sample_start:sample_start + batch_size]
            y_sample = y[sample_start:sample_start + batch_size]
            
            self.model.zero_grad()
            out = self.model(x_sample)
            loss = self.nllLoss(out, y_sample)
            loss.backward()
            optim.step()

        print(f"training loss: {loss.item()}")

    def sample(self, x, y):
        if x.max() > 1.0:
            x = x / 255.0
        y = y.long() if isinstance(y, torch.Tensor) else torch.tensor(y, dtype=torch.long)
        softmax = self.model(x)

        correct = 0
        for prediction, actual in zip(softmax, y):
            if torch.argmax(prediction) == actual:
                correct += 1

        accuracy = correct / len(y)
        print(f"accuracy on test set: {accuracy}")
        
        loss = self.nllLoss(softmax, y)
        print(f"sample loss: {loss.item()}")

In [87]:
import time

def run_mnist(use_cuda):
    x_train = torch.Tensor(fetch("https://raw.githubusercontent.com/fgnt/mnist/master/train-images-idx3-ubyte.gz")[16:].reshape(-1, 28, 28))
    y_train = torch.Tensor(fetch("https://raw.githubusercontent.com/fgnt/mnist/master/train-labels-idx1-ubyte.gz")[8:])
    x_test = torch.Tensor(fetch("https://raw.githubusercontent.com/fgnt/mnist/master/t10k-images-idx3-ubyte.gz")[16:].reshape(-1, 28, 28))
    y_test = torch.Tensor(fetch("https://raw.githubusercontent.com/fgnt/mnist/master/t10k-labels-idx1-ubyte.gz")[8:])

    x_train = x_train.view(60000, -1)
    x_test = x_test.view(10000, -1)
    
    device = torch.device("cpu")
    
    if use_cuda:
        device = torch.device("cuda") if torch.cuda.is_available() else torch.device("cpu")
    
    if device == torch.device("cuda"):
        print("Using CUDA")
        x_train = x_train.to("cuda")
        y_train = y_train.to("cuda")
        x_test = x_test.to("cuda")
        y_test = y_test.to("cuda")
    else:
        print("Using CPU")

    mnist_mlp = MNIST_MLP(device)
        
    start = time.time()
    mnist_mlp.train(x_train, y_train, 60, 200000, 0.01)
    mnist_mlp.sample(x_test, y_test)
    print("--- %s seconds ---" % (time.time() - start))

In [88]:
run_mnist(False)

Using CPU
training loss: 0.010470333509147167
accuracy on test set: 0.9798
sample loss: 0.06776325404644012
--- 91.73779916763306 seconds ---


In [89]:
run_mnist(True)

Using CUDA
training loss: 0.013014286756515503
accuracy on test set: 0.98
sample loss: 0.07056288421154022
--- 137.29116415977478 seconds ---


In [None]:
# CUDA slower on model this size