In [1]:
from pathlib import Path
import requests

In [2]:
PATH = Path("Data/MNIST")
if not (PATH).exists():
    URL = "http://deeplearning.net/data/mnist/"
    FILENAME = "mnist.pkl.gz"
    content = requests.get(URL + FILENAME).content
    PATH.open("wb").write(content)
    

In [3]:
import pickle
import gzip

with gzip.open((PATH).as_posix(), "rb") as f:
    ((x_train, y_train), (x_valid, y_valid), _) = pickle.load(f, encoding="latin-1")

In [4]:
print(x_train[0])
print(x_train.shape)

[0.         0.         0.         0.         0.         0.
 0.         0.         0.         0.         0.         0.
 0.         0.         0.         0.         0.         0.
 0.         0.         0.         0.         0.         0.
 0.         0.         0.         0.         0.         0.
 0.         0.         0.         0.         0.         0.
 0.         0.         0.         0.         0.         0.
 0.         0.         0.         0.         0.         0.
 0.         0.         0.         0.         0.         0.
 0.         0.         0.         0.         0.         0.
 0.         0.         0.         0.         0.         0.
 0.         0.         0.         0.         0.         0.
 0.         0.         0.         0.         0.         0.
 0.         0.         0.         0.         0.         0.
 0.         0.         0.         0.         0.         0.
 0.         0.         0.         0.         0.         0.
 0.         0.         0.         0.         0.         

In [7]:
import torch

In [8]:
x_train, y_train, x_valid, y_valid = map(torch.tensor, (x_train, y_train, x_valid, y_valid))
n, c = x_train.shape
print(y_train[0])
print(y_train[0].shape)
print(y_train.min(), y_train.max())

tensor(5)
torch.Size([])
tensor(0) tensor(9)


In [9]:
import math

weights = torch.randn(784, 10) / math.sqrt(784)
weights.requires_grad_()
bias = torch.zeros(10, requires_grad=True)

In [10]:
def log_softmax(x):
    return x - x.exp().sum(-1).log().unsqueeze(-1)

def model(xb):
    # @ is used for dot product
    return log_softmax(xb @ weights + bias)

In [11]:
batch_size = 64
xb = x_train[0:batch_size]
preds = model(xb)
print(preds[0], preds.shape)

tensor([-2.4635, -2.4052, -2.2210, -2.0868, -2.2571, -2.1150, -2.5393, -3.0197,
        -1.7465, -2.7391], grad_fn=<SelectBackward>) torch.Size([64, 10])


In [12]:
def negative_log_likelyhood(input, target):
    return -input[range(target.shape[0]), target].mean()

loss_function = negative_log_likelyhood

In [15]:
yb = y_train[0:batch_size]
print(loss_function(preds, yb))

tensor(2.3092, grad_fn=<NegBackward>)


In [16]:
def accuracy(out, yb):
    preds = torch.argmax(out, dim=1)
    return (preds == yb).float().mean()

In [17]:
print(accuracy(preds, yb))

tensor(0.0781)


In [None]:
from IPython.core.debugger import set_trace

learning_rate = 0.5
epochs = 2  

for epoch in range(epochs):
    for i in range((n - 1) // batch_size + 1):
        # set_trace() 
        # set_trace() can be used to check various variable values at each step. It's the standard Python debugger
        start_i = i * batch_size
        end_i = start_i + batch_size
        xb = x_train[start_i:end_i]
        yb = y_train[start_i:end_i]
        pred = model(xb)
        loss = loss_function(pred, yb)

        loss.backward()
        with torch.no_grad():
            weights -= weights.grad * lr
            bias -= bias.grad * lr
            weights.grad.zero_()
            bias.grad.zero_()

In [None]:
print(loss_function(model(xb), yb), accuracy(model(xb), yb))