In [20]:
import numpy as np
import torch
from torch.autograd import Variable
from torch import optim
from data_util import load_mnist

In [21]:
def build_model(input_dim, output_dim):
    model = torch.nn.Sequential()
    # computes w_c^T x + b_c 
    model.add_module("linear",
                     torch.nn.Linear(input_dim, output_dim))
    # Compute our log softmax term.
    model.add_module("softmax", torch.nn.LogSoftmax())
    return model

In [22]:
def train(model, loss, optimizer, x_val, y_val):
    
    # Take in x and y and make variable.
    x = Variable(x_val)
    y = Variable(y_val)

    # Resets the gradients to 0
    optimizer.zero_grad()

    # Computes the function above. (log softmax w_c^T x + b_c)
    fx = model.forward(x)

    # Computes a loss. Gives a scalar. 
    output = loss.forward(fx, y)

    # Magically computes the gradients. 
    output.backward()

    # updates the weights
    optimizer.step()
    return output.data[0]

In [23]:
def predict(model, x_val):
    x = Variable(x_val, requires_grad=False)
    output = model.forward(x)
    return output.data.numpy().argmax(axis=1)

In [24]:
def main():
    torch.manual_seed(42)
    trX, teX, trY, teY = load_mnist(onehot=False)
    trX = torch.from_numpy(trX).float()
    teX = torch.from_numpy(teX).float()
    trY = torch.from_numpy(trY).long()

    n_examples, n_features = trX.size()
    n_classes = 10
    model = build_model(n_features, n_classes)

    # Loss here is negative log-likelihood 
    loss = torch.nn.NLLLoss(size_average=True)

    # Optimizer. SGD stochastic gradient. 
    optimizer = optim.SGD(model.parameters(), lr=0.01)


    batch_size = 100
    for i in range(100):
        cost = 0.
        num_batches = n_examples // batch_size
        for k in range(num_batches):
            start, end = k * batch_size, (k + 1) * batch_size
            cost += train(model, loss, optimizer,
                          trX[start:end], trY[start:end])
        predY = predict(model, teX)
        print("Epoch %d, cost = %f, acc = %.2f%%"
              % (i + 1, cost / num_batches, 100. * np.mean(predY == teY)))


In [25]:
main()

Epoch 1, cost = 1.149908, acc = 85.17%
Epoch 2, cost = 0.643424, acc = 87.44%
Epoch 3, cost = 0.537060, acc = 88.17%
Epoch 4, cost = 0.485690, acc = 88.71%
Epoch 5, cost = 0.454144, acc = 89.14%
Epoch 6, cost = 0.432298, acc = 89.52%
Epoch 7, cost = 0.416023, acc = 89.80%
Epoch 8, cost = 0.403290, acc = 89.98%
Epoch 9, cost = 0.392972, acc = 90.16%
Epoch 10, cost = 0.384385, acc = 90.32%
Epoch 11, cost = 0.377089, acc = 90.45%
Epoch 12, cost = 0.370788, acc = 90.58%
Epoch 13, cost = 0.365271, acc = 90.66%
Epoch 14, cost = 0.360386, acc = 90.76%
Epoch 15, cost = 0.356020, acc = 90.83%
Epoch 16, cost = 0.352085, acc = 90.88%
Epoch 17, cost = 0.348513, acc = 90.98%
Epoch 18, cost = 0.345251, acc = 91.08%
Epoch 19, cost = 0.342256, acc = 91.16%
Epoch 20, cost = 0.339492, acc = 91.24%
Epoch 21, cost = 0.336932, acc = 91.26%
Epoch 22, cost = 0.334550, acc = 91.35%
Epoch 23, cost = 0.332327, acc = 91.38%
Epoch 24, cost = 0.330245, acc = 91.42%
Epoch 25, cost = 0.328290, acc = 91.46%
Epoch 26,