In [1]:
import torch
import torch.optim as optim
from torchvision import datasets, transforms
from torch.autograd import Variable

ImportError: No module named torch

In [219]:
batch_size = 1
train_loader = torch.utils.data.DataLoader(
    datasets.MNIST('data', train=True, download=True, transform=transforms.ToTensor()),
    batch_size=batch_size, shuffle=False)

In [220]:
test_loader = torch.utils.data.DataLoader(
    datasets.MNIST('data', train=False, transform=transforms.ToTensor()),
    batch_size=1)

In [226]:
# D_in is input dimension; H is hidden dimension; D_out is output dimension.
D_in, H, D_out, epochs, learning_rate = 784, 200, 10, 5, .1

# Use the nn package to define our model as a sequence of layers. nn.Sequential
# is a Module which contains other Modules, and applies them in sequence to
# produce its output. Each Linear Module computes output from input using a
# linear function, and holds internal Variables for its weight and bias.
model = torch.nn.Sequential(
    torch.nn.Linear(D_in, H, bias=False),
    torch.nn.Sigmoid(),
    torch.nn.Linear(H, D_out, bias=False),
    torch.nn.Sigmoid()
)

# Set stochastic gradient descent
model.optimiser = torch.optim.SGD(model.parameters(), learning_rate)

# The nn package also contains definitions of popular loss functions; in this
# case we will use Mean Squared Error (MSE) as our loss function.
loss_fn = torch.nn.MSELoss(size_average=False)

# for evaluation
train_loss = []
train_accu = []

for e in range(epochs):
    i = 0
    for data, target in train_loader:
        # create one hot encoding with offsets
        label = torch.zeros(1,10) +.01
        label[0][target]= .99
        
        # create Variables for backprop
        # transform the data into the proper dimensions with .view, 1 row + 784 & 10 columns
        data = Variable(data).view(1, D_in)
        target = Variable(label.view(1, D_out), requires_grad=False)
        
        # Forward pass: 
        pred = model(data)

        # Compute loss:
        loss = loss_fn(pred, target)

        # Zero the gradients before running the backward pass:
        model.zero_grad()

        # Backward pass:
        loss.backward()

#         # Update the weights using gradient descent. Each parameter is a Variable, so
#         # we can access its data and gradients like we did before.
#         for param in model.parameters():
#             param.data -= learning_rate * param.grad.data
        
        # Or use the built in optimizer to update the weights
        model.optimiser.step()   # update gradients
    
        # Evaluation: 
        accuracy = pred.data.max(1)[0] # get the maximum percentage in the array of 10 probabilities
        train_accu.append(accuracy)
        train_loss.append(loss.data[0])
        if i % 100 == 0:
            currentAccuracy = (sum(train_accu)/len(train_accu))*100
            currentLoss = (sum(train_loss)/len(train_accu))
            # overwrite the print line during epoch
            print('Epoch: {} \tTrain Step: {}\tLoss: {:.3f}\tAccuracy: {:.2f}%'.format(e, i, currentLoss, currentAccuracy.numpy()[0]), end='\r')
        i += 1
        
    # print a new line at the end of each epoch
    print('Epoch: {} \tTrain Step: {}\tLoss: {:.3f}\tAccuracy: {:.2f}%'.format(e, i, currentLoss, currentAccuracy.numpy()[0]))

Epoch: 0 	Train Step: 60000	Loss: 0.147	Accuracy: 86.41%
Epoch: 1 	Train Step: 60000	Loss: 0.107	Accuracy: 89.89%
Epoch: 2 	Train Step: 60000	Loss: 0.088	Accuracy: 91.57%
Epoch: 3 	Train Step: 60000	Loss: 0.075	Accuracy: 92.64%
Epoch: 4 	Train Step: 60000	Loss: 0.067	Accuracy: 93.41%


In [227]:
# score network
scorecard = []

for data, target in test_loader:
    test = Variable(data, requires_grad=False).view(1, D_in) 
    output = model(test) # run the forward pass
    pred = output.data.max(1)[1].numpy()[0] # get the array index of the maximum percentage
    if (pred == target.numpy()[0]):
        scorecard.append(1)
    else:
        scorecard.append(0)

print ("Performance = ", sum(scorecard) / len(scorecard)*100)
    

Performance =  97.52


In [228]:
import random

In [230]:
# test it
randomDigit = random.randint(0,len(test_loader.dataset))
data = test_loader.dataset[randomDigit][0]
test = Variable(data, requires_grad=False).view(1, D_in)
output = model(test)
pred = output.data.max(1)[1].numpy()[0]
score = output.data.max(1)[0].numpy()[0]
print("Label:", test_loader.dataset[randomDigit][1])
print("Prediction:", pred)
print("Score: {:.2f}%".format(score*100))
if (pred == test_loader.dataset[randomDigit][1]):
    print("Correct!")
else:
    print("Wrong!")

Label: 5
Prediction: 5
Score: 99.87%
Correct!
