In [16]:
import numpy as np 
import torch
from torch import nn
from torch import optim
import torch.nn.functional as F

import matplotlib.pyplot as plt
from torchvision import datasets, transforms
from collections import OrderedDict

In [35]:
# Define a transform to normalize the data
transform = transforms.Compose([transforms.ToTensor(),
                              transforms.Normalize((0.5,), (0.5,)),
                             ])
# Download and load the training data
trainset = datasets.MNIST('data/MNIST_data/', download=True, train=True, transform=transform)
trainloader = torch.utils.data.DataLoader(trainset, batch_size=64, shuffle=True)

# Download and load the test data
testset = datasets.MNIST('data/MNIST_data/', download=True, train=False, transform=transform)
testloader = torch.utils.data.DataLoader(testset, batch_size=64, shuffle=True)

In [36]:
# Hyperparameters for our network
input_size = 784
hidden_sizes = [128, 64]
output_size = 10

# Build a feed-forward network
model = nn.Sequential(OrderedDict([
                      ('fc1', nn.Linear(input_size, hidden_sizes[0])),
                      ('relu1', nn.ReLU()),
                      ('fc2', nn.Linear(hidden_sizes[0], hidden_sizes[1])),
                      ('relu2', nn.ReLU()),
                      ('logits', nn.Linear(hidden_sizes[1], output_size))]))

In [39]:
criterion = nn.CrossEntropyLoss()
optimizer = optim.SGD(model.parameters(), lr=0.003)

In [40]:
epochs = 3
print_every = 40
steps = 0
for e in range(epochs):
    running_loss = 0
    for images, labels in iter(trainloader):
        steps += 1
        # Flatten MNIST images into a 784 long vector
        images.resize_(images.size()[0], 784)
        
        optimizer.zero_grad()
        
        # Forward and backward passes
        output = model.forward(images)
        loss = criterion(output, labels)
        loss.backward()
        optimizer.step()
        
        running_loss += loss.item()
        
        if steps % print_every == 0:
            print("Epoch: {}/{}... ".format(e+1, epochs),
                  "Loss: {:.4f}".format(running_loss/print_every))
            
            running_loss = 0

Epoch: 1/3...  Loss: 2.2867
Epoch: 1/3...  Loss: 2.2697
Epoch: 1/3...  Loss: 2.2481
Epoch: 1/3...  Loss: 2.2276
Epoch: 1/3...  Loss: 2.2081
Epoch: 1/3...  Loss: 2.1821
Epoch: 1/3...  Loss: 2.1545
Epoch: 1/3...  Loss: 2.1294
Epoch: 1/3...  Loss: 2.0921
Epoch: 1/3...  Loss: 2.0590
Epoch: 1/3...  Loss: 2.0249
Epoch: 1/3...  Loss: 1.9781
Epoch: 1/3...  Loss: 1.9230
Epoch: 1/3...  Loss: 1.8799
Epoch: 1/3...  Loss: 1.8125
Epoch: 1/3...  Loss: 1.7528
Epoch: 1/3...  Loss: 1.6956
Epoch: 1/3...  Loss: 1.6270
Epoch: 1/3...  Loss: 1.5477
Epoch: 1/3...  Loss: 1.4974
Epoch: 1/3...  Loss: 1.3966
Epoch: 1/3...  Loss: 1.3474
Epoch: 1/3...  Loss: 1.2821
Epoch: 2/3...  Loss: 0.6762
Epoch: 2/3...  Loss: 1.1801
Epoch: 2/3...  Loss: 1.1149
Epoch: 2/3...  Loss: 1.0791
Epoch: 2/3...  Loss: 1.0362
Epoch: 2/3...  Loss: 0.9853
Epoch: 2/3...  Loss: 0.9820
Epoch: 2/3...  Loss: 0.9454
Epoch: 2/3...  Loss: 0.9040
Epoch: 2/3...  Loss: 0.8613
Epoch: 2/3...  Loss: 0.8155
Epoch: 2/3...  Loss: 0.8206
Epoch: 2/3...  Loss: