In [1]:
import torch
from torch import nn, optim
import torch.nn.functional as F
import matplotlib.pyplot as plt
from torch.utils.data.sampler import SubsetRandomSampler
from torchvision import datasets, transforms
import numpy as np

In [2]:
transform = transforms.Compose([transforms.ToTensor(),
                               transforms.Normalize((0.5,0.5,0.5),(0.5,0.5,0.5))])

trainset = datasets.MNIST('~/.pytorch/MNIST_data/', train=True, transform=transform)
testset = datasets.MNIST('~/.pytorch/MNIST_data/', train=False, transform=transform)

In [3]:
## Subsetting a validation set

valid_pct = 0.2
train_length = len(trainset)
valid_length = int(np.floor(train_length*valid_pct))

idx = list(range(train_length))
np.random.shuffle(idx)

train_idx, valid_idx = idx[valid_length:], idx[:valid_length] # First valid_length indexes for validation, rest for training

# Create samplers
train_sampler = SubsetRandomSampler(train_idx)
valid_sampler = SubsetRandomSampler(valid_idx)

batch_size = 32
train_loader = torch.utils.data.DataLoader(trainset, batch_size=batch_size, sampler=train_sampler)
valid_loader = torch.utils.data.DataLoader(trainset, batch_size=batch_size, sampler=valid_sampler)
test_loader = torch.utils.data.DataLoader(testset, batch_size=batch_size, shuffle=True)

In [4]:
# Define NN architecture

class Arc(nn.Module):
    def __init__(self, input_size, output_size, hidden_sizes, p=0.5):
        super().__init__()
        
        self.hidden = nn.ModuleList([nn.Linear(input_size,hidden_sizes[0])])
        hidden_layers = zip(hidden_sizes[:-1],hidden_sizes[1:])
        self.hidden.extend(nn.Linear(a,b) for a,b in hidden_layers)
        
        self.output = nn.Linear(hidden_sizes[-1], output_size)
        self.dropout = nn.Dropout(p=p)
    
    def forward(self, x):
        x = x.view(x.shape[0],-1)
        for each in self.hidden:
            x = self.dropout(F.relu(each(x)))
        x = self.output(x)
        x = F.log_softmax(x, dim=1)
        return x

In [5]:
model = Arc(784,10,[256,64])
criterion = nn.NLLLoss()
optimizer = optim.Adam(model.parameters(), lr=0.01)
model

Arc(
  (hidden): ModuleList(
    (0): Linear(in_features=784, out_features=256, bias=True)
    (1): Linear(in_features=256, out_features=64, bias=True)
  )
  (output): Linear(in_features=64, out_features=10, bias=True)
  (dropout): Dropout(p=0.5)
)

In [None]:
epochs = 30
min_valid_loss = np.Inf

for e in range(epochs):
    train_loss = 0
    valid_loss = 0
    correct = 0
    # Training loop
    for images, labels in train_loader:
        optimizer.zero_grad()
        log_ps = model(images)
        loss = criterion(log_ps, labels)
        loss.backward()
        optimizer.step()
        train_loss += loss.item()*images.size(0)
    # Validation loop
    with torch.no_grad():
        model.eval()
        for images, labels in valid_loader:
            log_ps = model(images)
            ps = torch.exp(log_ps)
            loss = criterion(log_ps, labels)
            valid_loss += loss.item()*images.size(0)
        model.train()
    
    avg_valid_loss = valid_loss/len(valid_loader.dataset)
    avg_train_loss = train_loss/len(train_loader.dataset)
    print('Epoch: {}/{}'.format(e+1, epochs),
         'Train_loss: {:.4f}'.format(avg_train_loss),
         'Valid_loss: {:.4f}'.format(avg_valid_loss))
    
    if avg_valid_loss <= min_valid_loss:
        torch.save(model.state_dict(), 'Best_model.pth')
        print('Validation loss decreased from {:.4f} to {:.4f}. Saving model'.format(min_valid_loss, avg_valid_loss))
        min_valid_loss = avg_valid_loss

Epoch: 1/30 Train_loss: 1.2503 Valid_loss: 0.2058
Validation loss decreased from inf to 0.2058. Saving model
Epoch: 2/30 Train_loss: 1.2229 Valid_loss: 0.1764
Validation loss decreased from 0.2058 to 0.1764. Saving model
Epoch: 3/30 Train_loss: 1.2610 Valid_loss: 0.2238
Epoch: 4/30 Train_loss: 1.2746 Valid_loss: 0.2309
Epoch: 5/30 Train_loss: 1.3611 Valid_loss: 0.2589
Epoch: 6/30 Train_loss: 1.3908 Valid_loss: 0.4305
Epoch: 7/30 Train_loss: 1.4073 Valid_loss: 0.2625
Epoch: 8/30 Train_loss: 1.4174 Valid_loss: 0.2539
Epoch: 9/30 Train_loss: 1.3824 Valid_loss: 0.2466
Epoch: 10/30 Train_loss: 1.4014 Valid_loss: 0.2537
Epoch: 11/30 Train_loss: 1.4149 Valid_loss: 0.2798
Epoch: 12/30 Train_loss: 1.3936 Valid_loss: 0.2584
Epoch: 13/30 Train_loss: 1.3928 Valid_loss: 0.2849
Epoch: 14/30 Train_loss: 1.4203 Valid_loss: 0.2837
Epoch: 15/30 Train_loss: 1.4031 Valid_loss: 0.2871
Epoch: 16/30 Train_loss: 1.3814 Valid_loss: 0.2794
Epoch: 17/30 Train_loss: 1.4332 Valid_loss: 0.2869
Epoch: 18/30 Train_lo

In [None]:
# Load the best model
model = model.load_state_dict('Best_model.pth')

In [None]:
# Checking accuracy on test set
test_loss = 0
class_correct = list(0. for i in range(10))
class_total = list(0. for i in range(10))

model.eval()
for images, labels in test_loader:
    log_ps = model(images)
    ps = torch.exp(log_ps)
    loss = criterion(log_ps, labels)
    test_loss += loss.item()*labels.size(0)
    _, pred = torch.max(log_ps, 1)
    correct = np.squeeze(pred.eq(labels.data.view_as(pred)))
    
    for i in range(batch_size):
        label = labels.data[i]
        class_correct[label] += correct[i].item()
        class_total[label] += 1

print('Test_loss: {}'.format(test_loss/len(test_loader.dataset)))

for i in range(10):
    if class_total[i] > 0:
        print('Test Accuracy of %5s: %2d%% (%2d/%2d)' % (
            str(i), 100 * class_correct[i] / class_total[i],
            np.sum(class_correct[i]), np.sum(class_total[i])))
    else:
        print('Test Accuracy of %5s: N/A (no training examples)' % (classes[i]))

print('\nTest Accuracy (Overall): %2d%% (%2d/%2d)' % (
    100. * np.sum(class_correct) / np.sum(class_total),
    np.sum(class_correct), np.sum(class_total)))