In [17]:
# import libraries
import torch
import numpy as np

from torchvision import datasets
import torchvision.transforms as transforms

import torch.nn as nn
import torch.nn.functional as F

from torch.utils.data.sampler import SubsetRandomSampler

import matplotlib.pyplot as plt
from IPython.display import display, clear_output
%matplotlib notebook


## Prepare Data

In [18]:
batch_size = 64
num_workers = 0
valid_size = 0.2

#data augmentation and data conversion to torch tensor
transform = transforms.Compose([
    transforms.RandomHorizontalFlip(), # randomly flip and rotate
    transforms.RandomRotation(10),
    transforms.ToTensor(),
    transforms.Normalize((0.5,), (0.5,))
])      
# load the training nad test datasets
train_data = datasets.MNIST(root='data', train=True,
                                   download=True, transform=transform)
test_data = datasets.MNIST(root='data', train=False,
                                  download=True, transform=transform)

# # prepare data loaders
# train_loader = torch.utils.data.DataLoader(train_data, batch_size=batch_size, num_workers=0)
# test_loader = torch.utils.data.DataLoader(test_data, batch_size=batch_size, num_workers=0)
# obtain training indices that will be used for validation
num_train = len(train_data)
indices = list(range(num_train))
np.random.shuffle(indices)
split = int(np.floor(valid_size * num_train))
train_idx, valid_idx = indices[split:], indices[:split]

# define samplers for obtaining training and validation batches
train_sampler = SubsetRandomSampler(train_idx)
valid_sampler = SubsetRandomSampler(valid_idx)

# prepare data loaders (combine dataset and sampler)
train_loader = torch.utils.data.DataLoader(train_data, batch_size=batch_size,
    sampler=train_sampler, num_workers=num_workers)
valid_loader = torch.utils.data.DataLoader(train_data, batch_size=batch_size, 
    sampler=valid_sampler, num_workers=num_workers)
test_loader = torch.utils.data.DataLoader(test_data, batch_size=batch_size, 
    num_workers=num_workers)

In [19]:
# Visualize the data
    
# obtain one batch of training images
dataiter = iter(train_loader)
images, labels = dataiter.next()
images = images.numpy()


plt.imshow(np.squeeze(images[22]), cmap='gray')
plt.title(str(labels[22].item()))
print(images[22].shape)

<IPython.core.display.Javascript object>

(1, 28, 28)


In [20]:
# define te architecture
class Cnn(nn.Module):
    def __init__(self):
        super(Cnn, self).__init__()
        # convolutional later (sees 28x28x1 image tensor)
        self.conv1 = nn.Conv2d(1, 24, 5, padding=2, stride=1)
        # convolutional later (sees 14x14x24 image tensor)
        self.conv2 = nn.Conv2d(24, 48, 5, padding=2, stride=1)
        # convolutional later (sees 7x7x48 image tensor)
        self.conv3 = nn.Conv2d(48, 64, 2, padding=1, stride=1)
        self.pool = nn.MaxPool2d(2, 2)
        self.fc1 = nn.Linear(1024, 256)
        self.fc2 = nn.Linear(256, 10)
    
    def forward(self, x):
        # add sqquence of convolution and max pooling layers
        x = self.pool(F.relu(self.conv1(x)))
        x = self.pool(F.relu(self.conv2(x)))
        x = self.pool(F.relu(self.conv3(x)))
        # flatten image input
        x = x.view(-1, 64 * 4 * 4)
        
        # add hidden layers with relu
        x = F.relu(self.fc1(x))
        
        x = self.fc2(x)
        return x
    
# create a model
model = Cnn()
print(model)

Cnn(
  (conv1): Conv2d(1, 24, kernel_size=(5, 5), stride=(1, 1), padding=(2, 2))
  (conv2): Conv2d(24, 48, kernel_size=(5, 5), stride=(1, 1), padding=(2, 2))
  (conv3): Conv2d(48, 64, kernel_size=(2, 2), stride=(1, 1), padding=(1, 1))
  (pool): MaxPool2d(kernel_size=2, stride=2, padding=0, dilation=1, ceil_mode=False)
  (fc1): Linear(in_features=1024, out_features=256, bias=True)
  (fc2): Linear(in_features=256, out_features=10, bias=True)
)


In [21]:
# move tensors to gpu
model.cuda()

Cnn(
  (conv1): Conv2d(1, 24, kernel_size=(5, 5), stride=(1, 1), padding=(2, 2))
  (conv2): Conv2d(24, 48, kernel_size=(5, 5), stride=(1, 1), padding=(2, 2))
  (conv3): Conv2d(48, 64, kernel_size=(2, 2), stride=(1, 1), padding=(1, 1))
  (pool): MaxPool2d(kernel_size=2, stride=2, padding=0, dilation=1, ceil_mode=False)
  (fc1): Linear(in_features=1024, out_features=256, bias=True)
  (fc2): Linear(in_features=256, out_features=10, bias=True)
)

In [22]:
import torch.optim as optim

# specify loss function (categorical cross-entropy)
criterion = nn.CrossEntropyLoss()

# specify optimizer
optimizer = optim.SGD(model.parameters(), lr=0.01)

In [49]:
class LivePlot():
    """Class to live plotting accuracy and loss during model training"""
    def __init__(self, fig, n_epochs):
        #initialize matplotlib figure
        self.n_epochs = n_epochs
        self.fig = fig
        self.ax_loss = fig.add_subplot(2, 1, 1)
        self.ax_acc = fig.add_subplot(2, 1, 2)
       
        plt.ion()
        self.fig.show()
        self.fig.canvas.draw()
        
    def plot_model_loss(self, train_loss, valid_loss):
         #Plot Loss    
        self.ax_loss.clear()
        self.ax_loss.plot(train_loss, label='training loss')
        self.ax_loss.plot(valid_loss, label='validation loss')
        self.ax_loss.set_xlabel('epochs')
        self.ax_loss.set_ylabel('loss')
        self.ax_loss.set_title('Model Loss')
        self.ax_loss.legend(loc="upper right")
        self.ax_loss.set_xlim(0, self.n_epochs+1)
        self.ax_loss.set_ylim(0, 0.25)
        fig.canvas.draw()
    
    def plot_model_accuracy(self, train_accuracy, valid_accuracy):
        #Plot acc    
        self.ax_acc.clear()
        self.ax_acc.plot(train_accuracy, label='training accuracy')
        self.ax_acc.plot(valid_accuracy, label='validation accuracy')
        self.ax_acc.set_xlabel('epochs')
        self.ax_acc.set_ylabel('accuracy')
        self.ax_acc.set_title('Model Accuracy')
        self.ax_acc.legend(loc="upper right")
        self.ax_acc.set_xlim(0, self.n_epochs+1)
        plt.tight_layout()
        self.fig.canvas.draw()

        

In [54]:
# number of epochs to train the model
n_epochs = 50

valid_loss_min = np.Inf # track change in validation loss
training_loss_history = []
validation_loss_history = []
training_acc_history = []
validation_acc_history = []

fig = plt.figure(figsize=(8, 8))
live_plots = LivePlot(fig, n_epochs)


for epoch in range(1, n_epochs+1):

    # keep track of training and validation loss
    train_loss = 0.0
    valid_loss = 0.0
    
    train_accuracy = 0.0
    
    train_batches_acc = []
    print(len(train_loader.sampler))
    # train the model #
    model.train()
    for batch_idx, (data, target) in enumerate(train_loader):
        # move tensors to GPU if CUDA is available
       
        data, target = data.cuda(), target.cuda()
        # clear the gradients of all optimized variables
        optimizer.zero_grad()
        # forward pass: compute predicted outputs by passing inputs to the model
        output = model(data)
        # calculate the batch loss
        loss = criterion(output, target)
        # backward pass: compute gradient of the loss with respect to model parameters
        loss.backward()
        # perform a single optimization step (parameter update)
        optimizer.step()
        # update training loss
        train_loss += loss.item()*data.size(0)
        #calculate batch accuracy
        _, pred = torch.max(output, 1) 
        correct_tensor = pred.eq(target.data.view_as(pred))
        correct = np.squeeze(correct_tensor.cpu().numpy())
        train_batches_acc.append(100*correct.sum()/data.size(0))
        train_accuracy += correct.sum()/data.size(0)
        
    
    validation_batches_acc = []
    # validate the model 
    model.eval()
    for batch_idx, (data, target) in enumerate(valid_loader):
        # move tensors to GPU if CUDA is available
        
        data, target = data.cuda(), target.cuda()
        # forward pass: compute predicted outputs by passing inputs to the model
        output = model(data)
        
        # calculate the batch loss
        loss = criterion(output, target)
        # update average validation loss 
        valid_loss += loss.item()*data.size(0)
        
        #calculate batch accuracy
        _, pred = torch.max(output, 1) 
        correct_tensor = pred.eq(target.data.view_as(pred))
        correct = np.squeeze(correct_tensor.cpu().numpy())
        validation_batches_acc.append(100*correct.sum()/data.size(0))
    
        
        
    
    # calculate average losses
    train_loss = train_loss/len(train_loader.sampler)
    valid_loss = valid_loss/len(valid_loader.sampler)
    
    # calculate average accuracy
    valid_acc = np.mean(np.array(validation_batches_acc))
    train_acc = np.mean(np.array(train_batches_acc))
    
    print(train_accuracy, train_acc)
    training_loss_history.append(train_loss)
    validation_loss_history.append(valid_loss)
    training_acc_history.append(train_acc)
    validation_acc_history.append(valid_acc)
    
    live_plots.plot_model_loss(training_loss_history, validation_loss_history)
    live_plots.plot_model_accuracy(training_acc_history, validation_acc_history)
    
   
        
    # print training/validation statistics 
    print('Epoch: {} \tTraining Loss: {:.6f} \tValidation Loss: {:.6f} \n\t\tTraining Acc: {:.6f} \tValidation Acc: {:.6f}'.format(
        epoch, train_loss, valid_loss, train_acc, valid_acc))
    
    # save model if validation loss has decreased
    if valid_loss <= valid_loss_min:
        print('Validation loss decreased ({:.6f} --> {:.6f}).  Saving model ...'.format(
        valid_loss_min,
        valid_loss))
        torch.save(model.state_dict(), 'digit_recognizer_cnn_model.pt')
        valid_loss_min = valid_loss

<IPython.core.display.Javascript object>

48000
1.0
1.984375
2.96875
3.96875
4.9375
5.921875
6.921875
7.921875
8.875
9.875
10.875
11.875
12.875
13.875
14.875
15.875
16.875
17.859375
18.84375
19.8125
20.78125
21.75
22.75
23.75
24.734375
25.71875
26.703125
27.671875
28.671875
29.671875
30.671875
31.671875
32.640625
33.640625
34.625
35.59375
36.59375
37.59375
38.59375
39.59375
40.578125
41.578125
42.5625
43.546875
44.546875
45.546875
46.53125
47.515625
48.5
49.5
50.484375
51.484375
52.46875
53.453125
54.453125
55.4375
56.4375
57.421875
58.40625
59.40625
60.40625
61.40625
62.40625
63.390625
64.375
65.375
66.375
67.375
68.296875
69.265625
70.265625
71.265625
72.265625
73.265625
74.265625
75.25
76.25
77.234375
78.234375
79.234375
80.203125
81.203125
82.203125
83.203125
84.171875
85.171875
86.171875
87.171875
88.171875
89.171875
90.15625
91.15625
92.15625
93.15625
94.15625
95.15625
96.125
97.125
98.09375
99.09375
100.0625
101.0625
102.03125
103.0
103.984375
104.984375
105.96875
106.9375
107.9375
108.9375
109.9375
110.9375
111.9375
11

KeyboardInterrupt: 

In [7]:
model.load_state_dict(torch.load('model_augmented.pt'))

IncompatibleKeys(missing_keys=[], unexpected_keys=[])