<a href="https://colab.research.google.com/github/ccarpenterg/LearningMXNet/blob/master/03_introduction_to_convnets_with_mxnet.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

## Introduction to Convolutional Neural Networks with MXNet

We previously trained an artificial neural network on the MNIST dataset, now we'll introduce the convolutional neural networks (CNNs or Convnets for short). CNNs are part of the world of deep learning.

In [0]:
!nvcc --version

In [0]:
!pip install mxnet-cu100

In [3]:
from __future__ import print_function

import mxnet as mx
from mxnet import nd, gluon, autograd
from mxnet.gluon import nn

from mxnet.gluon.data.vision import transforms

import statistics


print(mx.__version__)

1.5.1


### MNIST Dataset

In [0]:
# http://beta.mxnet.io/api/gluon/_autogen/mxnet.gluon.data.vision.transforms.ToTensor.html
# (HxWxC), [0, 255] -> (CxHxW), [0, 1)
transform = transforms.Compose([
    transforms.ToTensor()
])

MNIST = gluon.data.vision.MNIST

train_data = MNIST(train=True).transform_first(transform)
valid_data = MNIST(train=False).transform_first(transform)

In [8]:
train_loader = gluon.data.DataLoader(train_data, shuffle=True, batch_size=64)
valid_loader = gluon.data.DataLoader(valid_data, shuffle=False, batch_size=64)

dataiter = iter(train_loader)

batch, labels = dataiter.__next__()

print(batch.shape)
print(labels.shape)

(64, 1, 28, 28)
(64,)


### Convolutional Neural Network

In [0]:
convnet = nn.Sequential()

convnet.add(
    nn.Conv2D(channels=32, kernel_size=3, activation='relu'),
    nn.MaxPool2D(pool_size=2),
    nn.Conv2D(channels=64, kernel_size=3, activation='relu'),
    nn.MaxPool2D(pool_size=2),
    nn.Conv2D(channels=64, kernel_size=3, activation='relu'),
    nn.Dense(64, activation='relu'),
    nn.Dense(10)
)

convnet

Sequential(
  (0): Conv2D(None -> 32, kernel_size=(3, 3), stride=(1, 1), Activation(relu))
  (1): MaxPool2D(size=(2, 2), stride=(2, 2), padding=(0, 0), ceil_mode=False, global_pool=False, pool_type=max, layout=NCHW)
  (2): Conv2D(None -> 64, kernel_size=(3, 3), stride=(1, 1), Activation(relu))
  (3): MaxPool2D(size=(2, 2), stride=(2, 2), padding=(0, 0), ceil_mode=False, global_pool=False, pool_type=max, layout=NCHW)
  (4): Conv2D(None -> 64, kernel_size=(3, 3), stride=(1, 1), Activation(relu))
  (5): Dense(None -> 64, Activation(relu))
  (6): Dense(None -> 10, linear)
)

In [0]:
ctx = mx.gpu(0) if mx.context.num_gpus() > 0 else mx.cpu(0)
convnet.initialize(mx.init.Xavier(), ctx=ctx)
convnet.summary(nd.zeros((1, 1, 28, 28), ctx=ctx))

--------------------------------------------------------------------------------
        Layer (type)                                Output Shape         Param #
               Input                              (1, 1, 28, 28)               0
        Activation-1                     <Symbol conv0_relu_fwd>               0
        Activation-2                             (1, 32, 26, 26)               0
            Conv2D-3                             (1, 32, 26, 26)             320
         MaxPool2D-4                             (1, 32, 13, 13)               0
        Activation-5                     <Symbol conv1_relu_fwd>               0
        Activation-6                             (1, 64, 11, 11)               0
            Conv2D-7                             (1, 64, 11, 11)           18496
         MaxPool2D-8                               (1, 64, 5, 5)               0
        Activation-9                     <Symbol conv2_relu_fwd>               0
       Activation-10        

### Trainer: Stochastic Gradient Descent

In [0]:
trainer = gluon.Trainer(
    params=convnet.collect_params(),
    optimizer='sgd',
    optimizer_params={'learning_rate': 0.04},
)

**Train function**

In [0]:
def train(model, loss_function, optimizer):
    
    train_batch_losses = []
    
    for batch, labels in train_loader:
        batch = batch.as_in_context(ctx)
        labels = labels.as_in_context(ctx)
        
        with autograd.record():
            output = model(batch)
            loss = loss_function(output, labels)
            
        loss.backward()
        
        optimizer.step(batch_size=batch.shape[0])
        
        train_batch_losses.append(float(nd.sum(loss).asscalar()))
        
    mean_loss = statistics.mean(train_batch_losses)
    
    return mean_loss

**Validation function**

In [0]:
def validate(model, loss_function, optimizer):
    
    validation_batch_losses = []
    
    for batch, labels in valid_loader:
        batch = batch.as_in_context(ctx)
        labels = labels.as_in_context(ctx)
        
        output = model(batch)
        
        loss = loss_function(output, labels)
        
        validation_batch_losses.append(float(nd.sum(loss).asscalar()))
        
        mean_loss = statistics.mean(validation_batch_losses)
        
    return mean_loss

**Accuracy function**

In [0]:
def accuracy(model, loader):
    
    metric = mx.metric.Accuracy()
    
    for batch, labels in loader:
        batch = batch.as_in_context(ctx)
        labels = labels.as_in_context(ctx)
        
        class_probabilities = nd.softmax(model(batch), axis=1)
        
        predictions = nd.argmax(class_probabilities, axis=1)
        
        metric.update(labels, predictions)
        
    _, accuracy_metric = metric.get()
    
    return accuracy_metric

### Training the Convolutional Neural Network

In [0]:
loss_function = gluon.loss.SoftmaxCrossEntropyLoss()

epochs = 10

for epoch in range(1, 1 + epochs):
    
    print('Epoch {}/{}'.format(epoch, epochs))
    
    train_loss = train(convnet, loss_function, trainer)
    train_accuracy = accuracy(convnet, train_loader)
    
    print('Training loss: {}'.format(train_loss))
    print('Training accuracy: {}%'.format(train_accuracy * 100))
    
    valid_loss = validate(convnet, loss_function, trainer)
    valid_accuracy = accuracy(convnet, valid_loader)
    
    print('Validation loss: {}'.format(valid_loss))
    print('Validation accuracy: {}%'.format(valid_accuracy * 100))

Epoch 1/10
Training loss: 4.350311624946625
Training accuracy: 98.22666666666666%
Validation loss: 3.3219563202208775
Validation accuracy: 98.37%
Epoch 2/10
Training loss: 3.41099478887406
Training accuracy: 97.87833333333333%
Validation loss: 4.370452936964145
Validation accuracy: 97.72999999999999%
Epoch 3/10
Training loss: 2.9088058139302775
Training accuracy: 98.38%
Validation loss: 3.621938658913799
Validation accuracy: 98.16%
Epoch 4/10
Training loss: 2.4454577407126488
Training accuracy: 98.97166666666666%
Validation loss: 2.503656580534046
Validation accuracy: 98.79%
Epoch 5/10
Training loss: 2.0874024605803463
Training accuracy: 99.05666666666667%
Validation loss: 2.5217852653293416
Validation accuracy: 98.79%
Epoch 6/10
Training loss: 1.7922539590482613
Training accuracy: 99.21333333333334%
Validation loss: 2.3535762271416747
Validation accuracy: 98.72%
Epoch 7/10
Training loss: 1.638567416424325
Training accuracy: 99.45%
Validation loss: 2.2032792256154403
Validation accurac