In [1]:
import numpy as np
import mxnet as mx
from mxnet import gluon, nd, autograd
from mxnet.gluon.data.vision.datasets import ImageFolderDataset
from mxnet.gluon.data import DataLoader
import mxnet.contrib.onnx as onnx_mxnet
%matplotlib inline
import matplotlib.pyplot as plt
import tarfile, os
import json
import multiprocessing
import logging
logging.basicConfig(level=logging.INFO)

In [2]:
image_folder = "images"
utils_file = "utils.py" # contain utils function to plot nice visualization
images = ['wrench.jpg', 'dolphin.jpg', 'lotus.jpg']
base_url = "https://raw.githubusercontent.com/dmlc/web-data/master/mxnet/doc/tutorials/onnx/{}?raw=true"


for image in images:
    mx.test_utils.download(base_url.format("{}/{}".format(image_folder, image)), fname=image,dirname=image_folder)
mx.test_utils.download(base_url.format(utils_file), fname=utils_file)

from utils import *

INFO:root:images/wrench.jpg exists, skipping download
INFO:root:images/dolphin.jpg exists, skipping download
INFO:root:images/lotus.jpg exists, skipping download
INFO:root:utils.py exists, skipping download


## Downloading a model from the ONNX model zoo

We download a pre-trained model, in our case the [GoogleNet](https://arxiv.org/abs/1409.4842) model, trained on [ImageNet](http://www.image-net.org/) from the [ONNX model zoo](https://github.com/onnx/models). The model comes packaged in an archive `tar.gz` file containing an `model.onnx` model file.

In [3]:
base_url = "https://s3.amazonaws.com/download.onnx/models/opset_3/"
current_model = "bvlc_googlenet"
model_folder = "/home/ubuntu/pneumothorax/model"
archive_file = "{}.tar.gz".format(current_model)
archive_path = os.path.join(model_folder, archive_file)
url = "{}{}".format(base_url, archive_file)
onnx_path = os.path.join(model_folder, current_model, 'model.onnx')

'''
# Download the zipped model
mx.test_utils.download(url, dirname = model_folder)
'''

# Extract the model
if not os.path.isdir(os.path.join(model_folder, current_model)):
    print('Extracting {} in {}...'.format(archive_path, model_folder))
    tar = tarfile.open(archive_path, "r:gz")
    tar.extractall(model_folder)
    tar.close()
    print('Model extracted.')

In [4]:
data_folder = "/home/ubuntu/pneumothorax/mmode"
dataset_name = "train"

In [5]:
training_path = os.path.join(data_folder, dataset_name)
testing_path = os.path.join(data_folder, 'val')

### Load the data using an ImageFolderDataset and a DataLoader

We need to transform the images to a format accepted by the network

In [6]:
EDGE = 224
SIZE = (EDGE, EDGE)
BATCH_SIZE = 32
NUM_WORKERS = multiprocessing.cpu_count()

In [7]:
def transform(image, label):
    resized = mx.image.resize_short(image, EDGE)
    cropped, crop_info = mx.image.center_crop(resized, SIZE)
    transposed = nd.transpose(cropped, (2,0,1))
    return transposed, label

In [8]:
dataset_train = ImageFolderDataset(root=training_path, transform=transform)
dataset_test = ImageFolderDataset(root=testing_path, transform=transform)

In [9]:
dataloader_train = DataLoader(dataset_train, batch_size=BATCH_SIZE, last_batch='discard',
                              shuffle=True, num_workers=NUM_WORKERS)
dataloader_test = DataLoader(dataset_test, batch_size=BATCH_SIZE, last_batch='discard',
                             shuffle=True, num_workers=NUM_WORKERS)
print("Train dataset: {} images, Test dataset: {} images".format(len(dataset_train), len(dataset_test)))

Train dataset: 1829 images, Test dataset: 457 images


In [10]:
categories = dataset_train.synsets
NUM_CLASSES = len(categories)
BATCH_SIZE = 32

## Fine-Tuning the ONNX model

### Getting the last layer

Load the ONNX model

In [11]:
sym, arg_params, aux_params = onnx_mxnet.import_model(onnx_path)

This function get the output of a given layer

In [12]:
def get_layer_output(symbol, arg_params, aux_params, layer_name):
    all_layers = symbol.get_internals()
    net = all_layers[layer_name+'_output']
    net = mx.symbol.Flatten(data=net)
    new_args = dict({k:arg_params[k] for k in arg_params if k in net.list_arguments()})
    new_aux = dict({k:aux_params[k] for k in aux_params if k in net.list_arguments()})
    return (net, new_args, new_aux)

In [13]:
sym.get_internals()

<Symbol group [data_0, pad0, conv1/7x7_s2_w_0, conv1/7x7_s2_b_0, convolution0, relu0, pad1, pooling0, lrn0, pad2, conv2/3x3_reduce_w_0, conv2/3x3_reduce_b_0, convolution1, relu1, pad3, conv2/3x3_w_0, conv2/3x3_b_0, convolution2, relu2, lrn1, pad4, pooling1, pad5, inception_3a/1x1_w_0, inception_3a/1x1_b_0, convolution3, relu3, pad6, inception_3a/3x3_reduce_w_0, inception_3a/3x3_reduce_b_0, convolution4, relu4, pad7, inception_3a/3x3_w_0, inception_3a/3x3_b_0, convolution5, relu5, pad8, inception_3a/5x5_reduce_w_0, inception_3a/5x5_reduce_b_0, convolution6, relu6, pad9, inception_3a/5x5_w_0, inception_3a/5x5_b_0, convolution7, relu7, pad10, pooling2, pad11, inception_3a/pool_proj_w_0, inception_3a/pool_proj_b_0, convolution8, relu8, concat0, pad12, inception_3b/1x1_w_0, inception_3b/1x1_b_0, convolution9, relu9, pad13, inception_3b/3x3_reduce_w_0, inception_3b/3x3_reduce_b_0, convolution10, relu10, pad14, inception_3b/3x3_w_0, inception_3b/3x3_b_0, convolution11, relu11, pad15, inceptio

We get the network until the output of the `flatten0` layer

In [14]:
new_sym, new_arg_params, new_aux_params = get_layer_output(sym, arg_params, aux_params, 'flatten0')

### Fine-tuning in gluon


We can now take advantage of the features and pattern detection knowledge that our network learnt training on ImageNet, and apply that to the new Caltech101 dataset.


We pick a context, fine-tuning on CPU will be **WAY** slower.

In [15]:
ctx = mx.gpu() if mx.test_utils.list_gpus() else mx.cpu()

We create a symbol block that is going to hold all our pre-trained layers, and assign the weights of the different pre-trained layers to the newly created SymbolBlock

In [16]:
pre_trained = gluon.nn.SymbolBlock(outputs=new_sym, inputs=mx.sym.var('data_0'))
net_params = pre_trained.collect_params()
for param in new_arg_params:
    if param in net_params:
        net_params[param]._load_init(new_arg_params[param], ctx=ctx)
for param in new_aux_params:
    if param in net_params:
        net_params[param]._load_init(new_aux_params[param], ctx=ctx)

In [17]:
dense_layer = gluon.nn.Dense(NUM_CLASSES, activation='relu')
dense_layer.initialize(mx.init.Xavier(magnitude=2.24), ctx=ctx)

In [18]:
bnorm = gluon.nn.BatchNorm()
bnorm.initialize(mx.init.Xavier(magnitude=2.24), ctx=ctx)

We add the SymbolBlock and the new dense layer to a HybridSequential network

In [19]:
net = gluon.nn.HybridSequential()
net.add(pre_trained)
net.add(dense_layer)
net.add(bnorm)



### Loss
Softmax cross entropy for multi-class classification

In [20]:
softmax_cross_entropy = gluon.loss.SoftmaxCrossEntropyLoss()

### Trainer
Initialize trainer with common training parameters

In [21]:
LEARNING_RATE = 0.0001
WDECAY = 0.00001
MOMENTUM = 0.9

The trainer will retrain and fine-tune the entire network. If we use `dense_layer` instead of `net` in the cell below, the gradient updates would only be applied to the new last dense layer. Essentially we would be using the pre-trained network as a featurizer.

In [22]:
trainer = gluon.Trainer(net.collect_params(), 'rmsprop', 
                        {'learning_rate': LEARNING_RATE,
                         'wd':WDECAY})

### Evaluation loop

We measure the accuracy in a non-blocking way, using `nd.array` to take care of the parallelisation that MXNet and Gluon offers.

In [23]:
 def evaluate_accuracy_gluon(data_iterator, net):
    num_instance = nd.zeros(1, ctx=ctx)
    sum_metric = nd.zeros(1,ctx=ctx, dtype=np.int32)
    for i, (data, label) in enumerate(data_iterator):
        data = data.astype(np.float32).as_in_context(ctx)
        label = label.astype(np.int32).as_in_context(ctx)
        output = net(data)
        prediction = nd.argmax(output, axis=1).astype(np.int32)
        num_instance += len(prediction)
        sum_metric += (prediction==label).sum()
    accuracy = (sum_metric.astype(np.float32)/num_instance.astype(np.float32))
    return accuracy.asscalar()

In [24]:
%%time
print("Untrained network Test Accuracy: {0:.4f}".format(evaluate_accuracy_gluon(dataloader_test, net)))

Untrained network Test Accuracy: 0.4353
CPU times: user 1.88 s, sys: 4.07 s, total: 5.96 s
Wall time: 8.15 s


### Training loop

In [25]:
max_val_accuracy = 0
count = 0
n_count_stop = 5
for epoch in range(100):
    for i, (data, label) in enumerate(dataloader_train):
        data = data.astype(np.float32).as_in_context(ctx)
        label = label.as_in_context(ctx)

        if i%100==0 and i >0:
            print('Batch [{0}] loss: {1:.4f}'.format(i, loss.mean().asscalar()))

        with autograd.record():
            output = net(data)
            loss = softmax_cross_entropy(output, label)
        loss.backward()
        trainer.step(data.shape[0])

    nd.waitall() # wait at the end of the epoch    
    new_val_accuracy = evaluate_accuracy_gluon(dataloader_test, net)    
    print("Epoch [{0}] Test Accuracy {1:.4f} ".format(epoch, new_val_accuracy))

    # We perform early-stopping regularization, to prevent the model from overfitting
    if max_val_accuracy >= new_val_accuracy:
        count +=1
        if count == n_count_stop:
            print('Validation accuracy is not improving, stopping training')
            break
    else:
        count = 0
        max_val_accuracy = new_val_accuracy              

Epoch [0] Test Accuracy 0.9308 
Epoch [1] Test Accuracy 0.9152 
Epoch [2] Test Accuracy 0.9018 
Epoch [3] Test Accuracy 0.9643 
Epoch [4] Test Accuracy 0.9442 
Epoch [5] Test Accuracy 0.9754 
Epoch [6] Test Accuracy 0.9777 
Epoch [7] Test Accuracy 0.9844 
Epoch [8] Test Accuracy 0.9777 
Epoch [9] Test Accuracy 0.9821 
Epoch [10] Test Accuracy 0.9821 
Epoch [11] Test Accuracy 0.9799 
Epoch [12] Test Accuracy 0.9799 
Validation accuracy is not improving, stopping training
