# Creating a MXNET model using SSD

We first install the MXNET library

In [0]:
%load_ext autoreload
%autoreload 2

In [0]:
!pip install gluoncv
!pip install mxnet
!pip install mxnet-cu100

In [0]:
!wget https://www.dropbox.com/s/2gr35m1alsghnzi/detecion.py?dl=1 -O detection.py
!mv detection.py /usr/local/lib/python3.6/dist-packages/gluoncv/data/pascal_voc/

In [0]:
import os
os.kill(os.getpid(), 9)

We download the dataset. If you are working with Google Colab, you have several options to download the dataset in this notebook, see the available options in the [LabelDetection documentation](https://github.com/ancasag/LabelDetection).

In [0]:
!unzip datasets.zip

We import the necessary libraries

In [0]:
import time
from matplotlib import pyplot as plt
import numpy as np
import mxnet as mx
from mxnet import autograd, gluon
import gluoncv as gcv
from gluoncv.utils import download, viz
from gluoncv.data import VOCDetection
import argparse
import importlib

datasetName = "dataset"
nepochs = 25


In [0]:
classes = ['apple','banana','orange']

In [0]:
MXNET_ENABLE_GPU_P2P=0


class VOCLike(VOCDetection):
    CLASSES = classes
    def __init__(self, root, splits, transform=None, index_map=None, preload_label=True):
        super(VOCLike, self).__init__(root, splits, transform, index_map, preload_label)

dataset = VOCLike(root='datasets', splits=((datasetName, 'train'),))
test_dataset = VOCLike(root='datasets', splits=((datasetName, 'test'),))

net = gcv.model_zoo.get_model('ssd_512_resnet50_v1_custom', classes=classes,
    pretrained_base=False, transfer='voc')


from gluoncv.data.batchify import Tuple, Stack, Pad
from gluoncv.data.transforms.presets.ssd import SSDDefaultTrainTransform,SSDDefaultValTransform

def get_dataloader(net, train_dataset, val_dataset, data_shape, batch_size, num_workers):
    """Get dataloader."""
    width, height = data_shape, data_shape
    # use fake data to generate fixed anchors for target generation
    with autograd.train_mode():
        _, _, anchors = net(mx.nd.zeros((1, 3, height, width)))
    anchors = anchors.as_in_context(mx.cpu())
    batchify_fn = Tuple(Stack(), Stack(), Stack())  # stack image, cls_targets, box_targets
    train_loader = gluon.data.DataLoader(
        train_dataset.transform(SSDDefaultTrainTransform(width, height, anchors)),
        batch_size, True, batchify_fn=batchify_fn, last_batch='rollover', num_workers=num_workers)
    val_batchify_fn = Tuple(Stack(), Pad(pad_val=-1))
    val_loader = gluon.data.DataLoader(
        val_dataset.transform(SSDDefaultValTransform(width, height)),
        batch_size, False, batchify_fn=val_batchify_fn, last_batch='keep', num_workers=num_workers)
    return train_loader, val_loader

train_data,test_data = get_dataloader(net, dataset,test_dataset, 512, 8, 0)


# Start training(finetuning)
n_gpu = mx.context.num_gpus()
ctx = [mx.gpu(0)]
net.collect_params().reset_ctx(ctx)
trainer = gluon.Trainer(
    net.collect_params(), 'sgd',
    {'learning_rate': 0.001, 'wd': 0.0005, 'momentum': 0.9})

mbox_loss = gcv.loss.SSDMultiBoxLoss()
ce_metric = mx.metric.Loss('CrossEntropy')
smoothl1_metric = mx.metric.Loss('SmoothL1')

for epoch in range(0, nepochs):
    ce_metric.reset()
    smoothl1_metric.reset()
    tic = time.time()
    btic = time.time()
    net.hybridize(static_alloc=True, static_shape=True)
    for i, batch in enumerate(train_data):
        batch_size = batch[0].shape[0]
        data = gluon.utils.split_and_load(batch[0], ctx_list=ctx, batch_axis=0)
        cls_targets = gluon.utils.split_and_load(batch[1], ctx_list=ctx, batch_axis=0)
        box_targets = gluon.utils.split_and_load(batch[2], ctx_list=ctx, batch_axis=0)
        with autograd.record():
            cls_preds = []
            box_preds = []
            for x in data:
                cls_pred, box_pred, _ = net(x)
                cls_preds.append(cls_pred)
                box_preds.append(box_pred)
            sum_loss, cls_loss, box_loss = mbox_loss(
                cls_preds, box_preds, cls_targets, box_targets)
            autograd.backward(sum_loss)
        # since we have already normalized the loss, we don't want to normalize
        # by batch-size anymore
        trainer.step(1)
        ce_metric.update(0, [l * batch_size for l in cls_loss])
        smoothl1_metric.update(0, [l * batch_size for l in box_loss])
        name1, loss1 = ce_metric.get()
        name2, loss2 = smoothl1_metric.get()
        if i % 5 == 0:
            print('[Epoch {}][Batch {}], Speed: {:.3f} samples/sec, {}={:.3f}, {}={:.3f}'.format(
                epoch, i, batch_size/(time.time()-btic), name1, loss1, name2, loss2))
        btic = time.time()
    
#############################################################################################
# Save finetuned weights to disk
net.save_parameters('ssd_512_resnet50_final.params')

We evaluate the model.

In [0]:
from gluoncv.utils.metrics.voc_detection import VOC07MApMetric

def validate(net, val_data, ctx, eval_metric):
    """Test on validation dataset."""
    eval_metric.reset()
    # set nms threshold and topk constraint
    net.set_nms(nms_thresh=0.45, nms_topk=400)
    net.hybridize(static_alloc=True, static_shape=True)
    for batch in val_data:
        data = gluon.utils.split_and_load(batch[0], ctx_list=ctx, batch_axis=0, even_split=False)
        label = gluon.utils.split_and_load(batch[1], ctx_list=ctx, batch_axis=0, even_split=False)
        det_bboxes = []
        det_ids = []
        det_scores = []
        gt_bboxes = []
        gt_ids = []
        gt_difficults = []
        for x, y in zip(data, label):
            # get prediction results
            ids, scores, bboxes = net(x)
            det_ids.append(ids)
            det_scores.append(scores)
            # clip to image size
            det_bboxes.append(bboxes.clip(0, batch[0].shape[2]))
            # split ground truths
            gt_ids.append(y.slice_axis(axis=-1, begin=4, end=5))
            gt_bboxes.append(y.slice_axis(axis=-1, begin=0, end=4))
            gt_difficults.append(y.slice_axis(axis=-1, begin=5, end=6) if y.shape[-1] > 5 else None)

        # update metric
        eval_metric.update(det_bboxes, det_ids, det_scores, gt_bboxes, gt_ids, gt_difficults)
    return eval_metric.get()


val_metric = VOC07MApMetric(iou_thresh=0.5, class_names=test_dataset.classes)
map_name, mean_ap = validate(net, test_data, ctx, val_metric)
val_msg = '\n'.join(['{}={}'.format(k, v) for k, v in zip(map_name, mean_ap)])
print(val_msg)

At the end you will have a file called ssd_512_resnet50_final.params that together with the names file can be included in the application to be employed with new images. 

----------------------


## Data distillation

After training a model with the annotated images, it is possible to apply a data distillation procedure to create a model using the unlabelled images. You can only apply this techique if there were unlabelled images in your dataset. 

We first install the library for ensemble methods.

In [0]:
!git clone https://github.com/ancasag/ensembleObjectDetection.git

In [0]:
cd ensembleObjectDetection/TestTimeAugmentation

We install additional libraries that are required.

In [0]:
!pip install clodsa
!pip install mrcnn
!pip install keras-retinanet

The following cells apply data distillation.

In [0]:
import testTimeAugmentation
import function
import os
import shutil
import argparse
import ensembleOptions
from mainTTA import tta
from imutils import paths

In [0]:
pathImg = '/content/datasets/unlabelled/'

In [0]:
ssdResnet = testTimeAugmentation.MXnetSSD512Pred('/content/ssd_512_resnet50_final.params', '/content/datasets/classes.names')

In [0]:
myTechniques = [ "histo","hflip","none"]

In [0]:
option = "affirmative"

In [0]:
tta(ssdResnet,myTechniques,pathImg,option)

We remove the files without annotations. 


In [0]:
import os
from imutils import paths

files = paths.list_files("/content/datasets/unlabelled/",validExts='.xml')
i=0
for fullpath in files:
  if os.path.getsize(fullpath) < 400:   
    name = fullpath[fullpath.rfind('/')+1:fullpath.rfind('.')]
    os.remove(fullpath)
    os.remove("/content/datasets/unlabelled/"+name+".jpg")


Now, we reorganize the dataset.

In [0]:
!ls /content/datasets/unlabelled/*.jpg >> /content/datasets/VOCdataset/ImageSets/Main/train.txt

In [0]:
!sed -i 's#/content/datasets/unlabelled/##g' /content/datasets/VOCdataset/ImageSets/Main/train.txt

In [0]:
!sed -i 's#.jpg##g' /content/datasets/VOCdataset/ImageSets/Main/train.txt

In [0]:
!mv /content/datasets/unlabelled/*.jpg /content/datasets/VOCdataset/JPEGImages/

In [0]:
!mv /content/datasets/unlabelled/*.xml /content/datasets/VOCdataset/Annotations/

In [0]:
cd /content

Finally, we retrain the model and evaluate its performance.

In [0]:
import time
from matplotlib import pyplot as plt
import numpy as np
import mxnet as mx
from mxnet import autograd, gluon
import gluoncv as gcv
from gluoncv.utils import download, viz
from gluoncv.data import VOCDetection
import argparse
import importlib
from gluoncv.utils.metrics.voc_detection import VOC07MApMetric

datasetName = "dataset"
nepochs = 25

In [0]:
classes = ['apple','banana','orange']

In [0]:

MXNET_ENABLE_GPU_P2P=0


class VOCLike(VOCDetection):
    CLASSES = classes
    def __init__(self, root, splits, transform=None, index_map=None, preload_label=True):
        super(VOCLike, self).__init__(root, splits, transform, index_map, preload_label)

dataset = VOCLike(root='datasets', splits=((datasetName, 'train'),))
test_dataset = VOCLike(root='datasets', splits=((datasetName, 'test'),))

net = gcv.model_zoo.get_model('ssd_512_resnet50_v1_custom', classes=classes,
    pretrained_base=False, transfer='voc')
net.load_params('ssd_512_resnet50_final.params')

from gluoncv.data.batchify import Tuple, Stack, Pad
from gluoncv.data.transforms.presets.ssd import SSDDefaultTrainTransform,SSDDefaultValTransform

def get_dataloader(net, train_dataset, val_dataset, data_shape, batch_size, num_workers):
    """Get dataloader."""
    width, height = data_shape, data_shape
    # use fake data to generate fixed anchors for target generation
    with autograd.train_mode():
        _, _, anchors = net(mx.nd.zeros((1, 3, height, width)))
    anchors = anchors.as_in_context(mx.cpu())
    batchify_fn = Tuple(Stack(), Stack(), Stack())  # stack image, cls_targets, box_targets
    train_loader = gluon.data.DataLoader(
        train_dataset.transform(SSDDefaultTrainTransform(width, height, anchors)),
        batch_size, True, batchify_fn=batchify_fn, last_batch='rollover', num_workers=num_workers)
    val_batchify_fn = Tuple(Stack(), Pad(pad_val=-1))
    val_loader = gluon.data.DataLoader(
        val_dataset.transform(SSDDefaultValTransform(width, height)),
        batch_size, False, batchify_fn=val_batchify_fn, last_batch='keep', num_workers=num_workers)
    return train_loader, val_loader

train_data,test_data = get_dataloader(net, dataset,test_dataset, 512, 8, 0)


# Start training(finetuning)
n_gpu = mx.context.num_gpus()
ctx = [mx.gpu(0)]
net.collect_params().reset_ctx(ctx)
trainer = gluon.Trainer(
    net.collect_params(), 'sgd',
    {'learning_rate': 0.001, 'wd': 0.0005, 'momentum': 0.9})

mbox_loss = gcv.loss.SSDMultiBoxLoss()
ce_metric = mx.metric.Loss('CrossEntropy')
smoothl1_metric = mx.metric.Loss('SmoothL1')

for epoch in range(0, nepochs):
    ce_metric.reset()
    smoothl1_metric.reset()
    tic = time.time()
    btic = time.time()
    net.hybridize(static_alloc=True, static_shape=True)
    for i, batch in enumerate(train_data):
        batch_size = batch[0].shape[0]
        data = gluon.utils.split_and_load(batch[0], ctx_list=ctx, batch_axis=0)
        cls_targets = gluon.utils.split_and_load(batch[1], ctx_list=ctx, batch_axis=0)
        box_targets = gluon.utils.split_and_load(batch[2], ctx_list=ctx, batch_axis=0)
        with autograd.record():
            cls_preds = []
            box_preds = []
            for x in data:
                cls_pred, box_pred, _ = net(x)
                cls_preds.append(cls_pred)
                box_preds.append(box_pred)
            sum_loss, cls_loss, box_loss = mbox_loss(
                cls_preds, box_preds, cls_targets, box_targets)
            autograd.backward(sum_loss)
        # since we have already normalized the loss, we don't want to normalize
        # by batch-size anymore
        trainer.step(1)
        ce_metric.update(0, [l * batch_size for l in cls_loss])
        smoothl1_metric.update(0, [l * batch_size for l in box_loss])
        name1, loss1 = ce_metric.get()
        name2, loss2 = smoothl1_metric.get()
        if i % 5 == 0:
            print('[Epoch {}][Batch {}], Speed: {:.3f} samples/sec, {}={:.3f}, {}={:.3f}'.format(
                epoch, i, batch_size/(time.time()-btic), name1, loss1, name2, loss2))
        btic = time.time()
    
#############################################################################################
# Save finetuned weights to disk
net.save_parameters('ssd_512_resnet50_finalDD.params')

In [0]:


def validate(net, val_data, ctx, eval_metric):
    """Test on validation dataset."""
    eval_metric.reset()
    # set nms threshold and topk constraint
    net.set_nms(nms_thresh=0.45, nms_topk=400)
    net.hybridize(static_alloc=True, static_shape=True)
    for batch in val_data:
        data = gluon.utils.split_and_load(batch[0], ctx_list=ctx, batch_axis=0, even_split=False)
        label = gluon.utils.split_and_load(batch[1], ctx_list=ctx, batch_axis=0, even_split=False)
        det_bboxes = []
        det_ids = []
        det_scores = []
        gt_bboxes = []
        gt_ids = []
        gt_difficults = []
        for x, y in zip(data, label):
            # get prediction results
            ids, scores, bboxes = net(x)
            det_ids.append(ids)
            det_scores.append(scores)
            # clip to image size
            det_bboxes.append(bboxes.clip(0, batch[0].shape[2]))
            # split ground truths
            gt_ids.append(y.slice_axis(axis=-1, begin=4, end=5))
            gt_bboxes.append(y.slice_axis(axis=-1, begin=0, end=4))
            gt_difficults.append(y.slice_axis(axis=-1, begin=5, end=6) if y.shape[-1] > 5 else None)

        # update metric
        eval_metric.update(det_bboxes, det_ids, det_scores, gt_bboxes, gt_ids, gt_difficults)
    return eval_metric.get()


val_metric = VOC07MApMetric(iou_thresh=0.5, class_names=test_dataset.classes)
map_name, mean_ap = validate(net, test_data, ctx, val_metric)
val_msg = '\n'.join(['{}={}'.format(k, v) for k, v in zip(map_name, mean_ap)])
print(val_msg)

-------------------------------

# Using the model in LabelDetection

If you want to use the trained model with LabelDetection, you must download the following files:
- ssd_512_resnet50_finalDD.params
- datasets/classes.names