# Image Classification with GluonCV


## Configuration

Install GluonCV through `pip`

In [None]:
!pip install gluoncv

Import necessary modules

In [1]:
import os, time, shutil, zipfile
import matplotlib.pyplot as plt
import numpy as np

import mxnet as mx
from mxnet import gluon, image, init, nd
from mxnet import autograd as ag
from mxnet.gluon import nn
from mxnet.gluon.data.vision import transforms

import gluoncv
from gluoncv.utils import makedirs, download
from gluoncv.model_zoo import get_model

## Predict with Pre-trained Models

Let's load an image first.

In [None]:
download('https://www.catster.com/wp-content/uploads/2017/07/gray-and-white-cat-asleep-with-whiskers-out.jpg', path='./cat1.jpg')
filename = 'cat1.jpg'
img = mx.image.imread(filename)
plt.imshow(img.asnumpy())

Choose a good model, load pre-trained weights with `pretrained=True`.

In [None]:
model_name = 'resnet152_v2'
net = gluoncv.model_zoo.get_model(model_name, pretrained=True)


Preprocess the image with the preset transformations.

In [None]:
transformed_img = gluoncv.data.transforms.presets.imagenet.transform_eval(img)

How does the transformed image look like?

In [None]:
import numpy as np
plt.imshow(np.transpose(transformed_img[0].asnumpy(), (1, 2, 0)))

The transformation does the following two things:

1. Crop the center square
2. Normalizes the input image

With this, we can predict with one line of code:

In [None]:
pred = net(transformed_img)
prob = mx.nd.softmax(pred)[0].asnumpy()
prob.sum()

In [None]:
ind

Check the top-5 predicted classes.

In [None]:
prob = mx.nd.softmax(pred)[0].asnumpy()
ind = mx.nd.topk(pred, k=5)[0].astype('int').asnumpy().tolist()
print('The input picture is classified to be')
for i in range(5):
    print('- [%s], with probability %.3f.'%(net.classes[ind[i]], prob[ind[i]]))

We see the model gives the correct result. How about a smaller one?

You only need to change the name of the model from the above code.

In [None]:
model_name = 'MobileNet0.25'
net = gluoncv.model_zoo.get_model(model_name, pretrained=True)
pred = net(transformed_img)
prob = mx.nd.softmax(pred)[0].asnumpy()
ind = mx.nd.topk(pred, k=5)[0].astype('int').asnumpy().tolist()
print('The input picture is classified to be')
for i in range(5):
    print('- [%s], with probability %.3f.'%(net.classes[ind[i]], prob[ind[i]]))

The smaller model is not that confident, but still gives a good enough prediction.

## Transfer Learning with your own data

Now we are going to demonstrate how to transfer the knowledge from pre-trained model to your own domain.

We use a sampled smaller dataset in this tutorial. Introduction to the entire dataset can be found at [this page](https://gluon-cv.mxnet.io/build/examples_classification/transfer_learning_minc.html#data-preparation).

In [2]:
file_url = 'https://raw.githubusercontent.com/dmlc/web-data/master/gluoncv/classification/minc-2500-tiny.zip'
zip_file = download(file_url, path='./')
with zipfile.ZipFile(zip_file, 'r') as zin:
    zin.extractall(os.path.expanduser('./'))

Next we prepare the hyperparameters.

In [3]:
classes = 23

epochs = 5
lr = 0.001
per_device_batch_size = 1
momentum = 0.9
wd = 0.0001

lr_factor = 0.75
lr_steps = [10, 20, 30, np.inf]

num_gpus = 1
#num_workers = 8
#ctx = [mx.gpu(i) for i in range(num_gpus)] if num_gpus > 0 else [mx.cpu()]
#batch_size = per_device_batch_size * max(num_gpus, 1)

ctx = mx.gpu()
batch_size = 5

In [4]:
batch_size

5

Next, transformation, which is the preprocessing.
We implement the same preprocessing function as ImageNet.

Note: keep the transformation consistent with the original model training is important in transfer learning.

In [5]:
jitter_param = 0.4
lighting_param = 0.1

transform_train = transforms.Compose([
    transforms.RandomResizedCrop(224),
    transforms.RandomFlipLeftRight(),
    transforms.RandomColorJitter(brightness=jitter_param, contrast=jitter_param,
                                 saturation=jitter_param),
    transforms.RandomLighting(lighting_param),
    transforms.ToTensor(),
    transforms.Normalize([0.485, 0.456, 0.406], [0.229, 0.224, 0.225])
])

transform_test = transforms.Compose([
    transforms.Resize(256, keep_ratio=True),
    transforms.CenterCrop(224),
    transforms.ToTensor(),
    transforms.Normalize([0.485, 0.456, 0.406], [0.229, 0.224, 0.225])
])

Now we can load the data into train, validation, and test.

For validation and test, we use the same transformation.

In [6]:
path = './minc-2500-tiny'
train_path = os.path.join(path, 'train')
val_path = os.path.join(path, 'val')
test_path = os.path.join(path, 'test')

train_data = gluon.data.DataLoader(
    gluon.data.vision.ImageFolderDataset(train_path).transform_first(transform_train),
    batch_size=batch_size, shuffle=True)

val_data = gluon.data.DataLoader(
    gluon.data.vision.ImageFolderDataset(val_path).transform_first(transform_test),
    batch_size=batch_size, shuffle=False)

test_data = gluon.data.DataLoader(
    gluon.data.vision.ImageFolderDataset(test_path).transform_first(transform_test),
    batch_size=batch_size, shuffle=False)

Now the data is prepared. We define the model, then 

In [7]:
model_name = 'MobileNet1.0'
finetune_net = get_model(model_name, pretrained=True)
finetune_net.output

Dense(1024 -> 1000, linear)

In [8]:
with finetune_net.name_scope():
    finetune_net.output = nn.Dense(classes)
finetune_net.output

Dense(None -> 23, linear)

In [9]:
finetune_net.output.initialize(init.Xavier(), ctx = ctx, force_reinit=True)
finetune_net.collect_params().reset_ctx(ctx)

# Trainer, metric, loss
trainer = gluon.Trainer(finetune_net.collect_params(), 'sgd', {
                        'learning_rate': lr, 'momentum': momentum, 'wd': wd})
metric = mx.metric.Accuracy()
L = gluon.loss.SoftmaxCrossEntropyLoss()

Next, a function to measure the performance on validation and test dataset.

In [10]:
def test(net, val_data, ctx):
    metric = mx.metric.Accuracy()
    for i, batch in enumerate(val_data):
        data = batch[0].as_in_context(ctx)
        label = batch[1].as_in_context(ctx)
        outputs = net(data)
        metric.update(label, outputs)

    return metric.get()

Finally we can start our training! 

In [15]:

for epoch in range(epochs):
    metric.reset()

    for i, batch in enumerate(train_data):
        data = batch[0].as_in_context(ctx)
        label = batch[1].as_in_context(ctx)
        with ag.record():
            outputs = finetune_net(data)
            loss = L(outputs, label)
        loss.backward()

        trainer.step(batch_size)

        metric.update(label, outputs)

    _, train_acc = metric.get()

    _, val_acc = test(finetune_net, val_data, ctx)

    print('[Epoch %d] Train-acc: %.3f,  | Val-acc: %.3f |' %
             (epoch, train_acc, val_acc))

_, test_acc = test(finetune_net, test_data, ctx)
print('[Finished] Test-acc: %.3f' % (test_acc))

[Epoch 0] Train-acc: 0.348,  | Val-acc: 0.217 |
[Epoch 1] Train-acc: 0.330,  | Val-acc: 0.174 |
[Epoch 2] Train-acc: 0.409,  | Val-acc: 0.152 |
[Epoch 3] Train-acc: 0.452,  | Val-acc: 0.174 |
[Epoch 4] Train-acc: 0.409,  | Val-acc: 0.239 |
[Finished] Test-acc: 0.217


In [16]:
finetune_net.save_parameters('tinynet')

Although this is a small example, it is basically how we train a model on much larger dataset.

## Further resources

On the GluonCV Classification Model Zoo page, we provide:

- Training scripts for ImageNet and CIFAR10.
- Training hyperparameters to reproduce.
- Training Logs to compare speed and accuracy.