In [1]:
import sys
sys.path.insert(0, '../../../python')
import os
import tarfile
import multiprocessing
import time
import numpy as np
import mxnet as mx
from mxnet import nd, autograd, gluon
from mxnet.gluon.model_zoo import vision as models
from mxnet import nd, autograd, gluon
from mxnet.metric import Accuracy
from mxnet.gluon.data.vision.datasets import ImageFolderDataset


In [2]:
url = "https://s3.us-east-2.amazonaws.com/mxnet-public/101_ObjectCategories.tar.gz"
dataset_name = "101_ObjectCategories"
data_folder = "data"
if not os.path.isdir(data_folder):
    os.makedirs(data_folder)
    tar_path = mx.gluon.utils.download(url, path='data')
    done = True
    if not done:
        tar = tarfile.open(tar_path, "r:gz")
        tar.extractall(data_folder)
        tar.close()
        print('Data extracted')
training_path = os.path.join(data_folder, dataset_name)
testing_path = os.path.join(data_folder, "{}_test".format(dataset_name))

In [3]:
EDGE = 224
SIZE = (EDGE, EDGE)
BATCH_SIZE = 16
NUM_WORKERS = multiprocessing.cpu_count()
DTYPE = 'float16'

def transform(image, label):
    resized = mx.image.resize_short(image, EDGE)
    cropped, crop_info = mx.image.center_crop(resized, SIZE)
    transposed = nd.transpose(cropped, (2,0,1))
    return transposed, label

dataset_train = ImageFolderDataset(root=training_path, transform=transform)
dataset_test = ImageFolderDataset(root=testing_path, transform=transform)

train_data = gluon.data.DataLoader(dataset_train, BATCH_SIZE, shuffle=True, num_workers=NUM_WORKERS)
test_data = gluon.data.DataLoader(dataset_test, BATCH_SIZE, shuffle=False, num_workers=NUM_WORKERS)


In [4]:
ctx = mx.gpu() if mx.test_utils.list_gpus() else mx.cpu()
loss = gluon.loss.SoftmaxCrossEntropyLoss()
metric = Accuracy()

def test(ctx, val_data):
    metric.reset()
    val_data.reset()
    for batch in val_data:
        data = data.as_in_context(ctx).astype(DTYPE)
        label = label.as_in_context(ctx).astype(DTYPE)
        outputs = []
        for x in data:
            outputs.append(net(x))
        metric.update(label, outputs)
    return metric.get()

In [5]:
if DTYPE == 'float32':
    net = models.get_model(name='resnet50_v2', ctx=ctx, pretrained=False, classes=256)
    net.collect_params().initialize(mx.init.Xavier(magnitude=2.24), ctx=ctx)
    net.hybridize()
    trainer = gluon.Trainer(net.collect_params(), 'sgd')
else:
    net = models.get_model(name='resnet50_v2', ctx=ctx, pretrained=False, classes=256)
    net.collect_params().initialize(mx.init.Xavier(magnitude=2.24), ctx=ctx)
    net.cast(np.float16)
    net.hybridize()
    trainer = gluon.Trainer(net.collect_params(), 'sgd', {'multi_precision':True})
    

In [7]:
# train_data, val_data = get_caltech256_iterator()
num_epochs = 2
for epoch in range(num_epochs):
    tic = time.time()
    metric.reset()
    btic = time.time()
    for i, (data, label) in enumerate(train_data):
        data = data.as_in_context(ctx)
        label = label.as_in_context(ctx)
        outputs = []
        Ls = []
        with autograd.record():
            data = data.astype(DTYPE)
            z = net(data)
            L = loss(z, label)
        L.backward()            
        trainer.step(data.shape[0])
        metric.update(label, z)
        if i and not i%50:
            name, acc = metric.get()
            print('Epoch[%d] Batch [%d]\tSpeed: %f samples/sec\t%s=%f'%(
                           epoch, i, BATCH_SIZE/(time.time()-btic), name, acc))
        btic = time.time()

    name, acc = metric.get()
    print('[Epoch %d] training: %s=%f'%(epoch, name, acc))
    print('[Epoch %d] time cost: %f'%(epoch, time.time()-tic))
    name, val_acc = test(ctx, test_data)
    print('[Epoch %d] validation: %s=%f'%(epoch, name, val_acc))

Epoch[0] Batch [50]	Speed: 363.499228 samples/sec	accuracy=0.165441
Epoch[0] Batch [100]	Speed: 367.810496 samples/sec	accuracy=0.161510
Epoch[0] Batch [150]	Speed: 367.443968 samples/sec	accuracy=0.170530


Process Process-24:
Process Process-5:
Process Process-27:
Process Process-10:
Process Process-23:
Process Process-28:
Process Process-11:
Process Process-26:
Process Process-18:
Process Process-21:
Process Process-22:
Process Process-20:
Process Process-9:
Process Process-19:
Process Process-4:
Process Process-30:
Process Process-32:
Process Process-7:
Process Process-12:
Process Process-6:
Process Process-8:
Process Process-3:
Process Process-25:
Process Process-31:
Process Process-1:
Process Process-29:
Process Process-17:
Process Process-2:
Traceback (most recent call last):
Traceback (most recent call last):
Traceback (most recent call last):
Traceback (most recent call last):
Traceback (most recent call last):
Traceback (most recent call last):
Traceback (most recent call last):
Traceback (most recent call last):
Traceback (most recent call last):
Traceback (most recent call last):
Traceback (most recent call last):
Traceback (most recent call last):
Traceback (most recent call l

  File "../../../python/mxnet/gluon/data/dataloader.py", line 116, in worker_loop
    idx, samples = key_queue.get()
  File "/home/ubuntu/anaconda3/envs/mxnet_p36/lib/python3.6/multiprocessing/process.py", line 258, in _bootstrap
    self.run()
  File "../../../python/mxnet/gluon/data/dataloader.py", line 116, in worker_loop
    idx, samples = key_queue.get()
  File "/home/ubuntu/anaconda3/envs/mxnet_p36/lib/python3.6/multiprocessing/process.py", line 258, in _bootstrap
    self.run()
  File "../../../python/mxnet/gluon/data/dataloader.py", line 116, in worker_loop
    idx, samples = key_queue.get()
  File "../../../python/mxnet/gluon/data/dataloader.py", line 116, in worker_loop
    idx, samples = key_queue.get()
  File "../../../python/mxnet/gluon/data/dataloader.py", line 116, in worker_loop
    idx, samples = key_queue.get()
  File "../../../python/mxnet/gluon/data/dataloader.py", line 116, in worker_loop
    idx, samples = key_queue.get()
  File "../../../python/mxnet/gluon/data/d

Epoch[0] Batch [200]	Speed: 367.142434 samples/sec	accuracy=0.173197


  File "/home/ubuntu/anaconda3/envs/mxnet_p36/lib/python3.6/multiprocessing/queues.py", line 94, in get
    res = self._recv_bytes()
  File "/home/ubuntu/anaconda3/envs/mxnet_p36/lib/python3.6/multiprocessing/queues.py", line 93, in get
    with self._rlock:
  File "/home/ubuntu/anaconda3/envs/mxnet_p36/lib/python3.6/multiprocessing/queues.py", line 93, in get
    with self._rlock:
  File "/home/ubuntu/anaconda3/envs/mxnet_p36/lib/python3.6/multiprocessing/synchronize.py", line 96, in __enter__
    return self._semlock.__enter__()
  File "../../../python/mxnet/gluon/data/dataloader.py", line 116, in worker_loop
    idx, samples = key_queue.get()
  File "../../../python/mxnet/gluon/data/dataloader.py", line 116, in worker_loop
    idx, samples = key_queue.get()
  File "/home/ubuntu/anaconda3/envs/mxnet_p36/lib/python3.6/multiprocessing/synchronize.py", line 96, in __enter__
    return self._semlock.__enter__()
  File "../../../python/mxnet/gluon/data/dataloader.py", line 116, in worker_

  File "/home/ubuntu/anaconda3/envs/mxnet_p36/lib/python3.6/multiprocessing/process.py", line 258, in _bootstrap
    self.run()
  File "/home/ubuntu/anaconda3/envs/mxnet_p36/lib/python3.6/multiprocessing/process.py", line 93, in run
    self._target(*self._args, **self._kwargs)
  File "../../../python/mxnet/gluon/data/dataloader.py", line 119, in <listcomp>
    batch = batchify_fn([dataset[i] for i in samples])
  File "../../../python/mxnet/gluon/data/dataloader.py", line 119, in <listcomp>
    batch = batchify_fn([dataset[i] for i in samples])
  File "/home/ubuntu/anaconda3/envs/mxnet_p36/lib/python3.6/multiprocessing/process.py", line 258, in _bootstrap
    self.run()
  File "/home/ubuntu/anaconda3/envs/mxnet_p36/lib/python3.6/multiprocessing/process.py", line 93, in run
    self._target(*self._args, **self._kwargs)
  File "../../../python/mxnet/gluon/data/dataloader.py", line 116, in worker_loop
    idx, samples = key_queue.get()
  File "../../../python/mxnet/gluon/data/vision/datas

KeyboardInterrupt: 