apache · tqchen · Jun 8, 2016 · Jun 1, 2016 · Jun 7, 2016 · Jun 7, 2016
@@ -45,11 +45,12 @@ https://github.com/rbgirshick/fast-rcnn/tree/master/data/demo
 * Start training by run `python train.py`. Variable args can be found by run
 `python train.py --help`.
 * Training can be done in cpu, modify `train.py` accordingly.
+* Training can be done in multiple gpus.
 ```
 usage: train.py [-h] [--image_set IMAGE_SET] [--year YEAR]
                 [--root_path ROOT_PATH] [--devkit_path DEVKIT_PATH]
                 [--pretrained PRETRAINED] [--epoch EPOCH] [--prefix PREFIX]
-                [--gpu GPU_ID] [--begin_epoch BEGIN_EPOCH]
+                [--gpus GPU_ID] [--begin_epoch BEGIN_EPOCH]
                 [--end_epoch END_EPOCH] [--frequent FREQUENT]
 
 Train a Fast R-CNN network
@@ -67,13 +68,24 @@ optional arguments:
                         pretrained model prefix
   --epoch EPOCH         epoch of pretrained model
   --prefix PREFIX       new model prefix
-  --gpu GPU_ID          GPU device to train with
+  --gpus GPU_ID         GPU devices to train with
   --begin_epoch BEGIN_EPOCH
                         begin epoch of training
   --end_epoch END_EPOCH
                         end epoch of training
   --frequent FREQUENT   frequency of logging
+  --kv_store KV_STORE   kv_store type used in multi-device training
+  --work_load_list WORK_LOAD_LIST
+                        list of work load for different devices
 ```
+- Performance in terms of training speed
+
+ | GPUs | batch size | samples per second |
+ | --- | --- | --- |
+ | 1 | 2 | 3.02 |
+ | 2 | 4 | 3.80 |
+ | 4 | 8 | 5.96 |
+
 
 ## Testing
 * Start testing by run `python test.py`. Variable args can be found by run
@@ -129,4 +141,4 @@ This repository used code from [MXNet](https://github.com/dmlc/mxnet),
 [caffe](https://github.com/BVLC/caffe). Training data are from
 [Pascal VOC](http://host.robots.ox.ac.uk/pascal/VOC/),
 [ImageNet](http://image-net.org/). Model comes from
-[VGG16](http://www.robots.ox.ac.uk/~vgg/research/very_deep/).
+[VGG16](http://www.robots.ox.ac.uk/~vgg/research/very_deep/).
@@ -12,7 +12,7 @@ def resize(im, target_size, max_size):
     """
     im_shape = im.shape
     im_size_min = np.min(im_shape[0:2])
-    im_size_max = np.min(im_shape[0:2])
+    im_size_max = np.max(im_shape[0:2])
     im_scale = float(target_size) / float(im_size_min)
     # prevent bigger axis from being more than max_size:
     if np.round(im_scale * im_size_max) > max_size:

@@ -22,10 +22,8 @@ def __call__(self, param):
                 speed = self.frequent * self.batch_size / (time.time() - self.tic)
                 if param.eval_metric is not None:
                     name, value = param.eval_metric.get()
-                    cls, cls_value = param.cls_metric.get()
-                    bbox, bbox_value = param.bbox_metric.get()
                     logging.info("Epoch[%d] Batch [%d]\tSpeed: %.2f samples/sec\tTrain-%s=%f,\t%s=%f,\t%s=%f",
-                                 param.epoch, count, speed, name, value, cls, cls_value, bbox, bbox_value)
+                                 param.epoch, count, speed, name[0], value[0], name[1], value[1], name[2], value[2])
                 else:
                     logging.info("Iter[%d] Batch [%d]\tSpeed: %.2f samples/sec",
                                  param.epoch, count, speed)

@@ -4,7 +4,7 @@
 
 
 class ROIIter(mx.io.DataIter):
-    def __init__(self, roidb, batch_size=2, shuffle=False, mode='train'):
+    def __init__(self, roidb, ctx, batch_size=2, shuffle=False, mode='train', work_load_list=None):
         """
         This Iter will provide roi data to Fast R-CNN network
         :param roidb: must be preprocessed
@@ -15,9 +15,11 @@ def __init__(self, roidb, batch_size=2, shuffle=False, mode='train'):
         super(ROIIter, self).__init__()
 
         self.roidb = roidb
+        self.ctx = ctx
         self.batch_size = batch_size
         self.shuffle = shuffle
         self.mode = mode
+        self.work_load_list = work_load_list
         if self.mode != 'train':
             assert self.batch_size == 1
 
@@ -30,16 +32,17 @@ def __init__(self, roidb, batch_size=2, shuffle=False, mode='train'):
         self.data = None
         self.label = None
         self.get_batch()
-        self.data_name = self.data.keys()
-        self.label_name = self.label.keys()
 
     @property
     def provide_data(self):
-        return [(k, v.shape) for k, v in self.data.items()]
+        return [('data', self.data[0].shape), ('rois', self.data[1].shape)]
 
     @property
     def provide_label(self):
-        return [(k, v.shape) for k, v in self.label.items()]
+        return [('cls_prob_label', self.label[0].shape),
+                ('bbox_loss_target', self.label[1].shape),
+                ('bbox_loss_inside_weight', self.label[2].shape),
+                ('bbox_loss_outside_weight', self.label[3].shape)]
 
     def reset(self):
         self.cur = 0
@@ -53,26 +56,31 @@ def next(self):
         if self.iter_next():
             self.get_batch()
             self.cur += self.batch_size
-            return mx.io.DataBatch(data=self.data, label=self.label,
-                                   pad=self.getpad(), index=self.getindex())
+            if self.mode == 'train':
+                return mx.io.DataBatch(data=self.data, label=self.label,
+                                       pad=self.getpad(), index=self.getindex(),
+                                       provide_data=self.provide_data, provide_label=self.provide_label)
+            else:
+                return mx.io.DataBatch(data=self.data, label=self.label,
+                                       pad=self.getpad(), index=self.getindex())
         else:
             raise StopIteration
 
     def getindex(self):
         return self.cur / self.batch_size
 
     def getpad(self):
-        return self.batch_size - self.size % self.batch_size
+        if self.cur + self.batch_size > self.size:
+            return self.cur + self.batch_size - self.size
+        else:
+            return 0
 
     def get_batch(self):
         if self.mode == 'train':
             self.batch = self._get_train_batch()
-            self.data = {'data': self.batch['data'],
-                         'rois': self.batch['rois']}
-            self.label = {'cls_prob_label': self.batch['labels'],
-                          'bbox_loss_target': self.batch['bbox_targets'],
-                          'bbox_loss_inside_weight': self.batch['bbox_inside_weights'],
-                          'bbox_loss_outside_weight': self.batch['bbox_outside_weights']}
+            self.data = [mx.nd.array(self.batch['data']), mx.nd.array(self.batch['rois'])]
+            self.label = [mx.nd.array(self.batch['labels']), mx.nd.array(self.batch['bbox_targets']),
+                mx.nd.array(self.batch['bbox_inside_weights']), mx.nd.array(self.batch['bbox_outside_weights'])]
         else:
             self.batch = self._get_test_batch()
             self.data = {'data': self.batch['data'],
@@ -85,9 +93,13 @@ def _get_train_batch(self):
         :return: training batch (e.g. 128 samples)
         """
         cur_from = self.cur
-        cur_to = min(cur_from + self.batch_size, self.size)
-        roidb = [self.roidb[i] for i in range(cur_from, cur_to)]
-        batch = minibatch.get_minibatch(roidb, self.num_classes)
+        cur_to = cur_from + self.batch_size
+        if cur_to <= self.size:
+            roidb = [self.roidb[i] for i in range(cur_from, cur_to)]
+        else:
+            pad = cur_to - self.size
+            roidb = self.roidb[cur_from:] + self.roidb[:pad]
+        batch = minibatch.get_minibatch(roidb, self.num_classes, self.ctx, self.work_load_list)
         return batch
 
     def _get_test_batch(self):

@@ -24,8 +24,20 @@ def __init__(self):
         super(SmoothL1LossMetric, self).__init__('SmoothL1Loss')
 
     def update(self, labels, preds):
-        bbox_loss = preds[0].asnumpy()
-        label = labels[0].asnumpy()
+        bbox_loss = preds[1].asnumpy()
+        label = labels[1].asnumpy()
         bbox_loss = np.sum(bbox_loss)
         self.sum_metric += bbox_loss
         self.num_inst += label.shape[0]
+
+
+class Accuracy(mx.metric.EvalMetric):
+    def __init__(self):
+        super(Accuracy, self).__init__('accuracy')
+
+    def update(self, labels, preds):
+        pred_label = mx.ndarray.argmax_channel(preds[0]).asnumpy().astype('int32')
+        label = labels[0].asnumpy().astype('int32')
+
+        self.sum_metric += (pred_label.flat == label.flat).sum()
+        self.num_inst += len(pred_label.flat)
@@ -22,9 +22,9 @@
 from helper.processing import image_processing
 from helper.processing.bbox_regression import expand_bbox_regression_targets
 from rcnn.config import config
+from mxnet.executor_manager import _split_input_slice
 
-
-def get_minibatch(roidb, num_classes):
+def get_minibatch(roidb, num_classes, ctx, work_load_list=None):
     """
     return minibatch of images in roidb
     :param roidb: subset of main database
@@ -40,20 +40,30 @@ def get_minibatch(roidb, num_classes):
 
     # im_array: [num_images, c, h, w]
     im_array, im_scales = get_image_array(roidb, config.TRAIN.SCALES, random_scale_indexes)
-
     rois_array = list()
     labels_array = list()
     bbox_targets_array = list()
     bbox_inside_array = list()
 
-    for im_i in range(num_images):
+    if work_load_list is None:
+        work_load_list = [1] * len(ctx)
+    assert isinstance(work_load_list, list) and len(work_load_list) == len(ctx), \
+        "Invalid settings for work load. "
+    slices = _split_input_slice(num_images, work_load_list)
+
+    idx_in_slice = []
+    for islice in slices:
+        num_im = islice.stop - islice.start
+        for i in range(num_im):
+            idx_in_slice.append(i)
+    for im_i, idx in enumerate(idx_in_slice):
         im_rois, labels, bbox_targets, bbox_inside_weights, overlaps = \
             sample_rois(roidb[im_i], fg_rois_per_image, rois_per_image, num_classes)
 
         # project im_rois
         # do not round roi
         rois = im_rois * im_scales[im_i]
-        batch_index = im_i * np.ones((rois.shape[0], 1))
+        batch_index = idx * np.ones((rois.shape[0], 1))
         rois_array_this_image = np.hstack((batch_index, rois))
         rois_array.append(rois_array_this_image)
 

@@ -6,102 +6,82 @@
 from callback import Speedometer
 from config import config
 
-
 class Solver(object):
     def __init__(self, prefix,
                  symbol, ctx=None,
                  begin_epoch=0, num_epoch=None,
+                 kv_store='local',
                  arg_params=None, aux_params=None,
-                 optimizer='sgd', **kwargs):
+                 optimizer='sgd',
+                 max_data_shape=None, **kwargs):
         self.prefix = prefix
         self.symbol = symbol
         self.ctx = ctx
         if self.ctx is None:
             self.ctx = mx.cpu()
         self.begin_epoch = begin_epoch
         self.num_epoch = num_epoch
+        self.kv_store = kv_store
         self.arg_params = arg_params
         self.aux_params = aux_params
-        self.grad_params = None
-        self.executor = None
         self.optimizer = optimizer
         self.updater = None
+        self.max_data_shape = max_data_shape
         self.kwargs = kwargs.copy()
 
+        self.arg_names = None
+        self.param_names = None
+        self.aux_names = None
+
     def get_params(self, grad_req):
         arg_names = self.symbol.list_arguments()
+        self.arg_names = arg_names
         arg_shapes, out_shapes, aux_shapes = self.symbol.infer_shape(data=(1, 3, 224, 224), rois=(1, 5))
         if grad_req != 'null':
-            self.grad_params = {}
+            param_names = []
             for name, shape in zip(arg_names, arg_shapes):
                 if not (name.endswith('data') or name.endswith('rois') or
                         name.endswith('inside_weight') or name.endswith('outside_weight') or
                         name.endswith('label') or name.endswith('target') or
                         name.startswith('conv1') or name.startswith('conv2')):
-                    self.grad_params[name] = mx.nd.zeros(shape, self.ctx)
+                    param_names.append(name)
+            self.param_names = list(param_names)
         aux_names = self.symbol.list_auxiliary_states()
+        self.aux_names = aux_names
         self.aux_params = {k: mx.nd.zeros(s, self.ctx) for k, s in zip(aux_names, aux_shapes)}
 
     def fit(self, train_data,
             grad_req='write',
             frequent=20,
             logger=None):
+        (kvstore, update_on_kvstore) = mx.model._create_kvstore(
+            self.kv_store, len(self.ctx), self.arg_params)
         if logger is None:
             logger = logging
         logger.info('Start training with %s', str(self.ctx))
-        speedometer_param = namedtuple('BatchEndParams',
-                                       ['epoch', 'nbatch', 'eval_metric', 'cls_metric', 'bbox_metric'])
+
         batch_end_callback = Speedometer(train_data.batch_size, frequent=frequent)
         epoch_end_callback = mx.callback.do_checkpoint(self.prefix)
 
         self.get_params(grad_req)
-        self.optimizer = mx.optimizer.create(self.optimizer, rescale_grad=(1.0 / config.TRAIN.BATCH_SIZE), **self.kwargs)
-        self.updater = mx.optimizer.get_updater(self.optimizer)
 
-        eval_metric = mx.metric.create("accuracy")
+        eval_metric = metric.Accuracy()
         cls_metric = metric.LogLossMetric()
         bbox_metric = metric.SmoothL1LossMetric()
+        eval_metrics = mx.metric.CompositeEvalMetric()
+        for child_metric in [eval_metric, cls_metric, bbox_metric]:
+            eval_metrics.add(child_metric)
+        max_data_shape = self.max_data_shape
 
-        # begin training
-        for epoch in range(self.begin_epoch, self.num_epoch):
-            nbatch = 0
-            train_data.reset()
-            eval_metric.reset()
-            cls_metric.reset()
-            bbox_metric.reset()
-            for databatch in train_data:
-                nbatch += 1
-                for k, v in databatch.data.items():
-                    self.arg_params[k] = mx.nd.array(v, self.ctx)
-                for k, v in databatch.label.items():
-                    self.arg_params[k] = mx.nd.array(v, self.ctx)
-                self.executor = self.symbol.bind(self.ctx, self.arg_params, args_grad=self.grad_params,
-                                                 grad_req=grad_req, aux_states=self.aux_params)
-                assert len(self.symbol.list_arguments()) == len(self.executor.grad_arrays)
-                update_dict = {name: nd for name, nd
-                               in zip(self.symbol.list_arguments(), self.executor.grad_arrays) if nd}
-                output_dict = {name: nd for name, nd
-                               in zip(self.symbol.list_outputs(), self.executor.outputs)}
-                self.executor.forward(is_train=True)
-                self.executor.backward()
-
-                for key, arr in update_dict.items():
-                    self.updater(key, arr, self.arg_params[key])
-
-                label = self.arg_params['cls_prob_label']
-                pred = output_dict['cls_prob_output']
-                bb_target = self.arg_params['bbox_loss_target']
-                bb_loss = output_dict['bbox_loss_output']
-                eval_metric.update([label], [pred])
-                cls_metric.update([label], [pred])
-                bbox_metric.update([bb_target], [bb_loss])
-
-                # print speed and accuracy metric
-                batch_end_params = speedometer_param(epoch=epoch, nbatch=nbatch, eval_metric=eval_metric,
-                                                     cls_metric=cls_metric, bbox_metric=bbox_metric)
-                batch_end_callback(batch_end_params)
-
-            if epoch_end_callback:
-                epoch_end_callback(epoch, self.symbol, self.arg_params, self.aux_params)
-            name, value = eval_metric.get()
-            logger.info("                     --->Epoch[%d] Train-%s=%f", epoch, name, value)
+        self.optimizer = mx.optimizer.create(self.optimizer, rescale_grad=(1.0 / config.TRAIN.BATCH_SIZE), **self.kwargs)
+        mx.model._train_multi_device(self.symbol, self.ctx, self.arg_names, self.param_names,
+                                     self.aux_names, self.arg_params, self.aux_params,
+                                     begin_epoch=self.begin_epoch, end_epoch=self.num_epoch,
+                                     epoch_size=None, optimizer=self.optimizer,
+                                     train_data=train_data, eval_data=None,
+                                     eval_metric=eval_metrics,
+                                     epoch_end_callback=epoch_end_callback,
+                                     batch_end_callback=batch_end_callback,
+                                     kvstore=kvstore, update_on_kvstore=update_on_kvstore,
+                                     logger=logger, work_load_list=None, monitor=None,
+                                     mutable_data_shape=True, max_data_shape=self.max_data_shape)
@@ -25,7 +25,7 @@ def test_net(imageset, year, root_path, devkit_path, prefix, epoch, ctx):
 
     # load testing data
     voc, roidb = load_test_roidb(imageset, year, root_path, devkit_path)
-    test_data = ROIIter(roidb, batch_size=1, shuffle=False, mode='test')
+    test_data = ROIIter(roidb, ctx=ctx, batch_size=1, shuffle=False, mode='test')
 
     # load model
     args, auxs = load_param(prefix, epoch, convert=True, ctx=ctx)