# SSD train with ChainerCV with COIN DATASET

ChainerCV を使ったSSDの学習

In [1]:
import argparse
import copy
import numpy as np

import chainer
from chainer.datasets import ConcatenatedDataset
from chainer.datasets import TransformDataset
from chainer.optimizer_hooks import WeightDecay
from chainer import serializers
from chainer import training
from chainer.training import extensions
from chainer.training import triggers

#from chainercv.datasets import voc_bbox_label_names
#from chainercv.datasets import VOCBboxDataset
from lib.voc_bbox_dataset import VOCBboxDataset

from chainercv.extensions import DetectionVOCEvaluator
from chainercv.links.model.ssd import GradientScaling
from chainercv.links.model.ssd import multibox_loss
from chainercv.links import SSD300
from chainercv.links import SSD512
from chainercv import transforms

from chainercv.links.model.ssd import random_crop_with_bbox_constraints
from chainercv.links.model.ssd import random_distort
from chainercv.links.model.ssd import resize_with_random_interpolation

from datetime import datetime
import matplotlib.pyplot as plt

import os

In [2]:
def show_img(img):
    plt.imshow(img, interpolation="none")
    plt.tick_params(labelbottom='off')
    plt.tick_params(labelleft='off')
    plt.show()

In [3]:
class MultiboxTrainChain(chainer.Chain):

    def __init__(self, model, alpha=1, k=3):
        super(MultiboxTrainChain, self).__init__()
        with self.init_scope():
            self.model = model
        self.alpha = alpha
        self.k = k

    def __call__(self, imgs, gt_mb_locs, gt_mb_labels):
        mb_locs, mb_confs = self.model(imgs)
        loc_loss, conf_loss = multibox_loss(
            mb_locs, mb_confs, gt_mb_locs, gt_mb_labels, self.k)
        loss = loc_loss * self.alpha + conf_loss

        chainer.reporter.report(
            {'loss': loss, 'loss/loc': loc_loss, 'loss/conf': conf_loss},
            self)

        return loss

In [4]:
class Transform(object):

    def __init__(self, coder, size, mean):
        # to send cpu, make a copy
        self.coder = copy.copy(coder)
        self.coder.to_cpu()

        self.size = size
        self.mean = mean

    def __call__(self, in_data):
        # There are five data augmentation steps
        # 1. Color augmentation
        # 2. Random expansion
        # 3. Random cropping
        # 4. Resizing with random interpolation
        # 5. Random horizontal flipping

        img, bbox, label = in_data

        # 1. Color augmentation
        img = random_distort(img)

        # 2. Random expansion
        if np.random.randint(2):
            img, param = transforms.random_expand(
                img, fill=self.mean, return_param=True)
            bbox = transforms.translate_bbox(
                bbox, y_offset=param['y_offset'], x_offset=param['x_offset'])

        # 3. Random cropping
        img, param = random_crop_with_bbox_constraints(
            img, bbox, return_param=True)
        bbox, param = transforms.crop_bbox(
            bbox, y_slice=param['y_slice'], x_slice=param['x_slice'],
            allow_outside_center=False, return_param=True)
        label = label[param['index']]

        # 4. Resizing with random interpolatation
        _, H, W = img.shape
        img = resize_with_random_interpolation(img, (self.size, self.size))
        bbox = transforms.resize_bbox(bbox, (H, W), (self.size, self.size))

        # 5. Random horizontal flipping
        img, params = transforms.random_flip(
            img, x_random=True, return_param=True)
        bbox = transforms.flip_bbox(
            bbox, (self.size, self.size), x_flip=params['x_flip'])

        # Preparation for SSD network
        img -= self.mean
        mb_loc, mb_label = self.coder.encode(bbox, label)

        return img, mb_loc, mb_label

In [5]:
parser = argparse.ArgumentParser()
parser.add_argument('--model', choices=('ssd300', 'ssd512'), default='ssd300')
parser.add_argument('--batchsize', type=int, default=32)
parser.add_argument('--gpu', type=int, default=0)
parser.add_argument('--out', default='result')
parser.add_argument('--resume')
parser.add_argument('-f')
args = parser.parse_args()

In [6]:
args.out = os.path.join("result",datetime.now().strftime("%Y%m%d-%H%M%S"))
args.out

'result\\20180826-205956'

In [7]:
args.resume = ""

In [8]:
DATASET_KIND = "COIN"
#DATASET_KIND = "OSATU"

if DATASET_KIND == "OSATU":
    voc_bbox_label_names = VOCBboxDataset.OSATU_LABEL_NAME
    dataset_dir = os.path.join(".","osatu")
    print ("お札データセット")
elif DATASET_KIND == "COIN":
    voc_bbox_label_names = VOCBboxDataset.COIN_LABEL_NAME
    dataset_dir = os.path.join(".","coin")
    print ("硬貨データセット")
else:
    print ("データセットフォルダを正しく指定して下さい")
print (voc_bbox_label_names)

硬貨データセット
('1', '5', '10', '50', '100', '500')


In [9]:
if args.model == 'ssd300':
    model = SSD300(
        n_fg_class=len(voc_bbox_label_names),
        pretrained_model='imagenet')
elif args.model == 'ssd512':
    model = SSD512(
        n_fg_class=len(voc_bbox_label_names),
        pretrained_model='imagenet')
print ("use model: "+args.model)

use model: ssd300


In [10]:
model.use_preset('evaluate')
train_chain = MultiboxTrainChain(model)
if args.gpu >= 0:
    chainer.cuda.get_device_from_id(args.gpu).use()
    model.to_gpu()

In [11]:
#train = VOCBboxDataset(data_dir="jpn_coin_dataset", split='trainval')
train = TransformDataset(
    VOCBboxDataset(data_dir=dataset_dir, split='trainval', kind = DATASET_KIND),
    Transform(model.coder, model.insize, model.mean))
train_iter = chainer.iterators.SerialIterator(train, args.batchsize)

In [12]:
test = VOCBboxDataset(data_dir=dataset_dir, split='test',use_difficult=True, return_difficult=True, kind = DATASET_KIND)
test_iter = chainer.iterators.SerialIterator(test, args.batchsize, repeat=False, shuffle=False)

In [13]:
len(train), len(test)

(12, 8)

In [14]:
# initial lr is set to 1e-3 by ExponentialShift
optimizer = chainer.optimizers.MomentumSGD()
optimizer.setup(train_chain)
for param in train_chain.params():
    if param.name == 'b':
        param.update_rule.add_hook(GradientScaling(2))
    else:
        param.update_rule.add_hook(WeightDecay(0.0005))

updater = training.updaters.StandardUpdater(
    train_iter, optimizer, device=args.gpu)
trainer = training.Trainer(updater, (12000, 'iteration'), args.out)
trainer.extend(
    extensions.ExponentialShift('lr', 0.1, init=1e-3),
    trigger=triggers.ManualScheduleTrigger([8000, 10000], 'iteration'))

trainer.extend(
    DetectionVOCEvaluator(
        test_iter, model, use_07_metric=True,
        label_names=voc_bbox_label_names),
    trigger=(1000, 'iteration'))

#log_interval = 100, 'iteration'
log_interval = 1, 'epoch'
trainer.extend(extensions.LogReport(trigger=log_interval))
trainer.extend(extensions.observe_lr(), trigger=log_interval)
trainer.extend(extensions.PrintReport(
    ['epoch', 'iteration', 'lr',
     'main/loss', 'main/loss/loc', 'main/loss/conf',
     'validation/main/map']),
    trigger=log_interval)
#trainer.extend(extensions.ProgressBar(update_interval=10))

trainer.extend(extensions.snapshot(), trigger=(2000, 'iteration'))
trainer.extend(extensions.snapshot_object(model, 'model_iter_{.updater.iteration}'),trigger=(12000, 'iteration'))

if args.resume:
    serializers.load_npz(args.resume, trainer)

In [None]:
trainer.run()