#### * Reference: https://github.com/jwyang/faster-rcnn.pytorch

# Training code of faster-rcnn
 - Goal: understand overall training flow & check whether decreases


In [2]:
from __future__ import absolute_import
from __future__ import division
from __future__ import print_function

import _init_paths
import os
import sys
import numpy as np
import pprint
import time

import torch
import torch.nn as nn
import torch.optim as optim

import torchvision.transforms as transforms
from torch.utils.data.sampler import Sampler

from roi_data_layer.roidb import combined_roidb
from roi_data_layer.roibatchLoader import roibatchLoader
from model.utils.config import cfg, cfg_from_file, cfg_from_list, get_output_dir
from model.utils.net_utils import weights_normal_init, save_net, load_net, \
      adjust_learning_rate, save_checkpoint, clip_gradient

from model.faster_rcnn.vgg16 import vgg16
from model.faster_rcnn.resnet import resnet

## Arguments & Configuration

In [30]:
# Arguments
dataset_name = 'pascal_voc'
net = 'vgg16'
start_epoch = 1
max_epochs = 20
disp_interval = 100
checkpoint_interval = 10000
save_dir = 'models'
num_workers = 0
cuda = True
large_scale = False               
mGPUs = False
batch_size = 2
class_agnostic = False

# config optimization
optimizer = "sgd"
lr = 0.001
lr_decay_step = 5
lr_decay_gamma = 0.1

# set training session
session = 1

# resume trained model
resume = False
checksession = 1
checkepoch = 1
checkpoint = 0

# log and display
use_tfboard = False

imdb_name = "voc_2007_trainval"
imdbval_name = "voc_2007_test"
set_cfgs = ['ANCHOR_SCALES', '[8, 16, 32]', 'ANCHOR_RATIOS', '[0.5,1,2]', 'MAX_NUM_GT_BOXES', '20']
cfg_file = "cfgs/{}_ls.yml".format(net) if large_scale else "cfgs/{}.yml".format(net)

if cfg_file is not None:
    cfg_from_file(cfg_file)
if set_cfgs is not None:
    cfg_from_list(set_cfgs)

cfg.TRAIN.USE_FLIPPED = True
cfg.USE_GPU_NMS = cuda    
cfg.CUDA = True

print('Using config:')
pprint.pprint(cfg)
np.random.seed(cfg.RNG_SEED)


Using config:
{'ANCHOR_RATIOS': [0.5, 1, 2],
 'ANCHOR_SCALES': [8, 16, 32],
 'CROP_RESIZE_WITH_MAX_POOL': False,
 'CUDA': True,
 'DATA_DIR': '/home/cvlab/junha/git/faster-rcnn.pytorch/data',
 'DEDUP_BOXES': 0.0625,
 'EPS': 1e-14,
 'EXP_DIR': 'vgg16',
 'FEAT_STRIDE': [16],
 'GPU_ID': 0,
 'MATLAB': 'matlab',
 'MAX_NUM_GT_BOXES': 20,
 'MOBILENET': {'DEPTH_MULTIPLIER': 1.0,
               'FIXED_LAYERS': 5,
               'REGU_DEPTH': False,
               'WEIGHT_DECAY': 4e-05},
 'PIXEL_MEANS': array([[[102.9801, 115.9465, 122.7717]]]),
 'POOLING_MODE': 'align',
 'POOLING_SIZE': 7,
 'RESNET': {'FIXED_BLOCKS': 1, 'MAX_POOL': False},
 'RNG_SEED': 3,
 'ROOT_DIR': '/home/cvlab/junha/git/faster-rcnn.pytorch',
 'TEST': {'BBOX_REG': True,
          'HAS_RPN': True,
          'MAX_SIZE': 1000,
          'MODE': 'nms',
          'NMS': 0.3,
          'PROPOSAL_METHOD': 'gt',
          'RPN_MIN_SIZE': 16,
          'RPN_NMS_THRESH': 0.7,
          'RPN_POST_NMS_TOP_N': 300,
          'RPN_PRE_NMS_

# Custom Sampler class
 - argument of 'Dataloader' (sampler='')
 - define how to generate indices
 - if given, argument 'shuffle' should be None

In [23]:
class sampler(Sampler):
    def __init__(self, train_size, batch_size):
        self.num_data = train_size
        self.num_per_batch = int(train_size / batch_size)
        self.batch_size = batch_size
        self.range = torch.arange(0,batch_size).view(1, batch_size).long()
        self.leftover_flag = False
        if train_size % batch_size:
            self.leftover = torch.arange(self.num_per_batch*batch_size, train_size).long()
            self.leftover_flag = True

    def __iter__(self):
        rand_num = torch.randperm(self.num_per_batch).view(-1,1) * self.batch_size
        self.rand_num = rand_num.expand(self.num_per_batch, self.batch_size) + self.range

        self.rand_num_view = self.rand_num.view(-1)

        if self.leftover_flag:
            self.rand_num_view = torch.cat((self.rand_num_view, self.leftover),0)

        return iter(self.rand_num_view)

    def __len__(self):
        return self.num_data

# Get train dataloader object
<br>
 (1) Load data from files

In [24]:
# train set
# -- Note: Use validation set and disable the flipped to enable faster loading.
cfg.TRAIN.USE_FLIPPED = True
cfg.USE_GPU_NMS = cuda
imdb, roidb, ratio_list, ratio_index = combined_roidb(imdb_name)
train_size = len(roidb)

print('{:d} roidb entries'.format(len(roidb)))

Loaded dataset `voc_2007_trainval` for training
Set proposal method: gt
Appending horizontally-flipped training examples...
voc_2007_trainval gt roidb loaded from /home/cvlab/junha/git/faster-rcnn.pytorch/data/cache/voc_2007_trainval_gt_roidb.pkl
done
Preparing training data...
Image sizes loaded from /home/cvlab/junha/git/faster-rcnn.pytorch/data/cache/voc_2007_trainval_sizes.pkl
done
before filtering, there are 10022 images...
after filtering, there are 10022 images...
10022 roidb entries


(2) initialize dataset & sampler object <br>
(3) get dataloader object

In [25]:
sampler_batch = sampler(train_size, batch_size)

dataset = roibatchLoader(roidb, ratio_list, ratio_index, batch_size, \
                       imdb.num_classes, training=True)

dataloader = torch.utils.data.DataLoader(dataset, batch_size=batch_size,
                        sampler=sampler_batch, num_workers=num_workers)

In [26]:
# initilize the input tensor holder here.
im_data = torch.FloatTensor(1)
im_info = torch.FloatTensor(1)
num_boxes = torch.LongTensor(1)
gt_boxes = torch.FloatTensor(1)

print(num_boxes)

# ship to cuda
if cuda:
    im_data = im_data.cuda()
    im_info = im_info.cuda()
    num_boxes = num_boxes.cuda()
    gt_boxes = gt_boxes.cuda()


tensor([ 1.3977e+14])


# Initialize object detection network

In [27]:
# initilize the network here.
if net == 'vgg16':
    fasterRCNN = vgg16(imdb.classes, pretrained=True, class_agnostic=class_agnostic)
fasterRCNN.create_architecture()

if cuda:
    fasterRCNN.cuda()

Loading pretrained weights from data/pretrained_model/vgg16_caffe.pth


# Get parameters of model
 - we can set different value of learning rate, weight_decay params, etc.
 - we can filter params whoose requires_grad is False (freezing network)

In [28]:
params = []
for key, value in dict(fasterRCNN.named_parameters()).items():
    if value.requires_grad:
        if 'bias' in key:
            params += [{'params':[value],'lr':lr*(cfg.TRAIN.DOUBLE_BIAS + 1), \
                        'weight_decay': cfg.TRAIN.BIAS_DECAY and cfg.TRAIN.WEIGHT_DECAY or 0}]
        else:
            params += [{'params':[value],'lr':lr, 'weight_decay': cfg.TRAIN.WEIGHT_DECAY}]
optimizer = torch.optim.SGD(params, momentum=cfg.TRAIN.MOMENTUM)

# Main training iteration
 - get input data
 - forward
 - get loss
 - backward
 - save checkpoint (model weights)

In [29]:
output_dir = save_dir + "/" + net + "/" + dataset_name
if not os.path.exists(output_dir):
    os.makedirs(output_dir)

iters_per_epoch = int(train_size / batch_size)

for epoch in range(start_epoch, max_epochs + 1):
    # setting to train mode
    fasterRCNN.train()
    loss_temp = 0
    start = time.time()

    if epoch % (lr_decay_step + 1) == 0:
        adjust_learning_rate(optimizer, lr_decay_gamma)
        lr *= lr_decay_gamma

    for step, data in enumerate(dataloader):
        im_data.data.resize_(data[0].size()).copy_(data[0])
        im_info.data.resize_(data[1].size()).copy_(data[1])
        gt_boxes.data.resize_(data[2].size()).copy_(data[2])
        num_boxes.data.resize_(data[3].size()).copy_(data[3])

        '''
        Your code below
        
        1. forward path
        arguemnt & return values of forward() function(defined in 'lib/model/faster_rcnn/faster_rcnn.py')
        argument: im_data, im_info, gt_boxes, num_boxes
        return values: rois, cls_prob, bbox_pred, rpn_loss_cls, rpn_loss_bbox, RCNN_loss_cls, RCNN_loss_bbox, rois_label
        
        2. define total loss term

        3. backward path
        
        '''
        
        loss_temp += loss.item()
        if step % disp_interval == 0:
            end = time.time()
            if step > 0:
                loss_temp /= (disp_interval + 1)

            loss_rpn_cls = rpn_loss_cls.item()
            loss_rpn_box = rpn_loss_box.item()
            loss_rcnn_cls = RCNN_loss_cls.item()
            loss_rcnn_box = RCNN_loss_bbox.item()
            fg_cnt = torch.sum(rois_label.data.ne(0))
            bg_cnt = rois_label.data.numel() - fg_cnt

            print("[session %d][epoch %2d][iter %4d/%4d] loss: %.4f, lr: %.2e" \
                                    % (session, epoch, step, iters_per_epoch, loss_temp, lr))
            print("\t\t\tfg/bg=(%d/%d), time cost: %f" % (fg_cnt, bg_cnt, end-start))
            print("\t\t\trpn_cls: %.4f, rpn_box: %.4f, rcnn_cls: %.4f, rcnn_box %.4f" \
                          % (loss_rpn_cls, loss_rpn_box, loss_rcnn_cls, loss_rcnn_box))

            loss_temp = 0
            start = time.time()

    save_name = os.path.join(output_dir, 'faster_rcnn_{}_{}_{}.pth'.format(session, epoch, step))
    save_checkpoint({
        'session': session,
        'epoch': epoch + 1,
        'model': fasterRCNN.state_dict(),
        'optimizer': optimizer.state_dict(),
        'pooling_mode': cfg.POOLING_MODE,
        'class_agnostic': class_agnostic,
    }, save_name)
    print('save model: {}'.format(save_name))


[session 1][epoch  1][iter    0/2505] loss: 4.3540, lr: 1.00e-03
			fg/bg=(110/914), time cost: 0.554538
			rpn_cls: 0.6332, rpn_box: 0.1427, rcnn_cls: 3.2903, rcnn_box 0.2877
[session 1][epoch  1][iter  100/2505] loss: 1.3776, lr: 1.00e-03
			fg/bg=(67/957), time cost: 55.616909
			rpn_cls: 0.1685, rpn_box: 0.0559, rcnn_cls: 0.3553, rcnn_box 0.1442


KeyboardInterrupt: 

# After checking loss decreasing, 
 (0) Before running demo, please make sure whether images/ directory is empty. (remove '.ipynb_checkpoints/' or any other files/dirs)<br>
 (1) put your own images which contains at least one person into images/ directory <br>
 (2) type 'python demo.py --net vgg16 --checksession 1 --checkepoch $EPOCH --checkpoint 1669 --cuda --load_dir models' <br>
 on root dir of project <br>
 (3) check the result of object detection at images/ directory
 
 