In [1]:
from __future__ import absolute_import
from __future__ import division
from __future__ import print_function

import _init_paths
import os
import sys
import numpy as np
import argparse
import pprint
import pdb
import time

import torch
from torch.autograd import Variable
import torch.nn as nn
import torch.optim as optim

import torchvision.transforms as transforms
from torch.utils.data.sampler import Sampler

from roi_data_layer.roidb import combined_roidb
from roi_data_layer.roibatchLoader import roibatchLoader
from model.utils.config import cfg, cfg_from_file, cfg_from_list, get_output_dir
from model.utils.net_utils import weights_normal_init, save_net, load_net, \
      adjust_learning_rate, save_checkpoint, clip_gradient
from model.utils.net_utils import _smooth_l1_loss
from model.faster_rcnn.vgg16 import vgg16
from model.faster_rcnn.resnet import resnet

os.environ["CUDA_DEVICE_ORDER"]="PCI_BUS_ID"
os.environ["CUDA_VISIBLE_DEVICES"] = '2'

class sampler(Sampler):
  def __init__(self, train_size, batch_size):
    self.num_data = train_size
    self.num_per_batch = int(train_size / batch_size)
    self.batch_size = batch_size
    self.range = torch.arange(0,batch_size).view(1, batch_size).long()
    self.leftover_flag = False
    if train_size % batch_size:
      self.leftover = torch.arange(self.num_per_batch*batch_size, train_size).long()
      self.leftover_flag = True

  def __iter__(self):
    rand_num = torch.randperm(self.num_per_batch).view(-1,1) * self.batch_size
    self.rand_num = rand_num.expand(self.num_per_batch, self.batch_size) + self.range

    self.rand_num_view = self.rand_num.view(-1)

    if self.leftover_flag:
      self.rand_num_view = torch.cat((self.rand_num_view, self.leftover),0)

    return iter(self.rand_num_view)

  def __len__(self):
    return self.num_data


imdb, roidb, ratio_list, ratio_index = combined_roidb("voc_2007_trainval")
train_size = len(roidb)


sampler_batch = sampler(train_size, 8)

dataset = roibatchLoader(roidb, ratio_list, ratio_index, 8, \
                       imdb.num_classes, training=True)

dataloader = torch.utils.data.DataLoader(dataset, batch_size=8,
                        sampler=sampler_batch, num_workers=5)

# initilize the tensor holder here.
im_data = torch.FloatTensor(1)
im_info = torch.FloatTensor(1)
num_boxes = torch.LongTensor(1)
gt_boxes = torch.FloatTensor(1)


im_data = im_data.cuda()
im_info = im_info.cuda()
num_boxes = num_boxes.cuda()
gt_boxes = gt_boxes.cuda()

# make variable
im_data = Variable(im_data)
im_info = Variable(im_info)
num_boxes = Variable(num_boxes)
gt_boxes = Variable(gt_boxes)

fasterRCNN = resnet(imdb.classes, 101, pretrained=True, class_agnostic=True)
fasterRCNN.create_architecture()
fasterRCNN.cuda()

data = next(iter(dataloader))
with torch.no_grad():
    im_data.resize_(data[0].size()).copy_(data[0])
    im_info.resize_(data[1].size()).copy_(data[1])
    gt_boxes.resize_(data[2].size()).copy_(data[2])
    num_boxes.resize_(data[3].size()).copy_(data[3])

# fasterRCNN.zero_grad()
# rois, cls_prob, bbox_pred, \
# rpn_loss_cls, rpn_loss_box, \
# RCNN_loss_cls, RCNN_loss_bbox, \
# rois_label = fasterRCNN(im_data, im_info, gt_boxes, num_boxes)

# loss = rpn_loss_cls.mean() + rpn_loss_box.mean() \
#    + RCNN_loss_cls.mean() + RCNN_loss_bbox.mean()

Loaded dataset `voc_2007_trainval` for training
Set proposal method: gt
Appending horizontally-flipped training examples...
voc_2007_trainval gt roidb loaded from /home/kuchoco97/work/detection_network/faster-rcnn.pytorch/data/cache/voc_2007_trainval_gt_roidb.pkl
done
Preparing training data...
done
before filtering, there are 10022 images...
after filtering, there are 10022 images...
Loading pretrained weights from data/pretrained_model/resnet101_caffe.pth


In [2]:
# feed image data to base model to obtain base feature map
base_feat = fasterRCNN.RCNN_base(im_data)

# feed base feature map tp RPN to obtain rois
rois, rpn_loss_cls, rpn_loss_bbox = fasterRCNN.RCNN_rpn(base_feat, im_info, gt_boxes, num_boxes)

# if it is training phrase, then use ground trubut bboxes for refining
if True:
    roi_data = fasterRCNN.RCNN_proposal_target(rois, gt_boxes, num_boxes)
    rois, rois_label, rois_target, rois_inside_ws, rois_outside_ws = roi_data

    rois_label = rois_label.view(-1).long()
    rois_target = rois_target.view(-1, rois_target.size(2))
    rois_inside_ws = rois_inside_ws.view(-1, rois_inside_ws.size(2))
    rois_outside_ws = rois_outside_ws.view(-1, rois_outside_ws.size(2))

	nonzero()
Consider using one of the following signatures instead:
	nonzero(*, bool as_tuple) (Triggered internally at  ../torch/csrc/utils/python_arg_parser.cpp:962.)
  rpn_keep = Variable(rpn_label.view(-1).ne(-1).nonzero().view(-1))


In [12]:
base_feat.shape

torch.Size([8, 1024, 51, 38])

In [3]:
# do roi pooling based on predicted rois
POOLING_MODE = 'align'

if POOLING_MODE == 'align':
    pooled_feat = fasterRCNN.RCNN_roi_align(base_feat, rois.view(-1, 5))
elif POOLING_MODE == 'pool':
    pooled_feat = fasterRCNN.RCNN_roi_pool(base_feat, rois.view(-1,5))

# feed pooled features to top model
pooled_feat = fasterRCNN._head_to_tail(pooled_feat)

In [6]:
# compute bbox offset
bbox_pred = fasterRCNN.RCNN_bbox_pred(pooled_feat)
if True:
    # select the corresponding columns according to roi labels
    bbox_pred_view = bbox_pred.view(bbox_pred.size(0), int(bbox_pred.size(1) / 4), 4)
    bbox_pred_select = torch.gather(bbox_pred_view, 1, rois_label.view(rois_label.size(0), 1, 1).expand(rois_label.size(0), 1, 4))
    bbox_pred = bbox_pred_select.squeeze(1)