In [1]:
%load_ext autoreload
%autoreload 2
import pdb

In [2]:
import os

import matplotlib
from tqdm import tqdm

from utils.config import opt
from data.dataset import Dataset, TestDataset, inverse_normalize
from model import FasterRCNNVGG16
from torch.autograd import Variable
from torch.utils import data as data_
from trainer import FasterRCNNTrainer
from utils import array_tool as at
from utils.vis_tool import visdom_bbox
from utils.eval_tool import eval_detection_voc

# fix for ulimit
# https://github.com/pytorch/pytorch/issues/973#issuecomment-346405667
import resource

rlimit = resource.getrlimit(resource.RLIMIT_NOFILE)
resource.setrlimit(resource.RLIMIT_NOFILE, (20480, rlimit[1]))

matplotlib.use('agg')
import numpy as np
import cupy as cp

from model.utils.bbox_tools import bbox2loc, bbox_iou, loc2bbox
from model.utils.nms import non_maximum_suppression

from collections import namedtuple
import time
from torch.nn import functional as F
from model.utils.creator_tool import AnchorTargetCreator, ProposalTargetCreator

from torch import nn
import torch as t
from torch.autograd import Variable
from utils import array_tool as at
from utils.vis_tool import Visualizer

from utils.config import opt
# from torchnet.meter import ConfusionMeter, AverageValueMeter

    the python code for non_maximum_suppression is about 2x slow
    It is strongly recommended to build cython code: 
    `cd model/utils/nms/; python3 build.py build_ext --inplace
  `cd model/utils/nms/; python3 build.py build_ext --inplace''')


In [3]:
dataset = Dataset(opt)

print('load data')
dataloader = data_.DataLoader(dataset, \
                                  batch_size=1, \
                                  shuffle=True, \
                                  pin_memory=True,\
                                  num_workers=opt.num_workers)
testset = TestDataset(opt)
test_dataloader = data_.DataLoader(testset,
                                       batch_size=1,
                                       num_workers=opt.test_num_workers,
                                       shuffle=False, \
                                       pin_memory=True
                                       )

load data


In [4]:
faster_rcnn = FasterRCNNVGG16()
print('model construct completed')

model construct completed


In [5]:
trainer = FasterRCNNTrainer(faster_rcnn).cuda()

In [6]:
best_map = 0
lr_ = opt.lr

In [7]:
img, bbox_, label_, scale = next(iter(dataloader))

In [8]:
img.shape,bbox_.shape, label_.shape, scale.shape

(torch.Size([1, 3, 600, 749]),
 torch.Size([1, 2, 4]),
 torch.Size([1, 2]),
 torch.Size([1]))

In [9]:
img, bbox, label = img.cuda().float(), bbox_.cuda(), label_.cuda()

In [None]:
trainer(img, bbox_, label_, scale)

In [10]:
 imgs, bboxes, labels, scale = img, bbox_, label_, scale

In [11]:
n = bboxes.shape[0]
if n != 1:
    raise ValueError('Currently only batch size 1 is supported.')

_, _, H, W = imgs.shape
img_size = (H, W)

In [12]:
features = trainer.faster_rcnn.extractor(imgs)

In [13]:
features.shape

torch.Size([1, 512, 37, 46])

In [14]:
trainer.faster_rcnn.rpn

RegionProposalNetwork(
  (conv1): Conv2d(512, 512, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
  (score): Conv2d(512, 18, kernel_size=(1, 1), stride=(1, 1))
  (loc): Conv2d(512, 36, kernel_size=(1, 1), stride=(1, 1))
)

In [15]:
rpn_locs, rpn_scores, rois, roi_indices, anchor = trainer.faster_rcnn.rpn(features, img_size, scale)

In [16]:
rpn_locs.shape, rpn_scores.shape, rois.shape, roi_indices.shape, anchor.shape

(torch.Size([1, 15318, 4]),
 torch.Size([1, 15318, 2]),
 (2000, 4),
 (2000,),
 (15318, 4))

In [17]:
bbox = bboxes[0]
label = labels[0]
rpn_score = rpn_scores[0]
rpn_loc = rpn_locs[0]
roi = rois

In [18]:
sample_roi, gt_roi_loc, gt_roi_label = trainer.proposal_target_creator(
            roi,
            at.tonumpy(bbox), # at = array_tools,tensor to numpy 用不着了，在pytorch0.4里
            at.tonumpy(label),
            trainer.loc_normalize_mean,
            trainer.loc_normalize_std)

In [19]:
sample_roi.shape,gt_roi_loc.shape, gt_roi_label.shape

((128, 4), (128, 4), (128,))

In [20]:
sample_roi_index = t.zeros(len(sample_roi))

In [21]:
roi_cls_loc, roi_score = trainer.faster_rcnn.head(
    features,
    sample_roi,
    sample_roi_index)

In [22]:
roi_cls_loc.shape, roi_score.shape

(torch.Size([128, 84]), torch.Size([128, 21]))

In [23]:
gt_rpn_loc, gt_rpn_label = trainer.anchor_target_creator(
            at.tonumpy(bbox),
            anchor,
            img_size)

In [24]:
gt_rpn_loc.shape, gt_rpn_label.shape

((15318, 4), (15318,))

In [26]:
from trainer import _fast_rcnn_loc_loss

In [28]:
gt_rpn_label = at.tovariable(gt_rpn_label).long()
gt_rpn_loc = at.tovariable(gt_rpn_loc)

In [29]:
rpn_loc_loss = _fast_rcnn_loc_loss(rpn_loc, gt_rpn_loc, gt_rpn_label.data,
                                   trainer.rpn_sigma)

In [30]:
rpn_loc_loss

tensor(1.00000e-02 *
       1.9013, device='cuda:0')

In [33]:
rpn_score.shape,gt_rpn_label.shape

(torch.Size([15318, 2]), torch.Size([15318]))

In [None]:
rpn_cls_loss = F.cross_entropy(rpn_score, gt_rpn_label.cuda(), ignore_index=-1)

In [34]:
_gt_rpn_label = gt_rpn_label[gt_rpn_label > -1]
_rpn_score = at.tonumpy(rpn_score)[at.tonumpy(gt_rpn_label) > -1]
# self.rpn_cm.add(at.totensor(_rpn_score, False), _gt_rpn_label.data.long())

In [36]:
_gt_rpn_label.shape,_rpn_score.shape

(torch.Size([256]), (256, 2))

In [37]:
# ------------------ ROI losses (fast rcnn loss) -------------------#
n_sample = roi_cls_loc.shape[0]

In [38]:
n_sample

128

In [39]:
roi_cls_loc.shape

torch.Size([128, 84])

In [40]:
roi_cls_loc = roi_cls_loc.view(n_sample, -1, 4)

In [41]:
roi_cls_loc.shape

torch.Size([128, 21, 4])

In [42]:
gt_roi_label

array([12, 18, 18, 18, 12, 12, 18, 12, 18, 12, 12, 12, 18, 12, 18, 18, 18,
       12, 18, 18, 12, 12, 12, 18, 12,  0,  0,  0,  0,  0,  0,  0,  0,  0,
        0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,
        0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,
        0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,
        0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,
        0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,  0,
        0,  0,  0,  0,  0,  0,  0,  0,  0], dtype=int32)

In [43]:
roi_loc = roi_cls_loc[t.arange(0, n_sample).long().cuda(), \
      at.totensor(gt_roi_label).long()]

In [45]:
roi_loc.shape

torch.Size([128, 4])

In [46]:
gt_roi_label = at.tovariable(gt_roi_label).long()
gt_roi_loc = at.tovariable(gt_roi_loc)

In [None]:
roi_loc_loss = _fast_rcnn_loc_loss(
    roi_loc.contiguous(),
    gt_roi_loc,
    gt_roi_label.data,
    self.roi_sigma)

roi_cls_loss = nn.CrossEntropyLoss()(roi_score, gt_roi_label.cuda())

# self.roi_cm.add(at.totensor(roi_score, False), gt_roi_label.data.long())

losses = [rpn_loc_loss, rpn_cls_loss, roi_loc_loss, roi_cls_loss]
losses = losses + [sum(losses)]