In [1]:
# The hole fasterRCNN network using vgg16

In [2]:
import torch
import torch.nn as nn

In [3]:
from torchvision.models import vgg16
from config import opt
def decom_vgg16():
    # the 30th layer of features is relu of conv5_3
    if opt.caffe_pretrain:
        model = vgg16(pretrained=False)
        if not opt.load_path:
            model.load_state_dict(t.load(opt.caffe_pretrain_path))
    else:
        model = vgg16(not opt.load_path)

    features = list(model.features)[:30]
    classifier = model.classifier

    classifier = list(classifier)
    del classifier[6]
    if not opt.use_drop:
        del classifier[5]
        del classifier[2]
    classifier = nn.Sequential(*classifier)

    # freeze top4 conv
    for layer in features[:10]:
        for p in layer.parameters():
            p.requires_grad = False

    return nn.Sequential(*features)

In [4]:
ip = torch.Tensor(1,3,600,800);ip.shape

torch.Size([1, 3, 600, 800])

In [5]:
extractor = decom_vgg16()

In [6]:
out = extractor(ip);out.shape

torch.Size([1, 512, 37, 50])

In [7]:
import numpy as np
import six
def generate_anchor_base(base_size=16, ratios=[0.5, 1, 2],
                         anchor_scales=[8, 16, 32]):
    py = base_size / 2.
    px = base_size / 2.

    anchor_base = np.zeros((len(ratios) * len(anchor_scales), 4),
                           dtype=np.float32)
    for i in six.moves.range(len(ratios)):
        for j in six.moves.range(len(anchor_scales)):
            h = base_size * anchor_scales[j] * np.sqrt(ratios[i])
            w = base_size * anchor_scales[j] * np.sqrt(1. / ratios[i])

            index = i * len(anchor_scales) + j
            anchor_base[index, 0] = py - h / 2.
            anchor_base[index, 1] = px - w / 2.
            anchor_base[index, 2] = py + h / 2.
            anchor_base[index, 3] = px + w / 2.
    return anchor_base
anchor_base = generate_anchor_base();anchor_base

array([[ -37.254833,  -82.50967 ,   53.254833,   98.50967 ],
       [ -82.50967 , -173.01933 ,   98.50967 ,  189.01933 ],
       [-173.01933 , -354.03867 ,  189.01933 ,  370.03867 ],
       [ -56.      ,  -56.      ,   72.      ,   72.      ],
       [-120.      , -120.      ,  136.      ,  136.      ],
       [-248.      , -248.      ,  264.      ,  264.      ],
       [ -82.50967 ,  -37.254833,   98.50967 ,   53.254833],
       [-173.01933 ,  -82.50967 ,  189.01933 ,   98.50967 ],
       [-354.03867 , -173.01933 ,  370.03867 ,  189.01933 ]],
      dtype=float32)

In [8]:
def enumerate_shifted_anchor(anchor_base, feat_stride=16, height=50, width=50):
    # Enumerate all shifted anchors:
    #
    # add A anchors (1, A, 4) to
    # cell K shifts (K, 1, 4) to get
    # shift anchors (K, A, 4)
    # reshape to (K*A, 4) shifted anchors
    # return (K*A, 4)

    # !TODO: add support for torch.CudaTensor
    # xp = cuda.get_array_module(anchor_base)
    # it seems that it can't be boosed using GPU
    import numpy as xp
    shift_y = xp.arange(0, height * feat_stride, feat_stride)
    shift_x = xp.arange(0, width * feat_stride, feat_stride)
    shift_x, shift_y = xp.meshgrid(shift_x, shift_y)
    shift = xp.stack((shift_y.ravel(), shift_x.ravel(),
                      shift_y.ravel(), shift_x.ravel()), axis=1)

    A = anchor_base.shape[0]
    K = shift.shape[0]
    anchor = anchor_base.reshape((1, A, 4)) + \
             shift.reshape((1, K, 4)).transpose((1, 0, 2))
    anchor = anchor.reshape((K * A, 4)).astype(np.float32)
    return anchor
anchor = enumerate_shifted_anchor(anchor_base);anchor.shape

(22500, 4)

In [66]:
import numpy as np
def loc2bbox(src_bbox, loc):
    if src_bbox.shape[0] == 0:
        return xp.zeros((0, 4), dtype=loc.dtype)

    src_bbox = src_bbox.astype(src_bbox.dtype, copy=False)

    src_height = src_bbox[:, 2] - src_bbox[:, 0]
    src_width = src_bbox[:, 3] - src_bbox[:, 1]
    src_ctr_y = src_bbox[:, 0] + 0.5 * src_height
    src_ctr_x = src_bbox[:, 1] + 0.5 * src_width

    dy = loc[:, 0::4]
    dx = loc[:, 1::4]
    dh = loc[:, 2::4]
    dw = loc[:, 3::4]

    ctr_y = dy * src_height[:, np.newaxis] + src_ctr_y[:, np.newaxis]
    ctr_x = dx * src_width[:, np.newaxis] + src_ctr_x[:, np.newaxis]
    h = np.exp(dh) * src_height[:, np.newaxis]
    w = np.exp(dw) * src_width[:, np.newaxis]

    dst_bbox = np.zeros(loc.shape, dtype=loc.dtype)
    dst_bbox[:, 0::4] = ctr_y - 0.5 * h
    dst_bbox[:, 1::4] = ctr_x - 0.5 * w
    dst_bbox[:, 2::4] = ctr_y + 0.5 * h
    dst_bbox[:, 3::4] = ctr_x + 0.5 * w

    return dst_bbox


In [10]:
class ProposalCreator:
    def __init__(self,
                 parent_model,
                 nms_thresh=0.7,
                 n_train_pre_nms=12000,
                 n_train_post_nms=2000,
                 n_test_pre_nms=6000,
                 n_test_post_nms=300,
                 min_size=16
                 ):
        self.parent_model = parent_model
        self.nms_thresh = nms_thresh
        self.n_train_pre_nms = n_train_pre_nms
        self.n_train_post_nms = n_train_post_nms
        self.n_test_pre_nms = n_test_pre_nms
        self.n_test_post_nms = n_test_post_nms
        self.min_size = min_size

    def __call__(self, loc, score,
                 anchor, img_size, scale=1.):
        # NOTE: when test, remember
        # faster_rcnn.eval()
        # to set self.traing = False
        if self.parent_model.training:
            n_pre_nms = self.n_train_pre_nms
            n_post_nms = self.n_train_post_nms
        else:
            n_pre_nms = self.n_test_pre_nms
            n_post_nms = self.n_test_post_nms

        # Convert anchors into proposal via bbox transformations.
        # roi = loc2bbox(anchor, loc)
        roi = loc2bbox(anchor, loc)

        # Clip predicted boxes to image.
        roi[:, slice(0, 4, 2)] = np.clip(
            roi[:, slice(0, 4, 2)], 0, img_size[0])
        roi[:, slice(1, 4, 2)] = np.clip(
            roi[:, slice(1, 4, 2)], 0, img_size[1])

        # Remove predicted boxes with either height or width < threshold.
        min_size = self.min_size * scale
        hs = roi[:, 2] - roi[:, 0]
        ws = roi[:, 3] - roi[:, 1]
        keep = np.where((hs >= min_size) & (ws >= min_size))[0]
        roi = roi[keep, :]
        score = score[keep]

        # Sort all (proposal, score) pairs by score from highest to lowest.
        # Take top pre_nms_topN (e.g. 6000).
        order = score.ravel().argsort()[::-1]
        if n_pre_nms > 0:
            order = order[:n_pre_nms]
        roi = roi[order, :]

        # Apply nms (e.g. threshold = 0.7).
        # Take after_nms_topN (e.g. 300).

        # unNOTE: somthing is wrong here!
        # TODO: remove cuda.to_gpu
        keep = non_maximum_suppression(
            cp.ascontiguousarray(cp.asarray(roi)),
            thresh=self.nms_thresh)
        if n_post_nms > 0:
            keep = keep[:n_post_nms]
        roi = roi[keep]
        return roi

In [11]:
from torch.nn import functional as F
class RegionProposalNetwork(nn.Module):
    def __init__(
                self, in_channels=512, mid_channels=512, ratios=[0.5, 1, 2],
                anchor_scales=[8, 16, 32], feat_stride=16,
                proposal_creator_params=dict()):
        super(RegionProposalNetwork, self).__init__()
        
        self.anchor_base = generate_anchor_base(anchor_scales=anchor_scales, ratios=ratios)
        self.feat_stride = feat_stride
        self.proposal_layer = ProposalCreator(self, **proposal_creator_params)
        n_anchor = self.anchor_base.shape[0]
        
        self.conv1 = nn.Conv2d(in_channels, mid_channels, 3, 1, 1)
        self.score = nn.Conv2d(mid_channels, n_anchor * 2, 1, 1, 0)
        self.loc = nn.Conv2d(mid_channels, n_anchor * 4, 1, 1, 0)

    def forward(self, x, img_size, scale=1.):
        n, _, hh, ww = x.shape
        anchor = enumerate_shifted_anchor(np.array(self.anchor_base), self.feat_stride, hh, ww) # anchor:(22500, 4)

        n_anchor = anchor.shape[0] // (hh * ww)
        
        h = F.relu(self.conv1(x)) # torch.Size([1, 512, 50, 50]) for base feature 50*50 from 800*800
        rpn_locs = self.loc(h) # torch.Size([1, 36, 50, 50])
        rpn_locs = rpn_locs.permute(0, 2, 3, 1).contiguous().view(n, -1, 4) # torch.Size([1, 22500, 4])
        rpn_scores = self.score(h) # torch.Size([1, 18, 50, 50])
        rpn_scores = rpn_scores.permute(0, 2, 3, 1).contiguous() # torch.Size([1, 50, 50, 18])
        rpn_softmax_scores = F.softmax(rpn_scores.view(n, hh, ww, n_anchor, 2), dim=4) # torch.Size([1, 50, 50, 9, 2])
        rpn_fg_scores = rpn_softmax_scores[:, :, :, :, 1].contiguous() # torch.Size([1, 50, 50, 9])
        rpn_fg_scores = rpn_fg_scores.view(n, -1) # torch.Size([1, 22500])
        rpn_scores = rpn_scores.view(n, -1, 2) # torch.Size([1, 22500, 2])

        rois = list()
        roi_indices = list()
        for i in range(n):
            roi = self.proposal_layer(rpn_locs[i].cpu().data.numpy(),        # (22500, 4)
                                        rpn_fg_scores[i].cpu().data.numpy(), # (22500,)
                                        anchor, img_size,
                                        scale=scale)
            batch_index = i * np.ones((len(roi),), dtype=np.int32)
            rois.append(roi)
            roi_indices.append(batch_index)

        rois = np.concatenate(rois, axis=0)
        roi_indices = np.concatenate(roi_indices, axis=0)
        
        return rpn_locs, rpn_scores, rois, roi_indices, anchor

In [67]:
from torch.nn import functional as F
class RegionProposalNetwork1(nn.Module):
    def __init__(
                self, in_channels=512, mid_channels=512, ratios=[0.5, 1, 2],
                anchor_scales=[8, 16, 32], feat_stride=16,
                proposal_creator_params=dict()):
        super(RegionProposalNetwork1, self).__init__()
        
        self.anchor_base = generate_anchor_base(anchor_scales=anchor_scales, ratios=ratios)
        self.feat_stride = feat_stride
        self.proposal_layer = ProposalCreator(self, **proposal_creator_params)
        n_anchor = self.anchor_base.shape[0]
        
        self.conv1 = nn.Conv2d(in_channels, mid_channels, 3, 1, 1)
        self.score = nn.Conv2d(mid_channels, n_anchor * 2, 1, 1, 0)
        self.loc = nn.Conv2d(mid_channels, n_anchor * 4, 1, 1, 0)

    def forward(self, x, img_size, scale=1.):
        n, _, hh, ww = x.shape
        anchor = enumerate_shifted_anchor(np.array(self.anchor_base), self.feat_stride, hh, ww)

        n_anchor = anchor.shape[0] // (hh * ww)
        
        h = F.relu(self.conv1(x))
        rpn_locs = self.loc(h)
        rpn_locs = rpn_locs.permute(0, 2, 3, 1).contiguous().view(n, -1, 4)
        rpn_scores = self.score(h)
        rpn_scores = rpn_scores.permute(0, 2, 3, 1).contiguous()
        rpn_softmax_scores = F.softmax(rpn_scores.view(n, hh, ww, n_anchor, 2), dim=4)
        rpn_fg_scores = rpn_softmax_scores[:, :, :, :, 1].contiguous()
        rpn_fg_scores = rpn_fg_scores.view(n, -1)
        rpn_scores = rpn_scores.view(n, -1, 2)
        a = rpn_locs[0].cpu().data.numpy()
        b = rpn_fg_scores[0].cpu().data.numpy()
        roi = self.proposal_layer(a, b, anchor, img_size, scale=scale)
        print('roi:{}'.format(roi.shape))

        
        return rpn_locs, rpn_scores, rpn_softmax_scores, rpn_fg_scores, rpn_scores, anchor

In [68]:
tnet = RegionProposalNetwork1(512,512,ratios=[0.5, 1, 2], anchor_scales=[8, 16, 32],feat_stride=16);tnet

RegionProposalNetwork1(
  (conv1): Conv2d(512, 512, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
  (score): Conv2d(512, 18, kernel_size=(1, 1), stride=(1, 1))
  (loc): Conv2d(512, 36, kernel_size=(1, 1), stride=(1, 1))
)

In [69]:
rpn_locs, rpn_scores, rpn_softmax_scores, rpn_fg_scores, rpn_scores, anchor = tnet(torch.Tensor(1,512,50,50), (800,800))

NameError: name 'non_maximum_suppression' is not defined

In [12]:
class fasterRCNN(nn.Module):
    def __init__(self, numClass=20, ratios=[0.5, 1, 2], anchor_scales=[8, 16, 32]):
        super().__init__()
        self.base_net = decom_vgg16()
        self.rpn = RegionProposalNetwork(512, 512,
                                        ratios=ratios,
                                        anchor_scales=anchor_scales,
                                        feat_stride=16)
        
        
    def forward(self, x, scale):
        _, _, H, W = x.shape
        img_size = (H, W)
        x = self.base_net(x)
        x = self.rpn(x, img_size, scale)

        return out

In [13]:
net = fasterRCNN()
out = net(ip,1.6);out.shape

NameError: name 'xp' is not defined