Model

In [2]:
from torchvision.models.detection import fasterrcnn_resnet50_fpn

model = fasterrcnn_resnet50_fpn(num_classes=10)

Backbone

In [None]:
print(model.backbone.body)

In [None]:
print(model.backbone.fpn)

RPN

In [None]:
print(model.rpn)
print(model.rpn.anchor_generator.sizes)
print(model.rpn.anchor_generator.aspect_ratios)
print(model.rpn.anchor_generator.cell_anchors)

In [None]:
import numpy as np
import cv2
import matplotlib.pyplot as plt

background = np.ones([1000, 1000, 3])*255
center = (500, 500)
colormap = [(255,255,0),(255,0,255),(255,0,0),(0,255,0),(0,0,255),]
anchors = model.rpn.anchor_generator.cell_anchors

i=0
for anchor in anchors:
    color = colormap[i%7]
    i+=1
    for x1,y1,x2,y2 in anchor:
        px1 = int(center[0] + x1)
        py1 = int(center[1] + y1)
        px2 = int(center[0] + x2)
        py2 = int(center[1] + y2)
        cv2.rectangle(background,(px1, py1),(px2, py2),color, 5)

plt.axis(False)
plt.imshow(background)

In [None]:
#torchvison.models.detection._utils.py 내 decode_single 함수 발췌

import torch
from torch import Tensor

boxes = '앵커'
rel_codes = 'RPN 출력'

def decode_single(self, rel_codes: Tensor, boxes: Tensor) -> Tensor:
        """
        From a set of original boxes and encoded relative box offsets,
        get the decoded boxes.

        Args:
            rel_codes (Tensor): encoded boxes
            boxes (Tensor): reference boxes.
        """

        boxes = boxes.to(rel_codes.dtype)

        widths = boxes[:, 2] - boxes[:, 0]
        heights = boxes[:, 3] - boxes[:, 1]
        ctr_x = boxes[:, 0] + 0.5 * widths
        ctr_y = boxes[:, 1] + 0.5 * heights

        wx, wy, ww, wh = self.weights
        dx = rel_codes[:, 0::4] / wx
        dy = rel_codes[:, 1::4] / wy
        dw = rel_codes[:, 2::4] / ww
        dh = rel_codes[:, 3::4] / wh

        # Prevent sending too large values into torch.exp()
        dw = torch.clamp(dw, max=self.bbox_xform_clip)
        dh = torch.clamp(dh, max=self.bbox_xform_clip)

        pred_ctr_x = dx * widths[:, None] + ctr_x[:, None]
        pred_ctr_y = dy * heights[:, None] + ctr_y[:, None]
        pred_w = torch.exp(dw) * widths[:, None]
        pred_h = torch.exp(dh) * heights[:, None]

        # Distance from center to box's corner.
        c_to_c_h = torch.tensor(0.5, dtype=pred_ctr_y.dtype, device=pred_h.device) * pred_h
        c_to_c_w = torch.tensor(0.5, dtype=pred_ctr_x.dtype, device=pred_w.device) * pred_w

        pred_boxes1 = pred_ctr_x - c_to_c_w
        pred_boxes2 = pred_ctr_y - c_to_c_h
        pred_boxes3 = pred_ctr_x + c_to_c_w
        pred_boxes4 = pred_ctr_y + c_to_c_h
        pred_boxes = torch.stack((pred_boxes1, pred_boxes2, pred_boxes3, pred_boxes4), dim=2).flatten(1)
        return pred_boxes

RoI pooling

In [None]:
print(model.roi_heads)

In [None]:
import torch
import torchvision

model = torchvision.ops.MultiScaleRoIAlign(featmap_names=['0', '1'],
                                           output_size=(7,7),
                                           sampling_ratio=2)
i = dict()
i['0'] = torch.rand(1,256,64,64)
i['1'] = torch.rand(1,256,32,32)
boxes = torch.rand(3,4) * 256
boxes[:,2:] += boxes[:,:2]
image_sizes = [(512,512)]
output = model(i, [boxes], image_sizes)

print(boxes)
print(output.shape)
print(output.reshape([output.shape[0], -1]).shape)

Loss Function

In [4]:
import os,sys
sys.path.append(os.path.abspath('./../'))

import torch
from dataset import PennFudanDataset
import coco.transforms as T


data_path = './data/PennFudanPed'
transform = T.Compose([T.PILToTensor(), 
                       T.ConvertImageDtype(torch.float)])
trainset = PennFudanDataset(data_path,transform)
sample, target = trainset[0]
sample = sample.unsqueeze(0)
target = [target]
model = fasterrcnn_resnet50_fpn(num_classes=10)
losses = model(sample, target)

print(list(losses.keys()))

['loss_classifier', 'loss_box_reg', 'loss_objectness', 'loss_rpn_box_reg']


Augmentation

In [5]:
h_flip = T.RandomHorizontalFlip(p=1)

data_path = './data/PennFudanPed'
trainset = PennFudanDataset(data_path,transform)
sample, target = trainset[0]
print(target['boxes'])

new_sample, new_target = h_flip(sample, target)
print(new_target['boxes'])

tensor([[159., 181., 301., 430.],
        [419., 170., 534., 485.]])
tensor([[258., 181., 400., 430.],
        [ 25., 170., 140., 485.]])


In [None]:
import cv2
import matplotlib.pyplot as plt

%matplotlib inline 

fig, ax = plt.subplots(1, 1, figsize=(16, 8))

data_path = './data/PennFudanPed'
trainset = PennFudanDataset(data_path,transform.ToTensor())
sample, target = trainset[0]
print(target['boxes'])
sample = sample.permute(1,2,0).numpy()
sample = (sample*255).astype(int)

vis_sample = sample.copy()

for box in zip(target['boxes'].int()):
    box = box[0]
    cv2.rectangle(vis_sample,(box[0].item(), box[1].item()),(box[2].item(), box[3].item()),(0, 0, 255), 3)
    
ax.set_axis_off()
ax.imshow(vis_sample)

In [None]:
fig, ax = plt.subplots(1, 1, figsize=(16, 8))

h_flip = T.RandomHorizontalFlip(p=1)

data_path = './data/PennFudanPed'
trainset = PennFudanDataset(data_path,transform.ToTensor())
sample, target = trainset[0]
new_sample, new_target = h_flip(sample, target)

print(new_target['boxes'])
new_sample = new_sample.permute(1,2,0).numpy()
new_sample = (new_sample*255).astype(int)

vis_sample = new_sample.copy()

for box in zip(new_target['boxes'].int()):
    box = box[0]
    cv2.rectangle(vis_sample,(box[0].item(), box[1].item()),(box[2].item(), box[3].item()),(0, 0, 255), 3)
    
ax.set_axis_off()
ax.imshow(vis_sample)
