In [1]:
%matplotlib inline
%load_ext autoreload
%autoreload 2

In [2]:
from src.utils.datasets.ggimages import OpenImage
from src.utils.datasets.transform import RandomHorizontalFlip, Resize, Compose, XyToCenter
import torchvision.transforms as transforms
from src.utils.display.images import imshow, result_show
from torch.utils.data import DataLoader
from src.utils.datasets.adapter import convert_data
import numpy as np
import torch
from src.network.yolo import Yolo
from src.config import VOC_ANCHORS
from src.utils.cython_lib.process_boxes import preprocess_true_boxes

general_transform = Compose([
    Resize((608, 608)),
    XyToCenter()
])


transform = transforms.Compose([
                transforms.RandomChoice([
                    transforms.ColorJitter(hue=.1, saturation=.1),
                    transforms.RandomGrayscale(p=0.7),
                ]),
                transforms.ToTensor(),
                transforms.Normalize([0.485, 0.456, 0.406], [
                                     0.229, 0.224, 0.225])
            ])

In [3]:
batch_size = 1

ds = OpenImage('/data/data', 'OpenImage', general_transform=general_transform, transform=transform)

train_data_loader = DataLoader(ds, batch_size=batch_size, shuffle=True, collate_fn=convert_data, num_workers=0)


In [4]:
from src.network.base import DarkNet, DarknetBody, YoloBody

model = Yolo(VOC_ANCHORS, ds.classes)

In [5]:
print(model)

Yolo(
  (yolo_body): YoloBody(
    (body_bottom): DarknetBodyBottom(
      (first_layer): Conv2d(
        (conv): Conv2d(3, 32, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), bias=False)
        (bn): BatchNorm2d(32, eps=0.001, momentum=0, affine=True, track_running_stats=True)
        (relu): LeakyReLU(negative_slope=0.1)
      )
      (second_layer): MaxPool2d(kernel_size=2, stride=2, padding=0, dilation=1, ceil_mode=False)
      (third_layer): Conv2d(
        (conv): Conv2d(32, 64, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), bias=False)
        (bn): BatchNorm2d(64, eps=0.001, momentum=0, affine=True, track_running_stats=True)
        (relu): LeakyReLU(negative_slope=0.1)
      )
      (forth_layer): MaxPool2d(kernel_size=2, stride=2, padding=0, dilation=1, ceil_mode=False)
      (fifth_layer): BottleneckBlock(
        (first_layer): Conv2d(
          (conv): Conv2d(64, 128, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), bias=False)
          (bn): BatchNorm2d(128, ep

In [11]:
data = next(iter(train_data_loader))
batch_tensor, im_info, batch_boxes, batch_boxes_index, img_names = data


detectors_mask, matching_true_boxes = preprocess_true_boxes(batch_boxes, VOC_ANCHORS, (608, 608))

In [12]:
output = model(batch_tensor)

In [13]:
batch_box_tensor = torch.from_numpy(batch_boxes[np.newaxis, :]).type(torch.FloatTensor)

detectors_mask_tensor = torch.from_numpy(detectors_mask[np.newaxis, :])
matching_true_boxes_tensor = torch.from_numpy(matching_true_boxes[np.newaxis, :])

In [15]:
model.loss(output, batch_box_tensor, detectors_mask_tensor, matching_true_boxes_tensor)

torch.Size([1, 5, 1, 19, 19])
torch.Size([1, 5, 1, 19, 19])
torch.Size([1, 5, 1, 19, 19])


In [46]:
a = torch.randn(4, 2, 4).clamp(min=0)

In [47]:
a

tensor([[[0.0000, 0.7092, 0.0000, 0.1829],
         [0.0000, 0.0000, 0.7876, 0.3617]],

        [[0.6430, 0.0000, 0.7255, 0.0000],
         [0.0000, 0.0000, 0.8688, 0.4494]],

        [[0.2596, 0.4518, 0.0000, 0.0000],
         [0.0000, 0.0000, 1.7309, 0.0000]],

        [[0.0000, 0.0000, 0.1497, 0.0000],
         [0.9055, 1.3137, 0.2129, 0.4665]]])

In [49]:
best_iou, best_iou_index = a.max(dim=1, keepdim=True)

In [53]:
(best_iou > 0.5).type(torch.FloatTensor)

tensor([[[0., 1., 1., 0.]],

        [[1., 0., 1., 0.]],

        [[0., 0., 1., 0.]],

        [[1., 1., 0., 0.]]])