In [1]:
%matplotlib inline
%load_ext autoreload
%autoreload 2

In [2]:
import torch
from src.utils.datasets.ggimages import OpenImage
from src.utils.datasets.transform import RandomHorizontalFlip, Resize, Compose, XyToCenter
import torchvision.transforms as transforms
from src.utils.display.images import imshow, result_show
from torch.utils.data import DataLoader
from src.utils.datasets.adapter import convert_data
import numpy as np
from src.network.yolo import Yolo
from src.config import VOC_ANCHORS
from src.utils.process_boxes import preprocess_true_boxes

general_transform = Compose([
    Resize((608, 608)),
    XyToCenter()
])


transform = transforms.Compose([
                transforms.RandomChoice([
                    transforms.ColorJitter(hue=.1, saturation=.1),
                    transforms.RandomGrayscale(p=0.7),
                ]),
                transforms.ToTensor(),
                transforms.Normalize([0.485, 0.456, 0.406], [
                                     0.229, 0.224, 0.225])
            ])

In [3]:
batch_size = 1

ds = OpenImage('/data/data', 'OpenImage', general_transform=general_transform, transform=transform)

train_data_loader = DataLoader(ds, batch_size=batch_size, shuffle=True, collate_fn=convert_data, num_workers=0)


In [4]:
from src.network.base import DarkNet, DarknetBody, YoloBody

model = Yolo(VOC_ANCHORS, ds.classes)

In [5]:
print(model)

Yolo(
  (yolo_body): YoloBody(
    (body_bottom): DarknetBodyBottom(
      (first_layer): Conv2d(
        (conv): Conv2d(3, 32, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), bias=False)
        (bn): BatchNorm2d(32, eps=0.001, momentum=0, affine=True, track_running_stats=True)
        (relu): LeakyReLU(negative_slope=0.1)
      )
      (second_layer): MaxPool2d(kernel_size=2, stride=2, padding=0, dilation=1, ceil_mode=False)
      (third_layer): Conv2d(
        (conv): Conv2d(32, 64, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), bias=False)
        (bn): BatchNorm2d(64, eps=0.001, momentum=0, affine=True, track_running_stats=True)
        (relu): LeakyReLU(negative_slope=0.1)
      )
      (forth_layer): MaxPool2d(kernel_size=2, stride=2, padding=0, dilation=1, ceil_mode=False)
      (fifth_layer): BottleneckBlock(
        (first_layer): Conv2d(
          (conv): Conv2d(64, 128, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), bias=False)
          (bn): BatchNorm2d(128, ep

In [6]:
data = next(iter(train_data_loader))
batch_tensor, im_info, batch_boxes, batch_boxes_index, img_names = data


detectors_mask, matching_true_boxes = preprocess_true_boxes(batch_boxes, VOC_ANCHORS, (608, 608), len(ds.classes))

In [7]:
print(detectors_mask.shape)

(5, 1, 19, 19)


In [8]:
output = model(batch_tensor)

In [24]:
with torch.no_grad():
    result = model.eval(output, (608, 608), score_threshold=0.2, iou_threshold=0.001)

torch.Size([748, 4])
tensor([  0,   1,   2,   3,   4,   5,   6,   7,   8,  10,  11,  12,  13,  14,
         15,  16,  17,  18,  19,  20,  21,  22,  24,  25,  26,  27,  28,  29,
         30,  31,  32,  33,  34,  37,  38,  39,  40,  41,  42,  43,  45,  46,
         47,  48,  49,  50,  51,  52,  53,  54,  55,  56,  57,  58,  59,  60,
         61,  62,  63,  64,  65,  66,  67,  68,  70,  71,  72,  73,  74,  75,
         76,  78,  79,  80,  81,  82,  83,  84,  85,  86,  87,  88,  89,  90,
         91,  92,  93,  94,  95,  96,  97,  98,  99, 100, 101, 102, 103, 104,
        105, 106, 107, 108, 109, 110, 112, 113, 114, 116, 117, 118, 119, 120,
        121, 123, 124, 125, 126, 127, 128, 129, 130, 131, 132, 133, 134, 135,
        137, 138, 139, 140, 141, 142, 143, 144, 145, 146, 147, 148, 149, 150,
        151, 152, 153, 154, 156, 157, 159, 160, 161, 162, 163, 164, 165, 166,
        167, 168, 169, 170, 171, 172, 173, 174, 175, 176, 177, 178, 179, 180,
        181, 182, 183, 184, 185, 186, 187, 

In [10]:
torch.Tensor([1,2,34])

tensor([ 1.,  2., 34.])