In [1]:
import os
import pdb
import time

import cv2
import numpy as np
import torch
from torch import optim, nn
from torch.utils.data import DataLoader
import torch.nn.functional as F

from ssdmultibox.datasets import TrainPascalDataset, SIZE, NUM_CLASSES, device, Bboxer
from ssdmultibox.utils import open_image
from ssdmultibox.plotting import *
from ssdmultibox.criterion import SSDLoss
from ssdmultibox.models import SSDModel, vgg16_bn

import matplotlib.pyplot as plt

In [2]:
dataset = TrainPascalDataset()
dataloader = DataLoader(dataset, batch_size=4)
image_ids, ims, gt_bbs, gt_cats = next(iter(dataloader))

In [3]:
model = SSDModel()

ims, gt_bbs, gt_cats = dataset.to_device(ims, gt_bbs, gt_cats)

preds = model(ims)

In [4]:
len(preds)

6

In [5]:
len(preds[0])

6

In [78]:
count = 0
for i in range(len(preds)):
    count += (preds[i][0][0].shape[1]/4)*6
    print(preds[i][0][0].shape, preds[i][0][1].shape)
    
count

torch.Size([4, 5776]) torch.Size([4, 30324])
torch.Size([4, 1444]) torch.Size([4, 7581])
torch.Size([4, 400]) torch.Size([4, 2100])
torch.Size([4, 100]) torch.Size([4, 525])
torch.Size([4, 36]) torch.Size([4, 189])
torch.Size([4, 4]) torch.Size([4, 21])


11640.0

In [7]:
preds[-2][0][0].shape

torch.Size([4, 36])

In [8]:
gt_bbs[-2][0].shape

torch.Size([4, 36])

In [9]:
gt_cats[-2][0].shape

torch.Size([4, 9])

In [10]:
gt_cats[-2][0]

tensor([[20, 20, 20, 20,  6, 20, 20, 20, 20],
        [20, 20, 20, 20, 12, 20, 20, 20, 20],
        [14, 20, 14, 20, 14, 20,  1,  1,  1],
        [20, 20, 20, 20,  6, 20, 20, 20, 20]], dtype=torch.uint8)

In [11]:
preds[-2][0][1].reshape(4, 9, -1)[:,:,:-1].shape

torch.Size([4, 9, 20])

In [12]:
preds[-2][0][1].reshape(4, 9, -1)[:,:,:-1][0].shape # 1st item

torch.Size([9, 20])

In [13]:
# max prediction for the first item w/o the background class
pred_cats = preds[-2][0][1].reshape(4, 9, -1)[:,:,:-1][0]
max_conf, max_cls = pred_cats.max(1)
max_conf, max_cls

(tensor([0.0158, 0.0171, 0.0239, 0.0141, 0.0210, 0.0212, 0.0154, 0.0200, 0.0193],
        grad_fn=<MaxBackward0>), tensor([ 4, 16,  2, 16, 16, 16,  5, 16, 16]))

In [67]:
max_conf.sort(0, descending=True)

(tensor([0.1853, 0.1852, 0.1754,  ..., 0.0144, 0.0141, 0.0118],
        grad_fn=<SortBackward>),
 tensor([ 896, 1084, 1082,  ..., 1501, 1933,  651]))

In [74]:
mode_idx, _ = torch.mode(max_cls)
gt_idx = max_cls == max_cls[mode_idx]
gt_idx

tensor([0, 0, 1,  ..., 0, 0, 0], dtype=torch.uint8)

In [25]:
# only look at class 11
max_conf[gt_idx]

tensor([0.0171, 0.0141, 0.0210, 0.0212, 0.0200, 0.0193], grad_fn=<TakeBackward>)

In [26]:
sorted_max_conf, sorted_max_cls = max_conf[gt_idx].sort(0, descending=True)
sorted_max_conf, sorted_max_cls

(tensor([0.0212, 0.0210, 0.0200, 0.0193, 0.0171, 0.0141], grad_fn=<SortBackward>),
 tensor([3, 2, 4, 5, 0, 1]))

In [27]:
preds[-2][0][0].reshape(4, -1, 4).shape

torch.Size([4, 9, 4])

In [28]:
preds[-2][0][0].reshape(4, -1, 4)[0]

tensor([[ 0.0007,  0.0075,  0.0044,  0.0089],
        [-0.0018,  0.0062,  0.0030,  0.0087],
        [ 0.0039,  0.0128,  0.0055,  0.0097],
        [-0.0032,  0.0015,  0.0101,  0.0016],
        [-0.0044, -0.0002,  0.0090,  0.0010],
        [ 0.0043,  0.0096,  0.0097,  0.0047],
        [-0.0051,  0.0058,  0.0123, -0.0007],
        [-0.0045,  0.0046,  0.0072,  0.0005],
        [ 0.0053,  0.0089,  0.0077,  0.0050]], grad_fn=<SelectBackward>)

In [29]:
preds[-2][0][0].reshape(4, -1, 4)[0][gt_idx]

tensor([[-0.0018,  0.0062,  0.0030,  0.0087],
        [-0.0032,  0.0015,  0.0101,  0.0016],
        [-0.0044, -0.0002,  0.0090,  0.0010],
        [ 0.0043,  0.0096,  0.0097,  0.0047],
        [-0.0045,  0.0046,  0.0072,  0.0005],
        [ 0.0053,  0.0089,  0.0077,  0.0050]], grad_fn=<TakeBackward>)

In [30]:
sorted_max_cls

tensor([3, 2, 4, 5, 0, 1])

In [31]:
max_idx = sorted_max_cls[0]
# gt_idx - filters by classes == 11
# max_idx - is the max confidence prediction of that class
bbs_pred = preds[-2][0][0].reshape(4, -1, 4)[0][gt_idx][max_idx]
bbs_pred

tensor([0.0043, 0.0096, 0.0097, 0.0047], grad_fn=<SelectBackward>)

In [32]:
image_id, ann = next(iter(dataset.get_annotations().items()))
image_id, ann

(12,
 {'image_path': '/Users/aaron/data/VOC2007/trainval/VOCdevkit/VOC2007/JPEGImages/000012.jpg',
  'bbs': [[155, 96, 196, 174]],
  'cats': [6]})

In [33]:
im = open_image(ann['image_path'])
im.shape

(333, 500, 3)

In [34]:
SIZE * bbs_pred

tensor([1.2878, 2.8777, 2.9160, 1.4069], grad_fn=<MulBackward>)

In [35]:
bbs_pred_pascal = dataset.bboxer.fastai_bb_to_pascal_bb(SIZE*bbs_pred.detach().numpy())
bbs_pred_pascal

array([ 2.87771893,  1.28784156, -0.47085249,  2.62816334])

In [36]:
dataset.categories()[11]

'dog'

In [37]:
# resized_im = cv2.resize(im, (SIZE, SIZE))
# ax = show_img(im)
# draw_rect(ax, bbs_pred_pascal)

### multi feature maps

In [38]:
for i in range(len(preds)):
    print(preds[i][0][0].shape, preds[i][0][1].shape)

torch.Size([4, 5776]) torch.Size([4, 30324])
torch.Size([4, 1444]) torch.Size([4, 7581])
torch.Size([4, 400]) torch.Size([4, 2100])
torch.Size([4, 100]) torch.Size([4, 525])
torch.Size([4, 36]) torch.Size([4, 189])
torch.Size([4, 4]) torch.Size([4, 21])


In [39]:
preds[0][0][0].reshape(4, -1, 4).shape

torch.Size([4, 1444, 4])

In [40]:
preds[0][0][0].reshape(4, -1, 4)[0].shape

torch.Size([1444, 4])

In [41]:
preds[5][0][0].reshape(4, -1, 4)[0].shape

torch.Size([1, 4])

In [42]:
preds[4][0][0].reshape(4, -1, 4)[0].shape

torch.Size([9, 4])

In [43]:
torch.cat((
    preds[4][0][0].reshape(4, -1, 4)[0],
    preds[5][0][0].reshape(4, -1, 4)[0]
), 0).shape

torch.Size([10, 4])

In [79]:
# concat all feature_map bbs (but not aspect_ratio bbs) for the 1st training example
all_bbs = torch.cat([
    preds[i][0][0].reshape(4, -1, 4)[0] for i in range(6)
], 0)

all_bbs.shape

torch.Size([1940, 4])

### concat all bbs and cats for the 1st item

In [83]:
all_fm_ar_bbs = torch.cat([
    preds[i][j][0].reshape(4, -1, 4)[0] for j in range(6) for i in range(6)
], 0)

all_fm_ar_bbs.shape

torch.Size([11640, 4])

In [87]:
all_fm_ar_cats = torch.cat([
    preds[i][j][1].reshape(bs, -1, num_classes)[:,:,:-1][0] for j in range(6) for i in range(6)
], 0)

all_fm_ar_cats.shape

torch.Size([11640, 20])

### get the max prediction for "car", which we know is the label

In [85]:
ann

{'image_path': '/Users/aaron/data/VOC2007/trainval/VOCdevkit/VOC2007/JPEGImages/000012.jpg',
 'bbs': [[155, 96, 196, 174]],
 'cats': [6]}

In [88]:
dataset.categories()[6]

'car'

In [89]:
max_conf, max_cls = all_fm_ar_cats.max(1)
max_conf, max_cls

(tensor([0.0742, 0.0908, 0.0462,  ..., 0.0251, 0.0215, 0.0199],
        grad_fn=<MaxBackward0>), tensor([ 1, 14,  3,  ...,  8,  8, 19]))

In [90]:
CAR_ID = 6
gt_idx = max_cls == CAR_ID
gt_idx.shape, gt_idx

(torch.Size([11640]), tensor([0, 0, 0,  ..., 0, 0, 0], dtype=torch.uint8))

In [91]:
gt_idx.sum()

tensor(506)

In [107]:
sorted_max_conf, sorted_max_idx = max_conf[gt_idx].sort(dim=0)
sorted_max_conf.shape, sorted_max_idx.shape

(torch.Size([506]), torch.Size([506]))

In [110]:
# bbs filtered
max_bbs = all_fm_ar_bbs[gt_idx]
max_bbs.shape

torch.Size([506, 4])

In [112]:
max_bbs.requires_grad

True

In [137]:
sorted_max_conf.requires_grad

True

In [138]:
# apply mask before `nms()` fund

In [136]:
conf_thresh = 0.5
conf_thres_mask = sorted_max_conf.gt(conf_thresh)
sorted_max_conf[conf_thres_mask].sum().item() == 0

True

In [139]:
# uses `nms()` func from below

In [117]:
nms_keep, nms_count = nms(max_bbs.detach(), sorted_max_conf.detach())
nms_keep

tensor([505, 504, 503, 502, 501, 500, 499, 498, 497, 496, 495, 494, 493, 492,
        491, 490, 489, 488, 487, 486, 485, 484, 483, 482, 481, 480, 479, 478,
        477, 476, 475, 474, 473, 472, 471, 470, 469, 468, 467, 466, 465, 464,
        463, 462, 461, 460, 459, 458, 457, 456, 455, 454, 453, 452, 451, 450,
        449, 448, 447, 446, 445, 444, 443, 442, 441, 440, 439, 438, 437, 436,
        435, 434, 433, 432, 431, 430, 429, 428, 427, 426, 425, 424, 423, 422,
        421, 420, 419, 418, 417, 416, 415, 414, 413, 412, 411, 410, 409, 408,
        407, 406, 405, 404, 403, 402, 401, 400, 399, 398, 397, 396, 395, 394,
        393, 392, 391, 390, 389, 388, 387, 386, 385, 384, 383, 382, 381, 380,
        378, 377, 376, 375, 374, 373, 372, 371, 370, 369, 368, 367, 366, 365,
        364, 363, 362, 361, 360, 359, 358, 357, 356, 355, 354, 353, 352, 351,
        350, 349, 348, 347, 346, 345, 344, 343, 342, 341, 340, 339, 338, 337,
        336, 335, 334, 333, 332, 331, 330, 329, 328, 327, 326, 3

In [119]:
(nms_keep != 0).sum()

tensor(198)

In [120]:
sorted_max_conf[505]

tensor(0.1643, grad_fn=<SelectBackward>)

In [106]:
def nms(boxes, scores, overlap=0.5, top_k=200):
    """Apply non-maximum suppression at test time to avoid detecting too many
    overlapping bounding boxes for a given object.
    Args:
        boxes: (tensor) The location preds for the img, Shape: [num_priors,4].
        scores: (tensor) The class predscores for the img, Shape:[num_priors].
        overlap: (float) The overlap thresh for suppressing unnecessary boxes.
        top_k: (int) The Maximum number of box preds to consider.
    Return:
        The indices of the kept boxes with respect to num_priors.
    """

    keep = scores.new(scores.size(0)).zero_().long()
    if boxes.numel() == 0:
        return keep
    x1 = boxes[:, 0]
    y1 = boxes[:, 1]
    x2 = boxes[:, 2]
    y2 = boxes[:, 3]
    area = torch.mul(x2 - x1, y2 - y1)
    v, idx = scores.sort(0)  # sort in ascending order
    # I = I[v >= 0.01]
    idx = idx[-top_k:]  # indices of the top-k largest vals
    xx1 = boxes.new()
    yy1 = boxes.new()
    xx2 = boxes.new()
    yy2 = boxes.new()
    w = boxes.new()
    h = boxes.new()

    # keep = torch.Tensor()
    count = 0
    while idx.numel() > 0:
        i = idx[-1]  # index of current largest val
        # keep.append(i)
        keep[count] = i
        count += 1
        if idx.size(0) == 1:
            break
        idx = idx[:-1]  # remove kept element from view
        # load bboxes of next highest vals
        torch.index_select(x1, 0, idx, out=xx1)
        torch.index_select(y1, 0, idx, out=yy1)
        torch.index_select(x2, 0, idx, out=xx2)
        torch.index_select(y2, 0, idx, out=yy2)
        # store element-wise max with next highest score
        xx1 = torch.clamp(xx1, min=x1[i])
        yy1 = torch.clamp(yy1, min=y1[i])
        xx2 = torch.clamp(xx2, max=x2[i])
        yy2 = torch.clamp(yy2, max=y2[i])
        w.resize_as_(xx2)
        h.resize_as_(yy2)
        w = xx2 - xx1
        h = yy2 - yy1
        # check sizes of xx1 and xx2.. after each iteration
        w = torch.clamp(w, min=0.0)
        h = torch.clamp(h, min=0.0)
        inter = w*h
        # IoU = i / (area(a) + area(b) - i)
        rem_areas = torch.index_select(area, 0, idx)  # load remaining areas)
        union = (rem_areas - inter) + area[i]
        IoU = inter/union  # store result in iou
        # keep only elements with an IoU <= overlap
        idx = idx[IoU.le(overlap)]
    return keep, count

In [45]:
bs = 4
num_classes = 21
preds[4][0][1].reshape(bs, -1, num_classes)[:,:,:-1].shape

torch.Size([4, 9, 20])

In [46]:
# concat all cats one-hot for the 1st training example
all_cats = torch.cat([
    preds[i][0][1].reshape(bs, -1, num_classes)[:,:,:-1][0]
    for i in range(6)
], 0)

all_cats.shape

torch.Size([1940, 20])

In [47]:
max_conf, max_cls = all_cats.max(1)
max_conf, max_cls

(tensor([0.0742, 0.0908, 0.0462,  ..., 0.0200, 0.0193, 0.0218],
        grad_fn=<MaxBackward0>), tensor([ 1, 14,  3,  ..., 16, 16,  4]))

In [48]:
max_conf.shape, max_conf.shape

(torch.Size([1940]), torch.Size([1940]))

In [49]:
sorted_max_conf, sorted_max_cls_idxs = max_conf.sort(0, descending=True)
sorted_max_conf, sorted_max_cls_idxs

(tensor([0.1853, 0.1852, 0.1754,  ..., 0.0144, 0.0141, 0.0118],
        grad_fn=<SortBackward>),
 tensor([ 896, 1084, 1082,  ..., 1501, 1933,  651]))

In [50]:
max_idx = sorted_max_cls_idxs[0]
max_idx, max_conf[max_idx], max_cls[max_idx]

(tensor(896), tensor(0.1853, grad_fn=<SelectBackward>), tensor(17))

In [51]:
# surprisingly after 1 step, it predicted the right Category! of 6, which is a cat

In [52]:
all_bbs[max_idx]

tensor([ 0.0561,  0.0402,  0.0091, -0.0465], grad_fn=<SelectBackward>)

In [53]:
ann['bbs']

[[155, 96, 196, 174]]

In [54]:
im = open_image(ann['image_path'])
im.shape

(333, 500, 3)

In [55]:
gt_bb = Bboxer().scaled_fastai_bbs(ann['bbs'], im).squeeze()
gt_bb

array([0.28828829, 0.31      , 0.80747748, 0.69866667])

In [56]:
max_bb = all_bbs[max_idx].detach().numpy()
max_bb

array([ 0.05610389,  0.04020291,  0.00913793, -0.04651232], dtype=float32)

In [57]:
gt_bb.shape, max_bb.shape

((4,), (4,))

In [58]:
np.maximum(gt_bb[:2], max_bb[:2])

array([0.28828829, 0.31      ])

In [59]:
np.minimum(gt_bb[2:], max_bb[2:])

array([ 0.00913793, -0.04651232])

In [60]:
def single_bb_intersect(gt_bb, max_bb):
    wh = np.minimum(
        np.maximum(gt_bb[:2], max_bb[:2]) - np.minimum(gt_bb[2:], max_bb[2:]), 0)
    return wh[0] * wh[1]
    
single_bb_intersect(gt_bb, max_bb)

0.0

In [61]:
gt_bb

array([0.28828829, 0.31      , 0.80747748, 0.69866667])

In [62]:
def bb_area(bbs):
    return np.abs(bbs[0]-bbs[2])*np.abs(bbs[1]-bbs[3])

bb_area(gt_bb), bb_area(max_bb)

(0.20179153153153148, 0.004072664)

In [63]:
def single_bb_iou(gt_bb, max_bb):
    i = single_bb_intersect(gt_bb, max_bb)
    # don't forget to remove their overlapping area from the union calc!
    u = bb_area(gt_bb) + bb_area(max_bb) - i
    return i/u

single_bb_iou(gt_bb, max_bb)

0.0

In [64]:
a = np.array([0., 0., 10., 10.])
b = np.array([0., 0., 25., 25.])
single_bb_iou(a, b)

0.16

In [65]:
bb_area(a)

100.0

In [66]:
bb_area(b)

625.0

0.16

In [26]:
len(ims)

4

In [31]:
ims[0].shape

torch.Size([3, 300, 300])

In [32]:
ims.shape

torch.Size([4, 3, 300, 300])

In [29]:
im = ims[0].permute(1,2,0)
im.shape

torch.Size([300, 300, 3])

In [37]:
im2 = open_image(ann['image_path'])
im2.shape

(333, 500, 3)

In [33]:
bboxer = Bboxer()
bboxer.scaled_fastai_bbs(ann['bbs'], im)

array([[0.32      , 0.51666667, 0.89666667, 1.16666667]])

In [34]:
im.shape

torch.Size([300, 300, 3])

In [35]:
SIZE

300

In [40]:
def scaled_fastai_bbs(bbs, im):
    im_w = im.shape[1]
    im_h = im.shape[0]
    bbs = np.divide(bbs, [im_w, im_h, im_w, im_h])
    return np.array([
        bbs[:,1],
        bbs[:,0],
        bbs[:,3]+bbs[:,1]-(1/SIZE),
        bbs[:,2]+bbs[:,0]-(1/SIZE)]).T

fastai_bbs = scaled_fastai_bbs(np.array(ann['bbs']), im2)

# array([[[0.28828829, 0.31      , 0.80747748, 0.69866667],
fastai_bbs

array([[0.28828829, 0.31      , 0.80747748, 0.69866667]])

In [48]:
resized_im = cv2.resize(im2, (SIZE, SIZE))
resized_im.shape

(300, 300, 3)

In [62]:
# ax = show_img(resized_im)
# b = Bboxer.fastai_bb_to_pascal_bb((fastai_bbs*SIZE).squeeze())
# draw_rect(ax, b)

In [63]:
# fastai bbs
b = (fastai_bbs*SIZE).squeeze()
b

array([ 86.48648649,  93.        , 242.24324324, 209.6       ])

In [86]:
preds[-2][0][0].shape

torch.Size([4, 36])

In [91]:
pred_bbs = preds[-2][1][0]
pred_bbs.shape

torch.Size([4, 36])

In [92]:
pred_bbs.reshape(4, -1, 4).shape

torch.Size([4, 9, 4])

In [93]:
pred_bbs = pred_bbs.reshape(4, -1, 4)
pred_bbs.shape

torch.Size([4, 9, 4])

In [98]:
fastai_bbs, fastai_bbs.shape

(array([[0.28828829, 0.31      , 0.80747748, 0.69866667]]), (1, 4))

In [99]:
bboxer.anchor_corners(4).shape

(16, 4)

In [101]:
bbs = fastai_bbs
bbs_count = 9
bbs16 = np.reshape(np.tile(bbs, bbs_count), (-1,bbs_count,4))
bbs16.shape

(1, 9, 4)

In [107]:
pred_bbs.shape

torch.Size([4, 9, 4])

In [109]:
pred_bbs[0].unsqueeze(0).shape

torch.Size([1, 9, 4])

In [111]:
pred_bbs[0].unsqueeze(0).detach().numpy().shape

(1, 9, 4)

In [116]:
bbs16.shape

(1, 9, 4)

In [119]:
np.moveaxis(anchor_corners, (0,1,2), (0,2,1))[:,:2].shape

(1, 2, 9)

In [120]:
pred_bbs[0].shape

torch.Size([9, 4])

In [123]:
anchor_corners = pred_bbs[0].unsqueeze(0)
anchor_corners.shape

torch.Size([1, 9, 4])

In [125]:
pred_bbs[0].unsqueeze(0).permute(0,2,1)[:,:2].shape

torch.Size([1, 2, 9])

In [132]:
bbs16[:,:,:2].shape

(1, 9, 2)

In [136]:
pred_bbs[0].unsqueeze(0).detach().numpy()[:,:,:2].shape

(1, 9, 2)

In [139]:
anchor_corners = pred_bbs[0].unsqueeze(0).detach().numpy()
anchor_corners.shape

(1, 9, 4)

In [140]:
np.maximum(
    anchor_corners[:,:,:2],
    bbs16[:,:,:2]
)

array([[[0.28828829, 0.31      ],
        [0.28828829, 0.31      ],
        [0.28828829, 0.31      ],
        [0.28828829, 0.31      ],
        [0.28828829, 0.31      ],
        [0.28828829, 0.31      ],
        [0.28828829, 0.31      ],
        [0.28828829, 0.31      ],
        [0.28828829, 0.31      ]]])

In [146]:
np.minimum(
    anchor_corners[:,:,2:],
    bbs16[:,:,2:]
)

array([[[ 0.00215022,  0.01950801],
        [ 0.00425911,  0.01716721],
        [ 0.00564119,  0.01605638],
        [-0.00200997,  0.01785468],
        [-0.00373605,  0.02233196],
        [ 0.00184789,  0.01996584],
        [ 0.00416726,  0.01177759],
        [-0.00194897,  0.0158163 ],
        [-0.00054314,  0.01564306]]])

In [148]:
max_min = np.maximum(anchor_corners[:,:,:2], bbs16[:,:,:2]) - \
np.minimum(anchor_corners[:,:,2:], bbs16[:,:,2:])
max_min

array([[[0.28613807, 0.29049199],
        [0.28402918, 0.29283279],
        [0.2826471 , 0.29394362],
        [0.29029825, 0.29214532],
        [0.29202434, 0.28766804],
        [0.2864404 , 0.29003416],
        [0.28412102, 0.29822241],
        [0.29023726, 0.2941837 ],
        [0.28883143, 0.29435694]]])

In [155]:
fastai_bbs.shape

(1, 4)

In [156]:
bbs16 = np.reshape(np.tile(fastai_bbs, bbs_count), (-1,bbs_count,4))
bbs16.shape

(1, 9, 4)

In [158]:
preds[-2][0][0].reshape(4, -1, 4)[0].shape

torch.Size([9, 4])

In [160]:
# bbs = self.scaled_fastai_bbs(bbs, im)
bbs_count = 9 #grid_size*grid_size
bbs16 = np.reshape(np.tile(fastai_bbs, bbs_count), (-1,bbs_count,4)) # np.reshape(np.tile(bbs, bbs_count), (-1,bbs_count,4))
anchor_corners = preds[-2][0][0].reshape(4, -1, 4)[0].detach().numpy() # self.anchor_corners(grid_size, aspect_ratio)
intersect = np.minimum(
    np.maximum(anchor_corners[:,:2], bbs16[:,:,:2]) - \
    np.minimum(anchor_corners[:,2:], bbs16[:,:,2:]), 0)
intersect[:,:,0] * intersect[:,:,1]

array([[0., 0., 0., 0., 0., 0., 0., 0., 0.]])

In [161]:
bbs16

array([[[0.28828829, 0.31      , 0.80747748, 0.69866667],
        [0.28828829, 0.31      , 0.80747748, 0.69866667],
        [0.28828829, 0.31      , 0.80747748, 0.69866667],
        [0.28828829, 0.31      , 0.80747748, 0.69866667],
        [0.28828829, 0.31      , 0.80747748, 0.69866667],
        [0.28828829, 0.31      , 0.80747748, 0.69866667],
        [0.28828829, 0.31      , 0.80747748, 0.69866667],
        [0.28828829, 0.31      , 0.80747748, 0.69866667],
        [0.28828829, 0.31      , 0.80747748, 0.69866667]]])

In [162]:
anchor_corners = preds[-2][0][0].reshape(4, -1, 4)[0].detach().numpy()
anchor_corners

array([[0.        , 0.01953707, 0.        , 0.00457765],
       [0.        , 0.01807468, 0.        , 0.00290771],
       [0.00529758, 0.02018872, 0.        , 0.00574783],
       [0.        , 0.02480964, 0.        , 0.00409513],
       [0.        , 0.02163003, 0.        , 0.00946532],
       [0.00095571, 0.01893313, 0.        , 0.01274693],
       [0.        , 0.02924913, 0.        , 0.0018008 ],
       [0.        , 0.02630012, 0.        , 0.0105252 ],
       [0.00297953, 0.02177737, 0.        , 0.01682711]], dtype=float32)

# NEXT check other feature map sizes

In [154]:
def get_intersection(self, bbs, im, grid_size=4, aspect_ratio=(1.,1.)):
    # returns the i part of IoU scaled [0,1]
    bbs = self.scaled_fastai_bbs(bbs, im)
    bbs_count = grid_size*grid_size
    bbs16 = np.reshape(np.tile(bbs, bbs_count), (-1,bbs_count,4))
    anchor_corners = self.anchor_corners(grid_size, aspect_ratio)
    intersect = np.minimum(
        np.maximum(anchor_corners[:,:2], bbs16[:,:,:2]) - \
        np.minimum(anchor_corners[:,2:], bbs16[:,:,2:]), 0)
    pdb.set_trace()
    return intersect[:,:,0] * intersect[:,:,1]

Bboxer.get_intersection = get_intersection

Bboxer().get_intersection(ann['bbs'], im2).shape

> <ipython-input-154-cf8d8de7ebe8>(11)get_intersection()
-> return intersect[:,:,0] * intersect[:,:,1]
(Pdb) bbs.shape
(1, 4)
(Pdb) bbs16.shape
(1, 16, 4)
(Pdb) anchor_corners.shape
(16, 4)
(Pdb) c


(1, 16)

In [110]:
anchor_corners = pred_bbs[0].unsqueeze(0).detach().numpy()
intersect = np.minimum(
    np.maximum(anchor_corners[:,:2], bbs16[:,:,:2]) - \
    np.minimum(anchor_corners[:,2:], bbs16[:,:,2:]), 0)
intersect[:,:,0] * intersect[:,:,1]

ValueError: operands could not be broadcast together with shapes (1,2,4) (1,9,2) 

In [105]:
anchor_corners.shape

(4, 9, 4)

In [12]:
cel = nn.CrossEntropyLoss()

In [56]:
inputs = torch.tensor([[0., 10.], [5., 0.]], dtype=torch.float)
targets = torch.tensor([1, 0], dtype=torch.long)
cel(inputs, targets)

tensor(0.0034)

In [57]:
inputs.shape

torch.Size([2, 2])

In [58]:
targets.shape

torch.Size([2])

In [90]:
gt_cats[4][0].shape

torch.Size([4, 9])

In [91]:
gt_cats[4][0]

tensor([[20, 20, 20, 20,  6, 20, 20, 20, 20],
        [20, 20, 20, 20, 12, 20, 20, 20, 20],
        [14, 20, 14, 20, 14, 20,  1,  1,  1],
        [20, 20, 20, 20,  6, 20, 20, 20, 20]], dtype=torch.uint8)

In [92]:
preds = model(ims)

In [93]:
preds[4][0][1].shape

torch.Size([4, 189])

In [94]:
from ssdmultibox.datasets import Bboxer, NUM_CLASSES

feature_map_idx = 0
y = gt_cats[4][feature_map_idx]
yhat = preds[4][feature_map_idx][1]

batch_size = y.shape[0]
cats_label = Bboxer.one_hot_encoding(y)[:,:,:-1]
cats_preds = yhat.reshape(batch_size, -1, NUM_CLASSES)[:,:,:-1]

In [85]:
gt_idxs = y != 20

In [86]:
cats_label[gt_idxs].argmax(1)

tensor([11, 18, 18,  8,  7,  6,  8])

In [87]:
y[y != 20]

tensor([11, 18, 18,  8,  7,  6,  8], dtype=torch.uint8)

In [88]:
cats_preds[gt_idxs].max(1)

(tensor([0.0357, 0.0276, 0.0304, 0.0250, 0.0358, 0.0306, 0.0304],
        grad_fn=<MaxBackward0>), tensor([18, 18, 18,  2, 18, 18, 18]))

In [51]:
# gt shape
cats_preds[gt_idxs].argmax(1).shape, cats_label[gt_idxs].argmax(1).shape

(torch.Size([12]), torch.Size([12]))

In [52]:
cats_preds[gt_idxs].shape, cats_label[gt_idxs].shape

(torch.Size([12, 20]), torch.Size([12, 20]))

In [97]:
import torch.nn.functional as F

F.binary_cross_entropy_with_logits(cats_preds[gt_idxs], cats_label[gt_idxs])

tensor(0.6943, grad_fn=<MeanBackward1>)

In [8]:
from ssdmultibox.datasets import Bboxer, NUM_CLASSES

batch_size = 4
for fm_idx in range(len(gt_cats)):
    for ar_idx in range(len(gt_cats[fm_idx])):
        y = gt_cats[fm_idx][ar_idx]
        yhat = preds[fm_idx][ar_idx][1]
        cats_label = Bboxer.one_hot_encoding(y)[:,:,:-1]
        cats_preds = yhat.reshape(batch_size, -1, NUM_CLASSES)[:,:,:-1]
        print(cats_preds.argmax())
#         loss = F.binary_cross_entropy_with_logits(
#             cats_preds[fm_idx][ar_idx],
#             cats_label[fm_idx][ar_idx]
#         )
#         print(loss.item())

tensor(29712)
tensor(29707)
tensor(102291)
tensor(58764)
tensor(28918)
tensor(103671)
tensor(14941)
tensor(18761)
tensor(20179)
tensor(14504)
tensor(15228)
tensor(15165)
tensor(4173)
tensor(4165)
tensor(1078)
tensor(4031)
tensor(5738)
tensor(5295)
tensor(521)
tensor(1224)
tensor(596)
tensor(1428)
tensor(1086)
tensor(1284)
tensor(453)
tensor(95)
tensor(432)
tensor(272)
tensor(196)
tensor(551)
tensor(29)
tensor(70)
tensor(26)
tensor(16)
tensor(42)
tensor(9)


In [119]:
preds[-1][-1][1]

tensor([[-0.0110,  0.0171,  0.0216,  0.0110,  0.0149, -0.0023, -0.0193,  0.0132,
         -0.0006,  0.0052, -0.0102, -0.0137, -0.0036, -0.0005, -0.0087, -0.0193,
         -0.0056,  0.0157,  0.0210, -0.0135,  0.0081],
        [-0.0110,  0.0171,  0.0216,  0.0110,  0.0149, -0.0023, -0.0193,  0.0132,
         -0.0006,  0.0052, -0.0102, -0.0137, -0.0036, -0.0005, -0.0087, -0.0193,
         -0.0056,  0.0157,  0.0210, -0.0135,  0.0081],
        [-0.0110,  0.0171,  0.0216,  0.0110,  0.0149, -0.0023, -0.0193,  0.0132,
         -0.0006,  0.0052, -0.0102, -0.0137, -0.0036, -0.0005, -0.0087, -0.0193,
         -0.0056,  0.0157,  0.0210, -0.0135,  0.0081],
        [-0.0110,  0.0171,  0.0216,  0.0110,  0.0149, -0.0023, -0.0193,  0.0132,
         -0.0006,  0.0052, -0.0102, -0.0137, -0.0036, -0.0005, -0.0087, -0.0193,
         -0.0056,  0.0157,  0.0210, -0.0135,  0.0081]],
       grad_fn=<ViewBackward>)

In [10]:
(preds[0][0][1] == preds[0][2][1]).all()

tensor(0, dtype=torch.uint8)

In [71]:
import matplotlib

In [21]:
d = {'foo': 1, 'bar': 2}

In [22]:
d[0]

KeyError: 0

In [127]:
len(preds[0])

6

In [102]:
from ssdmultibox.criterion import CatsBCELoss, BbsL1Loss, SSDLoss

In [101]:
cats_criterion = CatsBCELoss()
bbs_criterion = BbsL1Loss()

cats_loss = cats_criterion(gt_cats, preds)
cats_loss

tensor(26.0574, grad_fn=<ThAddBackward>)

In [103]:
ssd_criterion = SSDLoss()

In [104]:
ssd_criterion._matched_gt_cats(gt_cats, preds)

tensor(296.)

In [105]:
n = 0

for fm_idx in range(len(gt_cats)):
    for ar_idx in range(len(gt_cats[fm_idx])):
        gt_idxs = gt_cats[fm_idx][ar_idx] != 20
        n += gt_idxs.sum()
n

tensor(296)

In [106]:
bbs_criterion(gt_bbs, gt_cats, preds)

tensor(6.4059, grad_fn=<ThAddBackward>)