In [1]:
import numpy as np
import torch 
import matplotlib.pyplot as plt
%matplotlib auto

Using matplotlib backend: Qt5Agg


In [2]:
def multibox_prior(data,sizes,ratios):
    in_height,in_weight=data.shape[-2:]
    num_sizes,num_ratios=len(sizes),len(ratios)
    box_per_pixel=(num_sizes+num_ratios-1)
    size_tensor=torch.tensor(sizes)
    ratio_tensor=torch.tensor(ratios)
    
    offset_h,offset_w=0.5,0.5
    steps_h=1.0/in_height
    steps_w=1.0/in_weight
    
    center_h=(torch.arange(in_height)+offset_h)*steps_h
    center_w=(torch.arange(in_weight)+offset_w)*steps_w
    shift_y,shift_x=torch.meshgrid(center_h,center_w)
    shift_y,shift_x=shift_y.reshape(-1),shift_x.reshape(-1)
    
    w=torch.cat((size_tensor*torch.sqrt(ratio_tensor[0]),size_tensor[0]*torch.sqrt(ratio_tensor[1:]))) * in_height/in_weight
    h=torch.cat((size_tensor/torch.sqrt(ratio_tensor[0]),size_tensor[1]/torch.sqrt(ratio_tensor[1:])))
    
    anchor_manipulations=torch.stack((-w,-h,w,h)).T.repeat(in_height*in_weight,1)/2
    
    out_grid=torch.stack([shift_x,shift_y,shift_x,shift_y],dim=1).repeat_interleave(box_per_pixel,dim=0)
    output=out_grid+anchor_manipulations
    return output.unsqueeze(0)
    

In [3]:
img=plt.imread(r'F:\study\ml\LM\image\13\catdog.jpg')
h,w=img.shape[:2]
print(h,w)

561 728


In [4]:
X=torch.rand(size=(1,3,h,w))
Y=multibox_prior(X,sizes=[0.75,0.5,0.25],ratios=[1,2,0.5])
Y.shape

  return _VF.meshgrid(tensors, **kwargs)  # type: ignore[attr-defined]


torch.Size([1, 2042040, 4])

In [5]:
def bbox_to_rect(bbox,color):
    return plt.Rectangle(xy=(bbox[0],bbox[1]),width=bbox[2]-bbox[0],height=bbox[3]-bbox[1],fill=False,edgecolor=color,linewidth=2)

In [6]:
def show_bboxes(axes,bboxes,labels=None,colors=None):
    def _make_list(obj,default_values=None):
        if obj is None:
            obj=default_values
        elif not isinstance(obj,(list,tuple)):
            obj=[obj]
        return obj
    
    labels=_make_list(labels)
    colors=_make_list(colors,['b','g','r','m','c'])
    plt.imshow(axes)
    for i ,bbox in enumerate(bboxes):
        color=colors[i % len(colors)]
        rect=bbox_to_rect(bbox.detach().numpy(),color)
        plt.gca().add_patch(rect)
        if labels and len(labels)>i:
            text_color='k' if color=='w' else 'w'
            plt.text(rect.xy[0],rect.xy[1],labels[i],va='center',ha='center',fontsize=9,color=text_color,bbox=dict(facecolor=color,lw=0))
        
    

In [7]:
boxes = Y.reshape(h, w, 5, 4)
bbox_scale = torch.tensor((w, h, w, h))
# box1=(boxes[250, 250, 2, :]*bbox_scale).detach().numpy()
box1=(boxes[250, 250, :, :]*bbox_scale).detach().numpy()

In [8]:
h,w=img.shape[:2]

bbox_scale = torch.tensor((w, h, w, h))
show_bboxes(img, boxes[250, 250, :, :] * bbox_scale,['s=0.75, r=1', 's=0.5, r=1', 's=0.25, r=1', 's=0.75, r=2','s=0.75, r=0.5'])

In [9]:
def box_iou(boxes1,boxes2):
    box_area=lambda boxes:((boxes[:,2]-boxes[:,0])*(boxes[:,3]-boxes[:,1]))
    areas1=box_area(boxes1)
    areas2=box_area(boxes2)
    
    inter_upperlefts=torch.max(boxes1[:,None,:2],boxes2[:,:2])
    inter_lowerright=torch.min(boxes1[:,None,2:],boxes2[:,2:])
    
    inters=(inter_lowerright-inter_upperlefts).clamp(min=0)
    inter_areas=inters[:,:,0]*inters[:,:,1]
    union_areas=areas1[:,None]+areas2-inter_areas
    return inter_areas/union_areas
    

In [46]:
ground_truth = torch.tensor([[0, 0.1, 0.08, 0.52, 0.92],[1, 0.55, 0.2, 0.9, 0.88]])
anchors = torch.tensor([[0, 0.1, 0.2, 0.3], [0.15, 0.2, 0.4, 0.4],[0.63, 0.05, 0.88, 0.98], [0.66, 0.45, 0.8, 0.8],[0.57, 0.3, 0.92, 0.9]])

In [11]:
box_iou(anchors,ground_truth[:,1:])

tensor([[0.0536, 0.0000],
        [0.1417, 0.0000],
        [0.0000, 0.5657],
        [0.0000, 0.2059],
        [0.0000, 0.7459]])

In [12]:
torch.max(box_iou(anchors,ground_truth[:,1:]),dim=1)

torch.return_types.max(
values=tensor([0.0536, 0.1417, 0.5657, 0.2059, 0.7459]),
indices=tensor([0, 0, 1, 1, 1]))

In [13]:
torch.argmax(box_iou(anchors,ground_truth[:,1:]))

tensor(9)

In [14]:
max_ious, indices = torch.max(box_iou(anchors,ground_truth[:,1:]),dim=1)

In [15]:
max_ious

tensor([0.0536, 0.1417, 0.5657, 0.2059, 0.7459])

In [16]:
indices

tensor([0, 0, 1, 1, 1])

In [17]:
max_ious

tensor([0.0536, 0.1417, 0.5657, 0.2059, 0.7459])

In [18]:
max_ious >= 0.5

tensor([False, False,  True, False,  True])

In [19]:
torch.Tensor([False, False,  True, False,  True])

tensor([0., 0., 1., 0., 1.])

In [20]:
torch.nonzero(max_ious >= 0.5)

tensor([[2],
        [4]])

In [21]:
indices[max_ious >= 0.5]

tensor([1, 1])

In [22]:
9%2

1

In [23]:
9/2

4.5

In [24]:
def assign_anchor_to_bbox(groud_truth,anchors,device,iou_threshold=0.5):
    num_anchors,num_gt_boxes=anchors.shape[0],groud_truth.shape[0]
    jaccard=box_iou(anchors,groud_truth)
    anchors_bbox_map=torch.full((num_anchors,),-1,dtype=torch.long,device=device)
    max_ious,indices=torch.max(jaccard,dim=1)
    anc_i=torch.nonzero(max_ious>0.5).reshape(-1)
    box_j=indices[max_ious>=0.5]
    anchors_bbox_map[anc_i]=box_j
    col_discard=torch.full((num_anchors,),-1)
    row_discard=torch.full((num_gt_boxes,),-1)
    print('jaccard:\n',anchors_bbox_map)
    print('jaccard:\n',jaccard)
    for _ in range(num_gt_boxes):
        max_idx=torch.argmax(jaccard)
        box_idx=(max_idx % num_gt_boxes).long()
        anc_idx=(max_idx / num_gt_boxes).long()
        anchors_bbox_map[anc_idx]=box_idx
        jaccard[:,box_idx]=col_discard
        jaccard[anc_idx,:]=row_discard
        print('__________________:',_)
        print('max_idx:',max_idx)
        print('box_idx:',box_idx)
        print('anc_idx:',anc_idx)
        print('jaccard:\n',jaccard)
        print('jaccard:\n',anchors_bbox_map)
        
    return anchors_bbox_map


In [25]:
assign_anchor_to_bbox(ground_truth[:,1:],anchors,'cpu')

jaccard:
 tensor([-1, -1,  1, -1,  1])
jaccard:
 tensor([[0.0536, 0.0000],
        [0.1417, 0.0000],
        [0.0000, 0.5657],
        [0.0000, 0.2059],
        [0.0000, 0.7459]])
__________________: 0
max_idx: tensor(9)
box_idx: tensor(1)
anc_idx: tensor(4)
jaccard:
 tensor([[ 0.0536, -1.0000],
        [ 0.1417, -1.0000],
        [ 0.0000, -1.0000],
        [ 0.0000, -1.0000],
        [-1.0000, -1.0000]])
jaccard:
 tensor([-1, -1,  1, -1,  1])
__________________: 1
max_idx: tensor(2)
box_idx: tensor(0)
anc_idx: tensor(1)
jaccard:
 tensor([[-1., -1.],
        [-1., -1.],
        [-1., -1.],
        [-1., -1.],
        [-1., -1.]])
jaccard:
 tensor([-1,  0,  1, -1,  1])


tensor([-1,  0,  1, -1,  1])

In [26]:
[1,2,3,4,5]>2

TypeError: '>' not supported between instances of 'list' and 'int'

In [27]:
torch.Tensor([1,2,3,4,5])>2

tensor([False, False,  True,  True,  True])

In [28]:
(torch.Tensor([1,2,3,4,5])>2).float()

tensor([0., 0., 1., 1., 1.])

In [29]:
(torch.Tensor([1,2,3,4,5])>2).float().unsqueeze(0)

tensor([[0., 0., 1., 1., 1.]])

In [30]:
(torch.Tensor([1,2,3,4,5])>2).float().unsqueeze(-1)

tensor([[0.],
        [0.],
        [1.],
        [1.],
        [1.]])

In [31]:
def box_corner_to_center(boxes):
    x1,y1,x2,y2=boxes[:,0],boxes[:,1],boxes[:,2],boxes[:,3]
    cx=(x1+x2)/2
    cy=(y1+y2)/2
    w=x2-x1
    h=y2-y1
    boxes=torch.stack((cx,cy,w,h),axis=-1)
    return boxes

In [32]:
def offset_boxes(anchors,assigned_bb,eps=1e-6):
    c_anc=box_corner_to_center(anchors)
    c_assigned_bb=box_corner_to_center(assigned_bb)
    offset_xy=10*(c_assigned_bb[:,:2]-c_anc[:,:2])/c_anc[:,2:]
    offset_wh=5*torch.log(eps+c_assigned_bb[:,2:]/c_anc[:,2:])
    offset=torch.cat([offset_xy,offset_wh],axis=1)
    return offset

In [33]:
def multibox_target(anchors,labels):
    batch_size,anchors=labels.shape[0],anchors.squeeze(0)
    batch_offset,batch_mask,batch_class_labels=[],[],[]
    device,num_anchors=anchors.device,anchors.shape[0]
    for i in range(batch_size):
        label=labels[i,:,:]
        anchors_bbox_map=assign_anchor_to_bbox(label[:,1:],anchors,device)
        bbox_mask=((anchors_bbox_map>=0).float().unsqueeze(-1).repeat(1,4))
        class_labels=torch.zeros(num_anchors,dtype=torch.long,device=device)
        assigned_bb=torch.zeros((num_anchors,4),dtype=torch.float32,device=device)
        indices_true=torch.nonzero(anchors_bbox_map>=0)
        print('anchors_bbox_map ',anchors_bbox_map)
        print('indices_true ',indices_true)
        bb_idx=anchors_bbox_map[indices_true]
        print('label:',label)
        print('bb_idx:',bb_idx)
        class_labels[indices_true]=label[bb_idx,0].long()+1
        print('class_labels:',class_labels)
        assigned_bb[indices_true]=label[bb_idx,1:]
        offset=offset_boxes(anchors,assigned_bb)*bbox_mask
        batch_offset.append(offset.reshape(-1))
        batch_mask.append(bbox_mask.reshape(-1))
        batch_class_labels.append(class_labels)
    bbox_offset=torch.stack(batch_offset)
    bbox_mask=torch.stack(batch_mask)
    class_labesl=torch.stack(batch_class_labels)
    return (bbox_offset,bbox_mask,class_labels)

In [36]:
show_bboxes(img, ground_truth[:, 1:] * bbox_scale, ['dog', 'cat'], 'k')
show_bboxes(img, anchors * bbox_scale, ['0', '1', '2', '3', '4'])

In [37]:
ground_truth.unsqueeze(dim=0)

tensor([[[0.0000, 0.1000, 0.0800, 0.5200, 0.9200],
         [1.0000, 0.5500, 0.2000, 0.9000, 0.8800]]])

In [38]:
ground_truth.unsqueeze(dim=0).shape

torch.Size([1, 2, 5])

In [39]:
labels = multibox_target(anchors.unsqueeze(dim=0),ground_truth.unsqueeze(dim=0))

jaccard:
 tensor([-1, -1,  1, -1,  1])
jaccard:
 tensor([[0.0536, 0.0000],
        [0.1417, 0.0000],
        [0.0000, 0.5657],
        [0.0000, 0.2059],
        [0.0000, 0.7459]])
__________________: 0
max_idx: tensor(9)
box_idx: tensor(1)
anc_idx: tensor(4)
jaccard:
 tensor([[ 0.0536, -1.0000],
        [ 0.1417, -1.0000],
        [ 0.0000, -1.0000],
        [ 0.0000, -1.0000],
        [-1.0000, -1.0000]])
jaccard:
 tensor([-1, -1,  1, -1,  1])
__________________: 1
max_idx: tensor(2)
box_idx: tensor(0)
anc_idx: tensor(1)
jaccard:
 tensor([[-1., -1.],
        [-1., -1.],
        [-1., -1.],
        [-1., -1.],
        [-1., -1.]])
jaccard:
 tensor([-1,  0,  1, -1,  1])
anchors_bbox_map  tensor([-1,  0,  1, -1,  1])
indices_true  tensor([[1],
        [2],
        [4]])
label: tensor([[0.0000, 0.1000, 0.0800, 0.5200, 0.9200],
        [1.0000, 0.5500, 0.2000, 0.9000, 0.8800]])
bb_idx: tensor([[0],
        [1],
        [1]])
class_labels: tensor([0, 1, 2, 0, 2])


In [40]:
labels

(tensor([[-0.0000e+00, -0.0000e+00, -0.0000e+00, -0.0000e+00,  1.4000e+00,
           1.0000e+01,  2.5940e+00,  7.1754e+00, -1.2000e+00,  2.6882e-01,
           1.6824e+00, -1.5655e+00, -0.0000e+00, -0.0000e+00, -0.0000e+00,
          -0.0000e+00, -5.7143e-01, -1.0000e+00,  4.1723e-06,  6.2582e-01]]),
 tensor([[0., 0., 0., 0., 1., 1., 1., 1., 1., 1., 1., 1., 0., 0., 0., 0., 1., 1.,
          1., 1.]]),
 tensor([0, 1, 2, 0, 2]))

In [41]:
labels[2]

tensor([0, 1, 2, 0, 2])

In [42]:
labels[1]

tensor([[0., 0., 0., 0., 1., 1., 1., 1., 1., 1., 1., 1., 0., 0., 0., 0., 1., 1.,
         1., 1.]])

In [43]:
labels[0]

tensor([[-0.0000e+00, -0.0000e+00, -0.0000e+00, -0.0000e+00,  1.4000e+00,
          1.0000e+01,  2.5940e+00,  7.1754e+00, -1.2000e+00,  2.6882e-01,
          1.6824e+00, -1.5655e+00, -0.0000e+00, -0.0000e+00, -0.0000e+00,
         -0.0000e+00, -5.7143e-01, -1.0000e+00,  4.1723e-06,  6.2582e-01]])

In [47]:
ground_truth.unsqueeze(dim=0).shape

torch.Size([1, 2, 5])

In [48]:
ground_truth

tensor([[0.0000, 0.1000, 0.0800, 0.5200, 0.9200],
        [1.0000, 0.5500, 0.2000, 0.9000, 0.8800]])

In [50]:
torch.stack([torch.Tensor([1,2,3]),torch.Tensor([4,5,6])])

tensor([[1., 2., 3.],
        [4., 5., 6.]])

In [51]:
def box_center_to_corner(boxes):
    cx,cy,w,h=boxes[:,0],boxes[:,1],boxes[:,2],boxes[:,3]
    x1=cx-0.5*w
    y1=cy-0.5*h
    x2=cx+0.5*w
    y2=cy+0.5*h
    boxes=torch.stack((x1,y1,x2,y2),axis=-1)
    return boxes

In [74]:
def offset_inverse(anchors,offset_preds):
    anc=box_corner_to_center(anchors)
    pred_bbox_xy=(offset_preds[:,:2]*anc[:,2:]/10)+anc[:,:2]
    pred_bbox_wh=torch.exp(offset_preds[:,2:]/5)*anc[:,2:]
    pred_bbox=torch.cat((pred_bbox_xy,pred_bbox_wh),axis=1)
    predicted_bbox=box_center_to_corner(pred_bbox)
    return predicted_bbox

In [90]:
def nms(boxes,scores,iou_threshold):
    B=torch.argsort(scores,dim=-1,descending=True)
    keep=[]
    while B.numel()>0:
        i=B[0]
        keep.append(i)
        if B.numel()==1:
            break
        iou=box_iou(boxes[i,:].reshape(-1,4),boxes[B[1:],:].reshape(-1,4)).reshape(-1)
        inds=torch.nonzero(iou<=iou_threshold).reshape(-1)
        print('iou : ',iou)
        print(inds)
        B=B[inds+1]
    print(keep)
    return torch.tensor(keep,device=boxes.device)

In [63]:
cls_probs = torch.tensor([[0] * 4, [0.9, 0.8, 0.7, 0.1], [0.1, 0.2, 0.3, 0.9]])
torch.argsort(cls_probs,dim=-1,descending=True)

tensor([[0, 1, 2, 3],
        [0, 1, 2, 3],
        [3, 2, 1, 0]])

In [57]:
anchors.shape

torch.Size([5, 4])

In [58]:
anchors.reshape(-1,4)

tensor([[0.0000, 0.1000, 0.2000, 0.3000],
        [0.1500, 0.2000, 0.4000, 0.4000],
        [0.6300, 0.0500, 0.8800, 0.9800],
        [0.6600, 0.4500, 0.8000, 0.8000],
        [0.5700, 0.3000, 0.9200, 0.9000]])

In [64]:
nms(anchors,cls_probs,0.5)

iou :  tensor([1.0000, 0.0588, 0.0000, 0.0000, 0.0000, 0.0000, 0.0588, 1.0000, 0.0588,
        1.0000, 0.0000, 0.0000, 0.0000, 0.0000, 1.0000, 0.0588, 0.0000, 0.0000,
        1.0000, 0.2108, 0.2108, 1.0000, 0.0000, 0.0000, 0.0000, 0.0000, 0.2108,
        1.0000, 1.0000, 0.2108, 0.0000, 0.0000])
tensor([ 1,  2,  3,  4,  5,  6,  8, 10, 11, 12, 13, 15, 16, 17, 19, 20, 22, 23,
        24, 25, 26, 29, 30, 31])


IndexError: index 3 is out of bounds for dimension 0 with size 3

In [100]:
def multibox_detection(cls_probs,offset_preds,anchors,nms_threshold=0.5,pos_threshold=0.009999999):
    device,batch_size=cls_probs.device,cls_probs.shape[0]
    anchors=anchors.squeeze(0)
    num_classes,num_anchors=cls_probs.shape[1],cls_probs.shape[2]
    out=[]
    for i in range(batch_size):
        cls_prob,offset_pred=cls_probs[i],offset_preds[i].reshape(-1,4)
        conf,class_id=torch.max(cls_prob[1:],0)
        predicted_bb=offset_inverse(anchors,offset_pred)
#         print('predicted_bb :',predicted_bb)
#         print('conf : ',conf)
#         return (predicted_bb,conf)
        keep=nms(predicted_bb,conf,nms_threshold)
        all_idx=torch.arange(num_anchors,dtype=torch.long,device=device)
        combined=torch.cat((keep,all_idx))
        uniques,counts=combined.unique(return_counts=True)
        non_keep=uniques[counts==1]
        all_id_sorted=torch.cat((keep,non_keep))
        class_id[non_keep]=-1
        class_id=class_id[all_id_sorted]
        conf,predicted_bb=conf[all_id_sorted],predicted_bb[all_id_sorted]
        below_min_idx=(conf<pos_threshold)
        class_id[below_min_idx]=-1
        conf[below_min_idx]=1-conf[below_min_idx]
        pred_info=torch.cat((class_id.unsqueeze(1),conf.unsqueeze(1),predicted_bb),dim=1)
        out.append(pred_info)
    return torch.stack(out)

In [80]:
cls_probs = torch.tensor([[0] * 4, [0.9, 0.8, 0.7, 0.1], [0.1, 0.2, 0.3, 0.9]])
anchors = torch.tensor([[0.1, 0.08, 0.52, 0.92], [0.08, 0.2, 0.56, 0.95],[0.15, 0.3, 0.62, 0.91], [0.55, 0.2, 0.9, 0.88]])
offset_preds = torch.tensor([0] * anchors.numel())

In [83]:
# (predicted_bb,conf)=multibox_detection(cls_probs.unsqueeze(dim=0),offset_preds.unsqueeze(dim=0),anchors.unsqueeze(dim=0),nms_threshold=0.5)

predicted_bb : tensor([[0.1000, 0.0800, 0.5200, 0.9200],
        [0.0800, 0.2000, 0.5600, 0.9500],
        [0.1500, 0.3000, 0.6200, 0.9100],
        [0.5500, 0.2000, 0.9000, 0.8800]])
conf :  tensor([0.9000, 0.8000, 0.7000, 0.9000])


In [84]:
predicted_bb

tensor([[0.1000, 0.0800, 0.5200, 0.9200],
        [0.0800, 0.2000, 0.5600, 0.9500],
        [0.1500, 0.3000, 0.6200, 0.9100],
        [0.5500, 0.2000, 0.9000, 0.8800]])

In [85]:
conf

tensor([0.9000, 0.8000, 0.7000, 0.9000])

In [86]:
[0]*4

[0, 0, 0, 0]

In [88]:
torch.max(cls_probs[1:],0)

torch.return_types.max(
values=tensor([0.9000, 0.8000, 0.7000, 0.9000]),
indices=tensor([0, 0, 0, 1]))

In [98]:
def nms(boxes,scores,iou_threshold):
    B=torch.argsort(scores,dim=-1,descending=True)
    print('BB:',B)
    keep=[]
    while B.numel()>0:
        i=B[0]
        keep.append(i)
        if B.numel()==1:
            break
        iou=box_iou(boxes[i,:].reshape(-1,4),boxes[B[1:],:].reshape(-1,4)).reshape(-1)
        inds=torch.nonzero(iou<=iou_threshold).reshape(-1)
        print('iou : ',iou)
        print('inds: ',inds)
        B=B[inds+1]
        print('B : ',B)
    print(keep)
    return torch.tensor(keep,device=boxes.device)

In [108]:
predicted_bb=torch.tensor([[0.1000, 0.0800, 0.5200, 0.9200],
        [0.0800, 0.2000, 0.5600, 0.9500],
        [0.1500, 0.3000, 0.6200, 0.9100],
        [0.5500, 0.2000, 0.9000, 0.8800]])
conf=torch.tensor([0.9000, 0.8000, 0.7000, 0.9000])

In [109]:
nms(predicted_bb,conf,0.5)

BB: tensor([0, 3, 1, 2])
iou :  tensor([0.0000, 0.7368, 0.5454])
inds:  tensor([0])
B :  tensor([3])
[tensor(0), tensor(3)]


tensor([0, 3])

In [103]:
show_bboxes(img, anchors * bbox_scale,['dog=0.9', 'dog=0.8', 'dog=0.7', 'cat=0.9'])

In [104]:
output = multibox_detection(cls_probs.unsqueeze(dim=0),offset_preds.unsqueeze(dim=0),anchors.unsqueeze(dim=0),nms_threshold=0.5)

BB: tensor([0, 3, 1, 2])
iou :  tensor([0.0000, 0.7368, 0.5454])
inds:  tensor([0])
B :  tensor([3])
[tensor(0), tensor(3)]


In [105]:
output

tensor([[[ 0.0000,  0.9000,  0.1000,  0.0800,  0.5200,  0.9200],
         [ 1.0000,  0.9000,  0.5500,  0.2000,  0.9000,  0.8800],
         [-1.0000,  0.8000,  0.0800,  0.2000,  0.5600,  0.9500],
         [-1.0000,  0.7000,  0.1500,  0.3000,  0.6200,  0.9100]]])

In [107]:
for i in output[0].detach().numpy():
    if i[0]==-1:
        continue
    label=('dog=','cat=')[int(i[0])]+str(i[1])
    show_bboxes(img,[torch.tensor(i[2:])*bbox_scale],label)