# Debug the loss function

In [1]:
%reload_ext autoreload
%autoreload 2
%matplotlib inline

In [2]:
from fastai.vision import *
from fastai.core import *
import json
from collections import defaultdict
import pdb
import numpy as np

## Data

In [3]:
coco = untar_data(URLs.COCO_SAMPLE)

img_dir = coco/'train_sample'
annotations = coco/'annotations'/'train_sample.json'

#image size
sz = 224

with open(annotations) as f:
    train_json = json.load(f)
ncat = len(train_json['categories'])

images, lbl_bbox = get_annotations(annotations)

img2bbox = dict(zip(images, lbl_bbox))
get_y_func = lambda o:img2bbox[o.name]

data = (ObjectItemList.from_folder(coco)
        #Where are the images? -> in coco and its subfolders
        .split_by_rand_pct(valid_pct=0.1, seed=0)                          
        #How to split in train/valid? -> randomly with the default 20% in valid
        .label_from_func(get_y_func)
        #How to find the labels? -> use get_y_func on the file name of the data
        .transform(get_transforms(), size=sz, tfm_y=True)
        #Data augmentation? -> Standard transforms; also transform the label images
        .databunch(bs=8, collate_fn=bb_pad_collate))   
        #Finally we convert to a DataBunch, use a batch size of 16,
        # and we use bb_pad_collate to collate the data into a mini-batch

## Model

In [4]:
class StdConv(nn.Module):
    def __init__(self, nin, nout, stride=2, drop=0.1):
        super().__init__()
        self.conv = nn.Conv2d(nin, nout, 3, stride=stride, padding=1)
        self.bn = nn.BatchNorm2d(nout)
        self.drop = nn.Dropout(drop)
        
    def forward(self, x): return self.drop(self.bn(F.relu(self.conv(x))))

def flatten_conv(x,k):
    bs,nf,gx,gy = x.size()
    x = x.permute(0,2,3,1).contiguous()
    return x.view(bs,-1,nf//k)

class OutConv(nn.Module):
    def __init__(self, k, nin, bias):
        super().__init__()
        self.k = k
        self.oconv1 = nn.Conv2d(nin, 4*k, 3, padding=1) # first bboxes
        self.oconv2 = nn.Conv2d(nin, (ncat+1)*k, 3, padding=1) # than class labels
        self.oconv2.bias.data.zero_().add_(bias)
        
    def forward(self, x):
        return [flatten_conv(self.oconv1(x), self.k),
                flatten_conv(self.oconv2(x), self.k)]

class SSD_Head(nn.Module):
    def __init__(self, k, bias):
        super().__init__()
        self.drop = nn.Dropout(0.25)
        self.sconv0 = StdConv(512,256, stride=1)
#         self.sconv1 = StdConv(256,256)
        self.sconv2 = StdConv(256,256)
        self.out = OutConv(k, 256, bias)
        
    def forward(self, x):
        x = self.drop(F.relu(x))
        x = self.sconv0(x)
#         x = self.sconv1(x)
        x = self.sconv2(x)
        return self.out(x)

## Loss

In [5]:
# centre+height/width -> corners
def hw2corners(ctr, hw): return torch.cat([ctr-hw/2, ctr+hw/2], dim=1)

def create_anchors(ncells=4, k=1):
    # create a tensor with anchor boxes - middles + sizes
    # coordinates of bboxes are scaled to -1,1, so anchor boxes must be too
    # ncells - ncells in a grid dimension
    # k - num boxes per cell
    first_ctr = -1 + 2/(2*ncells)
    last_ctr  =  1 - 2/(2*ncells)
    a_x = np.repeat(np.linspace(first_ctr, last_ctr, ncells), ncells)
    a_y = np.tile(np.linspace(first_ctr, last_ctr, ncells), ncells)
    a_sz = np.array([2/ncells for _ in a_x])
    anchors = torch.tensor(np.stack([a_x, a_y, a_sz, a_sz], axis=1)).type(torch.FloatTensor).cuda()
    anchor_cnr = hw2corners(anchors[:,:2], anchors[:,2:]) # anchor boxes corners
    grid_sizes = torch.tensor(np.array([2/ncells]), requires_grad=False).type(torch.FloatTensor).unsqueeze(1).cuda()
    return anchors, anchor_cnr, grid_sizes



In [6]:
anchors, anchor_cnr, grid_sizes = create_anchors()

In [7]:
def intersect(box_a, box_b):
    max_xy = torch.min(box_a[:, None, 2:], box_b[None, :, 2:])
    min_xy = torch.max(box_a[:, None, :2], box_b[None, :, :2])
    inter = torch.clamp((max_xy - min_xy), min=0)
    return inter[:, :, 0] * inter[:, :, 1]

def box_sz(b): return ((b[:, 2]-b[:, 0]) * (b[:, 3]-b[:, 1]))

def jaccard(box_a, box_b):
    inter = intersect(box_a, box_b)
    union = box_sz(box_a).unsqueeze(1) + box_sz(box_b).unsqueeze(0) - inter
    return inter / union

def map_to_ground_truth(overlaps, print_it=False):
    prior_overlap, prior_idx = overlaps.max(1)
    if print_it: print(prior_overlap)
#     pdb.set_trace()
    gt_overlap, gt_idx = overlaps.max(0)
    gt_overlap[prior_idx] = 1.99
    for i,o in enumerate(prior_idx): gt_idx[o] = i
    return gt_overlap,gt_idx

def actn_to_bb(actn, anchors):
    actn_bbs = torch.tanh(actn)
    actn_centers = (actn_bbs[:,:2]/2 * grid_sizes) + anchors[:,:2]
    actn_hw = (actn_bbs[:,2:]/2+1) * anchors[:,2:]
    return hw2corners(actn_centers, actn_hw)

In [8]:
def one_hot_embedding(labels, num_classes):
    return torch.eye(num_classes)[labels.data.cpu()]

class BCE_Loss(nn.Module):
    def __init__(self, num_classes):
        super().__init__()
        self.num_classes = num_classes

    def forward(self, pred, targ):
        t = one_hot_embedding(targ, self.num_classes+1)
#         t = t[:,:-1].contiguous().cuda()
#         x = pred[:,:-1]
        # in fastai v1, the background is encoded as the first class, rather than the last one
        t = t[:,1:].contiguous().cuda()
        x = pred[:,1:]
        w = self.get_weight(x,t)
        return F.binary_cross_entropy_with_logits(x, t, w, size_average=False)/self.num_classes
    
    def get_weight(self,x,t): return None

loss_f = BCE_Loss(ncat)

In [30]:
# remove padding - images in a minibatch will have a different number of objects
# those with fewer objects will have their bbox groundtruth tensor padded with 0s
def get_y(bbox,clas):
    bbox = bbox.view(-1,4)
    bb_keep = ((bbox[:,2]-bbox[:,0])>0).nonzero()[:,0]
    return bbox[bb_keep],clas[bb_keep]

def ssd_1_loss(pred_bbox,pred_clas,bbox,clas):
    bbox,clas = get_y(bbox,clas)
    if len(bbox) == 0 and len(clas) == 0:
        return 0.0, 0.0
    a_ic = actn_to_bb(pred_bbox, anchors)
    overlaps = jaccard(bbox.data, anchor_cnr.data)
    gt_overlap,gt_idx = map_to_ground_truth(overlaps,False)
    gt_clas = clas[gt_idx]
    pos = gt_overlap > 0.4
    pos_idx = torch.nonzero(pos)[:,0]
    gt_clas[~pos] = 0 # background coded as 0
    gt_bbox = bbox[gt_idx]
    print(a_ic[pos_idx] - gt_bbox[pos_idx])
    loc_loss = ((a_ic[pos_idx] - gt_bbox[pos_idx]).abs()).mean()
    clas_loss  = loss_f(pred_clas, gt_clas)
    print(f"loc_loss: {loc_loss}, clas_loss: {clas_loss}")
    return loc_loss, clas_loss

def ssd_loss(pred,targ_bb,targ_c):
    lcs,lls = 0.,0.
#    i = 1
#    for b_c,b_bb,bbox,clas in zip(*pred,targ_bb,targ_c):
    for pr_bb, pr_c, ta_bb, ta_c in zip(*pred, targ_bb, targ_c):
#         print(f"item {i}")
#         if i == 5:
#             pdb.set_trace()
#         i = i+1
#        loc_loss,clas_loss = ssd_1_loss(b_c,b_bb,bbox,clas)
        loc_loss, clas_loss = ssd_1_loss(pr_bb, pr_c, ta_bb, ta_c)
        lls += loc_loss
        lcs += clas_loss
#    if print_it: print(f'loc: {lls.data[0]}, clas: {lcs.data[0]}')
    return 30*lls+lcs

## Learner

In [31]:
head_reg4 = SSD_Head(k=1, bias=-3.)
learn = cnn_learner(data, models.resnet34, loss_func=ssd_loss, custom_head=head_reg4)

In [32]:
learn.load('better-model-loc-lossx30');

## Loss on validation set

In [33]:
data = (ObjectItemList.from_folder(coco)
        .split_by_rand_pct(valid_pct=0.1, seed=0)                          
        .label_from_func(get_y_func)
        .transform(get_transforms(), size=sz, tfm_y=True)
        .databunch(bs=8, collate_fn=bb_pad_collate))   
x,y = data.one_batch(ds_type=DatasetType.Valid)
x = x.cuda()
y_bb, y_c = y
y_bb = y_bb.cuda()
y_c = y_c.cuda()

In [34]:
y_pred = learn.model(x)
y_pred_bb, y_pred_c = y_pred

In [35]:
i = 1
y_bb_i = y_bb[i].unsqueeze(0)
y_c_i  = y_c[i].unsqueeze(0)
y_pred_i = (y_pred_bb[i].unsqueeze(0), y_pred_c[i].unsqueeze(0))

In [36]:
#%%debug
ssd_loss(y_pred_i, y_bb_i, y_c_i)

tensor([[-0.1034, -0.0976,  0.5972,  0.4994],
        [ 0.2693,  0.3305,  0.5311,  0.4556],
        [ 0.3235, -0.4616,  0.4421, -0.1691]], device='cuda:0',
       grad_fn=<SubBackward0>)
loc_loss: 0.3567039370536804, clas_loss: 4.574733257293701




tensor(15.2759, device='cuda:0', grad_fn=<AddBackward0>)

In [37]:
ssd_loss(y_pred, y_bb, y_c)

tensor([[ 0.5708, -0.2397,  0.4113, -0.0435],
        [ 0.2182,  0.5587, -0.0846,  0.4222],
        [ 0.0717,  0.5275, -0.1957,  0.3994]], device='cuda:0',
       grad_fn=<SubBackward0>)
loc_loss: 0.3119214177131653, clas_loss: 2.773035764694214
tensor([[-0.1034, -0.0976,  0.5972,  0.4994],
        [ 0.2693,  0.3305,  0.5311,  0.4556],
        [ 0.3235, -0.4616,  0.4421, -0.1691]], device='cuda:0',
       grad_fn=<SubBackward0>)
loc_loss: 0.3567039370536804, clas_loss: 4.574733257293701
tensor([[ 0.7013,  0.1199, -0.6790, -1.6278]], device='cuda:0',
       grad_fn=<SubBackward0>)
loc_loss: 0.7819737792015076, clas_loss: 2.022991180419922
tensor([[-0.3065, -0.5251, -0.1677, -0.3485],
        [-0.0179, -0.7334,  0.1514, -0.0897]], device='cuda:0',
       grad_fn=<SubBackward0>)
loc_loss: 0.29253679513931274, clas_loss: 3.372897148132324
tensor([[-0.0219,  0.3872,  0.1059,  0.5014],
        [-0.0199,  0.1155, -0.0303,  0.2087]], device='cuda:0',
       grad_fn=<SubBackward0>)
loc_loss: 0.

tensor(92.3826, device='cuda:0', grad_fn=<AddBackward0>)