In [None]:
def yolo_loss(args, anchors):
    # 一共有三层
    num_layers = len(anchors)//3
    #---------------------------------------------------------------------------------------------------#
    #   将预测结果和实际ground truth分开，args是[*model_body.output, *y_true]
    #   y_true是一个列表，包含三个特征层，shape分别为(m,13,13,3,85),(m,26,26,3,85),(m,52,52,3,85)。
    #   yolo_outputs是一个列表，包含三个特征层，shape分别为(m,13,13,3,85),(m,26,26,3,85),(m,52,52,3,85)。
    #---------------------------------------------------------------------------------------------------#
    y_true = args[num_layers:]
    yolo_outputs = args[:num_layers]
    #-----------------------------------------------------------#
    #   13x13的特征层对应的anchor是[142, 110], [192, 243], [459, 401]
    #   26x26的特征层对应的anchor是[36, 75], [76, 55], [72, 146]
    #   52x52的特征层对应的anchor是[12, 16], [19, 36], [40, 28]
    #-----------------------------------------------------------#
    anchor_mask = [[6,7,8], [3,4,5], [0,1,2]] if num_layers==3 else [[3,4,5], [1,2,3]]
    # 得到input_shpae为416,416 
    input_shape = K.cast(K.shape(yolo_outputs[0])[1:3] * 32, K.dtype(y_true[0]))
    
    loss = 0
    num_pos = 0

    #-----------------------------------------------------------#
    #   取出每一张图片
    #   m的值就是batch_size
    #-----------------------------------------------------------#
    m = K.shape(yolo_outputs[0])[0]
    mf = K.cast(m, K.dtype(yolo_outputs[0]))
    
    for i in range(num_layers):
        #-----------------------------------------------------------#
        #   以第一个特征层(m,13,13,3,85)为例子
        #   取出该特征层中存在目标的点的位置。(m,13,13,3,1)
        #-----------------------------------------------------------#
        object_mask = y_true[l][..., 4:5]
        #-----------------------------------------------------------#
        #   取出其对应的种类(m,13,13,3,80)
        #-----------------------------------------------------------#
        true_class_probs = y_true[l][..., 5:]

### 1、DIOU_Loss（Distance_IOU_Loss）

<img src="https://pic1.zhimg.com/80/v2-029f094658e87f441bf30c80cb8d07d0_1440w.jpg" alt="img" style="zoom:45%;" />

### 2、CIOU_loss
CIOU_Loss和DIOU_Loss前面的公式都是一样的，不过在此基础上还增加了一个影响因子，将预测框和目标框的长宽比都考虑了进去。

![img](https://pic2.zhimg.com/80/v2-a24dd2e0d0acef20f6ead6a13b5c33d1_1440w.jpg)

其中v是衡量长宽比一致性的参数，我们也可以定义为：

![img](https://pic2.zhimg.com/80/v2-5abd8f82d7e30bdf21d2fd5851cb53a1_1440w.jpg)

这样CIOU_Loss就将目标框回归函数应该考虑三个重要几何因素：重叠面积、中心点距离，长宽比全都考虑进去了。

In [None]:
class IouLoss(object):
    def __init__(self,
                loss_weight=2.5
                max_height=608
                max_width=608
                ciou_term=False
                loss_squre=True):
        self._loss_weight = loss_weight
        self._MAX_H = max_height
        self._MAX_W = max_width
        self.ciou_term = ciou_term
        self.loss_squre = loss_squre
        
    def __call__(self, x, y, w, h
                tx, ty, tw, th,
                anchors,
                downsample_ratio,
                batch_size,
                scale_x_y=1.,
                ioup=None,
                eps=1.e-10):
        '''
        Args:
            x  | y | w | h  ([Variables]): the output of yolov for encoded x|y|w|h
            tx |ty |tw |th  ([Variables]): the target of yolov for encoded x|y|w|h
            anchors ([float]): list of anchors for current output layer
            downsample_ratio (float): the downsample ratio for current output layer
            batch_size (int): training batch size
            eps (float): the decimal to prevent the denominator eqaul zero
        '''
        pred = self._bbox_transform(x, y, w, h, anchors, downsample_ratio,
                                   batch_size, False, scale_x_y, eps)
        gt = self._bbox_transform(tx, ty, tw, th, anchors, downsample_ratio,
                                    batch_size, True, scale_x_y, eps)
        iouk = self._iou(pred, gt, ioup, eps)
        if self.loss_square:
            loss_iou = 1. - iouk * iouk
        else:
            loss_iou = 1. - iouk
            
        return loss_iou
        
    
    def _iou(self, pred, gt, ioup=None, eps=1.e-10):
        x1, y1, x2, y2 = pred
        x1g, y1g, x2g, y2g = gt
        
        xkis1 = tf.maximum(x1, x1g)
        ykis1 = tf.maximum(y1, y1g)
        xkis2 = tf.minimum(x2, x2g)
        ykis2 = tf.minimum(y2, y2g)
        
        inter_w = tf.maximum((xkis2 - xkis1), 0.0)
        inter_h = tf.maximum((ykis2 - ykis1), 0.0)
        # 计算交集部分
        intsctk = inter_w * inter_h
        
        # 计算并集部分
        unionk = (x2 - x1) * (y2 - y1) + (x2g - x1g) * (y2g - y1g) - intsctk + eps
        
        # 计算交并比
        iouk = intsctk / unionk
        
        # 如果使用 ciou
        if self.ciou_term:
            ciou = self.get_ciou_term(pred, gt, iouk, eps)
            iouk = iouk - ciou
        
        return iouk
    
    def get_ciou_term(self, pred, gt, iouk, eps):
        x1, y1, x2, y2 = pred
        x1g, y1g, x2g, y2g = gt
        
        # 计算中心位置和宽高
        cx = (x1 + x2) / 2
        cy = (y1 + y2) / 2
        w = (x2 - x1) + 1e-9
        h = (y2 - y1) + 1e-9

        cxg = (x1g + x2g) / 2
        cyg = (y1g + y2g) / 2
        wg = x2g - x1g
        hg = y2g - y1g
        
        # 最小外接框坐标计算
        xc1 = tf.minimum(x1, x1g)
        yc1 = tf.minimum(y1, y1g)
        xc2 = tf.maximum(x2, x2g)
        yc2 = tf.maximum(y2, y2g)
        # 计算对角线距离
        dist_union = (xc2 - xc1) ** 2 + (yc2 - yc1) ** 2
        # 计算中心点距离
        dist_intersection = (cx - cxg) ** 2 + (cy - cyg) ** 2
        # DIOU term
        diou_term = (dist_intersection + eps) / (dist_union + eps)
        
        arctan = tf.atan(wg / hg) - tf.atan(w / h)
        v = 4. / (np.pi ** 2) * (arctan ** 2)
        # CIOU term，公式见上面
        ciou_term = v**2 / (1 - iouk + v + eps)
        
        return diou_term + ciou_term
        
        
    def _bbox_transform(self, dcx, dcy, dw, dh, anchors, downsample_ratio,
                        batch_size, is_gt, scale_x_y, eps):
        '''用来解析预测框和真实框坐标，暂时没看
        '''
        shape_fmp = tf.shape(dcx)
        # batch_size = shape_fmp[0]
        anchor_per_scale = shape_fmp[1]
        output_size = shape_fmp[2]
        output_size_f = tf.cast(output_size, tf.float32)
        rows = tf.range(output_size_f, dtype=tf.float32)
        cols = tf.range(output_size_f, dtype=tf.float32)
        rows = tf.tile(rows[tf.newaxis, tf.newaxis, tf.newaxis, :], [batch_size, anchor_per_scale, output_size, 1])
        cols = tf.tile(cols[tf.newaxis, tf.newaxis, :, tf.newaxis], [batch_size, anchor_per_scale, 1, output_size])

        if is_gt:
            cx = (dcx + rows) / output_size_f
            cy = (dcy + cols) / output_size_f
        else:
            dcx_sig = tf.sigmoid(dcx)
            dcy_sig = tf.sigmoid(dcy)
            if (abs(scale_x_y - 1.0) > eps):
                dcx_sig = scale_x_y * dcx_sig - 0.5 * (scale_x_y - 1)
                dcy_sig = scale_x_y * dcy_sig - 0.5 * (scale_x_y - 1)
            cx = (dcx_sig + rows) / output_size_f
            cy = (dcy_sig + cols) / output_size_f

        anchor_w_ = [anchors[i] for i in range(0, len(anchors)) if i % 2 == 0]
        anchor_w_np = np.array(anchor_w_)
        anchor_w_ = tf.ones(anchor_w_np.shape, dtype=tf.float32) * anchor_w_np
        anchor_w = tf.tile(anchor_w_[tf.newaxis, :, tf.newaxis, tf.newaxis], [batch_size, 1, output_size, output_size])

        anchor_h_ = [anchors[i] for i in range(0, len(anchors)) if i % 2 == 1]
        anchor_h_np = np.array(anchor_h_)
        anchor_h_ = tf.ones(anchor_h_np.shape, dtype=tf.float32) * anchor_h_np
        anchor_h = tf.tile(anchor_h_[tf.newaxis, :, tf.newaxis, tf.newaxis], [batch_size, 1, output_size, output_size])

        # e^tw e^th
        exp_dw = tf.exp(dw)
        exp_dh = tf.exp(dh)
        pw = (exp_dw * anchor_w) / (output_size_f * downsample_ratio)
        ph = (exp_dh * anchor_h) / (output_size_f * downsample_ratio)

        x1 = cx - 0.5 * pw
        y1 = cy - 0.5 * ph
        x2 = cx + 0.5 * pw
        y2 = cy + 0.5 * ph

        return x1, y1, x2, y2

In [None]:
def calc_obj_loss(output, obj, tobj, gt_box, batch_size, anchors,
                 num_classes, downsample, ignore_thresh, scale_x_y):
    # A prediction bbox overlap any gt_bbox over ignore_thresh,
        # objectness loss will be ignored, process as follows:
    _anchors = np.array(anchors)
    _anchors = np.reshape(_anchors, (-1, 2).astype(np.float32))
    
    image_size = tf.ones((batch_size, 2), dtype=tf.float32)
    bbox,  prob = paddle_yolo_box(output, _anchors, downsample,
                                     num_classes, scale_x_y, im_size, clip_bbox=False,
                                     conf_thresh=0.0)
    
    

In [None]:
def