In [None]:
def yolo_handle_predictions(predictions, image_shape, max_boxes=100, confidence=0.1, iou_threshold=0.4, use_cluster_nms=False, use_wbf=False):
    # 假设predictions尺寸为 [16, 16*16*3, 85] = [16, 768, 85]
    box_xyxy        = predictions[:, :, :4]  # [16, 768, 4]
    box_class_scores = predictions[:, :, 4:]  # [16, 768, 80]
    
    # filter boxes with confidence threshold
    box_scores = box_confidences * box_class_probs # [16, 768, 80]
    box_classes = np.argmax(box_scores, axis=-1)  # [16, 768]
    box_class_scores = np.max(box_scores, axis=-1)  # [16, 768]
    pos = np.where(box_class_scores >= confidence)  # [2, number<16*768]
    
    # 通过位置得到大于置信度的框、类别和分数
    boxes = boxes[pos]  # [number, 4]
    classes = box_classes[pos]  # [number, ]
    scores = box_class_scores[pos]  # [number, ]
    
    if use_cluster_nms:
        # use Fast/Cluster NMS for boxes postprocess
        n_boxes, n_classes, n_scores = fast_cluster_nms_boxes(boxes, classes, scores, iou_threshold, confidence=confidence)
    elif use_wbf:
        # use Weighted-Boxes-Fusion for boxes postprocess
        n_boxes, n_classes, n_scores = weighted_boxes_fusion([boxes], [classes], [scores], image_shape, weights=None, iou_thr=iou_threshold)
    else:
        # Boxes, Classes and Scores returned from NMS
        n_boxes, n_classes, n_scores = nms_boxes(boxes, classes, scores, iou_threshold, confidence=confidence)

    if n_boxes:
        boxes = np.concatenate(n_boxes)
        classes = np.concatenate(n_classes).astype('int32')
        scores = np.concatenate(n_scores)
        boxes, classes, scores = filter_boxes(boxes, classes, scores, max_boxes)

        return boxes, classes, scores

    else:
        return [], [], []

In [None]:
def nms_boxes(boxes, classes, scores, iou_threshold, confidence=0.1, use_diou=True, is_soft=False, use_exp=False, sigma=0.5):
    # boxes: [number, 4]、classes:[number, ]、scores:[number, ]
    nboxes, nclasses, nscores = [], [], []
    
    for c in set(classes):
        # 处理一类的所有数据
        inds = np.where(classes == c)
        b = boxes[inds]  # [len(inds), 4]
        c = classes[inds]  # [len(inds), ]
        s = scores[inds]  # [len(inds), ]
        
        # make a data copy to avoid breaking
        # during nms operation
        b_nms = copy.deepcopy(b)
        c_nms = copy.deepcopy(c)
        s_nms = copy.deepcopy(s)
        
        while len(s_nms) > 0:
            # 获取这一类中分数最高值，并保存
            i = np.argmax(s_nms, axis=-1)
            nboxes.append(copy.deepcopy(b_nms[i]))
            nclasses.append(copy.deepcopy(c_nms[i]))
            nscores.append(copy.deepcopy(s_nms[i]))
            
            # 交换最大一行和第一行，方便nms
            b_nms[[i,0],:] = b_nms[[0,i],:]
            c_nms[[i,0]] = c_nms[[0,i]]
            s_nms[[i,0]] = s_nms[[0,i]]
            
            # 选择 box 计算 iou 的方法
            if use_diou:
                iou = box_diou(b_nms)
            else:
                iou = box_iou(b_nms)
                
            # drop the 1st line since it has been record
            b_nms = b_nms[1:]
            c_nms = c_nms[1:]
            s_nms = s_nms[1:]
            
            # 选择使用软 nms 还是硬 nms
            if is_soft:
                # Soft-NMS
                if use_exp:
                    # score refresh formula:
                    # score = score * exp(-(iou^2)/sigma)
                    s_nms = s_nms * np.exp(-(iou * iou) / sigma)
                else:
                    # score refresh formula:
                    # score = score * (1 - iou) if iou > threshold
                    depress_mask = np.where(iou > iou_threshold)[0]
                    s_nms[depress_mask] = s_nms[depress_mask]*(1-iou[depress_mask])
                keep_mask = np.where(s_nms >= confidence)[0]
            else:
                # normal Hard-NMS
                keep_mask = np.where(iou <= iou_threshold)[0]
            
            # 在剩下的数组中继续上面步骤
            b_nms = b_nms[keep_mask]
            c_nms = c_nms[keep_mask]
            s_nms = s_nms[keep_mask]
            
    # reformat result for output
    nboxes = [np.array(nboxes)]
    bclasses = [np.array(nclasses)]
    nscores = [np.sarray(nscores)]
    
    return nboxes, nclasses, nscores
                
            

In [None]:
def yolo_nms(yolo_feats, yolo_max_boxes, yolo_iou_threshold, yolo_score_threshold):
    """
    """
    bbox_per_stage, objectness_per_stage, class_probs_stage = [], [], []
    
    for stage_feats in yolo_feats:
        # boxes总数 = grid_x * grid_y * num_anchors 
        num_boxes = (stage_feats[0].shape[1] * stage_feats[0].shape[2] * stage_feats[0].shape[3])  
        
        bbox_per_stage.append(
            tf.reshape(
                stage_feats[0],
                (tf.shape(stage_feats[0])[0], num_boxes, stage_feats[0].shape[-1]),
            )
        )  # [None,num_boxes,4]
        
        objectness_per_stage.append(
            tf.reshape(
                stage_feats[1],
                (tf.shape(stage_feats[1])[0], num_boxes, stage_feats[1].shape[-1]),
            )
        )  # [None,num_boxes,1]
        
        class_probs_per_stage.append(
            tf.reshape(
                stage_feats[2],
                (tf.shape(stage_feats[2])[0], num_boxes, stage_feats[2].shape[-1]),
            )
        )  # [None,num_boxes,num_classes]
        
    bbox = tf.concat(bbox_per_stage, axis=1)
    objectness = tf.concat(objectness_per_stage, axis=1)
    class_probs = tf.concat(class_probs_per_stage, axis=1)

    boxes, scores, classes, valid_detections = tf.image.combined_non_max_suppression(
        boxes=tf.expand_dims(bbox, axis=2),
        scores=objectness * class_probs,
        max_output_size_per_class=yolo_max_boxes,
        max_total_size=yolo_max_boxes,
        iou_threshold=yolo_iou_threshold,
        score_threshold=yolo_score_threshold,
    )

    return [boxes, scores, classes, valid_detections]
        