tf_extended/seglink.py

import cv2
import numpy as np
import tensorflow as tf

import config
import util

############################################################################################################
#                       seg_gt calculation                                                                 #
############################################################################################################

def anchor_rect_height_ratio(anchor, rect):
    """calculate the height ratio between anchor and rect
    """
    rect_height = min(rect[2], rect[3])
    anchor_height = anchor[2] * 1.0
    ratio = anchor_height / rect_height
    return max(ratio, 1.0 / ratio)
    
def is_anchor_center_in_rect(anchor, xs, ys, bbox_idx):
    """tell if the center of the anchor is in the rect represented using xs and ys and bbox_idx 
    """
    bbox_points = zip(xs[bbox_idx, :], ys[bbox_idx, :])
    cnt = util.img.points_to_contour(bbox_points);
    acx, acy, aw, ah = anchor
    return util.img.is_in_contour((acx, acy), cnt)
    
def min_area_rect(xs, ys):
    """
    Args:
        xs: numpy ndarray with shape=(N,4). N is the number of oriented bboxes. 4 contains [x1, x2, x3, x4]
        ys: numpy ndarray with shape=(N,4), [y1, y2, y3, y4]
            Note that [(x1, y1), (x2, y2), (x3, y3), (x4, y4)] can represent an oriented bbox.
    Return:
        the oriented rects sorrounding the box, in the format:[cx, cy, w, h, theta]. 
    """
    xs = np.asarray(xs, dtype = np.float32)
    ys = np.asarray(ys, dtype = np.float32)
        
    num_rects = xs.shape[0]
    box = np.empty((num_rects, 5))#cx, cy, w, h, theta
    for idx in xrange(num_rects):
        points = zip(xs[idx, :], ys[idx, :])
        cnt = util.img.points_to_contour(points)
        rect = cv2.minAreaRect(cnt)
        cx, cy = rect[0]
        w, h = rect[1]
        theta = rect[2]
        box[idx, :] = [cx, cy, w, h, theta]
    
    box = np.asarray(box, dtype = xs.dtype)
    return box

def tf_min_area_rect(xs, ys):
    return tf.py_func(min_area_rect, [xs, ys], xs.dtype)

def transform_cv_rect(rects):
    """Transform the rects from opencv method minAreaRect to our rects. 
    Step 1 of Figure 5 in seglink paper

    In cv2.minAreaRect, the w, h and theta values in the returned rect are not convenient to use (at least for me), so 
            the Oriented (or rotated) Rectangle object in seglink algorithm is defined different from cv2.
    
    Rect definition in Seglink:
        1. The angle value between a side and x-axis is:
            positive: if it rotates clockwisely, with y-axis increasing downwards.
            negative: if it rotates counter-clockwisely.
            This is opposite to cv2, and it is only a personal preference. 
        
        2. The width is the length of side taking a smaller absolute angle with the x-axis. 
        3. The theta value of a rect is the signed angle value between width-side and x-axis
        4. To rotate a rect to horizontal direction, just rotate its width-side horizontally,
             i.e., rotate it by a angle of theta using cv2 method. 
             (see the method rotate_oriented_bbox_to_horizontal for rotation detail)
            
    
    Args:
        rects: ndarray with shape = (5, ) or (N, 5).
    Return:
        transformed rects.
    """
    only_one = False
    if len(np.shape(rects)) == 1:
        rects = np.expand_dims(rects, axis = 0)
        only_one = True
    assert np.shape(rects)[1] == 5, 'The shape of rects must be (N, 5), but meet %s'%(str(np.shape(rects)))
    
    rects = np.asarray(rects, dtype = np.float32).copy()
    num_rects = np.shape(rects)[0]
    for idx in xrange(num_rects):
        cx, cy, w, h, theta = rects[idx, ...];
        #assert theta < 0 and theta >= -90, "invalid theta: %f"%(theta) 
        if abs(theta) > 45 or (abs(theta) == 45 and w < h):
            w, h = [h, w]
            theta = 90 + theta
        rects[idx, ...] = [cx, cy, w, h, theta]
    if only_one:
        return rects[0, ...]
    return rects                
    

def rotate_oriented_bbox_to_horizontal(center, bbox):
    """
    Step 2 of Figure 5 in seglink paper
    
    Rotate bbox horizontally along a `center` point
    Args:
        center: the center of rotation
        bbox: [cx, cy, w, h, theta]
    """
    assert np.shape(center) == (2, ), "center must be a vector of length 2"
    assert np.shape(bbox) == (5, ) or np.shape(bbox) == (4, ), "bbox must be a vector of length 4 or 5"
    bbox = np.asarray(bbox.copy(), dtype = np.float32)
    
    cx, cy, w, h, theta = bbox;
    M = cv2.getRotationMatrix2D(center, theta, scale = 1) # 2x3
    
    cx, cy = np.dot(M, np.transpose([cx, cy, 1]))
    
    bbox[0:2] = [cx, cy] 
    return bbox

def crop_horizontal_bbox_using_anchor(bbox, anchor):
    """Step 3 in Figure 5 in seglink paper
    The crop operation is operated only on the x direction.
    Args:
        bbox: a horizontal bbox with shape = (5, ) or (4, ). 
    """
    assert np.shape(anchor) == (4, ), "anchor must be a vector of length 4"
    assert np.shape(bbox) == (5, ) or np.shape(bbox) == (4, ), "bbox must be a vector of length 4 or 5"
    
    # xmin and xmax of the anchor    
    acx, acy, aw, ah = anchor
    axmin = acx - aw / 2.0;
    axmax = acx + aw / 2.0;
    
    # xmin and xmax of the bbox
    cx, cy, w, h = bbox[0:4]
    xmin = cx - w / 2.0
    xmax = cx + w / 2.0
    
    # clip operation
    xmin = max(xmin, axmin)
    xmax = min(xmax, axmax)
    
    # transform xmin, xmax to cx and w
    cx = (xmin + xmax) / 2.0;
    w = xmax - xmin
    bbox = bbox.copy()
    bbox[0:4] = [cx, cy, w, h]
    return bbox

def rotate_horizontal_bbox_to_oriented(center, bbox):
    """
    Step 4 of Figure 5 in seglink paper: 
        Rotate the cropped horizontal bbox back to its original direction
    Args:
        center: the center of rotation
        bbox: [cx, cy, w, h, theta]
    Return: the oriented bbox
    """
    assert np.shape(center) == (2, ), "center must be a vector of length 2"
    assert np.shape(bbox) == (5, ) , "bbox must be a vector of length 4 or 5"
    bbox = np.asarray(bbox.copy(), dtype = np.float32)
    
    cx, cy, w, h, theta = bbox;
    M = cv2.getRotationMatrix2D(center, -theta, scale = 1) # 2x3
    cx, cy = np.dot(M, np.transpose([cx, cy, 1]))
    bbox[0:2] = [cx, cy]
    return bbox


def cal_seg_loc_for_single_anchor(anchor, rect):
    """
    Step 2 to 4
    """
    # rotate text box along the center of anchor to horizontal direction
    center = (anchor[0], anchor[1])
    rect = rotate_oriented_bbox_to_horizontal(center, rect)

    # crop horizontal text box to anchor    
    rect = crop_horizontal_bbox_using_anchor(rect, anchor)
    
    # rotate the box to original direction
    rect = rotate_horizontal_bbox_to_oriented(center, rect)
    
    return rect    
    

@util.dec.print_calling_in_short_for_tf
def match_anchor_to_text_boxes(anchors, xs, ys):
    """Match anchors to text boxes. 
       Return:
           seg_labels: shape = (N,), the seg_labels of segments. each value is the index of matched box if >=0.  
           seg_locations: shape = (N, 5), the absolute location of segments. Only the match segments are correctly calculated.
           
    """
    
    assert len(np.shape(anchors)) == 2 and np.shape(anchors)[1] == 4, "the anchors must be a tensor with shape = (num_anchors, 4)"
    assert len(np.shape(xs)) == 2 and np.shape(xs) == np.shape(ys) and np.shape(ys)[1] == 4, "the xs, ys must be a tensor with shape = (num_bboxes, 4)"
    anchors = np.asarray(anchors, dtype = np.float32)
    xs = np.asarray(xs, dtype = np.float32)
    ys = np.asarray(ys, dtype = np.float32)
    
    num_anchors = anchors.shape[0]
    seg_labels = np.ones((num_anchors, ), dtype = np.int32) * -1;
    seg_locations = np.zeros((num_anchors, 5), dtype = np.float32)
    
    # to avoid ln(0) in the ending process later.
    #     because the height and width will be encoded using ln(w_seg / w_anchor)
    seg_locations[:, 2] = anchors[:, 2]
    seg_locations[:, 3] = anchors[:, 3]
    
    num_bboxes = xs.shape[0]
    
    
    #represent bboxes using min area rects
    rects = min_area_rect(xs, ys) # shape = (num_bboxes, 5)
    rects = transform_cv_rect(rects)
    assert rects.shape == (num_bboxes, 5)
    
    #represent bboxes using contours
    cnts = []
    for bbox_idx in xrange(num_bboxes):
        bbox_points = zip(xs[bbox_idx, :], ys[bbox_idx, :])
        cnt = util.img.points_to_contour(bbox_points);
        cnts.append(cnt)
        
    import time
    start_time = time.time()
    # match anchor to bbox
    for anchor_idx in xrange(num_anchors):
        anchor = anchors[anchor_idx, :]
        acx, acy, aw, ah = anchor
        center_point_matched = False
        height_matched = False
        for bbox_idx in xrange(num_bboxes):
            # center point check
            center_point_matched = util.img.is_in_contour((acx, acy), cnts[bbox_idx])
            if not center_point_matched:
                continue
                
            # height height_ratio check
            rect = rects[bbox_idx, :]
            height_ratio = anchor_rect_height_ratio(anchor, rect)
            height_matched = height_ratio <= config.max_height_ratio
            if height_matched and center_point_matched:
                # an anchor can only be matched to at most one bbox
                seg_labels[anchor_idx] = bbox_idx
                seg_locations[anchor_idx, :] = cal_seg_loc_for_single_anchor(anchor, rect)
        
    end_time = time.time()
    tf.logging.info('Time in For Loop: %f'%(end_time - start_time))
    return seg_labels, seg_locations

# @util.dec.print_calling_in_short_for_tf
def match_anchor_to_text_boxes_fast(anchors, xs, ys):
    """Match anchors to text boxes. 
       Return:
           seg_labels: shape = (N,), the seg_labels of segments. each value is the index of matched box if >=0.  
           seg_locations: shape = (N, 5), the absolute location of segments. Only the match segments are correctly calculated.
           
    """
    
    assert len(np.shape(anchors)) == 2 and np.shape(anchors)[1] == 4, "the anchors must be a tensor with shape = (num_anchors, 4)"
    assert len(np.shape(xs)) == 2 and np.shape(xs) == np.shape(ys) and np.shape(ys)[1] == 4, "the xs, ys must be a tensor with shape = (num_bboxes, 4)"
    anchors = np.asarray(anchors, dtype = np.float32)
    xs = np.asarray(xs, dtype = np.float32)
    ys = np.asarray(ys, dtype = np.float32)
    
    num_anchors = anchors.shape[0]
    seg_labels = np.ones((num_anchors, ), dtype = np.int32) * -1;
    seg_locations = np.zeros((num_anchors, 5), dtype = np.float32)
    
    # to avoid ln(0) in the ending process later.
    #     because the height and width will be encoded using ln(w_seg / w_anchor)
    seg_locations[:, 2] = anchors[:, 2]
    seg_locations[:, 3] = anchors[:, 3]
    
    num_bboxes = xs.shape[0]
    
    
    #represent bboxes using min area rects
    rects = min_area_rect(xs, ys) # shape = (num_bboxes, 5)
    rects = transform_cv_rect(rects)
    assert rects.shape == (num_bboxes, 5)
    
    # construct a bbox point map: keys are the poistion of all points in bbox contours, and 
    #    value being the bbox index
    bbox_mask = np.ones(config.image_shape, dtype = np.int32) * (-1)
    for bbox_idx in xrange(num_bboxes):
        bbox_points = zip(xs[bbox_idx, :], ys[bbox_idx, :])
        bbox_cnts = util.img.points_to_contours(bbox_points)
        util.img.draw_contours(bbox_mask, bbox_cnts, -1, color = bbox_idx, border_width = - 1)
    
    points_in_bbox_mask = np.where(bbox_mask >= 0)
    points_in_bbox_mask = set(zip(*points_in_bbox_mask))
    points_in_bbox_mask = points_in_bbox_mask.intersection(config.default_anchor_center_set)
    
    for point in points_in_bbox_mask:
        anchors_here = config.default_anchor_map[point]
        for anchor_idx in anchors_here:
            anchor = anchors[anchor_idx, :]
            bbox_idx = bbox_mask[point]
            acx, acy, aw, ah = anchor
            height_matched = False
                    
            # height height_ratio check
            rect = rects[bbox_idx, :]
            height_ratio = anchor_rect_height_ratio(anchor, rect)
            height_matched = height_ratio <= config.max_height_ratio
            if height_matched:
                # an anchor can only be matched to at most one bbox
                seg_labels[anchor_idx] = bbox_idx
                seg_locations[anchor_idx, :] = cal_seg_loc_for_single_anchor(anchor, rect)
    return seg_labels, seg_locations


############################################################################################################
#                       link_gt calculation                                                                #
############################################################################################################
def reshape_link_gt_by_layer(link_gt):
    inter_layer_link_gts = {}
    cross_layer_link_gts = {}
    
    idx = 0;
    for layer_idx, layer_name in enumerate(config.feat_layers):
        layer_shape = config.feat_shapes[layer_name]
        lh, lw = layer_shape
        
        length = lh * lw * 8;
        layer_link_gt = link_gt[idx: idx + length]
        idx = idx + length;
        layer_link_gt = np.reshape(layer_link_gt, (lh, lw, 8))
        inter_layer_link_gts[layer_name] = layer_link_gt
        
    for layer_idx in xrange(1, len(config.feat_layers)):
        layer_name = config.feat_layers[layer_idx]
        layer_shape = config.feat_shapes[layer_name]
        lh, lw = layer_shape
        length = lh * lw * 4;
        layer_link_gt = link_gt[idx: idx + length]
        idx = idx + length;
        layer_link_gt = np.reshape(layer_link_gt, (lh, lw, 4))
        cross_layer_link_gts[layer_name] = layer_link_gt
    
    assert idx == len(link_gt)
    return inter_layer_link_gts, cross_layer_link_gts
        
def reshape_labels_by_layer(labels):
    layer_labels = {}
    idx = 0;
    for layer_name in config.feat_layers:
        layer_shape = config.feat_shapes[layer_name]
        label_length = np.prod(layer_shape)
        
        layer_match_result = labels[idx: idx + label_length]
        idx = idx + label_length;
        
        layer_match_result = np.reshape(layer_match_result, layer_shape)
        
        layer_labels[layer_name] = layer_match_result;
    assert idx == len(labels)
    return layer_labels;

def get_inter_layer_neighbours(x, y):
    return [(x - 1, y - 1), (x, y - 1), (x + 1, y - 1), \
            (x - 1, y),                 (x + 1, y),  \
            (x - 1, y + 1), (x, y + 1), (x + 1, y + 1)]
    
def get_cross_layer_neighbours(x, y):
    return [(2 * x, 2 * y), (2 * x + 1, 2 * y), (2 * x, 2 * y + 1), (2 * x + 1, 2 * y + 1)]
    
def is_valid_cord(x, y, w, h):
    """
    Tell whether the 2D coordinate (x, y) is valid or not.
    If valid, it should be on an h x w image
    """
    return x >=0 and x < w and y >= 0 and y < h;

def cal_link_labels(labels):
    layer_labels = reshape_labels_by_layer(labels)
    inter_layer_link_gts = []
    cross_layer_link_gts = []
    for layer_idx, layer_name in enumerate(config.feat_layers):
        layer_match_result = layer_labels[layer_name]
        h, w = config.feat_shapes[layer_name]
        
        # initalize link groundtruth for the current layer
        inter_layer_link_gt = np.ones((h, w, 8), dtype = np.int32) * (-1)
        
        if layer_idx > 0: # no cross-layer link for the first layer. 
            cross_layer_link_gt = np.ones((h, w, 4), dtype = np.int32) * (-1)
            
        for x in xrange(w):
            for y in xrange(h):
                # the value in layer_match_result stands for the bbox idx a segments matches 
                # if less than 0, not matched.
                # only matched segments are considered in link_gt calculation
                if layer_match_result[y, x] >= 0:
                    matched_idx = layer_match_result[y, x]
                    
                    
                    # inter-layer link_gt calculation
                    # calculate inter-layer link_gt using the bbox matching result of inter-layer neighbours 
                    neighbours = get_inter_layer_neighbours(x, y)
                    for nidx, nxy in enumerate(neighbours): # n here is short for neighbour
                        nx, ny = nxy
                        if is_valid_cord(nx, ny, w, h):
                            n_matched_idx = layer_match_result[ny, nx]
                            # if the current default box has matched the same bbox with this neighbour, \
                            # the linkage connecting them is labeled as positive.
                            if matched_idx == n_matched_idx: 
                                inter_layer_link_gt[y, x, nidx] = n_matched_idx;
                                
                    # cross layer link_gt calculation
                    if layer_idx > 0:
                        previous_layer_name = config.feat_layers[layer_idx - 1];
                        ph, pw = config.feat_shapes[previous_layer_name]
                        previous_layer_match_result = layer_labels[previous_layer_name]
                        neighbours = get_cross_layer_neighbours(x, y)
                        for nidx, nxy in enumerate(neighbours):
                            nx, ny = nxy
                            if is_valid_cord(nx, ny, pw, ph):
                                n_matched_idx = previous_layer_match_result[ny, nx]
                                if matched_idx == n_matched_idx:
                                    cross_layer_link_gt[y, x, nidx] = n_matched_idx;                             
                    
        inter_layer_link_gts.append(inter_layer_link_gt)
        
        if layer_idx > 0:
            cross_layer_link_gts.append(cross_layer_link_gt)
    
    # construct the final link_gt from layer-wise data.
    # note that this reshape and concat order is the same with that of predicted linkages, which\
    #     has been done in the construction of SegLinkNet.
    inter_layer_link_gts = np.hstack([np.reshape(t, -1) for t in inter_layer_link_gts]);
    cross_layer_link_gts = np.hstack([np.reshape(t, -1) for t in cross_layer_link_gts]);
    link_gt = np.hstack([inter_layer_link_gts, cross_layer_link_gts])
    return link_gt

# @util.dec.print_calling_in_short_for_tf
def encode_seg_offsets(seg_locs):
    """
    Args:
        seg_locs: a ndarray with shape = (N, 5). It contains the abolute values of segment locations 
    Return:
        seg_offsets, i.e., the offsets from default boxes. It is used as the final segment location ground truth.
    """
    anchors = config.default_anchors
    anchor_cx, anchor_cy, anchor_w, anchor_h = (anchors[:, idx] for idx in range(4))
    seg_cx, seg_cy, seg_w, seg_h = (seg_locs[:, idx] for idx in range(4))
    
    #encoding using the formulations from Euqation (2) to (6) of seglink paper
    #    seg_cx = anchor_cx + anchor_w * offset_cx
    offset_cx = (seg_cx - anchor_cx) * 1.0 / anchor_w
    
    #    seg_cy = anchor_cy + anchor_w * offset_cy
    offset_cy = (seg_cy - anchor_cy) * 1.0 / anchor_h
    
    #    seg_w = anchor_w * e^(offset_w)
    offset_w = np.log(seg_w * 1.0 / anchor_w)
    #    seg_h = anchor_w * e^(offset_h)
    offset_h = np.log(seg_h * 1.0 / anchor_h)
    
    # prior scaling can be used to adjust the loss weight of loss on offset x, y, w, h, theta
    seg_offsets = np.zeros_like(seg_locs)
    seg_offsets[:, 0] = offset_cx / config.prior_scaling[0]
    seg_offsets[:, 1] = offset_cy / config.prior_scaling[1]
    seg_offsets[:, 2] = offset_w / config.prior_scaling[2]
    seg_offsets[:, 3] = offset_h / config.prior_scaling[3]
    seg_offsets[:, 4] = seg_locs[:, 4]  / config.prior_scaling[4]
    return seg_offsets

def decode_seg_offsets_pred(seg_offsets_pred):
    anchors = config.default_anchors
    anchor_cx, anchor_cy, anchor_w, anchor_h = (anchors[:, idx] for idx in range(4))
    
    offset_cx = seg_offsets_pred[:, 0] * config.prior_scaling[0]
    offset_cy = seg_offsets_pred[:, 1] * config.prior_scaling[1]
    offset_w = seg_offsets_pred[:, 2]  * config.prior_scaling[2] 
    offset_h = seg_offsets_pred[:, 3]  * config.prior_scaling[3]
    offset_theta = seg_offsets_pred[:, 4] * config.prior_scaling[4]
    
    seg_cx = anchor_cx + anchor_w * offset_cx
    seg_cy = anchor_cy + anchor_h * offset_cy # anchor_h == anchor_w
    seg_w = anchor_w * np.exp(offset_w)
    seg_h = anchor_h * np.exp(offset_h)
    seg_theta = offset_theta
    
    seg_loc = np.transpose(np.vstack([seg_cx, seg_cy, seg_w, seg_h, seg_theta]))
    return seg_loc

# @util.dec.print_calling_in_short_for_tf
def get_all_seglink_gt(xs, ys, ignored):
    
    # calculate ground truths. 
    # for matching results, i.e., seg_labels and link_labels, the values stands for the 
    #     index of matched bbox
    assert len(np.shape(xs)) == 2 and \
            np.shape(xs)[-1] == 4 and \
            np.shape(ys) == np.shape(xs), \
        'the shape of xs and ys must be (N, 4), but got %s and %s'%(np.shape(xs), np.shape(ys))
    
    assert len(xs) == len(ignored), 'the length of xs and `ignored` must be the same, \
            but got %s and %s'%(len(xs), len(ignored))
            
    anchors = config.default_anchors
    seg_labels, seg_locations = match_anchor_to_text_boxes_fast(anchors, xs, ys);
    link_labels = cal_link_labels(seg_labels)
    seg_offsets = encode_seg_offsets(seg_locations)
    
    
    # deal with ignored: use -2 to denotes ignored matchings temporarily
    def set_ignored_labels(labels, idx):
        cords = np.where(labels == idx)
        labels[cords] = -2
    
    ignored_bbox_idxes = np.where(ignored == 1)[0]
    for ignored_bbox_idx in ignored_bbox_idxes:
        set_ignored_labels(link_labels, ignored_bbox_idx)
        set_ignored_labels(seg_labels, ignored_bbox_idx)
        
        
    # deal with bbox idxes: use 1 to replace all matched label
    def set_positive_labels_to_one(labels):
        cords = np.where(labels >= 0)
        labels[cords] = 1
        
    set_positive_labels_to_one(seg_labels)
    set_positive_labels_to_one(link_labels)

    # deal with ignored: use 0 to replace all -2
    def set_ignored_labels_to_zero(labels):
        cords = np.where(labels == -2)
        labels[cords] = 0

    set_ignored_labels_to_zero(seg_labels)
    set_ignored_labels_to_zero(link_labels)

    # set dtypes    
    seg_labels = np.asarray(seg_labels, dtype = np.int32)
    seg_offsets = np.asarray(seg_offsets, dtype = np.float32)
    link_labels = np.asarray(link_labels, dtype = np.int32)
    
    return seg_labels, seg_offsets, link_labels
    

def tf_get_all_seglink_gt(xs, ys, ignored):
    """
    xs, ys: tensors reprensenting ground truth bbox, both with shape=(N, 4), values in 0~1
    """
    h_I, w_I = config.image_shape
    
    xs = xs * w_I
    ys = ys * h_I    
    seg_labels, seg_offsets, link_labels = tf.py_func(get_all_seglink_gt, [xs, ys, ignored], [tf.int32, tf.float32, tf.int32]);
    seg_labels.set_shape([config.num_anchors])
    seg_offsets.set_shape([config.num_anchors, 5])
    link_labels.set_shape([config.num_links])
    return seg_labels, seg_offsets, link_labels;

############################################################################################################
#                       linking segments together                                                          #
############################################################################################################
def group_segs(seg_scores, link_scores, seg_conf_threshold, link_conf_threshold):
    """
    group segments based on their scores and links.
    Return: segment groups as a list, consisting of list of segment indexes, reprensting a group of segments belonging to a same bbox.
    """
    
    assert len(np.shape(seg_scores)) == 1
    assert len(np.shape(link_scores)) == 1
    
    valid_segs = np.where(seg_scores >= seg_conf_threshold)[0];# `np.where` returns a tuple
    assert valid_segs.ndim == 1
    mask = {}
    for s in valid_segs:
        mask[s] = -1;
    
    def get_root(idx):
        parent = mask[idx]
        while parent != -1:
            idx = parent
            parent = mask[parent]
        return idx
            
    def union(idx1, idx2):
        root1 = get_root(idx1)
        root2 = get_root(idx2)
        
        if root1 != root2:
            mask[root1] = root2
            
    def to_list():
        result = {}
        for idx in mask:
            root = get_root(idx)
            if root not in result:
                result[root] = []
            
            result[root].append(idx)
            
        return [result[root] for root in result]

        
    seg_indexes = np.arange(len(seg_scores))
    layer_seg_indexes = reshape_labels_by_layer(seg_indexes)

    layer_inter_link_scores, layer_cross_link_scores = reshape_link_gt_by_layer(link_scores)
    
    for layer_index, layer_name in enumerate(config.feat_layers):
        layer_shape = config.feat_shapes[layer_name]
        lh, lw = layer_shape
        layer_seg_index = layer_seg_indexes[layer_name]
        layer_inter_link_score = layer_inter_link_scores[layer_name]
        if layer_index > 0:
            previous_layer_name = config.feat_layers[layer_index - 1]
            previous_layer_seg_index = layer_seg_indexes[previous_layer_name]
            previous_layer_shape = config.feat_shapes[previous_layer_name]
            plh, plw = previous_layer_shape
            layer_cross_link_score = layer_cross_link_scores[layer_name]
            
            
        for y in xrange(lh):
            for x in xrange(lw):
                seg_index = layer_seg_index[y, x]
                _seg_score = seg_scores[seg_index]
                if _seg_score >= seg_conf_threshold:

                    # find inter layer linked neighbours                    
                    inter_layer_neighbours = get_inter_layer_neighbours(x, y)
                    for nidx, nxy in enumerate(inter_layer_neighbours):
                        nx, ny = nxy
                        
                        # the condition of connecting neighbour segment: valid coordinate, 
                        # valid segment confidence and valid link confidence.
                        if is_valid_cord(nx, ny, lw, lh) and \
                            seg_scores[layer_seg_index[ny, nx]]  >= seg_conf_threshold and \
                            layer_inter_link_score[y, x, nidx] >= link_conf_threshold:
                            n_seg_index = layer_seg_index[ny, nx]
                            union(seg_index, n_seg_index)
                    
                    # find cross layer linked neighbours
                    if layer_index > 0:
                        cross_layer_neighbours = get_cross_layer_neighbours(x, y)
                        for nidx, nxy in enumerate(cross_layer_neighbours):
                            nx, ny = nxy
                            if is_valid_cord(nx, ny, plw, plh) and \
                               seg_scores[previous_layer_seg_index[ny, nx]]  >= seg_conf_threshold and \
                               layer_cross_link_score[y, x, nidx] >= link_conf_threshold:
                               
                                n_seg_index = previous_layer_seg_index[ny, nx]
                                union(seg_index, n_seg_index)

    return to_list()
        
        
############################################################################################################
#                       combining segments to bboxes                                                       #
############################################################################################################
def tf_seglink_to_bbox(seg_cls_pred, link_cls_pred, seg_offsets_pred, image_shape, 
                       seg_conf_threshold = None, link_conf_threshold = None):
    if len(seg_cls_pred.shape) == 3:
        assert seg_cls_pred.shape[0] == 1 # only batch_size == 1 supported now TODO
        seg_cls_pred = seg_cls_pred[0, ...]
        link_cls_pred = link_cls_pred[0, ...]
        seg_offsets_pred = seg_offsets_pred[0, ...]
        image_shape = image_shape[0, :]
    
    assert seg_cls_pred.shape[-1] == 2
    assert link_cls_pred.shape[-1] == 2
    assert seg_offsets_pred.shape[-1] == 5
    
    seg_scores = seg_cls_pred[:, 1]
    link_scores = link_cls_pred[:, 1]
    image_bboxes = tf.py_func(seglink_to_bbox, 
          [seg_scores, link_scores, seg_offsets_pred, image_shape, seg_conf_threshold, link_conf_threshold], 
          tf.float32);
    return image_bboxes
    
    
def seglink_to_bbox(seg_scores, link_scores, seg_offsets_pred, 
                    image_shape = None, seg_conf_threshold = None, link_conf_threshold = None):
    """
    Args:
        seg_scores: the scores of segments being positive
        link_scores: the scores of linkage being positive
        seg_offsets_pred
    Return:
        bboxes, with shape = (N, 5), and N is the number of predicted bboxes
    """
    seg_conf_threshold = seg_conf_threshold or config.seg_conf_threshold
    link_conf_threshold = link_conf_threshold or config.link_conf_threshold
    if image_shape is None:
        image_shape = config.image_shape

    seg_groups = group_segs(seg_scores, link_scores, seg_conf_threshold, link_conf_threshold);
    seg_locs = decode_seg_offsets_pred(seg_offsets_pred)
    
    bboxes = []
    ref_h, ref_w = config.image_shape
    for group in seg_groups:
        group = [seg_locs[idx, :] for idx in group]
        bbox = combine_segs(group)
        image_h, image_w = image_shape[0:2]
        scale = [image_w * 1.0 / ref_w, image_h * 1.0 / ref_h, image_w * 1.0 / ref_w, image_h * 1.0 / ref_h, 1]
        bbox = np.asarray(bbox) * scale
        bboxes.append(bbox)
        
    bboxes = bboxes_to_xys(bboxes, image_shape)
    return np.asarray(bboxes, dtype = np.float32)

def sin(theta):
    return np.sin(theta / 180.0 * np.pi)
def cos(theta):
    return np.cos(theta / 180.0 *  np.pi)
def tan(theta):
    return np.tan(theta / 180.0 * np.pi)
    
def combine_segs(segs, return_bias = False):
    segs = np.asarray(segs)
    assert segs.ndim == 2
    assert segs.shape[-1] == 5    
    
    if len(segs) == 1:
        return segs[0, :]
    
    # find the best straight line fitting all center points: y = kx + b
    cxs = segs[:, 0]
    cys = segs[:, 1]

    ## the slope
    bar_theta = np.mean(segs[:, 4])# average theta
    k = tan(bar_theta);
    
    ## the bias: minimize sum (k*x_i + b - y_i)^2
    ### let c_i = k*x_i - y_i
    ### sum (k*x_i + b - y_i)^2 = sum(c_i + b)^2
    ###                           = sum(c_i^2 + b^2 + 2 * c_i * b)
    ###                           = n * b^2 + 2* sum(c_i) * b + sum(c_i^2)
    ### the target b = - sum(c_i) / n = - mean(c_i) = mean(y_i - k * x_i)
    b = np.mean(cys - k * cxs)
    
    # find the projections of all centers on the straight line
    ## firstly, move both the line and centers upward by distance b, so as to make the straight line crossing the point(0, 0): y = kx
    ## reprensent the line as a vector (1, k), and the projection of vector(x, y) on (1, k) is: proj = (x + k * y)  / sqrt(1 + k^2)
    ## the projection point of (x, y) on (1, k) is (proj * cos(theta), proj * sin(theta))
    t_cys = cys - b
    projs = (cxs + k * t_cys) / np.sqrt(1 + k**2)
    proj_points = np.transpose([projs * cos(bar_theta), projs * sin(bar_theta)])
    
    # find the max distance
    max_dist = -1;
    idx1 = -1;
    idx2 = -1;

    for i in xrange(len(proj_points)):
        point1 = proj_points[i, :]
        for j in xrange(i + 1, len(proj_points)):
            point2 = proj_points[j, :]
            dist = np.sqrt(np.sum((point1 - point2) ** 2))
            if dist > max_dist:
                idx1 = i
                idx2 = j
                max_dist = dist
    assert idx1 >= 0 and idx2 >= 0
    # the bbox: bcx, bcy, bw, bh, average_theta
    seg1 = segs[idx1, :]
    seg2 = segs[idx2, :]
    bcx, bcy = (seg1[:2] + seg2[:2]) / 2.0
    bh = np.mean(segs[:, 3])
    bw = max_dist + (seg1[2] + seg2[2]) / 2.0
    
    if return_bias:
        return bcx, bcy, bw, bh, bar_theta, b# bias is useful for debugging.
    else:
        return bcx, bcy, bw, bh, bar_theta
            
def bboxes_to_xys(bboxes, image_shape):
    """Convert Seglink bboxes to xys, i.e., eight points
    The `image_shape` is used to to make sure all points return are valid, i.e., within image area
    """
    if len(bboxes) == 0:
        return []
    
    assert np.ndim(bboxes) == 2 and np.shape(bboxes)[-1] == 5, 'invalid `bboxes` param with shape =  ' + str(np.shape(bboxes))
    
    h, w = image_shape[0:2]
    def get_valid_x(x):
        if x < 0:
            return 0
        if x >= w:
            return w - 1
        return x
    
    def get_valid_y(y):
        if y < 0:
            return 0
        if y >= h:
            return h - 1
        return y
    
    xys = np.zeros((len(bboxes), 8))
    for bbox_idx, bbox in enumerate(bboxes):
        bbox = ((bbox[0], bbox[1]), (bbox[2], bbox[3]), bbox[4])
        points = cv2.cv.BoxPoints(bbox)
        points = np.int0(points)
        for i_xy, (x, y) in enumerate(points):
            x = get_valid_x(x)
            y = get_valid_y(y)
            points[i_xy, :] = [x, y]
        points = np.reshape(points, -1)
        xys[bbox_idx, :] = points
    return xys