In [10]:
import cv2
import os
import argparse
import numpy as np
import xml.etree.ElementTree as ET

def selective_search(img, strategy):
    """
    @brief Selective search with different strategies
    @param img The input image
    @param strategy The strategy selected ['color', 'all']
    @retval bboxes Bounding boxes
    """
    ss = cv2.ximgproc.segmentation.createSelectiveSearchSegmentation()
    gs = cv2.ximgproc.segmentation.createGraphSegmentation()
    ##################################################
    # TODO: For this part, please set the K as 200,  #
    #       sigma as 0.8 for the graph segmentation. #
    #       Use gs as the graph segmentation for ss  #
    #       to process after strategies are set.     #
    ##################################################
    
    gs.setK(400)
    gs.setSigma(0.8)
    
    ss.addStrategy(strategy)
    ss.addGraphSegmentation(gs)
    ss.addImage(img)
    
    
    ##################################################
    # End of TODO                                    #
    ##################################################
    bboxes = ss.process()
    xyxy_bboxes = []

    for box in bboxes:
        x, y, w, h = box
        xyxy_bboxes.append([x, y, x+w, y + h])

    return xyxy_bboxes

def parse_annotation(anno_path):
    """
    @brief Parse annotation files for ground truth bounding boxes
    @param anno_path Path to the file
    """
    tree = ET.parse(anno_path)
    root = tree.getroot()
    gt_bboxes = []
    for child in root:
        if child.tag == 'object':
            for grandchild in child:
                if grandchild.tag == "bndbox":
                    x0 = int(grandchild.find('xmin').text)
                    x1 = int(grandchild.find('xmax').text)
                    y0 = int(grandchild.find('ymin').text)
                    y1 = int(grandchild.find('ymax').text)
                    gt_bboxes.append([x0, y0, x1, y1])
    return gt_bboxes

def bb_intersection_over_union(boxA, boxB):
    """
    @brief compute the intersaction over union (IoU) of two given bounding boxes
    @param boxA numpy array (x_min, y_min, x_max, y_max)
    @param boxB numpy array (x_min, y_min, x_max, y_max)
    """
    ##################################################
    # TODO: Implement the IoU function               #
    ##################################################
    
    # Assuming box coordinates for both the boxes are with respect to a common reference frame
    
    # Also assuming box coordinates are sane and individual box areas are non-zero (non-trivial)
    
    x_min_1,y_min_1,x_max_1,y_max_1 = boxA
    x_min_2,y_min_2,x_max_2,y_max_2 = boxB
    
    if x_min_2 >= x_max_1 or y_min_2 >= y_max_1 or x_max_2 <= x_min_1 or y_max_2 <= y_min_1:
        iou = 0
    else:
        x_1 = max(x_min_1,x_min_2)
        x_2 = min(x_max_1,x_max_2)
        
        y_1 = max(y_min_1,y_min_2)
        y_2 = min(y_max_1,y_max_2)
        
        # NOTE : Here, since the case of iou = 0 has already been checked, we will always have
        # y_2 > y_1 and x_2 > x_1 (also since boxes are assumed non-trivial)
        
        common_area = (y_2-y_1)*(x_2-x_1)
        area_A = (y_max_1-y_min_1)*(x_max_1-x_min_1)
        area_B = (y_max_2-y_min_2)*(x_max_2-x_min_2)
        union_area = area_A + area_B - common_area
        
        iou = common_area/union_area
        
        
    ##################################################
    # End of TODO                                    #
    ##################################################
    return iou

def visualize(img, boxes, color):
    """
    @breif Visualize boxes
    @param img The target image
    @param boxes The box list
    @param color The color
    """
    for box in boxes:
        ##################################################
        # TODO: plot the rectangles with given color in  #
        #       the img for each box.                    #
        ##################################################
        
        img = cv2.rectangle(img,(box[0],box[1]),(box[2],box[3]),color,2)

        ##################################################
        # End of TODO                                    #
        ##################################################
    return img


def main(strategy_info):
    
    """
    
    parser = argparse.ArgumentParser()
    parser.add_argument('--strategy', type=str, default='color')
    args = parser.parse_args()
    
    """
    
    img_dir = 'HW2_Data/JPEGImages'
    anno_dir = 'HW2_Data/Annotations'
    thres = .5

    img_list = os.listdir(img_dir)
    
    num_hit = 0
    num_gt = 0
    
    
    for img_path in img_list:
        
        """
        Load the image file here through cv2.imread
        """
        img_id = img_path[:-4]
        img_name = os.path.join(img_dir, img_path)
        ##################################################
        # TODO: Load the image with OpenCV               #
        ##################################################
        
        

        img = cv2.imread(img_name)



        ##################################################
        # End of TODO                                    #
        ##################################################
        
        if strategy_info == 'all':
            strategy = cv2.ximgproc.segmentation.createSelectiveSearchSegmentationStrategyMultiple()
            color_strat = cv2.ximgproc.segmentation.createSelectiveSearchSegmentationStrategyColor()
            fill_strat = cv2.ximgproc.segmentation.createSelectiveSearchSegmentationStrategyFill()
            size_strat = cv2.ximgproc.segmentation.createSelectiveSearchSegmentationStrategySize()
            texture_strat = cv2.ximgproc.segmentation.createSelectiveSearchSegmentationStrategyTexture()
            strategy.addStrategy(color_strat,0.25)
            strategy.addStrategy(fill_strat,0.25)
            strategy.addStrategy(size_strat,0.25)
            strategy.addStrategy(texture_strat,0.25)
        elif strategy_info == 'color':
            strategy = cv2.ximgproc.segmentation.createSelectiveSearchSegmentationStrategyColor()
        elif strategy_info == 'fill':
            strategy = cv2.ximgproc.segmentation.createSelectiveSearchSegmentationStrategyFill()
        elif strategy_info == 'size':
            strategy = cv2.ximgproc.segmentation.createSelectiveSearchSegmentationStrategySize()
        elif strategy_info == 'texture':
            strategy = cv2.ximgproc.segmentation.createSelectiveSearchSegmentationStrategyTexture()
        else: # default, color
            strategy = cv2.ximgproc.segmentation.createSelectiveSearchSegmentationStrategyColor()
            
        
        # NOTE that my code is not run on a command line
        # Thus, argparse won't be used
        # Rather, main can be executed by calling it with the strategy one wants
        # This is done by strategy_info
        # If strategy_info = 'all', 'color', 'fill', 'size', 'texture', then the corresponding strategy will be used
        # If strategy_info is anything else, then by default, the color based strategy will be implemented
        
        
        proposals = selective_search(img, strategy)
        gt_bboxes = parse_annotation(os.path.join(anno_dir, img_id + ".xml"))
        iou_bboxes = []  # proposals with IoU greater than 0.5

        ##################################################
        # TODO: For all the gt_bboxes in each image,     #
        #       please calculate the recall of the       #
        #       gt_bboxes according to the document.     #
        #       Store the bboxes with IoU >= 0.5         #
        #       If there is more than one proposal has   #
        #       IoU >= 0.5 with a same groundtruth bbox, #
        #       store the one with biggest IoU.          #
        ##################################################
        

        recall_count = 0
        for ground_truth in gt_bboxes:
            loc_selection = []
            prev_loc_iou = 0
            for prop_box in proposals:
                loc_iou = bb_intersection_over_union(np.array(ground_truth),np.array(prop_box))
                if loc_iou >= 0.5:
                    if loc_iou > prev_loc_iou:
                        loc_selection = prop_box
                        prev_loc_iou = loc_iou
            if loc_selection != []:
                iou_bboxes.append(loc_selection)
                recall_count = recall_count + 1
        recall = recall_count/len(gt_bboxes)
            
        ##################################################
        # End of TODO                                    #
        ##################################################
        
        vis_img = img.copy()
        vis_img = visualize(vis_img, gt_bboxes, (255, 0, 0))
        vis_img = visualize(vis_img, iou_bboxes, (0, 0, 255))

        proposals_img = img.copy()
        proposals_img = visualize(proposals_img, gt_bboxes, (255, 0, 0))
        proposals_img = visualize(proposals_img, proposals, (0, 0, 255))

        ##################################################
        # TODO: (optional) You may use cv2 to visualize  #
        #       or save the image for report.            #
        ##################################################
        
        print('Strategy is : ',strategy_info)
        
        cv2.imshow('GT + Best Proposals for current image',vis_img)
        cv2.waitKey(0)
        cv2.destroyAllWindows()
        
        cv2.imshow('GT + All Proposals for current image',proposals_img)
        cv2.waitKey(0)
        cv2.destroyAllWindows()
        
        print('Image Recall : ',recall)
        
        
        ##################################################
        # End of TODO                                    #
        ##################################################

In [11]:
main('color')

Strategy is :  color
Image Recall :  0.6
Strategy is :  color
Image Recall :  0.2857142857142857
Strategy is :  color
Image Recall :  0.6666666666666666


In [12]:
main('all')

Strategy is :  all
Image Recall :  0.8
Strategy is :  all
Image Recall :  0.2857142857142857
Strategy is :  all
Image Recall :  0.6666666666666666


In [13]:
main('texture')

Strategy is :  texture
Image Recall :  0.4
Strategy is :  texture
Image Recall :  0.2857142857142857
Strategy is :  texture
Image Recall :  0.6666666666666666


In [14]:
main('size')

Strategy is :  size
Image Recall :  1.0
Strategy is :  size
Image Recall :  0.42857142857142855
Strategy is :  size
Image Recall :  0.6666666666666666


In [15]:
main('fill')

Strategy is :  fill
Image Recall :  0.8
Strategy is :  fill
Image Recall :  0.2857142857142857
Strategy is :  fill
Image Recall :  0.6666666666666666
