In [1]:
import cv2
import time
import numpy as np
import random
import matplotlib.pyplot as plt
from sklearn.model_selection import train_test_split
from sklearn.ensemble import RandomForestClassifier
%matplotlib inline

MIN_SIMILARITY_WITH_GT_REGION = 0.5
MIN_SIMILARITY_WITH_NEIGHBOR_BBOX = 0.5
NeedFastSeSe = False

In [2]:
def getCameraParams ( isColor: bool ) :
    # TODO : consider distortion

    # RGB Intrinsic Parameters
    fx_rgb = 5.1885790117450188e+02
    fy_rgb = 5.1946961112127485e+02
    cx_rgb = 3.2558244941119034e+02
    cy_rgb = 2.5373616633400465e+02
    # # RGB Distortion Parameters
    # k1_rgb =  2.0796615318809061e-01
    # k2_rgb = -5.8613825163911781e-01
    # k3_rgb = 4.9856986684705107e-01
    # p1_rgb = 7.2231363135888329e-04
    # p2_rgb = 1.0479627195765181e-03

    # Depth Intrinsic Parameters
    fx_d = 5.8262448167737955e+02
    fy_d = 5.8269103270988637e+02
    cx_d = 3.1304475870804731e+02
    cy_d = 2.3844389626620386e+02
    # # RGB Distortion Parameters
    # k1_d = -9.9897236553084481e-02
    # k2_d = 3.9065324602765344e-01
    # k3_d = -5.1031725053400578e-01
    # p1_d = 1.9290592870229277e-03
    # p2_d = -1.9422022475975055e-03

    if isColor :
        return cx_rgb, cy_rgb, fx_rgb, fy_rgb
    return cx_d, cy_d, fx_d, fy_d

In [3]:
def getImages ( img_name: str, root_dir: str = "../nyudv2", print_info: bool = False ) :
    start = time.time()

    # image = cv2.imread(f"{root_dir}/rgb_{img_name}.png")
    # image_depth = np.mean(cv2.imread(f"{root_dir}/depth_{img_name}.png"), axis=2)
    # image_labelmaps = cv2.imread(f"{root_dir}/label_maps_{img_name}.png")
    image = np.load(f"{root_dir}/rgb/{img_name}.npy")
    image_depth = np.load(f"{root_dir}/depth/{img_name}.npy")
    image_labelmaps = np.load(f"{root_dir}/label/{img_name}.npy")
    image_instmaps = np.load(f"{root_dir}/instance/{img_name}.npy")

    image_hha = cv2.imread(f"{root_dir}/hha/{img_name}.png")

    image_xyz = np.zeros((image_depth.shape[0], image_depth.shape[1], 3))
    height, width = image_depth.shape
    CX_DEPTH, CY_DEPTH, FX_DEPTH, FY_DEPTH = getCameraParams(isColor=False)
    for i in range(height):
        for j in range(width):
            z = image_depth[i,j]
            x = (j - CX_DEPTH) * z / FX_DEPTH
            y = (i - CY_DEPTH) * z / FY_DEPTH
            image_xyz[i,j] = [x, y, z]

    image_labinsts = np.concatenate((image_labelmaps[:, :, np.newaxis], image_instmaps[:, :, np.newaxis]), axis=-1)
    fg_linsts = np.array(list({ (l,i) for l, i in image_labinsts.reshape(-1, 2) if l != 0 and i != 0 })) # 0 is background (boundaries, etc)

    allmasks, am_label, am_instance, am_area = list(), list(), list(), list()
    for label, inst in fg_linsts :
        mask = (image_labinsts[:, :, 0] == label) & (image_labinsts[:, :, 1] == inst)
        allmasks.append(mask)
        am_label.append(label)
        am_instance.append(inst)
        am_area.append(np.sum(mask))
    allmasks = np.array(allmasks)
    am_label = np.array(am_label)
    am_instance = np.array(am_instance)
    am_area = np.array(am_area)

    if print_info :
        unilm = np.unique(image_labelmaps)
        print(f"[INFO] unique label classes: {unilm}")
        print(f"[INFO] number of unique label classes: {len(unilm)}")
        print(f"[INFO] all images loaded in {time.time() - start:.4f}s")

    # ensure all of these are np arrays
    return image, image_depth, image_hha, image_xyz, image_labinsts, allmasks, am_label, am_instance, am_area

In [4]:
def plot_images ( image, image_depth, image_hha ) :
    _, subplts = plt.subplots(1, 3, figsize=(10, 10))
    subplts[0].imshow(image)
    subplts[0].axis('off')
    subplts[1].imshow(image_depth)
    subplts[1].axis('off')
    subplts[2].imshow(image_hha)
    subplts[2].axis('off')
    plt.show()

In [5]:
def plot_hha_components ( image_hha ) :
    _, subplts = plt.subplots(1, 3, figsize=(10, 10))
    subplts[0].imshow(image_hha[:, :, 0])
    subplts[0].axis('off')
    subplts[1].imshow(image_hha[:, :, 1])
    subplts[1].axis('off')
    subplts[2].imshow(image_hha[:, :, 2])
    subplts[2].axis('off')
    plt.show()

In [6]:
def plot_angle_directions ( image_hha ) :
    down = image_hha[:, :, 0] < 255/3
    horiz = ( 255/3 <= image_hha[:, :, 0] ) & ( image_hha[:, :, 0] < 2*255/3 )
    up = 2*255/3 <= image_hha[:, :, 0]

    _, subplts = plt.subplots(1, 3, figsize=(10, 10))
    subplts[0].imshow(down)
    subplts[0].axis('off')
    subplts[1].imshow(horiz)
    subplts[1].axis('off')
    subplts[2].imshow(up)
    subplts[2].axis('off')
    plt.show()

In [7]:
def plot_masks ( allmasks, am_label, am_instance ) :
    masks_per_row = 5
    _, subplts = plt.subplots(int(np.ceil(len(allmasks)/masks_per_row)), masks_per_row, figsize=(20, 10))
    for subplt in subplts.flatten() :
        subplt.axis('off')
    for idx, (l, i, imask) in enumerate(zip(am_label, am_instance, allmasks)) :
        subplts[idx//masks_per_row][idx%masks_per_row].imshow(imask)
        subplts[idx//masks_per_row][idx%masks_per_row].axis('off')
        subplts[idx//masks_per_row][idx%masks_per_row].set_title(f"{l}, {i}")
    plt.show()

In [8]:
def performSeSe ( image, image_hha, print_info=False ) :
	start = time.time()
	ss = cv2.ximgproc.segmentation.createSelectiveSearchSegmentation()
	ss.setBaseImage(image)
	ss.addImage(image_hha) # TODO : test reverse order
	if NeedFastSeSe :
		if print_info :
			print("[INFO] using *fast* selective search")
		ss.switchToSelectiveSearchFast()
	else:
		if print_info :
			print("[INFO] using *quality* selective search")
		ss.switchToSelectiveSearchQuality()
	bboxes = ss.process()
	if print_info :
		print(f"[INFO] total number of region proposals: {len(bboxes)}")
		print(f"[INFO] basic region proposal took {time.time()-start:.4f} seconds")
	return bboxes

In [9]:
def showSeSeBBoxes ( image, rects ) :
	bocses_per = 30
	for i in range(0, len(rects), bocses_per):
		output = image.copy()
		for (x, y, w, h) in rects[i:i + bocses_per]:
			color = [random.randint(0, 255) for _ in range(0, 3)]
			cv2.rectangle(output, (x, y), (x + w, y + h), color, 2)
		cv2.imshow("Output", output)
		key = cv2.waitKey(0) & 0xFF
		if key == ord("q"):
			break
	cv2.destroyWindow("Output")

In [10]:
def ground_truth_region_label ( bbox_labinsts, bbox_masks, am_label, am_instance, am_area ) :
    best_class, best_score = 0, MIN_SIMILARITY_WITH_GT_REGION
    labels_in_question = np.unique(bbox_labinsts[:, :, 0])
    insts_in_question = np.unique(bbox_labinsts[:, :, 1])
    w, h = bbox_masks.shape[1:]

    for label, inst, area, mask_region in zip(am_label, am_instance, am_area, bbox_masks) :
        if label not in labels_in_question or inst not in insts_in_question :
            continue
        intersection_area = np.sum(mask_region) # TODO : can be made faster by saving the tight bounding box of the mask and only checking that area
        # TODO : check if it could be better to calculate the intersection between bounding box and tight bounding box of mask
        sim = intersection_area / ( area + w*h - intersection_area ) # using jaccard similarity as per the paper
        if sim > best_score :
            best_score, best_class = sim, label
    return best_class, best_score

In [11]:
def bb_jaccardsimilarity ( bb1, bb2 ) :
    x1,y1,w1,h1 = bb1
    x2,y2,w2,h2 = bb2
    x_intersection = max(x1, x2)
    y_intersection = max(y1, y2)
    w_intersection = min(x1+w1, x2+w2) - x_intersection
    h_intersection = min(y1+h1, y2+h2) - y_intersection

    if w_intersection <= 0 or h_intersection <= 0 :
        return 0.0
    intersection_area = w_intersection * h_intersection
    iou = ( intersection_area ) / ( w1*h1 + w2*h2 - intersection_area )
    assert iou >= 0.0
    assert iou <= 1.0
    return iou

In [12]:
def supress_non_maximal_boxes ( rects, image_labinsts, allmasks, am_label, am_instance, am_area, print_info=False ) :
    start = time.time()
    data_unsupressed = list()
    for (x, y, w, h) in rects :
        gt_label, gt_score = ground_truth_region_label(image_labinsts[y:y+h, x:x+w], allmasks[:, y:y+h, x:x+w], am_label, am_instance, am_area)
        data_unsupressed.append((x, y, w, h, gt_label, gt_score))
    data_unsupressed = np.asanyarray(data_unsupressed)
    sorting_order = data_unsupressed[:, -1].argsort()[::-1]
    data_unsupressed = data_unsupressed[sorting_order]
    # NOTE : now the data_unsupressed is sorted by gt_score (descending order)

    if print_info :
        print(f"[INFO] label and score calculation took {time.time()-start:.4f}s")
        start = time.time()

    # supress non maximal boxes
    regions_supressed, labels_supressed, scores_supressed = list(), list(), list()
    for data in data_unsupressed :
        bb = data[:4]
        for bb_ in regions_supressed :
            if bb_jaccardsimilarity(bb, bb_) > MIN_SIMILARITY_WITH_NEIGHBOR_BBOX :
                break
        else :
            # NOTE : this is not a typo, it is a python feature
            # NOTE : code reaches here only if the above for loop is not exited by break
            regions_supressed.append(bb)
            labels_supressed.append(data[4])
            scores_supressed.append(data[5])
    regions_supressed = np.asanyarray(regions_supressed).astype(int)
    labels_supressed = np.asanyarray(labels_supressed).astype(int)
    scores_supressed = np.asanyarray(scores_supressed)

    if print_info :
        print(f"[INFO] number of boxes after supression: {len(labels_supressed)}")
        print(f"[INFO] non-maximum supression took {time.time()-start:.4f}s")
    return regions_supressed, labels_supressed, scores_supressed

In [13]:
def extract_features ( image, image_depth, image_hha, image_xyz, regions_supressed, labels_supressed, scores_supressed, print_info: bool = False ) :
    start = time.time()
    X_supressed, Y_supressed = list(), list()
    for (x, y, w, h), gt_label, gt_score in zip(regions_supressed, labels_supressed, scores_supressed) :
        proposed_box_rgb = image[y:y+h, x:x+w]
        proposed_box_depth = image_depth[y:y+h, x:x+w]
        proposed_box_angle = image_hha[y:y+h, x:x+w][:, :, 0]
        proposed_box_height = image_hha[y:y+h, x:x+w][:, :, 1]
        proposed_box_disparity = image_hha[y:y+h, x:x+w][:, :, 2]
        proposed_box_xyz = image_xyz[y:y+h, x:x+w].reshape(-1, 3)

        depth_mean_sd = np.mean(proposed_box_depth), np.std(proposed_box_depth)
        height_mean_sd = np.mean(proposed_box_height), np.std(proposed_box_height)
        angle_mean_sd = np.mean(proposed_box_angle), np.std(proposed_box_angle)
        disparity_mean_sd = np.mean(proposed_box_disparity), np.std(proposed_box_disparity)
        x_mean_sd = np.mean(proposed_box_xyz[:, 0]), np.std(proposed_box_xyz[:, 0])
        y_mean_sd = np.mean(proposed_box_xyz[:, 1]), np.std(proposed_box_xyz[:, 1])
        z_mean_sd = np.mean(proposed_box_xyz[:, 2]), np.std(proposed_box_xyz[:, 2])
        extent_x = np.max(proposed_box_xyz[:, 0]) - np.min(proposed_box_xyz[:, 0])
        extent_y = np.max(proposed_box_xyz[:, 1]) - np.min(proposed_box_xyz[:, 1])
        extent_z = np.max(proposed_box_xyz[:, 2]) - np.min(proposed_box_xyz[:, 2])
        min_height = np.min(proposed_box_height)
        max_height = np.max(proposed_box_height)
        frac_facing_down = np.sum(proposed_box_angle < 255/3) / (w*h)
        frac_facing_horiz = np.sum(( 255/3 <= proposed_box_angle ) & ( proposed_box_angle < 2*255/3 )) / (w*h)
        frac_facing_up = np.sum(2*255/3 <= proposed_box_angle) / (w*h)

        area = w*h
        perimeter = 2*(w+h)
        location = x / image.shape[1], y / image.shape[0]
        aspect_ratio = w / h
        # perimeter (and sum of contour strength) divided by the squared root of the area
        # area of the region divided by that of the bounding box.
        # Sum of contour strength at the boundaries
        # mean contour strength at the boundaries
        # minimum and maximum UCM threshold of appearance and disappearance of the regions forming the candidate.

        r_mean_sd = np.mean(proposed_box_rgb[:, :, 0]), np.std(proposed_box_rgb[:, :, 0])
        g_mean_sd = np.mean(proposed_box_rgb[:, :, 1]), np.std(proposed_box_rgb[:, :, 1])
        b_mean_sd = np.mean(proposed_box_rgb[:, :, 2]), np.std(proposed_box_rgb[:, :, 2])
        d_mean_sd = np.mean(proposed_box_depth), np.std(proposed_box_depth)

        features = [ gt_score,x,y,w,h, *depth_mean_sd, *height_mean_sd, *angle_mean_sd, *disparity_mean_sd, *x_mean_sd, *y_mean_sd, *z_mean_sd, extent_x, extent_y, extent_z, min_height, max_height, frac_facing_down, frac_facing_horiz, frac_facing_up, area, perimeter, *location, aspect_ratio, *r_mean_sd, *g_mean_sd, *b_mean_sd, *d_mean_sd ]
        # NOTE : remove gt_score,x,y,w,h from features before training
        X_supressed.append(features)
        Y_supressed.append(gt_label)
    X_supressed = np.asanyarray(X_supressed)
    Y_supressed = np.asanyarray(Y_supressed)
    
    if print_info :
        print("[INFO] classes and their counts:")
        print(np.asanyarray(np.unique(labels_supressed, return_counts=True)).T)
        print("[INFO] number of classes excluding background : ", len(np.unique(labels_supressed[labels_supressed!=0])))
        print(f"[INFO] feature extraction took {time.time()-start:.4f} seconds")
    return X_supressed, Y_supressed

In [14]:
def plot_selected_regions ( image, regions_supressed, Y_supressed ) :
    pics_per_row = 4
    _, subplts = plt.subplots(int(np.ceil(np.sum(Y_supressed!=0)/pics_per_row)), pics_per_row, figsize=(20, 20))
    for subplt in subplts.flatten() :
        subplt.axis('off')
    plt.tight_layout()

    for idx, ids in enumerate(np.argwhere(Y_supressed != 0).flatten()) :
        x, y, w, h = regions_supressed[ids]
        output = np.zeros_like(image)
        output[y:y+h, x:x+w] = image[y:y+h, x:x+w]
        # print(f"pred class = {Y_supressed[ids]}")
        subplts[idx//pics_per_row][idx%pics_per_row].imshow(output)
        subplts[idx//pics_per_row][idx%pics_per_row].set_title(Y_supressed[ids])
    plt.show()

In [15]:
def get_features_from_filename ( imgnumber, print_plots=False, print_info=False ) :
    tic = time.time()
    image, image_depth, image_hha, image_xyz, image_labinsts, allmasks, am_label, am_instance, am_area = getImages(imgnumber, print_info=print_info)
    if print_plots :
        plot_masks(allmasks, am_label, am_instance)
    allbboxes = performSeSe(image, image_hha, print_info=print_info)
    bboxes, labels, scores = supress_non_maximal_boxes(allbboxes, image_labinsts, allmasks, am_label, am_instance, am_area, print_info=print_info)
    X, Y = extract_features(image, image_depth, image_hha, image_xyz, bboxes, labels, scores, print_info=print_info)
    if print_plots :
        plot_selected_regions(image, bboxes, Y)
    if print_info :
        print(f"[INFO] X-> {X.shape} \t Y-> {Y.shape}")
        print(f"[INFO] total time taken {time.time()-tic:.4f} seconds")
    return X, Y

In [16]:
X__, Y__ = get_features_from_filename("3", print_plots=False, print_info=True)

[INFO] unique label classes: [ 0  2  3  5 15 21 34 35 36 37]
[INFO] number of unique label classes: 10
[INFO] all images loaded in 2.0767s
[INFO] using *quality* selective search
[INFO] total number of region proposals: 6594
[INFO] basic region proposal took 10.3028 seconds
[INFO] label and score calculation took 6.6721s
[INFO] number of boxes after supression: 925
[INFO] non-maximum supression took 2.4808s
[INFO] classes and their counts:
[[  0 906]
 [  2   4]
 [  5   1]
 [ 15   5]
 [ 21   3]
 [ 34   1]
 [ 35   1]
 [ 36   1]
 [ 37   3]]
[INFO] number of classes excluding background :  8
[INFO] feature extraction took 0.4521 seconds
[INFO] X-> (925, 40) 	 Y-> (925,)
[INFO] total time taken 21.9854 seconds


In [17]:
X__, Y__ = get_features_from_filename("69", print_plots=False, print_info=True)

[INFO] unique label classes: [  0  11  21  42  55  59  64  66  74  78  81 119 141 157 158]
[INFO] number of unique label classes: 15
[INFO] all images loaded in 1.9625s
[INFO] using *quality* selective search
[INFO] total number of region proposals: 7492
[INFO] basic region proposal took 8.3346 seconds
[INFO] label and score calculation took 6.1502s
[INFO] number of boxes after supression: 1042
[INFO] non-maximum supression took 3.0868s
[INFO] classes and their counts:
[[   0 1016]
 [  11    1]
 [  21    3]
 [  42    1]
 [  55    2]
 [  59    2]
 [  64    4]
 [  66    2]
 [  74    1]
 [  78    1]
 [  81    3]
 [ 119    1]
 [ 141    2]
 [ 157    1]
 [ 158    2]]
[INFO] number of classes excluding background :  14
[INFO] feature extraction took 0.5105 seconds
[INFO] X-> (1042, 40) 	 Y-> (1042,)
[INFO] total time taken 20.0448 seconds


In [18]:
X__, Y__ = get_features_from_filename("72", print_plots=False, print_info=True)

[INFO] unique label classes: [  0   2   4  21  64  74  85 119 144 157 158]
[INFO] number of unique label classes: 11
[INFO] all images loaded in 1.9989s
[INFO] using *quality* selective search
[INFO] total number of region proposals: 3184
[INFO] basic region proposal took 8.9336 seconds
[INFO] label and score calculation took 2.2131s
[INFO] number of boxes after supression: 529
[INFO] non-maximum supression took 0.7549s
[INFO] classes and their counts:
[[  0 508]
 [  2   1]
 [  4   1]
 [ 21   2]
 [ 64   6]
 [ 74   2]
 [ 85   1]
 [119   3]
 [144   2]
 [157   2]
 [158   1]]
[INFO] number of classes excluding background :  10
[INFO] feature extraction took 0.2870 seconds
[INFO] X-> (529, 40) 	 Y-> (529,)
[INFO] total time taken 14.1878 seconds
