<a href="https://colab.research.google.com/github/gayoung-k/object-detection-learning-notes/blob/main/selective_search_iou.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

### Deriving region proposal areas for object detection using selective search




#### Download selectivesearch and load the image
https://github.com/AlpacaTechJP/selectivesearch

In [None]:
!pip install selectivesearch

In [5]:
!mkdir -p /content/data
!wget -O /content/data/audrey01.jpg https://raw.githubusercontent.com/gayoung-k/object-detection-learning-notes/main/images/audrey01.jpg


--2025-09-03 20:30:15--  https://raw.githubusercontent.com/gayoung-k/object-detection-learning-notes/main/images/audrey01.jpg
Resolving raw.githubusercontent.com (raw.githubusercontent.com)... 185.199.111.133, 185.199.108.133, 185.199.110.133, ...
Connecting to raw.githubusercontent.com (raw.githubusercontent.com)|185.199.111.133|:443... connected.
HTTP request sent, awaiting response... 200 OK
Length: 30102 (29K) [image/jpeg]
Saving to: ‘/content/data/audrey01.jpg’


2025-09-03 20:30:15 (21.3 MB/s) - ‘/content/data/audrey01.jpg’ saved [30102/30102]



In [None]:
import selectivesearch
import cv2
import matplotlib.pyplot as plt
import os
%matplotlib inline

img = cv2.imread('./data/audrey01.jpg')
img_rgb = cv2.cvtColor(img, cv2.COLOR_BGR2RGB)
print('img shape:', img.shape)

plt.figure(figsize=(8, 8))
plt.imshow(img_rgb)
plt.show()

In [None]:
import selectivesearch

# selectivesearch.selective_search() returns the region proposal information of the image
_, regions = selectivesearch.selective_search(img_rgb, scale=100, min_size=2000)

print(type(regions), len(regions))


#### Viewing the information of the returned Region Proposals

The returned `regions` variable is of type list, and each element inside is a dictionary.

Meaning of each dictionary key:

* **rect**: contains the starting x, y coordinates along with width and height values, representing the bounding box of a detected object candidate.
* **size**: the size of the object segment selected by selective search.
* **labels**: unique IDs of the objects located inside the bounding box defined by `rect`.
* As you go further down, the bounding boxes have larger width and height values, and the probability increases that multiple objects may exist within a single bounding box.


In [None]:
# Print only the rect information
# cand_rects = [cand['rect'] for cand in regions]
# print(cand_rects)
cand_rects = []
for item in regions:
    cand_rects.append(item['rect'])

print(cand_rects)


In [None]:
# Visualization using OpenCV's rectangle()
# rectangle() draws a box on the original image by taking the image,
# top-left coordinates, bottom-right coordinates, box color, thickness, etc. as input arguments.

green_rgb = (125, 255, 51)
img_rgb_copy = img_rgb.copy()
for rect in cand_rects:

    left = rect[0]
    top = rect[1]
    # rect[2] and rect[3] are width and height, so add them to the top-left coordinates
    # to obtain the bottom-right coordinates.
    right = left + rect[2]
    bottom = top + rect[3]

    img_rgb_copy = cv2.rectangle(img_rgb_copy, (left, top), (right, bottom), color=green_rgb, thickness=2)

plt.figure(figsize=(8, 8))
plt.imshow(img_rgb_copy)
plt.show()


#### Extract only candidates with large bounding box sizes


In [None]:
cand_rects = [cand['rect'] for cand in regions if cand['size'] > 10000]

green_rgb = (125, 255, 51)
img_rgb_copy = img_rgb.copy()
for rect in cand_rects:

    left = rect[0]
    top = rect[1]
    # rect[2] and rect[3] are width and height, so add them to the top-left coordinates
    # to obtain the bottom-right coordinates.
    right = left + rect[2]
    bottom = top + rect[3]

    img_rgb_copy = cv2.rectangle(img_rgb_copy, (left, top), (right, bottom), color=green_rgb, thickness=2)

plt.figure(figsize=(8, 8))
plt.imshow(img_rgb_copy)
plt.show()


### Calculate IOU

#### Create a function that takes candidate boxes and ground truth boxes as input arguments and calculates the IOU


In [None]:
import numpy as np

def compute_iou(cand_box, gt_box):

    # Calculate intersection areas
    x1 = np.maximum(cand_box[0], gt_box[0])
    y1 = np.maximum(cand_box[1], gt_box[1])
    x2 = np.minimum(cand_box[2], gt_box[2])
    y2 = np.minimum(cand_box[3], gt_box[3])

    intersection = np.maximum(x2 - x1, 0) * np.maximum(y2 - y1, 0)

    cand_box_area = (cand_box[2] - cand_box[0]) * (cand_box[3] - cand_box[1])
    gt_box_area = (gt_box[2] - gt_box[0]) * (gt_box[3] - gt_box[1])
    union = cand_box_area + gt_box_area - intersection

    iou = intersection / union
    return iou

In [None]:
import cv2
import matplotlib.pyplot as plt
%matplotlib inline

# Assume that the coordinates of the actual box (Ground Truth) are as follows.
gt_box = [60, 15, 320, 420]


img = cv2.imread('./data/audrey01.jpg')
img_rgb = cv2.cvtColor(img, cv2.COLOR_BGR2RGB)

red = (255, 0 , 0)
img_rgb = cv2.rectangle(img_rgb, (gt_box[0], gt_box[1]), (gt_box[2], gt_box[3]), color=red, thickness=2)

plt.figure(figsize=(8, 8))
plt.imshow(img_rgb)
plt.show()

In [None]:
import selectivesearch

# selectivesearch.selective_search() returns the region proposal information of the image
img = cv2.imread('./data/audrey01.jpg')
img_rgb2 = cv2.cvtColor(img, cv2.COLOR_BGR2RGB)
_, regions = selectivesearch.selective_search(img_rgb2, scale=100, min_size=2000)

print(type(regions), len(regions))


In [None]:
[cand['rect'] for cand in regions]

In [None]:
cand_rects = [cand['rect'] for cand in regions]
for index, cand_box in enumerate(cand_rects):
    cand_box = list(cand_box) # tuple -> list
    cand_box[2] += cand_box[0] # cand_box has x, y, w, h
    cand_box[3] += cand_box[1]

    iou = compute_iou(cand_box, gt_box)
    print('index:', index, "iou:", iou)

In [None]:
cand_rects = [cand['rect'] for cand in regions if cand['size'] > 5000]
cand_rects.sort()
cand_rects

In [None]:
img = cv2.imread('./data/audrey01.jpg')
img_rgb = cv2.cvtColor(img, cv2.COLOR_BGR2RGB)
print('img shape:', img.shape)

green_rgb = (125, 255, 51)
cand_rects = [cand['rect'] for cand in regions if cand['size'] > 3000]
gt_box = [60, 15, 320, 420]
img_rgb = cv2.rectangle(img_rgb, (gt_box[0], gt_box[1]), (gt_box[2], gt_box[3]), color=red, thickness=2)

for index, cand_box in enumerate(cand_rects):

    cand_box = list(cand_box)
    cand_box[2] += cand_box[0]
    cand_box[3] += cand_box[1]

    iou = compute_iou(cand_box, gt_box)

    if iou > 0.6:
        print('index:', index, "iou:", iou, 'rectangle:',(cand_box[0], cand_box[1], cand_box[2], cand_box[3]) )
        cv2.rectangle(img_rgb, (cand_box[0], cand_box[1]), (cand_box[2], cand_box[3]), color=green_rgb, thickness=1)
        text = "{}: {:.2f}".format(index, iou)
        cv2.putText(img_rgb, text, (cand_box[0]+ 100, cand_box[1]+10), cv2.FONT_HERSHEY_SIMPLEX, 0.4, color=green_rgb, thickness=1)

plt.figure(figsize=(12, 12))
plt.imshow(img_rgb)
plt.show()
