In [5]:
import cv2
import tensorflow as tf
import sys
import numpy as np
import time

In [6]:
print(tf.__version__)
print(sys.version)
print(tf.config.list_physical_devices("GPU"))

2.10.0
3.10.0 | packaged by conda-forge | (default, Nov 10 2021, 13:20:59) [MSC v.1916 64 bit (AMD64)]
[PhysicalDevice(name='/physical_device:GPU:0', device_type='GPU')]


## PyImage Blog:-https://pyimagesearch.com/2020/07/06/region-proposal-object-detection-with-opencv-keras-and-tensorflow/?_ga=2.81635607.1422367737.1701858621-1743169504.1688156425

In [7]:
def print_detection_information(detections):
    total = 0
    for label in detections.keys():
        L = detections[label]
        print(f"{label.upper()} Class Contains {len(L)} predictions")
        total += len(L)
    print(f"Total Number of Detections are {total}")

In [8]:
def show_detection_on_image(img,positive_detections):
    for label in positive_detections.keys():
        out = img.copy()
        detections = positive_detections[label]
        for conf,box in detections:
            (x1,y1,x2,y2) = box
            cv2.rectangle(out,(x1,y1),(x2,y2),(0,255,0),2)
            y3 = y1 - 10 if y1 - 10 > 10 else y1 + 10
            cv2.putText(out,f"{label}:-{round(float(conf),3)}%",(x1,y3),cv2.FONT_HERSHEY_SIMPLEX,0.45,(0,0,0),2)
        cv2.imshow("Out",out)
        key = cv2.waitKey(0) & 0xFF
        if key == ord('q'):
            cv2.destroyAllWindows()
            break
        cv2.destroyAllWindows()

In [9]:
def perform_RGP(image,method="fast"):
    SS = cv2.ximgproc.segmentation.createSelectiveSearchSegmentation()
    SS.setBaseImage(image)
    if method == "fast":
        print("[INFO] Applying Fast Selective Search")
        SS.switchToSelectiveSearchFast()
    else:
        print("[INFO] Applying Quality Selective Search")
        SS.switchToSelectiveSearchQuality()
    start = time.time()
    rects = SS.process()
    end = time.time()
    print(f"[INFO] Total {len(rects)} ROI's were Generated and Total Time Taken is {end-start}")
    return rects

## Applying Object Detection with Class Wise NoN-Maxima Supression

In [10]:
def Detect(image_location,selective_search_method="fast",minimun_confidence=0.90):
    img = cv2.imread(image_location)
    rects = perform_RGP(img,selective_search_method)
    proposals = []
    boxes = []
    for rect in rects:
        (x,y,w,h) = rect
        ROI = img[y:y+h,x:x+w]
        ROI = cv2.cvtColor(ROI,cv2.COLOR_BGR2RGB)
        ROI = cv2.resize(ROI,(224,224))
        ROI = tf.keras.applications.resnet50.preprocess_input(ROI)
        proposals.append(ROI)
        boxes.append((x,y,x+w,y+h))

    proposals = np.array(proposals)
    boxes = np.array(boxes)
    
    model = tf.keras.applications.ResNet50(input_shape=(224,224,3))
    
    preds = model.predict(proposals)
    preds = tf.keras.applications.resnet50.decode_predictions(preds,top=1)
    preds = np.array(preds).squeeze()
    
    positive_detections = dict()
    for (_,class_name,conf),box in zip(preds,boxes):
        if float(conf) >= minimun_confidence:
            L = positive_detections.get(class_name,[])
            L.append((float(conf),box))
            positive_detections[class_name] = L

    print("\n\n[INFO] Before Applying Non-Maxima Supression we have:-") 
    
    print_detection_information(positive_detections)
    show_detection_on_image(img,positive_detections)

    suprresed_detections = dict()

    total = 0
    for label in positive_detections.keys():
        detections = positive_detections[label]
        probs = np.array([x[0] for x in detections],dtype=np.float32)
        boxes = np.array([x[1] for x in detections])
        idxs = non_max_suppression(boxes,probs)
        for prob,box in zip(probs[idxs],boxes[idxs]):
            L = suprresed_detections.get(label,[])
            L.append((prob,box))
            suprresed_detections[label] = L
        
    print("\n\n[INFO] After Applying Non-Maxima Supression we have:-")

    print_detection_information(suprresed_detections)
    show_detection_on_image(img,suprresed_detections)

In [87]:
fg = Detect(r"E:/PyImage_ComputerVision/ObjectDetection/WorkingData/InputData/beagle.png")

[INFO] Applying Fast Selective Search
[INFO] Total 922 ROI's were Generated and Total Time Taken is 0.9293477535247803


[INFO] Before Applying Non-Maxima Supression we have:-
BEAGLE Class Contains 47 predictions
QUILL Class Contains 2 predictions
PAINTBRUSH Class Contains 2 predictions
WINDSOR_TIE Class Contains 1 predictions
THEATER_CURTAIN Class Contains 2 predictions
PATAS Class Contains 1 predictions
CLOG Class Contains 1 predictions
PANPIPE Class Contains 1 predictions
PAPER_TOWEL Class Contains 1 predictions
Total Number of Detections are 58


[INFO] After Applying Non-Maxima Supression we have:-
BEAGLE Class Contains 1 predictions
QUILL Class Contains 1 predictions
PAINTBRUSH Class Contains 1 predictions
WINDSOR_TIE Class Contains 1 predictions
THEATER_CURTAIN Class Contains 1 predictions
PATAS Class Contains 1 predictions
CLOG Class Contains 1 predictions
PANPIPE Class Contains 1 predictions
PAPER_TOWEL Class Contains 1 predictions
Total Number of Detections are 9


In [None]:
Detect(r"E:/PyImage_ComputerVision/ObjectDetection/WorkingData/InputData/beagle2.webp")

[INFO] Applying Fast Selective Search
[INFO] Total 2564 ROI's were Generated and Total Time Taken is 2.8348679542541504


[INFO] Before Applying Non-Maxima Supression we have:-
BRAMBLING Class Contains 3 predictions
BEAGLE Class Contains 25 predictions
REDBONE Class Contains 2 predictions
BRABANCON_GRIFFON Class Contains 1 predictions
SLOTH_BEAR Class Contains 1 predictions
WEASEL Class Contains 1 predictions
SAFETY_PIN Class Contains 1 predictions
SPEEDBOAT Class Contains 2 predictions
PLATE_RACK Class Contains 1 predictions
KEESHOND Class Contains 2 predictions
HATCHET Class Contains 1 predictions
ENGLISH_FOXHOUND Class Contains 2 predictions
IBIZAN_HOUND Class Contains 4 predictions
BRITTANY_SPANIEL Class Contains 1 predictions
Total Number of Detections are 47


[INFO] After Applying Non-Maxima Supression we have:-
BRAMBLING Class Contains 2 predictions
BEAGLE Class Contains 3 predictions
REDBONE Class Contains 1 predictions
BRABANCON_GRIFFON Class Contains 1 predictions
SLOTH_BEAR

In [None]:
Detect(r"E:/PyImage_ComputerVision/ObjectDetection/WorkingData/InputData/example_04.jpg")

[INFO] Applying Fast Selective Search
[INFO] Total 1651 ROI's were Generated and Total Time Taken is 0.753544807434082


[INFO] Before Applying Non-Maxima Supression we have:-
FACE_POWDER Class Contains 1 predictions
PANPIPE Class Contains 6 predictions
BEER_BOTTLE Class Contains 81 predictions
BARREL Class Contains 2 predictions
CLEAVER Class Contains 1 predictions
PACKET Class Contains 1 predictions
BEARSKIN Class Contains 1 predictions
UPRIGHT Class Contains 2 predictions
WINDOW_SCREEN Class Contains 3 predictions
Total Number of Detections are 98


## As we can notice that the results are not Accurate and NMS in this way doesnot works Properly

# --------------------------------------------------------------------------------------------

## Applying Object Detection with Excluding Small Boxes with Class-Wise NoN-Maxima Supression

In [15]:
def Detect1(image_location,selective_search_method="fast",minimun_confidence=0.90):
    img = cv2.imread(image_location)
    hieght,width,_ = img.shape
    rects = perform_RGP(img,selective_search_method)
    proposals = []
    boxes = []
    for rect in rects:
        (x,y,w,h) = rect
        if w/float(width) > 0.2 or h/float(hieght) > 0.2:
            ROI = img[y:y+h,x:x+w]
            ROI = cv2.cvtColor(ROI,cv2.COLOR_BGR2RGB)
            ROI = cv2.resize(ROI,(224,224))
            ROI = tf.keras.applications.resnet50.preprocess_input(ROI)
            proposals.append(ROI)
            boxes.append((x,y,x+w,y+h))

    print(f"[INFO] {len(proposals)} ROI's are left after Filtering Small ROI's")
    proposals = np.array(proposals)
    boxes = np.array(boxes)
    
    model = tf.keras.applications.ResNet50(input_shape=(224,224,3))
    
    preds = model.predict(proposals)
    preds = tf.keras.applications.resnet50.decode_predictions(preds,top=1)
    preds = np.array(preds).squeeze()
    
    positive_detections = dict()
    for (_,class_name,conf),box in zip(preds,boxes):
        if float(conf) >= minimun_confidence:
            L = positive_detections.get(class_name,[])
            L.append((float(conf),box))
            positive_detections[class_name] = L

    print("\n\n[INFO] Before Applying Non-Maxima Supression we have:-") 
    
    print_detection_information(positive_detections)
    show_detection_on_image(img,positive_detections)

    suprresed_detections = dict()

    total = 0
    for label in positive_detections.keys():
        detections = positive_detections[label]
        probs = np.array([x[0] for x in detections],dtype=np.float32)
        boxes = np.array([x[1] for x in detections])
        idxs = non_max_suppression(boxes,probs)
        for prob,box in zip(probs[idxs],boxes[idxs]):
            L = suprresed_detections.get(label,[])
            L.append((prob,box))
            suprresed_detections[label] = L
        
    print("\n\n[INFO] After Applying Non-Maxima Supression we have:-")

    print_detection_information(suprresed_detections)
    show_detection_on_image(img,suprresed_detections)

In [16]:
Detect1(r"E:/PyImage_ComputerVision/ObjectDetection/WorkingData/InputData/example_04.jpg")

[INFO] Applying Fast Selective Search
[INFO] Total 1651 ROI's were Generated and Total Time Taken is 0.7676844596862793
[INFO] 960 ROI's are left after Filtering Small ROI's


[INFO] Before Applying Non-Maxima Supression we have:-
BEER_BOTTLE Class Contains 81 predictions
WINDOW_SCREEN Class Contains 3 predictions
UPRIGHT Class Contains 1 predictions
Total Number of Detections are 85


[INFO] After Applying Non-Maxima Supression we have:-
BEER_BOTTLE Class Contains 4 predictions
WINDOW_SCREEN Class Contains 1 predictions
UPRIGHT Class Contains 1 predictions
Total Number of Detections are 6


In [18]:
Detect1(r"E:/PyImage_ComputerVision/ObjectDetection/WorkingData/InputData/beagle.png")

[INFO] Applying Fast Selective Search
[INFO] Total 922 ROI's were Generated and Total Time Taken is 0.9196615219116211
[INFO] 525 ROI's are left after Filtering Small ROI's


[INFO] Before Applying Non-Maxima Supression we have:-
PAINTBRUSH Class Contains 1 predictions
BEAGLE Class Contains 47 predictions
QUILL Class Contains 2 predictions
PAPER_TOWEL Class Contains 1 predictions
CLOG Class Contains 1 predictions
Total Number of Detections are 52


[INFO] After Applying Non-Maxima Supression we have:-
PAINTBRUSH Class Contains 1 predictions
BEAGLE Class Contains 1 predictions
QUILL Class Contains 1 predictions
PAPER_TOWEL Class Contains 1 predictions
CLOG Class Contains 1 predictions
Total Number of Detections are 5


In [19]:
Detect1(r"E:/PyImage_ComputerVision/ObjectDetection/WorkingData/InputData/beagle2.webp")

[INFO] Applying Fast Selective Search
[INFO] Total 2564 ROI's were Generated and Total Time Taken is 2.8555333614349365
[INFO] 1032 ROI's are left after Filtering Small ROI's


[INFO] Before Applying Non-Maxima Supression we have:-
SPEEDBOAT Class Contains 2 predictions
BEAGLE Class Contains 25 predictions
BRITTANY_SPANIEL Class Contains 1 predictions
IBIZAN_HOUND Class Contains 4 predictions
Total Number of Detections are 32


[INFO] After Applying Non-Maxima Supression we have:-
SPEEDBOAT Class Contains 1 predictions
BEAGLE Class Contains 3 predictions
BRITTANY_SPANIEL Class Contains 1 predictions
IBIZAN_HOUND Class Contains 1 predictions
Total Number of Detections are 6


### Still Better Detections than First Way Bcoz Small Unnessesary Detections were Removed!

# --------------------------------------------------------------------------------------------

## Applying Object Detection Wiht Removing Small Detections and Applying NMS on all Detections Together

In [52]:
def Detect2(image_location,selective_search_method="fast",minimun_confidence=0.90):
    img = cv2.imread(image_location)
    hieght,width,_ = img.shape
    rects = perform_RGP(img,selective_search_method)
    proposals = []
    boxes = []
    for rect in rects:
        (x,y,w,h) = rect
        if w/float(width) > 0.2 or h/float(hieght) > 0.2:
            ROI = img[y:y+h,x:x+w]
            ROI = cv2.cvtColor(ROI,cv2.COLOR_BGR2RGB)
            ROI = cv2.resize(ROI,(224,224))
            ROI = tf.keras.applications.resnet50.preprocess_input(ROI)
            proposals.append(ROI)
            boxes.append((x,y,x+w,y+h))

    print(f"[INFO] {len(proposals)} ROI's are left after Filtering Small ROI's")
    proposals = np.array(proposals)
    boxes = np.array(boxes)
    
    model = tf.keras.applications.ResNet50(input_shape=(224,224,3))
    
    preds = model.predict(proposals)
    preds = tf.keras.applications.resnet50.decode_predictions(preds,top=1)
    preds = np.array(preds).squeeze()
    
    positive_indexs = []
    for i,(_,class_name,conf) in enumerate(preds):
        if float(conf) >= minimun_confidence:
            positive_indexs.append(i)

    postive_preds = preds[positive_indexs]
    postive_boxes = boxes[positive_indexs]
    print(f"\n\n[INFO] Before Applying Non-Maxima Supression we have {len(postive_preds)} Detections") 
    
    indxs = non_max_suppression(postive_boxes,np.array(postive_preds[:,2],dtype=np.float32))
    
    supressed_preds = postive_preds[indxs]
    supressed_boxes = postive_boxes[indxs]
    print(f"\n\n[INFO] After Applying Non-Maxima Supression we have {len(supressed_preds)} Detections")
    
    for pred,box in zip(supressed_preds,supressed_boxes):
        out = img.copy()
        (_,class_name,conf) = pred
        (x1,y1,x2,y2) = box
        cv2.rectangle(out,(x1,y1),(x2,y2),(0,255,0),2)
        y3 = y1 - 10 if y1 - 10 > 10 else y1 + 10
        cv2.putText(out,f"{class_name.upper()}:-{round(float(conf),3)}%",(x1,y3),cv2.FONT_HERSHEY_SIMPLEX,0.45,(0,0,0),2)
        cv2.imshow("OUT",out)
        cv2.waitKey(0)
        cv2.destroyAllWindows()

In [53]:
Detect2(r"E:/PyImage_ComputerVision/ObjectDetection/WorkingData/InputData/example_04.jpg")

[INFO] Applying Fast Selective Search
[INFO] Total 1651 ROI's were Generated and Total Time Taken is 0.7609660625457764
[INFO] 960 ROI's are left after Filtering Small ROI's


[INFO] Before Applying Non-Maxima Supression we have 85 Detections


[INFO] After Applying Non-Maxima Supression we have 4 Detections


In [55]:
Detect2(r"E:/PyImage_ComputerVision/ObjectDetection/WorkingData/InputData/beagle.png")

[INFO] Applying Fast Selective Search
[INFO] Total 922 ROI's were Generated and Total Time Taken is 0.928673505783081
[INFO] 525 ROI's are left after Filtering Small ROI's


[INFO] Before Applying Non-Maxima Supression we have 52 Detections


[INFO] After Applying Non-Maxima Supression we have 3 Detections


In [56]:
Detect2(r"E:/PyImage_ComputerVision/ObjectDetection/WorkingData/InputData/beagle2.webp")

[INFO] Applying Fast Selective Search
[INFO] Total 2564 ROI's were Generated and Total Time Taken is 2.797518491744995
[INFO] 1032 ROI's are left after Filtering Small ROI's


[INFO] Before Applying Non-Maxima Supression we have 32 Detections


[INFO] After Applying Non-Maxima Supression we have 3 Detections


### We can see the ABove MEthod is Not good When we have Multiple Objects of Same Class.

# --------------------------------------------------------------------------------------------

## Non-Maxima Suppression

### PyImage Blog Link :-https://pyimagesearch.com/2014/11/17/non-maximum-suppression-object-detection-python/

### Implementation of NMS by PyIamge in imutils Package:-https://github.com/PyImageSearch/imutils/blob/master/imutils/object_detection.py

### Medium NMS Post:-https://medium.com/analytics-vidhya/non-max-suppression-nms-6623e6572536

In [13]:
def non_max_suppression(boxes, probs=None, overlapThresh=0.3):
	# if there are no boxes, return an empty list
	if len(boxes) == 0:
		return []

	# if the bounding boxes are integers, convert them to floats -- this
	# is important since we'll be doing a bunch of divisions
	if boxes.dtype.kind == "i":
		boxes = boxes.astype("float")

	# initialize the list of picked indexes
	pick = []

	# grab the coordinates of the bounding boxes
	x1 = boxes[:, 0]
	y1 = boxes[:, 1]
	x2 = boxes[:, 2]
	y2 = boxes[:, 3]

	# compute the area of the bounding boxes and grab the indexes to sort
	# (in the case that no probabilities are provided, simply sort on the
	# bottom-left y-coordinate)
	area = (x2 - x1 + 1) * (y2 - y1 + 1)
	idxs = y2

	# if probabilities are provided, sort on them instead
	if probs is not None:
		idxs = probs

	# sort the indexes
	idxs = np.argsort(idxs)

	# keep looping while some indexes still remain in the indexes list
	while len(idxs) > 0:
		# grab the last index in the indexes list and add the index value
		# to the list of picked indexes
		last = len(idxs) - 1
		i = idxs[last]
		pick.append(i)

		# find the largest (x, y) coordinates for the start of the bounding
		# box and the smallest (x, y) coordinates for the end of the bounding
		# box
		xx1 = np.maximum(x1[i], x1[idxs[:last]])
		yy1 = np.maximum(y1[i], y1[idxs[:last]])
		xx2 = np.minimum(x2[i], x2[idxs[:last]])
		yy2 = np.minimum(y2[i], y2[idxs[:last]])

		# compute the width and height of the bounding box
		w = np.maximum(0, xx2 - xx1 + 1)
		h = np.maximum(0, yy2 - yy1 + 1)

		# compute the ratio of overlap
		overlap = (w * h) / area[idxs[:last]]

		# delete all indexes from the index list that have overlap greater
		# than the provided overlap threshold
		idxs = np.delete(idxs, np.concatenate(([last],
			np.where(overlap > overlapThresh)[0])))

	# return the indexes of only the bounding boxes to keep
	return pick

In [144]:
fg1

[[('n02906734', 'broom', 0.4722762),
  ('n03876231', 'paintbrush', 0.15754955),
  ('n04067472', 'reel', 0.13112687),
  ('n03314780', 'face_powder', 0.024718378),
  ('n03207941', 'dishwasher', 0.011802571)],
 [('n03485407', 'hand-held_computer', 0.21492009),
  ('n04612504', 'yawl', 0.17486916),
  ('n03792972', 'mountain_tent', 0.09301227),
  ('n03584829', 'iron', 0.078669064),
  ('n03796401', 'moving_van', 0.06772048)],
 [('n02865351', 'bolo_tie', 0.76130325),
  ('n04311004', 'steel_arch_bridge', 0.06899695),
  ('n04127249', 'safety_pin', 0.05575896),
  ('n03544143', 'hourglass', 0.0299827),
  ('n04372370', 'switch', 0.012237245)],
 [('n03888257', 'parachute', 0.06882348),
  ('n02692877', 'airship', 0.06323644),
  ('n01608432', 'kite', 0.05736748),
  ('n03355925', 'flagpole', 0.03373232),
  ('n04552348', 'warplane', 0.02989382)],
 [('n03769881', 'minibus', 0.9757304),
  ('n03895866', 'passenger_car', 0.021687467),
  ('n03770679', 'minivan', 0.0005451776),
  ('n03796401', 'moving_van', 0

In [146]:
type(fg2)

list

In [150]:
type(fg2[0][0][2])

numpy.float32