## Object Detection with OpenCV-Python Using a Haar-Cascade Classifier


### 1. Package import

In [None]:
import tensorflow as tf
import cv2
import numpy as np

### 2. CascadeClassifier from cv2, which could be applied in the three method
- static capture the face from image
- object detction from the video
- real-time face detection

Part of the following code is referred to [blog](https://stackabuse.com/object-detection-with-opencv-python-using-a-haar-cascade-classifier/)

load the `vgg_model` to classify the objects

In [None]:
model_name = './models/vgg_model.h5'
vgg_model = tf.keras.models.load_model(model_name)
class_names = ['cats', 'dogs']
RED = (0, 0, 255)
GREEN = (0, 255, 0)

### some functions used in object capture

- `imread()`method loads the image


#### 2.1 static capture the face from image

In [None]:
# format image
def image_format(image):
    image = tf.image.resize(image, [150, 150])
    image = np.expand_dims(image, axis = 0)
    image = image / 255.
    
    return image

In [None]:
image_path = "./data/test/cats/cat.12301.jpg"
window_name = f"Detected Objects in {image_path}"
original_image = cv2.imread(image_path)


# Convert the image to grayscale for easier computation
image_grey = cv2.cvtColor(original_image, cv2.COLOR_RGB2GRAY)

eye_classifier = cv2.CascadeClassifier(f"{cv2.data.haarcascades}haarcascade_eye.xml")

face_classifier = cv2.CascadeClassifier(f"{cv2.data.haarcascades}haarcascade_frontalface_alt.xml")

detected_eyes = eye_classifier.detectMultiScale(image_grey, minSize=(50, 50))
detected_face = face_classifier.detectMultiScale(image_grey, minSize=(50, 50))

prediction = vgg_model.predict(image_format(original_image))
y_pred = class_names[np.array(prediction[0]).argmax(axis=0)]
confidence = np.array(prediction[0]).max(axis=0)

color = (0, 255, 0) if y_pred == 'cats' else (0, 0, 255)


print(y_pred, confidence, color)
# Draw rectangles on eyes
if len(detected_eyes) != 0:
    for (x, y, width, height) in detected_eyes:
        cv2.rectangle(original_image, (x, y),
                      (x + height, y + width),
                      (0, 255, 0), 5)

        
# Draw rectangles on faces
if len(detected_face) != 0:
    for (x, y, width, height) in detected_face:
        cv2.rectangle(original_image, (x, y),
                      (x + height, y + width),
                      (255, 0, 0), 5)
        
        cv2.putText("{:6} - {:.2f}%".format(y_pred, confidence*100),
                    (x, y),
                    cv2.FONT_HERSHEY_PLAIN,  # font
                    2,  # fontScale
                    color,
                    2)
        
cv2.namedWindow(window_name, cv2.WINDOW_KEEPRATIO)
cv2.imshow(window_name + '(press ESC to exit)', original_image)
cv2.resizeWindow(window_name, 400, 400)
cv2.waitKey(0)
cv2.destroyAllWindows()
print("Streaming ended")

#### 2.2 detect from video

In [None]:
def get_extended_image(img, x, y, w, h, k=0.1):
    '''
    Parameters:
        img (array-like, 2D): The original image
        x (int): x coordinate of the upper-left corner
        y (int): y coordinate of the upper-left corner
        w (int): Width of the desired image
        h (int): Height of the desired image
        k (float): The coefficient of expansion of the image
    Returns:
        image (tensor with shape (1, 150, 150, 3))
    '''
    # The next code block checks that coordinates will be non-negative
    # (in case if desired image is located in top left corner)
    
    if x - k*w > 0:
        start_x = int(x - k*w)
    else:
        start_x = x
    if y - k*h > 0:
        start_y = int(y - k*h)
    else:
        start_y = y

    end_x = int(x + (1 + k)*w)
    end_y = int(y + (1 + k)*h)

    face_image = img[start_y:end_y, start_x:end_x]
    face_image = tf.image.resize(face_image, [150, 150])
    face_image = np.expand_dims(face_image, axis=0)
    return face_image / 255.0

In [None]:
video_path = "./video/cat-dog.MP4"
window_name = f"Detected Objects in {video_path}"
video = cv2.VideoCapture(video_path)

obj = 0
while True:
    # read() returns a boolean alongside the image data if it was successful
    ret, frame = video.read()
    frame = cv2.flip(frame, 0)
    # Quit if no image can be read from the video
    if not ret:
        break
    cv2.namedWindow(window_name, cv2.WINDOW_NORMAL)
    # Greyscale image for classification
    image = cv2.cvtColor(frame, cv2.COLOR_BGR2GRAY)
    # Define classifier
    cascade_classifier = cv2.CascadeClassifier(
        f"{cv2.data.haarcascades}haarcascade_frontalcatface.xml")
    
    # Detect objects
    detected_objects = cascade_classifier.detectMultiScale(
        image, minSize=(50, 50))
    
    # Draw rectangles
    if len(detected_objects) != 0:
        for (x, y, height, width) in detected_objects:
            
            face_image = get_extended_image(frame, x, y, height, width, 0.5)
            result = vgg_model.predict(face_image)
            prediction = class_names[np.array(result[0]).argmax(axis=0)]
            confidence = np.array(result[0]).max(axis=0)
            if prediction == 'cats':
                color = (0, 255, 0)
            else:
                color = (0, 0, 255)
            
            cv2.rectangle(frame, (x, y), ((x + height), (y + width)), color, 5)
            
            cv2.putText(frame,
                    # text to put
                    "{:6} - {:.2f}%".format(prediction, confidence*100),
                    (x, y),
                    cv2.FONT_HERSHEY_PLAIN,  # font
                    5,  # fontScale
                    color,
                    5)  # thickness in px
            
            obj += 1
            filePath = "./example_imgs/video{}.jpg".format(obj)
            cv2.imwrite(filePath, frame)
            
    #Show image
    cv2.imshow(window_name, frame)
    
    if cv2.waitKey(1) == 27:
        break

video.release()
cv2.destroyAllWindows()

#### 3. real-time 

In [None]:
video_capture = cv2.VideoCapture(0)  # webcamera

if not video_capture.isOpened():
    print("Unable to access the camera")
else:
    print("Access to the camera was successfully obtained")

print("Streaming started - to quit press ESC")
while True:

    # Capture frame-by-frame
    ret, frame = video_capture.read()
    
    if not ret:
        print("Can't receive frame (stream end?). Exiting ...")
        break

    gray = cv2.cvtColor(frame, cv2.COLOR_BGR2GRAY)

    faces = face_cascade.detectMultiScale(
        gray,
        scaleFactor=1.3,
        minNeighbors=5,
        minSize=(100, 100),
        flags=cv2.CASCADE_SCALE_IMAGE
    )

    for (x, y, w, h) in faces:
        # for each face on the image detected by OpenCV
        # get extended image of this face
        face_image = get_extended_image(frame, x, y, w, h, 0.5)

        # classify face and draw a rectangle around the face
        # green for positive class and red for negative
        result = vgg_model.predict(face_image)
        prediction = class_names[np.array(
            result[0]).argmax(axis=0)]  # predicted class
        confidence = np.array(result[0]).max(axis=0)  # degree of confidence

        if prediction == 'cats':
            color = GREEN
        else:
            color = RED
        # draw a rectangle around the face
        cv2.rectangle(frame,
                      (x, y),  # start_point
                      (x+w, y+h),  # end_point
                      color,
                      2)  # thickness in px
        cv2.putText(frame,
                    # text to put
                    "{:6} - {:.2f}%".format(prediction, confidence*100),
                    (x, y),
                    cv2.FONT_HERSHEY_PLAIN,  # font
                    2,  # fontScale
                    color,
                    2)  # thickness in px

    # display the resulting frame
    cv2.imshow("Face detector - to quit press ESC", frame)

    # Exit with ESC
    key = cv2.waitKey(1)
    if key % 256 == 27:  # ESC code
        break


# when everything done, release the capture
video_capture.release()
cv2.destroyAllWindows()
print("Streaming ended")

In [None]:

cascade_classifier = cv2.CascadeClassifier(
        f"{cv2.data.haarcascades}haarcascade_frontalface_default.xml")
    
window_name = "Detected Objects in webcam"
video = cv2.VideoCapture(0)
obj = 0
while video.isOpened():
    ret, frame = video.read()

    if not ret:
        break

    image = cv2.cvtColor(frame, cv2.COLOR_BGR2GRAY)

    detected_objects = cascade_classifier.detectMultiScale(
        image, 
        scaleFactor=1.3,

        minSize=(50, 50),
        )
    
    if len(detected_objects) != 0:
        for x, y, height, width in detected_objects:
            
            face_image = get_extended_image(frame, x, y, height, width, 0.5)
            result = vgg_model.predict(face_image)
            prediction = class_names[np.array(result[0]).argmax(axis=0)]
            confidence = np.array(result[0]).max(axis=0)
            if prediction == 'cats':
                color = (0, 255, 0)
            else:
                color = (0, 0, 255)
            
            cv2.rectangle(frame, (x, y), ((x + height), (y + width)), color, 5)
            
            cv2.putText(frame,
                    # text to put
                    "{:6} - {:.2f}%".format(prediction, confidence*100),
                    (x, y),
                    cv2.FONT_HERSHEY_PLAIN,  # font
                    5,  # fontScale
                    color,
                    5)  # thickness in px
            
            obj += 1
            filePath = "./example_imgs/real_time{}.jpg".format(obj)
            cv2.imwrite(filePath, frame)
        
    cv2.imshow(window_name, frame)

    if cv2.waitKey(1) == 27:
        break

video.release()
cv2.destroyAllWindows()
print("Streaming ended")