In [1]:
import cv2
import numpy
import sys

In [2]:
# These constants define different image filters or modes: Preview, Blur, Features, 
# and Canny Edge Detection.
PREVIEW = 0
BLUR = 1
FEATURES = 2
CANNY = 3

# This dictionary feature_params contains parameters used for feature detection, such as maximum corners,
#  quality level, minimum distance, and block size.
feature_params = dict(maxCorners = 500,
                     qualityLevel = 0.2,
                     minDistance = 15,
                     blockSize = 9)


In [3]:
net = cv2.dnn.readNetFromCaffe('deploy.prototxt','res10_300x300_ssd_iter_140000_fp16.caffemodel')


# These variables define parameters such as input width and height of the model, mean values 
# for normalization, and confidence threshold for detections.
# make sure the parameter is the same as the one that was use to train the model
in_width = 300
in_height = 300
mean = [104, 117, 123]
conf_threshold = 0.7

In [4]:
# These lines initialize variables s (video source), image_filter (current filter mode), 
# alive (flag to control the loop), and result (variable to hold processed frames).
s = 0
image_filter = PREVIEW
alive = True
result = None

# This code sets up a window with the name 'Camera Filters' using namedWindow() function from OpenCV.
win_name = 'Camera Filters'
cv2.namedWindow(win_name, cv2.WINDOW_NORMAL)

# This line creates a video capture object source using the video source specified by the variable s.
source = cv2.VideoCapture(s)

# Main loop for processing frames:
while alive:
    has_frame, frame = source.read()
    if not has_frame:
        break
    
    
    
    
    
      #  Here, each frame is read from the video source, flipped horizontally, and preprocessed as
    #  required by the deep learning model. Then, inference is performed on the preprocessed frame.
    has_frame, frame = source.read()
    frame = cv2.flip(frame, 1) #just to flip the frame to remove literal invertion for this tutorial
    frame_height = frame.shape[0]
    frame_width = frame.shape[1]
    #create a 4D blob from a frame
    blob = cv2.dnn.blobFromImage(frame, 1.0, (in_width, in_height), mean, swapRB = False, crop = False)
    #run the model
    net.setInput(blob)
    detections = net.forward()
    
    # This loop iterates over the detections, filters out those with confidence above the threshold, 
    # and draws bounding boxes around the detected objects on the frame.
    for i in range (detections.shape[2]):
        confidence = detections[0, 0, i, 2]
        if confidence > conf_threshold:
            x_left_bottom = int(detections[0, 0, i, 3] * frame_width)
            y_left_bottom = int(detections[0, 0, i, 4] * frame_height)
            y_right_top = int(detections[0, 0, i, 5] * frame_width)
            x_right_top = int(detections[0, 0, i, 6] * frame_height)
            
            cv2.rectangle(frame, (x_left_bottom, y_left_bottom), (x_right_top, y_right_top), (0, 255, 0))
            label = "Confidence: %.4f" % confidence
            label_size, base_line = cv2.getTextSize(label, cv2.FONT_HERSHEY_SIMPLEX, 0.5, 1)
            
            cv2.rectangle(frame, (x_left_bottom, y_left_bottom-label_size[1]),
                                ( x_left_bottom + label_size[0], y_left_bottom + base_line),
                         (255, 255, 255), cv2.FILLED)
            cv2.putText(frame, label, (x_left_bottom, y_left_bottom),
                       cv2.FONT_HERSHEY_SIMPLEX, 0.5, (0, 0, 0))
    
    # Here, the inference time is calculated and displayed on the frame.
    t,_ = net.getPerfProfile()
    label = "Inference time: %.2f ms" % (t * 1000.0/cv2.getTickFrequency())
    cv2.putText(frame, label, (0, 15), cv2.FONT_HERSHEY_SIMPLEX, 0.5, (0,255,0))
    
    
    
    
        
    frame = cv2.flip(frame,1)
    
    # Apply image filters based on user input:
    if image_filter == PREVIEW:
        result = frame
    elif image_filter == CANNY:
        result = cv2.Canny(frame, 145, 150)
    elif image_filter == BLUR:
        result = cv2.blur(frame, (13,13))
    elif image_filter == FEATURES:
        result = frame
        frame_gray = cv2.cvtColor(frame, cv2.COLOR_BGR2GRAY)
        corners = cv2.goodFeaturesToTrack(frame_gray, **feature_params)
        if corners is not None:
            for x, y in numpy.float32(corners).reshape(-1,2):
                cv2.circle(result, (int(x),int(y)), 10, (0, 255, 0), 1)
                
    # This block displays the processed frame in the window and waits for user input.
    #  Based on the key pressed by the user, it changes the image_filter mode or exits the loop if
    #  'Q' or 'q' or Escape key (ASCII 27) is pressed.
    cv2.imshow(win_name, result)
    key = cv2.waitKey(1)
    if key == ord('Q') or key == ord('q') or key == 27:
        alive = False
    elif key == ord('C') or key == ord('c'):
        image_filter = CANNY
    elif key == ord('B') or key == ord('b'):
        image_filter = BLUR
    elif key == ord('F') or key == ord('f'):
        image_filter = FEATURES
    elif key == ord('P') or key == ord('p'):
        image_filter = PREVIEW
    
    
    
    
    
   
        
# Finally, it releases the video source and destroys the window when the loop exits.        
source.release()
cv2.destroyWindow(win_name)