Design and implement algorithms that recognize hand shapes (such as making a fist, thumbs up, thumbs down, pointing with an index finger etc.) or gestures (such as waving with one or both hands, swinging, drawing something in the air etc.) and create a graphical display that responds to the recognition of the hand shapes or gestures. For your system, you are encouraged to try out some of the following computer vision techniques that were discussed in class and use at least a couple of techniques (in particular, binary object shape analysis):

- horizontal and vertical projections to find bounding boxes of ”movement blobs” or ”skin-color blobs”
- size, position, and orientation of object of interest
- circularity of object of interest
- template matching (e.g., create templates of a closed hand and an open hand)
- background differencing: D(x,y,t) = |I(x,y,t)-I(x,y,0)|
- frame-to-frame differencing: D’(x,y,t) = |I(x,y,t)-I(x,y,t-1)|
- motion energy templates (union of binary difference images over a window of time)
- skin-color detection (e.g., thresholding red and green pixel values)
- tracking the position and orientation of moving objects

In [None]:
# Read video stream from the webcam and display it in a window
import cv2
import numpy as np

In [None]:
def find_frame_difference(current, previous):
    current = cv2.cvtColor(current, cv2.COLOR_BGR2GRAY)
    previous = cv2.cvtColor(previous, cv2.COLOR_BGR2GRAY)
    frame_diff = cv2.absdiff(current, previous)
    return frame_diff

def find_contours(frame):
    """
    Find object contours in the frame difference
    """
    frame = cv2.cvtColor(frame, cv2.COLOR_BGR2GRAY)
    ret, thresh = cv2.threshold(frame, 127, 255, 0, cv2.THRESH_BINARY)
    contours, hierarchy = cv2.findContours(thresh, cv2.RETR_TREE, cv2.CHAIN_APPROX_SIMPLE)
    return contours

def find_bounding_box(largest_contour):
    """
    Find the bounding box of the largest contour using horizontal and vertical projection of the histogram
    """
    pass
    

def convert_to_direction(orientation):
    """
    Convert orientation angle to human-readable direction format
    """
    directions = ["East", "Southeast", "South", "Southwest", "West", "Northwest", "North", "Northeast"]
    angle_deg = np.degrees(orientation) % 360
    index = round(angle_deg / 45) % 8
    return directions[index]


def find_skin_color_blobs(frame):
    """
    Find skin color blobs in the frame
    Source: https://stackoverflow.com/questions/8753833/exact-skin-color-hsv-range
    """
    # Convert BGR to HSV
    hsv = cv2.cvtColor(frame, cv2.COLOR_BGR2HSV)

    # Define the range of skin color in HSV
    lower_skin = np.array([0, 48, 80], dtype=np.uint8)
    upper_skin = np.array([20, 255, 255], dtype=np.uint8)

    # Threshold the HSV image to get the mask
    mask = cv2.inRange(hsv, lower_skin, upper_skin)

    # Find contours in the mask
    contours, _ = cv2.findContours(mask, cv2.RETR_EXTERNAL, cv2.CHAIN_APPROX_SIMPLE)
    largest_contour = max(contours, key=cv2.contourArea)
    # Draw bounding boxes around the contours
    x, y, w, h = cv2.boundingRect(largest_contour)
    cv2.rectangle(frame, (x, y), (x + w, y + h), (0, 255, 0), 2)  # Draw bounding box
    return frame, largest_contour, x, y


def get_shape_position_orientation(shape_frame, contour, x, y):
    """
    Get the position and orientation of the shape
    """
    # Fit an ellipse to the contour
    shape = cv2.fitEllipse(contour) #TODO: Could also use polygon

    # Get the orientation of the object (major axis angle)
    orientation = np.deg2rad(shape[2])


    # Get the position of the object
    position = shape[0]

    moment = cv2.moments(contour)
    centroid_x = int(moment["m10"] / moment["m00"])
    centroid_y = int(moment["m01"] / moment["m00"])
    centroid = (centroid_x, centroid_y)

    length = 100 #TODO: Adjust length
    x2 = int(centroid_x + length * np.cos(orientation))
    y2 = int(centroid_y + length * np.sin(orientation))
    orientation_vector = (x2, y2)
    orientation_text = convert_to_direction(orientation) #TODO: needs more work
    cv2.line(shape_frame, centroid, orientation_vector, (0, 0, 255), 2)
    cv2.ellipse(shape_frame, shape, (0, 0, 255), 2)
    cv2.putText(shape_frame, f"Orientation: {orientation_text}", (x, y - 10), cv2.FONT_HERSHEY_SIMPLEX, 0.5, (0, 0, 255), 2)
    return shape_frame

In [None]:
cap = cv2.VideoCapture(0)
ret, prev_frame = cap.read()
if not ret:
    print("Can't receive frame (stream end?). Exiting ...")
    exit(0)

cv2.namedWindow('frame', cv2.WINDOW_NORMAL)
cv2.imshow('frame', prev_frame)

cv2.namedWindow('diff', cv2.WINDOW_NORMAL)
cv2.namedWindow('skin', cv2.WINDOW_NORMAL)

while True:
    ret, frame = cap.read()
    frame_immutable = frame
    if not ret:
        print("Can't receive frame (stream end?). Exiting ...")
        break
    cv2.imshow('frame', frame)

    diff = find_frame_difference(frame, prev_frame)
    cv2.imshow('diff', diff)

    frame_skin, largest_contour, bbx_x, bbx_y  = find_skin_color_blobs(frame)
    cv2.imshow('skin', frame_skin)

    frame_shape = get_shape_position_orientation(frame_skin
    , largest_contour, bbx_x, bbx_y)
    cv2.imshow('skin', frame_shape)

    prev_frame = frame
    
    if cv2.waitKey(30) == 27:
        print("esc key is pressed by user") 
        break


cap.release()
cv2.destroyAllWindows()
cv2.waitKey(1)