In [163]:
'''
CS585_Lab3
CS585 Image and Video Computing
Lab 3
--------------
This program introduces the following concepts:
	a) Reading a stream of images from a webcamera, and displaying the video
	b) Skin color detection
	c) Background differencing
	d) Visualizing motion history
--------------
'''
import cv2
import sys
import numpy as np

In [164]:
# globals
max_thresh = 255

In [165]:
def my_skin_detect(src) :
    '''
    Function that detects whether a pixel belongs to the skin based on RGB values
    Args: 
        src The source color image
    Returns: 
        dst The destination grayscale image where skin pixels are colored white and the rest are colored black
    Surveys of skin color modeling and detection techniques:
    Vezhnevets, Vladimir, Vassili Sazonov, and Alla Andreeva. "A survey on pixel-based skin color detection techniques." Proc. Graphicon. Vol. 3. 2003.
    Kakumanu, Praveen, Sokratis Makrogiannis, and Nikolaos Bourbakis. "A survey of skin-color modeling and detection methods." Pattern recognition 40.3 (2007): 1106-1122.
    '''
    dst = np.zeros(np.shape(src)[:-1], dtype=np.uint8)
    mask = np.logical_and.reduce((src[:,:,2] > 94 ,src[:,:,1] > 40, src[:,:,0] > 20 ))
    dst[mask] = 255
    return dst

def my_frame_differencing(prev, curr):
    '''
    Function that does frame differencing between the current frame and the previous frame
    Args:
        src The current color image
        prev The previous color image
    Returns:
        dst The destination grayscale image where pixels are colored white if the corresponding pixel intensities in the current
    and previous image are not the same
    '''
    dst = cv2.absdiff(prev, curr)
    gs = cv2.cvtColor(dst, cv2.COLOR_BGR2GRAY)
    dst = (gs > 50).astype(np.uint8) * 255
    return dst

def my_motion_energy(mh):
    '''
    Function that accumulates the frame differences for a certain number of pairs of frames
    Args:
        mh Vector of frame difference images
    Returns:
        dst The destination grayscale image to store the accumulation of the frame difference images
    '''
    dst = np.zeros(np.shape(mh[0][:,:]), dtype=np.uint8)

    mask = np.logical_or.reduce((mh[0][:,:] == 255, mh[1][:,:] == 255, mh[2][:,:] == 255))

    dst[mask] = 255
    return dst



In [166]:
def hand_capture():
    cap = cv2.VideoCapture(0)
    gestures = []
    # if not successful, exit program
    if not cap.isOpened():
        print("Cannot open the video cam")
        sys.exit()

    # create a window called "MyVideo0"
    cv2.namedWindow("Webcam", cv2.WINDOW_AUTOSIZE)

    # read a new frame from video
    ret, frame0 = cap.read()
    if not ret:
        print("Cannot read a frame from video stream")
    images = 0
    while(1):
        ret, frame = cap.read()
        if not ret:
            print("Cannot read a frame from video stream")
            break


        cv2.putText(frame,"Please take a picture with no hands in view, press space to capture when ready.", (50,50), cv2.FONT_HERSHEY_SIMPLEX, 1, (0,255,0), 2, cv2.LINE_AA)
        cv2.imshow("Webcam", frame)

        if cv2.waitKey(30) == 32:
            background = cv2.cvtColor(my_skin_detect(frame),cv2.COLOR_GRAY2BGR)
            break

    while(images < 3):
        # read a new frame from video
        ret, frame = cap.read()
        # if not successful, break loop
        if not ret:
            print("Cannot read a frame from video stream")
            break

        frame = clean_gesture(frame, background)
        if images == 0:
                cv2.putText(frame,"Please make a hand gesture, press space to capture when ready.", (50,50), cv2.FONT_HERSHEY_SIMPLEX, 1, (0,255,0), 2, cv2.LINE_AA)
        if images == 1:
                cv2.putText(frame,"Please make a second hand gesture, press space to capture when ready.",(50,50), cv2.FONT_HERSHEY_SIMPLEX, 1, (0,255,0), 2, cv2.LINE_AA)
        if images == 2:
                cv2.putText(frame,"Please make the last gesture, press space to capture when ready.",(50,50), cv2.FONT_HERSHEY_SIMPLEX, 1, (0,255,0), 2, cv2.LINE_AA)
        if cv2.waitKey(30) == 32:
            images += 1
            if images == 1:
                gestures.append(frame)
            if images == 2:
                gestures.append(frame)
            if images == 3:
                gestures.append(frame)
                break

        cv2.imshow("Webcam", frame)
    cap.release()
    cv2.destroyAllWindows()
    return gestures, background



In [167]:
def clean_gesture(frame, background):
    image = my_skin_detect(frame)
    image = cv2.cvtColor(image,cv2.COLOR_GRAY2BGR)
    image = my_frame_differencing(background, image)
    # cv2.namedWindow('Skin')
    # cv2.imshow('Skin', image)
    # image = cv2.cvtColor(image, cv2.COLOR_BGR2GRAY)
    # cv2.namedWindow('Grey')
    # cv2.imshow('Grey', image)
    _, thres_output = cv2.threshold(image, 125 , max_thresh, 0)
    contours, hierarchy = cv2.findContours(thres_output, cv2.RETR_TREE, cv2.CHAIN_APPROX_SIMPLE)
    contour_output = cv2.cvtColor(np.zeros(thres_output.shape,dtype='uint8'),cv2.COLOR_GRAY2BGR)

    max_id = max(enumerate(contours), key=lambda x : cv2.contourArea(x[1]))[0]

    cv2.drawContours(contour_output, contours, max_id, (255, 0,0), cv2.FILLED, 8)
    cv2.drawContours(contour_output, contours, max_id, (0, 0,255), 2, 8)

    # cv2.namedWindow('Contours')
    # cv2.imshow('Contours', contour_output)
    return contour_output


In [168]:
# gestures = clean_gesture(gest ures)
# ----------------
# a) Reading a stream of images from a webcamera, and displaying the video
# ----------------
# For more information on reading and writing video: http://docs.opencv.org/modules/highgui/doc/reading_and_writing_images_and_video.html
# open the video camera no. 0
cap = cv2.VideoCapture(0)

# if not successful, exit program
if not cap.isOpened(): 
    print("Cannot open the video cam")
    sys.exit()

# create a window called "MyVideo0"
# cv2.namedWindow("Webcam", cv2.WINDOW_AUTOSIZE)

# read a new frame from video
ret, frame0 = cap.read()
if not ret:
    print("Cannot read a frame from video stream")

gestures, background = hand_capture( )
# show the frame in "Webcam" window
# cv2.imshow("Webcam", frame0)

# create windows
# cv2.namedWindow("MyVideo", cv2.WINDOW_AUTOSIZE)
# cv2.namedWindow("FrameDiff", cv2.WINDOW_AUTOSIZE)
# cv2.namedWindow("MotionEnergy", cv2.WINDOW_AUTOSIZE)
#
# my_motion_history = []
# fMH1 = np.zeros(np.shape(frame0)[:-1], dtype=np.uint8)
# fMH2 = np.zeros(np.shape(frame0)[:-1], dtype=np.uint8)
# fMH3 = np.zeros(np.shape(frame0)[:-1], dtype=np.uint8)
# my_motion_history.append(fMH1)
# my_motion_history.append(fMH2)
# my_motion_history.append(fMH3)

while(1):
    # read a new frame from video
    ret, frame = cap.read()
    # if not successful, break loop
    if not ret:
        print("Cannot read a frame from video stream")
        break
    clean = clean_gesture(frame, background)
    if cv2.matchTemplate(clean, gestures[0], cv2.TM_CCOEFF_NORMED) >= 0.8:
        cv2.putText(frame,"Gesture 1 Detected",(50,50), cv2.FONT_HERSHEY_SIMPLEX, 1, (0,255,0), 2, cv2.LINE_AA)
    elif cv2.matchTemplate(clean, gestures[1], cv2.TM_CCOEFF_NORMED) >= 0.8:
        cv2.putText(frame,"Gesture 2 Detected",(50,50), cv2.FONT_HERSHEY_SIMPLEX, 1, (0,255,0), 2, cv2.LINE_AA)
    elif cv2.matchTemplate(clean, gestures[2], cv2.TM_CCOEFF_NORMED) >= 0.8:
        cv2.putText(frame,"Gesture 3 Detected",(50,50), cv2.FONT_HERSHEY_SIMPLEX, 1, (0,255,0), 2, cv2.LINE_AA)
    else:
        cv2.putText(frame,"No Gesture Detected",(50,50), cv2.FONT_HERSHEY_SIMPLEX, 1, (0,255,0), 2, cv2.LINE_AA)

    cv2.imshow("Webcam", frame)

    # wait for 'esc' key press for 30ms. If 'esc' key is pressed, break loop
    if cv2.waitKey(30) == 27:
        print("esc key is pressed by user")
        break

# When everything done, release the capture
cap.release()
cv2.destroyAllWindows()
for i in range (1,5):
    cv2.waitKey(1)

esc key is pressed by user
