In [24]:
import sys
import os
import numpy as np
import cv2
(major_ver, minor_ver, subminor_ver) = (cv2.__version__).split('.')
print('OpenCV Version: {}.{}.{}'.format(major_ver, minor_ver, subminor_ver))

OpenCV Version: 4.4.0


In [25]:
# OpenCV feature tracker.
def setup_tracker(ttype):
    tracker_types = ['BOOSTING', 'MIL', 'KCF', 'TLD', 'MEDIANFLOW', 'GOTURN']
    tracker_type = tracker_types[ttype]

    if tracker_type == 'BOOSTING':
        tracker = cv2.TrackerBoosting_create()
    if tracker_type == 'MIL':
        tracker = cv2.TrackerMIL_create()
    if tracker_type == 'KCF':
        tracker = cv2.TrackerKCF_create()
    if tracker_type == 'TLD':
        tracker = cv2.TrackerTLD_create()
    if tracker_type == 'MEDIANFLOW':
        tracker = cv2.TrackerMedianFlow_create()
    if tracker_type == 'GOTURN':
        tracker = cv2.TrackerGOTURN_create()
    return tracker


# Helper function for applying a mask to an array
def mask_array(array, imask):
    if array.shape[:2] != imask.shape:
        raise Exception("Shapes of input and imask are incompatible")
    output = np.zeros_like(array, dtype=np.uint8)
    for i, row in enumerate(imask):
        output[i, row] = array[i, row]
    return output

In [26]:
classes = {0: 'fist', 1: 'five', 2: 'point', 3: 'swing'}

CURR_POSE = 'fist'
DATASET_DIR = os.getcwd() + '/datasets/training_set'
# DATASET_DIR = os.getcwd() + '/datasets/valid_set'

video = cv2.VideoCapture(0)
if not video.isOpened():
    print("Could not open video")
    sys.exit()

ok, frame = video.read()
if not ok:
    print("Cannot read video")
    sys.exit()

# OpenCV dialation and erosion kernel
kernel = np.ones((3, 3), np.uint8)

# Tracking Bounding box -> (TopRightX, TopRightY, Width, Height)
# Tracking box of size 128x128
bbox_initial = (60, 60, 188, 188)
bbox = bbox_initial

# Tracking status, -1 for not tracking, 0 for unsuccessful tracking, 1 for successful tracking
tracking = -1

positions = {'hand_pose': (15, 40), 'fps': (15, 20)}

img_count = 0
bg = frame.copy()

while True:
    ok, frame = video.read()
    display = frame.copy()
    if not ok:
        break

    timer = cv2.getTickCount()

    diff = cv2.absdiff(bg, frame)
    mask = cv2.cvtColor(diff, cv2.COLOR_BGR2GRAY)

    th, thresh = cv2.threshold(mask, 10, 255, cv2.THRESH_BINARY)

    opening = cv2.morphologyEx(thresh, cv2.MORPH_OPEN, kernel)
    closing = cv2.morphologyEx(opening, cv2.MORPH_CLOSE, kernel)
    img_dilation = cv2.dilate(closing, kernel, iterations=1)

    imask = img_dilation > 0
    # Get foreground from mask
    foreground = mask_array(frame, imask)

    # If tracking is active, update the tracker
    if tracking != -1:
        tracking, bbox = tracker.update(foreground)
        tracking = int(tracking)

    hand_crop = img_dilation[int(bbox[1]):int(bbox[1] + bbox[3]),
                             int(bbox[0]):int(bbox[0] + bbox[2])]

    # Draw bounding box
    p1 = (int(bbox[0]), int(bbox[1]))
    p2 = (int(bbox[0] + bbox[2]), int(bbox[1] + bbox[3]))
    cv2.rectangle(display, p1, p2, (255, 0, 0), 2, 1)

    # Calculate Frames per second (FPS)
    fps = cv2.getTickFrequency() / (cv2.getTickCount() - timer)
    cv2.putText(display, "FPS : " + str(int(fps)), positions['fps'],
                cv2.FONT_HERSHEY_SIMPLEX, 0.65, (50, 170, 50), 2)

    # Display result
    cv2.imshow("display", display)
    # Display diff
    cv2.imshow("diff", diff)
    # Display thresh
    cv2.imshow("thresh", thresh)
    # Display mask
    cv2.imshow("img_dilation", img_dilation)
    try:
        # Display hand_crop
        cv2.imshow("hand_crop", hand_crop)
    except:
        pass

    k = cv2.waitKey(1) & 0xff

    if k == 27: break  # ESC pressed
    elif k == 114 or k == 112:
        # r pressed
        bg = frame.copy()
        bbox = bbox_initial
        tracking = -1
    elif k == 116:
        # t pressed
        # Initialize tracker with first frame and bounding box
        tracker = setup_tracker(2)
        tracking = tracker.init(frame, bbox)
    elif k == 115:
        # s pressed
        img_count += 1
        dname = os.path.join(DATASET_DIR, CURR_POSE)
        fname = os.path.join(DATASET_DIR, CURR_POSE,
                             "{}_{}.jpg".format(CURR_POSE, img_count))
        if not os.path.isdir(dname):
            os.mkdir(dname)
        done = False
        try:
            done = cv2.imwrite(fname, hand_crop)
        except:
            pass
        if not done:
            print('Fail to save to ', fname)
        else:
            print('Save to ', fname)
    elif k != 255:
        print(k)

cv2.destroyAllWindows()
video.release()

Save to  /home/wei/git/itir_cv/datasets/valid_set/swing/swing_1.jpg
Save to  /home/wei/git/itir_cv/datasets/valid_set/swing/swing_2.jpg
Save to  /home/wei/git/itir_cv/datasets/valid_set/swing/swing_3.jpg
Save to  /home/wei/git/itir_cv/datasets/valid_set/swing/swing_4.jpg
Save to  /home/wei/git/itir_cv/datasets/valid_set/swing/swing_5.jpg
Save to  /home/wei/git/itir_cv/datasets/valid_set/swing/swing_6.jpg
Save to  /home/wei/git/itir_cv/datasets/valid_set/swing/swing_7.jpg
Save to  /home/wei/git/itir_cv/datasets/valid_set/swing/swing_8.jpg
Save to  /home/wei/git/itir_cv/datasets/valid_set/swing/swing_9.jpg
Save to  /home/wei/git/itir_cv/datasets/valid_set/swing/swing_10.jpg
Save to  /home/wei/git/itir_cv/datasets/valid_set/swing/swing_11.jpg
Save to  /home/wei/git/itir_cv/datasets/valid_set/swing/swing_12.jpg
Save to  /home/wei/git/itir_cv/datasets/valid_set/swing/swing_13.jpg
Save to  /home/wei/git/itir_cv/datasets/valid_set/swing/swing_14.jpg
Save to  /home/wei/git/itir_cv/datasets/val