In [1]:
!pip install scikit-image



In [2]:
import cv2
import time
import numpy as np
import matplotlib.pyplot as plt
%matplotlib inline
from skimage.feature import peak_local_max

protoFile = "hand/pose_deploy.prototxt"
weightsFile = "hand/pose_iter_102000.caffemodel"
nPoints = 22
POSE_PAIRS = [ [0,1],[1,2],[2,3],[3,4],[0,5],[5,6],[6,7],[7,8],[0,9],[9,10],[10,11],[11,12],[0,13],[13,14],[14,15],[15,16],[0,17],[17,18],[18,19],[19,20] ]
net = cv2.dnn.readNetFromCaffe(protoFile, weightsFile)

threshold = 0.2

In [11]:
def draw_skeleton(frame, points, color):
    for pair in POSE_PAIRS:
        partA = pair[0]
        partB = pair[1]

        if points[partA] and points[partB]:
            cv2.line(frame, points[partA], points[partB], (0, 255, 255), 2, lineType=cv2.LINE_AA)
            cv2.circle(frame, points[partA], 5, color, thickness=-1, lineType=cv2.FILLED)
            cv2.circle(frame, points[partB], 5, color, thickness=-1, lineType=cv2.FILLED)   

def regroup_points(pointsA, pointsB):
    for pair in POSE_PAIRS:
        partA = pair[0]
        partB = pair[1]
        
        if pointsA[partA] and pointsA[partB] and pointsB[partB]:
            X = pointsA[partA]
            Y1 = pointsA[partB]
            Y2 = pointsB[partB]
            distA = cv2.norm(X, Y1)
            distB = cv2.norm(X, Y2)
            if distA > distB:
                tmp = pointsB[partB]
                pointsB[partB] = pointsA[partB]
                pointsA[partB] = tmp
            
cap = cv2.VideoCapture(0)


while(True):
    # Capture frame-by-frame
    ret, frame = cap.read()
    #frame = cv2.imread("front-back.jpg")
    t = time.time()
    frameCopy = np.copy(frame)
    frameWidth = frame.shape[1]
    frameHeight = frame.shape[0]
        
    aspect_ratio = frameWidth/frameHeight

    inHeight = 320
    inWidth = int(((aspect_ratio*inHeight)*8)//8)
    
    inpBlob = cv2.dnn.blobFromImage(frame, 1.0 / 255, (inWidth, inHeight),
                              (0, 0, 0), swapRB=False, crop=False)

    net.setInput(inpBlob)

    output = net.forward()
    #print(output.shape)
    # Empty list to store the detected keypoints
    points = []
    pointsA = []
    pointsB = []
    probMap = []
    for i in range(nPoints):
        # confidence map of corresponding body's part.
        probMap = output[0, i, :, :]
        probMap = cv2.resize(probMap, (frameWidth, frameHeight))
        '''
        plt.figure(figsize=[14,10])
        plt.imshow(cv2.cvtColor(frame, cv2.COLOR_BGR2RGB))
        plt.imshow(probMap, alpha=0.6)
        plt.colorbar()
        plt.axis("off")
        '''
        local_peak = peak_local_max(probMap, min_distance=10, threshold_abs=0.05, num_peaks=2)
        if len(local_peak) == 2:
            pointA = local_peak[0]
            pointB = local_peak[1]
            valA = probMap[pointA[0], pointA[1]]
            valB = probMap[pointB[0], pointB[1]]
            if valA > threshold:
                pointsA.append((int(pointA[1]), int(pointA[0])))
            else:
                pointsA.append(None)
            if valB > threshold:
                pointsB.append((int(pointB[1]), int(pointB[0])))
            else:
                pointsB.append(None)
        else:
            # Find global maxima of the probMap.
            minVal, prob, minLoc, point = cv2.minMaxLoc(probMap)
        
            if prob > threshold :
                cv2.circle(frameCopy, (int(point[0]), int(point[1])), 6, (0, 255, 255), thickness=-1, lineType=cv2.FILLED)
                cv2.putText(frameCopy, "{}".format(i), (int(point[0]), int(point[1])), cv2.FONT_HERSHEY_SIMPLEX, .8, (0, 0, 255), 2, lineType=cv2.LINE_AA)

                # Add the point to the list if the probability is greater than the threshold
                pointsA.append((int(point[0]), int(point[1])))
            else :
                pointsA.append(None)
            pointsB.append(None)
                
    print('pointsA:', pointsA)
    print('pointsB:', pointsB)
    regroup_points(pointsA, pointsB)    
    # Draw Skeleton
    #draw_skeleton(frame, points, (0, 0, 255))
    draw_skeleton(frame, pointsA, (0, 255, 0))
    draw_skeleton(frame, pointsB, (255, 0, 0))
    
    '''
    for pair in POSE_PAIRS:
        partA = pair[0]
        partB = pair[1]

        if points[partA] and points[partB]:
            #cv2.line(frame, points[partA], points[partB], (0, 255, 255), 2, lineType=cv2.LINE_AA)
            cv2.circle(frame, points[partA], 5, (0, 0, 255), thickness=-1, lineType=cv2.FILLED)
            cv2.circle(frame, points[partB], 5, (0, 0, 255), thickness=-1, lineType=cv2.FILLED)
    '''
    print("Time Taken for frame = {}".format(time.time() - t))

    # cv2.putText(frame, "time taken = {:.2f} sec".format(time.time() - t), (50, 50), cv2.FONT_HERSHEY_COMPLEX, .8, (255, 50, 0), 2, lineType=cv2.LINE_AA)
    # cv2.putText(frame, "Hand Pose using OpenCV", (50, 50), cv2.FONT_HERSHEY_COMPLEX, 1, (255, 50, 0), 2, lineType=cv2.LINE_AA)
    cv2.imshow('Output-Skeleton', frame)
    # cv2.imwrite("video_output/{:03d}.jpg".format(k), frame)
    if cv2.waitKey(1) & 0xFF == ord('q'):
        break

# When everything done, release the capture
cap.release()
cv2.destroyAllWindows()

pointsA: [None, None, None, None, None, None, None, None, None, None, None, None, None, None, None, None, None, None, None, None, None, None]
pointsB: [None, None, None, None, None, None, None, None, None, None, None, None, None, None, None, None, None, None, None, None, None, None]
Time Taken for frame = 1.9685170650482178
pointsA: [None, None, None, None, None, None, None, None, None, None, None, None, None, None, None, None, None, None, None, None, None, None]
pointsB: [None, None, None, None, None, None, None, None, None, None, None, None, None, None, None, None, None, None, None, None, None, None]
Time Taken for frame = 1.5381081104278564
pointsA: [None, None, None, None, None, None, None, None, None, None, None, None, None, None, None, None, None, None, None, None, None, None]
pointsB: [None, None, None, None, None, None, None, None, None, None, None, None, None, None, None, None, None, None, None, None, None, None]
Time Taken for frame = 1.5169649124145508
pointsA: [None, None, 

pointsA: [(515, 329), (230, 318), (254, 318), (278, 317), (409, 305), (266, 270), (290, 246), (538, 222), (325, 210), (492, 258), (491, 222), (290, 209), (302, 186), (480, 257), (468, 221), (266, 198), (278, 186), (468, 258), (444, 234), (432, 221), (432, 197), None]
pointsB: [(195, 318), (491, 317), (467, 317), (443, 306), (302, 305), (515, 269), (526, 246), (313, 222), (539, 198), (254, 258), (278, 222), (491, 197), (480, 173), (242, 257), (255, 221), (467, 197), (456, 162), (219, 246), (231, 222), (242, 209), (243, 197), None]
Time Taken for frame = 1.8297007083892822
pointsA: [(183, 306), (218, 306), (456, 306), (266, 306), (290, 306), (254, 270), (278, 246), (302, 233), (314, 210), (243, 257), (278, 222), (290, 209), (301, 197), (456, 246), (444, 221), (266, 198), (278, 186), (444, 258), (420, 234), (409, 221), (397, 198), None]
pointsB: [(492, 305), (479, 305), (242, 306), (432, 305), (408, 305), (492, 257), (503, 234), (503, 209), (503, 185), (479, 246), (468, 221), (467, 186), 

pointsA: [(468, 378), None, (527, 317), (539, 282), (563, 269), (515, 258), (515, 210), (515, 185), (515, 149), None, None, None, None, None, (290, 209), (313, 174), (325, 161), (278, 269), (302, 234), (325, 221), (337, 198), None]
pointsB: [(207, 330), (183, 294), (171, 258), (183, 233), (172, 198), (230, 222), (242, 174), (255, 150), (278, 138), (254, 234), (278, 185), (290, 161), (313, 138), (266, 246), None, None, None, None, None, None, None, None]
Time Taken for frame = 1.831683874130249
pointsA: [(468, 378), None, (527, 317), (539, 282), (563, 269), (515, 258), (515, 210), (515, 185), (515, 149), (491, 258), (480, 198), None, (467, 138), None, (290, 209), (313, 185), (325, 162), (278, 270), (302, 234), (314, 221), (326, 198), None]
pointsB: [(207, 330), (183, 294), (171, 269), (183, 233), (171, 198), (219, 233), (242, 185), (254, 161), (266, 138), (243, 234), (266, 186), (290, 161), (302, 138), (266, 257), None, None, None, None, None, None, None, None]
Time Taken for frame = 1.

pointsA: [None, (218, 306), (207, 281), (219, 246), (219, 222), (266, 257), (279, 210), (302, 197), (314, 174), (278, 269), (313, 222), (337, 209), (349, 186), (290, 281), (326, 246), (349, 233), (361, 210), (302, 294), (326, 281), (349, 258), (361, 246), None]
pointsB: [(230, 341), None, None, None, None, None, None, None, None, None, None, None, None, None, None, None, None, None, None, None, None, None]
Time Taken for frame = 1.816939353942871
pointsA: [None, (207, 306), (207, 281), (218, 246), (218, 222), (255, 246), (278, 210), (290, 186), (314, 174), (278, 258), (302, 222), (326, 198), (349, 185), (290, 281), (325, 246), (338, 222), (361, 209), (302, 294), (326, 270), (349, 258), (361, 246), None]
pointsB: [(219, 341), None, None, None, None, None, None, None, None, None, None, None, None, None, None, None, None, None, None, None, None, None]
Time Taken for frame = 1.8354299068450928
pointsA: [None, (207, 306), (207, 281), (219, 246), (218, 221), (255, 246), (278, 209), (290, 186

pointsA: [(480, 389), None, (515, 317), (539, 294), (563, 269), (503, 317), (504, 270), (515, 246), (527, 221), (479, 317), (479, 269), None, None, (455, 330), (433, 282), (444, 246), (433, 222), (432, 354), (420, 341), (397, 318), (384, 294), None]
pointsB: [None, None, None, None, None, None, None, None, None, None, None, (480, 234), (491, 209), None, None, None, None, None, None, None, None, None]
Time Taken for frame = 1.8004157543182373
pointsA: [(468, 377), (491, 330), (515, 317), (539, 293), (563, 258), (503, 306), (515, 270), (515, 245), None, (468, 306), (479, 269), None, None, (444, 329), (443, 282), (444, 246), (432, 222), (432, 353), (420, 330), (397, 317), (385, 294), None]
pointsB: [None, None, None, None, None, None, None, None, (527, 221), None, None, (480, 234), (491, 198), None, None, None, None, None, None, None, None, None]
Time Taken for frame = 1.869591236114502
pointsA: [(491, 401), None, (527, 330), (539, 318), (551, 305), (455, 318), (432, 294), (421, 270), (42

pointsA: [None, None, (527, 245), (480, 317), None, None, None, None, None, None, None, None, None, None, None, None, None, None, None, None, None, None]
pointsB: [None, None, None, None, None, None, None, None, None, None, None, None, None, None, None, None, None, None, None, None, None, None]
Time Taken for frame = 1.8077247142791748
pointsA: [None, None, None, None, None, None, None, None, None, None, None, None, None, None, None, None, None, None, None, None, None, None]
pointsB: [None, None, None, None, None, None, None, None, None, None, None, None, None, None, None, None, None, None, None, None, None, None]
Time Taken for frame = 1.836348056793213
pointsA: [(302, 341), (325, 329), (361, 329), (361, 306), (373, 293), None, (267, 329), (266, 330), (255, 342), None, None, None, None, None, None, None, None, None, None, None, None, None]
pointsB: [None, None, None, None, None, (302, 293), (314, 245), (302, 245), None, None, None, None, None, None, None, None, None, None, None, None,