In [1]:
import matplotlib.pyplot as plt
from copy import deepcopy
import numpy as np
import time
import cv2
%matplotlib inline

## Specify the model to be used
COCO and MPI are body pose estimation model. COCO has 18 points and MPI has 15 points as output.

HAND is hand keypoints estimation model. It has 22 points as output

Ensure that the model files are available in the folders.

In [2]:
MODE = "MPI"

if MODE is "COCO":
    protoFile = "Models/coco/pose_deploy_linevec.prototxt"
    weightsFile = "Models/coco/pose_iter_440000.caffemodel"
    nPoints = 18
    POSE_PAIRS = [ [1,0],[1,2],[1,5],[2,3],[3,4],[5,6],[6,7],[1,8],[8,9],[9,10],[1,11],[11,12],[12,13],[0,14],[0,15],[14,16],[15,17]]

elif MODE is "MPI" :
    protoFile = "Models/mpi/pose_deploy_linevec_faster_4_stages.prototxt"
    weightsFile = "Models/mpi/pose_iter_160000.caffemodel"
    nPoints = 15
    POSE_PAIRS = [[0,1], [1,2], [2,3], [3,4], [1,5], [5,6], [6,7], [1,14], [14,8], [8,9], [9,10], [14,11], [11,12], [12,13] ]
    

<b>COCO Output Format</b> Nose – 0, Neck – 1, Right Shoulder – 2, Right Elbow – 3, Right Wrist – 4, Left Shoulder – 5, Left Elbow – 6, Left Wrist – 7, Right Hip – 8, Right Knee – 9, Right Ankle – 10, Left Hip – 11, Left Knee – 12, LAnkle – 13, Right Eye – 14, Left Eye – 15, Right Ear – 16, Left Ear – 17, Background – 18

<b>MPII Output Format</b> Head – 0, Neck – 1, Right Shoulder – 2, Right Elbow – 3, Right Wrist – 4, Left Shoulder – 5, Left Elbow – 6, Left Wrist – 7, Right Hip – 8, Right Knee – 9, Right Ankle – 10, Left Hip – 11, Left Knee – 12, Left Ankle – 13, Chest – 14, Background – 15

#### Load the network and set the input size

In [3]:
skNet = cv2.dnn.readNetFromCaffe(protoFile, weightsFile)

inWidth = 368
inHeight = 368

### Functions

In [4]:
def showPoints(frame, output, W, H, frameWidth, frameHeight, sPoint, threshold):
    # Plots bodypoints and relations detected

    # Empty list to store the detected keypoints
    points = []

    for i in range(nPoints):
        # confidence map of corresponding body's part.
        probMap = output[0, i, :, :]

        # Find global maxima of the probMap.
        minVal, prob, minLoc, point = cv2.minMaxLoc(probMap)

        # Scale the point to fit on the original image
        x = (frameWidth * point[0]) / W
        y = (frameHeight * point[1]) / H

        if prob > threshold :
            cv2.circle(frame, (int(x), int(y)), 8*sPoint, (0, 0, 255), thickness=-1, lineType=cv2.FILLED)

            # Add the point to the list if the probability is greater than the threshold
            points.append((int(x), int(y)))
        else :
            points.append(None)

    for pair in POSE_PAIRS:
        partA = pair[0]
        partB = pair[1]

        if points[partA] and points[partB]:
            cv2.line(frame, points[partA], points[partB], (0, 255, 255), 3*sPoint)

    return frame

In [5]:
def getBoxes(outs):
     # Get specs of the box where an object has been detected
        
    class_ids = [] # Indicates the type of object that has been found (0: person)
    confidences = [] # The score that each class has had
    boxes = [] # Boxes coordinates
    
    #create bounding box 

    for out in outs:
        for detection in out:
            scores = detection[5:]
            class_id = np.argmax(scores)
            confidence = scores[class_id]
            if confidence > 0.1:
                center_x = int(detection[0] * frameWidth)
                center_y = int(detection[1] * frameHeight)
                w = int(detection[2] * frameWidth)
                h = int(detection[3] * frameHeight)
                x = center_x - w / 2
                y = center_y - h / 2
                class_ids.append(class_id)
                confidences.append(float(confidence))
                boxes.append([x, y, w, h]) #(xUpLeft, yUpLeft, widthObject, heigthObject)
    return class_ids, confidences, boxes
    
def getPeople(indices, class_ids, confidences, boxes, showBox = True):
    # Plots rectangle of every person detected if showBox = True, and
    # returns the area of each person detected
    
    area = [] # Rectangle Area
    personBoxes = []

    # Check if is people detection
    for i in indices:
        i = i[0]
        box = boxes[i]
        if class_ids[i]==0:
            area.append(box[2]*box[3]) # width*height
            personBoxes.append(box)
            label = str(classes[class_ids[i]])
            if showBox:
                cv2.rectangle(frame, (round(box[0]),round(box[1])), (round(box[0]+box[2]),round(box[1]+box[3])), (0, 0, 255), 2)
                cv2.putText(frame, label, (round(box[0])-10,round(box[1])-10), cv2.FONT_HERSHEY_SIMPLEX, 0.5, (0, 0, 255), 2)
    return area, personBoxes

# Yolo & Sk Detection

In [6]:
from math import floor
from os import getcwd

path = getcwd().replace("OpenPose","")

classes = None
with open(path+'coco.names', 'r') as f:
    classes = [line.strip() for line in f.readlines()]
    
yoloNet = cv2.dnn.readNet(path+'yolov3-spp.weights', path+'yolov3-spp.cfg')

In [7]:
name = "VideoSkier"

cap = cv2.VideoCapture("Dataset\\"+name+".mp4")

#Para hacerlo a cámara lenta
fps = cap.get(cv2.CAP_PROP_FPS)/2 # /2 for slowmo

if (cap.isOpened()== False):
    print("Error opening video stream or file")

frame = []

ret, frame = cap.read()

frameWidth = frame.shape[1]
frameHeight = frame.shape[0]

# Confidence margin to cut biggest person detected
mConf = 0.3

fourcc = cv2.VideoWriter_fourcc(*'MP4V')

save = input("Save model? 0: no, Other: Name of file. ")
while type(save) != str: save = input("Insert a valid name ")

if save != "0":
    out = cv2.VideoWriter('Output\\'+save+'.mp4', fourcc, fps, (frameWidth, frameHeight))
    
    
blurr = input("Calculate skeleton with blurred frames? 1. Yes, Other. No ")
while type(blurr) != str: blurr = input("Insert a valid option ")

sPoint = 1
numFrame = 0

farObject = True
threshold = 0.1
thresArea = 50
thrs2Yolo = 0.04

while(cap.isOpened()):
    ret, frame = cap.read()

    if ret == True:

        ###############################YOLO PERSON DETECTION############################
        
        if numFrame %fps == 0 or farObject:
            yoloNet.setInput(cv2.dnn.blobFromImage(frame, 0.00392, (416,416), (0,0,0), True, crop=False))

            layer_names = yoloNet.getLayerNames()
            output_layers = [layer_names[i[0] - 1] for i in yoloNet.getUnconnectedOutLayers()]
            outs = yoloNet.forward(output_layers)

            class_ids, confidences, boxes = getBoxes(outs)

            indices = cv2.dnn.NMSBoxes(boxes, confidences, 0.1, 0.1)

            area, personBoxes = getPeople(indices, class_ids, confidences, boxes, showBox = False)
            
            if area != [] and thrs2Yolo > max(area)/(frameWidth*frameHeight) and max(area) > thresArea:
                farObject = True
            else:
                farObject = False
        
        ###############################SKELETON DETECTION#############################
        
        # Cut Biggest Person

        if farObject: # If person detected
            idx = area.index(max(area))
            box = personBoxes[idx]
            
            # Calculate position of Biggest Person + Security Range
            posBiggest = [max(floor(box[0]-(mConf*box[2])),0), min(floor((box[0]+box[2])+(mConf*box[2])), frameWidth-1),
                        max(floor(box[1]-(mConf*box[3])),0), min(floor((box[1]+box[3])+(mConf*box[3])), frameHeight-1)]

            # Resize image until input features for Sk detection are satisfied
            while posBiggest[1]-posBiggest[0] < inWidth:
                posBiggest[0] = max(posBiggest[0]-1,0)
                posBiggest[1] = min(posBiggest[1]+1,frameWidth-1)
            while posBiggest[3]-posBiggest[2] < inHeight:
                posBiggest[2] = max(posBiggest[2]-1,0)
                posBiggest[3] = min(posBiggest[3]+1,frameHeight-1)
        
        if farObject: # If person detected, cut the frame to see only that person
            
            person = frame[posBiggest[2]:posBiggest[3],posBiggest[0]:posBiggest[1]]
            
            if blurr == "1": # If blurred frames option is activated
                inpBlob = cv2.dnn.blobFromImage(cv2.GaussianBlur(deepcopy(person),(5,5), sigmaX = 0, sigmaY = 1), 
                                                1.0 / 255, (person.shape[1], person.shape[0]),
                                                (0, 0, 0), swapRB=False, crop=False)
            else:
                inpBlob = cv2.dnn.blobFromImage(person, 1.0 / 255, (person.shape[1], person.shape[0]), 
                                                (0, 0, 0), swapRB=False, crop=False)
            
        else:
                
            if blurr == "1": # If blurred frames option is activated
                inpBlob = cv2.dnn.blobFromImage(cv2.GaussianBlur(deepcopy(frame),(5,5), sigmaX = 0, sigmaY = 1), 
                                                1.0 / 255, (inWidth, inHeight),
                                                (0, 0, 0), swapRB=False, crop=False)
            else:
                inpBlob = cv2.dnn.blobFromImage(frame, 1.0 / 255, (inWidth, inHeight), 
                                                (0, 0, 0), swapRB=False, crop=False)

        skNet.setInput(inpBlob)

        output = skNet.forward()
        H = output.shape[2]
        W = output.shape[3]
        
        if farObject:
            person = showPoints(person, output, W, H, person.shape[1], 
                               person.shape[0], sPoint, threshold)
            frame[posBiggest[2]:posBiggest[3],posBiggest[0]:posBiggest[1]] = person
        else:
            frame = showPoints(frame, output, W, H, frameWidth, frameHeight, sPoint, threshold)

        if save != "0": out.write(frame.astype('uint8'))
        cv2.imshow('Frame', frame.astype('uint8'))
        
        numFrame += 1

        # Press Q on keyboard to  exit
        if cv2.waitKey(1) & 0xFF == ord('q'):
            break

    else:
        break

cap.release()
if save != "0": out.release()
cv2.destroyAllWindows()

Save model? 0: no, Other: Name of file. 1_1BlurrBoth
Calculate skeleton with blurred frames? 1. Yes, Other. No 1
