In [1]:
import cv2
import numpy as np
from imutils.video import VideoStream
import imutils
import time

Caffe .prototxt and weights downloaded from https://github.com/LZQthePlane/Face-detection-base-on-ResnetSSD

Pyimagesearch Notebooks used:

[Face Detection](https://www.pyimagesearch.com/2018/02/26/face-detection-with-opencv-and-deep-learning/)

[Streaming Video Into OpenCV](https://www.pyimagesearch.com/2019/04/15/live-video-streaming-over-network-with-opencv-and-imagezmq/)

[blob explaination](https://www.pyimagesearch.com/2017/11/06/deep-learning-opencvs-blobfromimage-works/)

[OBS](https://obsproject.com/download) + [OBS virtual cam](https://obsproject.com/forum/resources/obs-virtualcam.949/)

[How to use yolov3 with OpenCV](https://www.pyimagesearch.com/2018/11/12/yolo-object-detection-with-opencv/)

[Live Cam at bryant park in New York](https://www.webcamtaxi.com/en/usa/new-york/bryant-park.html)

[Yolov3 pretrained for face detection](https://github.com/sthanhng/yoloface)


# Caffe Loaded model, MobileNetSSD, image test

In [10]:
#load serialized model from disk
print("[INFO] loading model...")
net = cv2.dnn.readNetFromCaffe("MobileNetSSD_deploy.prototxt", "MobileNetSSD_deploy10695.caffemodel")

# load input image and construct blob resized to 300x300
image = cv2.imread('pedestriantestimg.jpg')
(h, w) = image.shape[:2]
blob = cv2.dnn.blobFromImage(cv2.resize(image, (300, 300) ), 1.0, (300, 300), (104.0, 177.0, 123.0))

# pass blob through network, obtain detects and predictions
print("[INFO] computing object detections...")
net.setInput(blob)
detections = net.forward()

[INFO] loading model...
[INFO] computing object detections...


In [11]:
# loop over detects
for face in range(0, detections.shape[2]):
    # extract the confidence (i.e., probability) associated with the prediction
    confidence = detections[0, 0, face, 2]
    
    #filter out weak detections
    if confidence > 0.5:
        #compute coordinates of bouding box
        box = detections[0,0, face , 3:7] * np.array([w, h, w, h])
        (startX, startY, endX, endY) = box.astype("int")
        
        # draw bouding box with probability label
        text = "{:.2f}%".format(confidence * 100)
        y = startY - 10 if startY - 10 > 10 else startY + 10
        cv2.rectangle(image, (startX, startY), (endX, endY), (0, 0, 255), 2)
        cv2.putText(image, text, (startX, y), cv2.FONT_HERSHEY_SIMPLEX, 0.45, (0,0,255), 2)

#show the output image
cv2.imshow("Output", image)
cv2.waitKey(0)
cv2.destroyAllWindows()

# Caffe Loaded model, MobileNetSSD, video stream test

In [7]:
#load our model
print("[INFO] loading model...")
net = cv2.dnn.readNetFromCaffe("MobileNetSSD_deploy.prototxt", "MobileNetSSD_deploy10695.caffemodel")

#initialize the video stream
print("[INFO] starting video stream...")
vs = VideoStream(src=1).start()
time.sleep(2.0)

# loop over frames from stream
while True:
    #grab frame and resize to max width of 400 pixels
    frame = vs.read()
    frame = imutils.resize(frame, width=400)
    
    # grab the frame dim and convert blob
    (h, w) = frame.shape[:2]
    blob = cv2.dnn.blobFromImage(cv2.resize(frame, (300, 300)), 1.0, (300, 300), (104.0, 177.0, 123.0))
    
    # pass blob through net and obtain detects and pred
    net.setInput(blob)
    detections = net.forward()
    
    #loop through detects
    for face in range(0, detections.shape[2]):
        # extract confidence associated with pred
        confidence = detections[0, 0, face, 2]
        
        #filter ot weak detectons
        if confidence < 0.5:
            continue
            
        # compute coordinates of bounding box
        box = detections[0, 0, face, 3:7] * np.array([w, h, w, h])
        (startX, startY, endX, endY) = box.astype("int")
        
        # draw bonding box + probability label
        text = "{:.2f}%".format(confidence * 100)
        y = startY - 10 if startY - 10 > 10 else startY + 10
        cv2.rectangle(frame, (startX, startY), (endX, endY), (0,0,255), 2)
        cv2.putText(frame, text, (startX, y), cv2.FONT_HERSHEY_SIMPLEX, 0.45, (0,0,255), 2)
        
    # show output frame
    cv2.imshow("Frame", frame)
    key = cv2.waitKey(1) & 0xFF
    
    # if the 'q' key was pressed, break from loo
    if key == ord("q"):
        break

# do a bit of cleanup
cv2.destroyAllWindows()
vs.stop()

[INFO] loading model...
[INFO] starting video stream...


# HOGDescriptor tests

In [13]:
from imutils.object_detection import non_max_suppression

hog = cv2.HOGDescriptor()
hog.setSVMDetector(cv2.HOGDescriptor_getDefaultPeopleDetector())

image = cv2.imread('pedestriantestimg.jpg')
image = imutils.resize(image, width=min(400, image.shape[1]))

orig = image.copy()

(rects, weights) = hog.detectMultiScale(image, winStride=(4,4), padding=(8,8), scale=1.05)

for (x,y,w,h) in rects:
    cv2.rectangle(orig, (x,y), (x+w, y+h), (0,0,255), 2)
    
rects = np.array([[x,y,x+w, y+h] for (x,y,w,h) in rects])
pick = non_max_suppression(rects, probs=None, overlapThresh=0.65)

for (xA, yA, xB, yB) in pick:
    cv2.rectangle(image, (xA, yA), (xB, yB), (0,255,0), 2)
    
print("[INFO] {} original boxes, {} after suppression".format(len(rects), len(pick)))

cv2.imshow("Before NMS", orig)
cv2.imshow("After NMS", image)
cv2.waitKey(0)
cv2.destroyAllWindows()

[INFO] 6 original boxes, 6 after suppression


In [16]:
print("[INFO] starting video stream...")
vs = VideoStream(src=1).start()
time.sleep(2.0)

while True:
    frame = vs.read()
    frame = imutils.resize(frame, width=min(400, frame.shape[1]))
    orig = frame.copy()
    
    
    (rects, weights) = hog.detectMultiScale(frame, winStride=(4,4), padding=(8,8), scale=1.01)
    for (x,y,w,h) in rects:
        cv2.rectangle(orig, (x, y), (x+w, y+h), (0,255,0), 2)
    
    rects = np.array([[x,y,x+w,y+h] for (x,y,w,h) in rects])
    pick = non_max_suppression(rects, probs=None, overlapThresh=0.65)
    
    for (x1,y1,x2,y2) in pick:
        cv2.rectangle(frame, (x1, y1), (x2,y2), (0,0,255), 2)
        
    cv2.imshow("Frame", frame)
    cv2.imshow("Orig", orig)
    key = cv2.waitKey(1) & 0xFF
    
    if key == ord("q"):
        break

# do a bit of cleanup
cv2.destroyAllWindows()
vs.stop()

[INFO] starting video stream...


# Yolov4 Pretrained 80 classes (COCO dataset)

In [55]:
import time
#load serialized model from disk
print("[INFO] loading model...")
net = cv2.dnn.readNetFromDarknet("yolov4.cfg", "yolov4.weights")

#determine only the *output* layer names that we need from yolo
ln = net.getLayerNames()
ln = [ln[i[0] - 1] for i in net.getUnconnectedOutLayers()]

# load input image and construct blob resized to 300x300
image = cv2.imread('pedestriantestimg.jpg')
(h, w) = image.shape[:2]
blob = cv2.dnn.blobFromImage(cv2.resize(image, (416, 416) ), 1/ 255.0, (416, 416), swapRB=True, crop=False)

# pass blob through network, obtain detects and predictions
print("[INFO] computing object detections...")
net.setInput(blob)
start = time.time()
layerOutputs = net.forward(ln)
end = time.time()

print("[INFO] YOLO took {:.6f} seconds".format(end - start))

#initilize lists of detects bounding boxes, confidences, and class IDs
boxes = []
confidences = []
classIDs = []

for output in layerOutputs:
    for detection in output:
        scores = detection[5:]
        classID = np.argmax(scores)
        confidence = scores[classID]
        
        if confidence > 0.5:
            box = detection[0:4] * np.array([w, h, w, h])
            (centerX, centerY, width, height) = box.astype("int")
            
            x = int(centerX - (width/2))
            y = int(centerY - (height/2))
            
            boxes.append([x, y, int(width), int(height)])
            confidences.append(float(confidence))
            classIDs.append(classID)
         
#apply non-maxima suppresion

idxs = cv2.dnn.NMSBoxes(boxes, confidences, 0.5, 0.3)

if len(idxs) > 0:
    for obj in idxs.flatten():
        (x, y) = (boxes[obj][0], boxes[obj][1])
        (w, h) = (boxes[obj][2], boxes[obj][3])
        
        color = [int(c) for c in COLORS[classIDs[obj]]]
        cv2.rectangle(image, (x, y), (x+w, y+h), color, 2)
        text = "{}: {:.4f}".format(LABELS[classIDs[obj]], confidences[obj])
        cv2.putText(image, text, (x, y - 5), cv2.FONT_HERSHEY_SIMPLEX, 0.5, color, 2)
        
cv2.imshow("image", image)
cv2.waitKey(0)
cv2.destroyAllWindows()

[INFO] loading model...
[INFO] computing object detections...
[INFO] YOLO took 0.610000 seconds


In [9]:
import os
import numpy as np
import cv2
import numpy as np
from imutils.video import VideoStream
import imutils
import time

labelsPath = "./coco.names"
LABELS = open(labelsPath).read().strip().split("\n")

np.random.seed(42)
COLORS = np.random.randint(0, 255, size=(len(LABELS), 3), dtype='uint8')

In [3]:
print("[INFO] loading model...")
net = cv2.dnn.readNetFromDarknet("yolov4.cfg", "yolov4.weights")

#determine only the *output* layer names that we need from yolo
ln = net.getLayerNames()
ln = [ln[i[0] - 1] for i in net.getUnconnectedOutLayers()]

from imutils.video import VideoStream
vs = VideoStream(src=1).start()

while True:
    frame = vs.read()  

    # load input image and construct blob resized to 300x300
    (h, w) = frame.shape[:2]
    blob = cv2.dnn.blobFromImage(cv2.resize(frame, (608, 608) ), 1/ 255.0, (608, 608), swapRB=True, crop=False)

    # pass blob through network, obtain detects and predictions
    net.setInput(blob)
    layerOutputs = net.forward(ln)

    #initilize lists of detects bounding boxes, confidences, and class IDs
    boxes = []
    confidences = []
    classIDs = []


    for output in layerOutputs:
        for detection in output:
            scores = detection[5:]
            classID = np.argmax(scores)
            confidence = scores[classID]
        
            if confidence > 0.6:
                box = detection[0:4] * np.array([w, h, w, h])
                (centerX, centerY, width, height) = box.astype("int")
            
                x = int(centerX - (width/2))
                y = int(centerY - (height/2))
            
                boxes.append([x, y, int(width), int(height)])
                confidences.append(float(confidence))
                classIDs.append(classID)
         
    #apply non-maxima suppresion

    idxs = cv2.dnn.NMSBoxes(boxes, confidences, 0.6, 0.3)

    if len(idxs) > 0:
        for obj in idxs.flatten():
            (x, y) = (boxes[obj][0], boxes[obj][1])
            (w, h) = (boxes[obj][2], boxes[obj][3])
        
            color = [int(c) for c in COLORS[classIDs[obj]]]
            cv2.rectangle(frame, (x, y), (x+w, y+h), color, 2)
            text = "{}: {:.4f}".format(LABELS[classIDs[obj]], confidences[obj])
            cv2.putText(frame, text, (x, y - 5), cv2.FONT_HERSHEY_SIMPLEX, 0.5, color, 2)
    
    cv2.imshow("Frame", frame)
    key = cv2.waitKey(1) & 0xFF
    
    if key == ord("q"):
        break
        
cv2.destroyAllWindows()
vs.stop()
vs.stream.release()

[INFO] loading model...


# Yolov3 pretrained on face detection.

In [11]:
CONF_THRESHOLD = 0.5
NMS_THRESHOLD = 0.4
IMG_WIDTH = 416
IMG_HEIGHT = 416

# Default colors
COLOR_BLUE = (255, 0, 0)
COLOR_GREEN = (0, 255, 0)
COLOR_RED = (0, 0, 255)
COLOR_WHITE = (255, 255, 255)
COLOR_YELLOW = (0, 255, 255)


def get_outputs_names(net):
    # Get the names of all the layers in the network
    layers_names = net.getLayerNames()

    # Get the names of the output layers, i.e. the layers with unconnected
    # outputs
    return [layers_names[i[0] - 1] for i in net.getUnconnectedOutLayers()]

# Draw the predicted bounding box
def draw_predict(frame, conf, left, top, right, bottom):
    # Draw a bounding box.
    cv2.rectangle(frame, (left, top), (right, bottom), COLOR_YELLOW, 2)

    text = '{:.2f}'.format(conf)

    # Display the label at the top of the bounding box
    label_size, base_line = cv2.getTextSize(text, cv2.FONT_HERSHEY_SIMPLEX, 0.5, 1)

    top = max(top, label_size[1])
    cv2.putText(frame, text, (left, top - 4), cv2.FONT_HERSHEY_SIMPLEX, 0.4,
                COLOR_WHITE, 1)
    
def refined_box(left, top, width, height):
    right = left + width
    bottom = top + height

    original_vert_height = bottom - top
    top = int(top + original_vert_height * 0.15)
    bottom = int(bottom - original_vert_height * 0.05)

    margin = ((bottom - top) - (right - left)) // 2
    left = left - margin if (bottom - top - right + left) % 2 == 0 else left - margin - 1

    right = right + margin

    return left, top, right, bottom

def crop_pic(frame, left, top, right, bottom):
    frame = frame[top:bottom+1, left: right+1]
    return frame

In [14]:
import os
import cv2
import numpy as np
import imutils
import time


net = cv2.dnn.readNetFromDarknet("yolov3-face.cfg", "yolov3-wider_16000.weights")
    
image = cv2.imread('profpicalex.jpg')
image = imutils.resize(image, width=1000)
(h, w) = image.shape[:2]
blob = cv2.dnn.blobFromImage(cv2.resize(image, (416, 416)), 1/255.0, (416,416), crop=False)

ln = net.getLayerNames()
ln = [ln[i[0] - 1] for i in net.getUnconnectedOutLayers()]

net.setInput(blob)
Outs = net.forward(ln)    


confidences = []
boxes = []
final_boxes = []
for out in Outs:
    for detection in out:
        scores  = detection[5:]
        class_id = np.argmax(scores)
        confidence = scores[class_id]
        if confidence > 0.5:
            center_x = int(detection[0] * w)
            center_y = int(detection[1] * h)
            width = int(detection[2] * w)
            height = int(detection[3] * h)
            left = int(center_x - width/2)
            top = int(center_y - height/2)
            confidences.append(float(confidence))
            boxes.append([left, top, width, height])
        
indices = cv2.dnn.NMSBoxes(boxes, confidences, 0.5, 0.3)

for i in indices:
    i = i[0]
    box = boxes[i]
    left = box[0]
    top = box[1]
    width = box[2]
    height = box[3]
    final_boxes.append(box)
    left, top, right, bottom = refined_box(left, top, width, height)
    draw_predict(image, confidences[i], left, top, right, bottom)
    crop = crop_pic(image, left, top, right, bottom)
    
    
cv2.imshow("Image", image)
cv2.imshow("Crop", crop)
cv2.waitKey(0)
cv2.destroyAllWindows()

0.9731691

(507, 85)