In [1]:
import numpy as np, cv2 as cv, imutils

In [2]:
hog = cv.HOGDescriptor()
hog.setSVMDetector(cv.HOGDescriptor_getDefaultPeopleDetector())

In [4]:
# Basic Object detection with pre-trained HOG + SVM
path = "images/pedestrians.jpg"
me = "images/me.jpg"
img = cv.imread(path)
(rects, probs) = hog.detectMultiScale(img, winStride=(5,5), padding=(8,8), scale=1.02)
copy = img.copy()

for i, (x,y,w,h) in enumerate(rects):
    cv.rectangle(copy, (x,y), (x+w, y+h), (0,255,0), 2)
    cv.putText(copy, str(probs[i]), (x,y-10), cv.FONT_HERSHEY_SIMPLEX, 0.5, (0,0,255), 2)
    
cv.imshow("Image", copy)
cv.waitKey(0)
cv.destroyAllWindows()

# SSD with mobilenet

In [7]:
CLASSES = ["background", "aeroplane", "bicycle", "bird", "boat",
"bottle", "bus", "car", "cat", "chair", "cow", "diningtable",
"dog", "horse", "motorbike", "person", "pottedplant", "sheep",
"sofa", "train", "tvmonitor"]

COLORS = np.random.uniform(0, 255, size=(len(CLASSES), 3))

In [3]:
# Load in model and weights
print("Loading model...")
net = cv.dnn.readNetFromCaffe("MobileNetSSD_deploy.prototxt.txt","MobileNetSSD_deploy.caffemodel")

def play(size=400):
    """Real-time object detection with pre-trained Mobilenet-SSD."""
    
    cap = cv.VideoCapture("/home/walt/Videos/Farm.mp4")
    while True:
        _, frame = cap.read()
        frame = imutils.resize(frame, width=size)
        (h,w) = frame.shape[:2]
        blob = cv.dnn.blobFromImage(cv.resize(frame, (300, 300)), 0.007843, (size,size), 127.5)
        net.setInput(blob)
        detections = net.forward()
        for i in np.arange(0, detections.shape[2]):
            conf = detections[0,0,i,2]
            if conf > 0.3:
                idx = int(detections[0,0,i,1])

                rect = detections[0,0,i,3:7] * np.array([w,h,w,h])
                (xA, yA, xB, yB) = rect.astype(int)

                label = f"{CLASSES[idx]}: {conf*100:.2f}%"
                cv.rectangle(frame, (xA, yA), (xB,yB), COLORS[idx], 2)
                y = yA-15 if yA-15>15 else yA+15
                cv.putText(frame, label, (xA,y), cv.FONT_HERSHEY_COMPLEX, 0.5,
                          COLORS[idx])

        cv.imshow("Video", frame)
        if cv.waitKey(1) & 0xFF == ord("q"):
            break

    cap.release()
    cv.destroyAllWindows()
    print(f"Approximate FPS: {fps.fps():.2f}")

Loading model...


In [4]:
play(size=600)

Approximate FPS: 11.67


In [2]:
labelsPath = "yolo-object-detection/yolo-coco/coco.names"
LABELS = open(labelsPath).read().strip().split("\n")

np.random.seed(42)
COLORS = np.random.randint(0,255,size=(len(LABELS), 3), dtype="uint8")

In [3]:
w_path = "yolo-object-detection/yolo-coco/yolov3.weights"
config = w_path[:-14] + "yolov3.cfg"

In [4]:
net = cv.dnn.readNetFromDarknet(config, w_path)

In [14]:
cap = cv.VideoCapture("/home/walt/Videos/Farm.mp4")    
while True:
    ret, img = cap.read()
    if not ret:
        print("Failed to open")
    # img = cv.imread("pedestrians.jpg")
    #img = imutils.resize(img, width=(500))
    (h,w) = img.shape[:2] 
    ln = net.getLayerNames()
    ln = [ln[i[0]-1] for i in net.getUnconnectedOutLayers()]

    blob = cv.dnn.blobFromImage(img, 1/255., (416,416), swapRB=True, crop=False)

    net.setInput(blob)
    layerOutputs = net.forward(ln)
    boxes, confidences, classIDs = [], [], []

    for output in layerOutputs:
        for detection in output:
            scores = detection[5:]
            classID = np.argmax(scores)
            confidence = scores[classID]

            if confidence > 0.5:
                box = detection[:4] * np.array([w,h,w,h])
                (xc,yc,width,height) = box.astype("int")
                x = int(xc - (width/2))
                y = int(yc - (height/2))
                boxes.append([x,y,int(width), int(height)])
                confidences.append(float(confidence))
                classIDs.append(classID)

    idxs = cv.dnn.NMSBoxes(boxes, confidences, 0.5, 0.4)
    if len(idxs) > 0:
        # loop over the indexes we are keeping
        for i in idxs.flatten():
            # extract the bounding box coordinates
            (x, y) = (boxes[i][0], boxes[i][1])
            (w, h) = (boxes[i][2], boxes[i][3])
            # draw a bounding box rectangle and label on the image
            color = [int(c) for c in COLORS[classIDs[i]]]
            cv.rectangle(img, (x, y), (x + w, y + h), color, 2)
            text = "{}: {:.4f}".format(LABELS[classIDs[i]], confidences[i])
            cv.putText(img, text, (x, y - 5), cv.FONT_HERSHEY_SIMPLEX,
                0.5, color, 2)
    # show the output image
    cv.imshow("Vid", img)
    if cv.waitKey(1) & 0xFF == ord("q"):
        break

cap.release()
cv.destroyAllWindows()