# Object Detection with YOLO

This notebook provides a ready-to-run setup of the You Only Look Once (YOLO) v3 network for object detection. The [YOLO family of models](https://www.cv-foundation.org/openaccess/content_cvpr_2016/html/Redmon_You_Only_Look_CVPR_2016_paper.html) were created by [Joseph Chet Redmon](https://pjreddie.com/).  Training these models requires large data sets like [ImageNet]() and [Microsoft COCO]() and significant compute resources, making it infeasible for most users to train their own models.  Thankfully, the researchers have released the weights of their trained models and the community has developed code that allows these models to be run with frameworks like Pytorch and Tensorflow.  This notebook uses [code](https://github.com/experiencor/keras-yolo3) released under the MIT license by [Huynh Ngoc Anh](https://github.com/experiencor) to run the pre-trained YOLO model in Keras.

To use this notebook, you will need to download the YOLOv3 model weights from [https://pjreddie.com/media/files/yolov3.weights](https://pjreddie.com/media/files/yolov3.weights).  Place the `yolov3.weights` file in the same directory as this notebook.

You will need to change the `input_image_path` and `output_image_path` parameters.

In [1]:
import cv2
import numpy as np
import os
from pathlib import Path
import urllib.request

In [2]:
# Parameters

weights_path = "yolov3.weights"
config_path = "yolov3.cfg"
output_dir = "outputs"
Path(output_dir).mkdir(exist_ok=True)

# Supports .png, .jpg, and .jpeg files
included_images = [
    "IMG_1703.jpeg",
    "IMG_1704.jpeg",
    "IMG_4134.jpeg",
    "IMG_5415.jpg",
    "IMG_7207.jpg",
]

# Optional: download a couple of web images to use as "own" images
web_images = {
    "web_dog.jpg": "https://raw.githubusercontent.com/pjreddie/darknet/master/data/dog.jpg",
    "web_giraffe.jpg": "https://raw.githubusercontent.com/pjreddie/darknet/master/data/giraffe.jpg",
}

for filename, url in web_images.items():
    if not Path(filename).exists():
        urllib.request.urlretrieve(url, filename)

own_images = list(web_images.keys())

net_h, net_w = 416, 416
obj_thresh, nms_thresh = 0.5, 0.45

labels = ["person", "bicycle", "car", "motorbike", "aeroplane", "bus", "train", "truck", \
          "boat", "traffic light", "fire hydrant", "stop sign", "parking meter", "bench", \
          "bird", "cat", "dog", "horse", "sheep", "cow", "elephant", "bear", "zebra", "giraffe", \
          "backpack", "umbrella", "handbag", "tie", "suitcase", "frisbee", "skis", "snowboard", \
          "sports ball", "kite", "baseball bat", "baseball glove", "skateboard", "surfboard", \
          "tennis racket", "bottle", "wine glass", "cup", "fork", "knife", "spoon", "bowl", "banana", \
          "apple", "sandwich", "orange", "broccoli", "carrot", "hot dog", "pizza", "donut", "cake", \
          "chair", "sofa", "pottedplant", "bed", "diningtable", "toilet", "tvmonitor", "laptop", "mouse", \
          "remote", "keyboard", "cell phone", "microwave", "oven", "toaster", "sink", "refrigerator", \
          "book", "clock", "vase", "scissors", "teddy bear", "hair drier", "toothbrush"]



In [3]:
# Load YOLOv3 model via OpenCV DNN
if not Path(weights_path).exists():
    raise FileNotFoundError(f"Missing weights file: {weights_path}")
if not Path(config_path).exists():
    raise FileNotFoundError(f"Missing config file: {config_path}")

net = cv2.dnn.readNetFromDarknet(config_path, weights_path)
output_layer_names = net.getUnconnectedOutLayersNames()


def detect_and_draw(image_path, output_path):
    image = cv2.imread(image_path)
    if image is None:
        raise FileNotFoundError(f"Could not read image: {image_path}")

    height, width = image.shape[:2]
    blob = cv2.dnn.blobFromImage(
        image, 1 / 255.0, (net_w, net_h), swapRB=True, crop=False
    )

    net.setInput(blob)
    outputs = net.forward(output_layer_names)

    boxes = []
    confidences = []
    class_ids = []

    for output in outputs:
        for detection in output:
            scores = detection[5:]
            class_id = int(np.argmax(scores))
            confidence = float(scores[class_id])
            if confidence > obj_thresh:
                center_x = int(detection[0] * width)
                center_y = int(detection[1] * height)
                w = int(detection[2] * width)
                h = int(detection[3] * height)
                x = int(center_x - w / 2)
                y = int(center_y - h / 2)
                boxes.append([x, y, w, h])
                confidences.append(confidence)
                class_ids.append(class_id)

    indices = cv2.dnn.NMSBoxes(boxes, confidences, obj_thresh, nms_thresh)

    if len(indices) > 0:
        for i in indices.flatten():
            x, y, w, h = boxes[i]
            label = labels[class_ids[i]] if class_ids[i] < len(labels) else str(class_ids[i])
            score = confidences[i]
            cv2.rectangle(image, (x, y), (x + w, y + h), (0, 255, 0), 2)
            cv2.putText(
                image,
                f"{label} {score:.2f}",
                (x, max(y - 10, 0)),
                cv2.FONT_HERSHEY_SIMPLEX,
                0.5,
                (0, 255, 0),
                2,
            )

    cv2.imwrite(output_path, image)
    return {
        "image_path": image_path,
        "output_path": output_path,
        "detections": [
            {
                "label": labels[class_ids[i]] if class_ids[i] < len(labels) else str(class_ids[i]),
                "confidence": confidences[i],
                "box": boxes[i],
            }
            for i in (indices.flatten().tolist() if len(indices) > 0 else [])
        ],
    }


In [4]:
results = []

for image_path in included_images + own_images:
    output_path = os.path.join(
        output_dir, f"{Path(image_path).stem}_detected{Path(image_path).suffix}"
    )
    results.append(detect_and_draw(image_path, output_path))

results

[{'image_path': 'IMG_1703.jpeg',
  'output_path': 'outputs\\IMG_1703_detected.jpeg',
  'detections': [{'label': 'person',
    'confidence': 0.9990772008895874,
    'box': [587, 1217, 220, 660]},
   {'label': 'person',
    'confidence': 0.9962482452392578,
    'box': [2517, 1354, 186, 405]},
   {'label': 'person',
    'confidence': 0.9955952167510986,
    'box': [3720, 1324, 241, 791]},
   {'label': 'person',
    'confidence': 0.9912199378013611,
    'box': [9, 1304, 430, 705]},
   {'label': 'person',
    'confidence': 0.9796616435050964,
    'box': [2356, 1325, 179, 412]},
   {'label': 'person',
    'confidence': 0.9664183855056763,
    'box': [1473, 1280, 185, 319]},
   {'label': 'person',
    'confidence': 0.962527871131897,
    'box': [3571, 1334, 183, 674]},
   {'label': 'person',
    'confidence': 0.9616000056266785,
    'box': [2118, 1391, 151, 392]},
   {'label': 'person',
    'confidence': 0.9453965425491333,
    'box': [1724, 1359, 144, 352]},
   {'label': 'chair',
    'confid