In [2]:
import cv2
import numpy as np
import urllib.request
import tensorflow as tf
import tarfile


In [3]:

# Download the object detection model
MODEL_URL = 'http://download.tensorflow.org/models/object_detection/ssd_mobilenet_v2_coco_2018_03_29.tar.gz'
MODEL_PATH = 'ssd_mobilenet_v2_coco_2018_03_29/saved_model'

urllib.request.urlretrieve(MODEL_URL, 'model.tar.gz')
tarfile = tarfile.open('model.tar.gz', 'r:gz')
tarfile.extractall()


In [4]:

# Load the model into memory
model = tf.saved_model.load(MODEL_PATH)

INFO:tensorflow:Saver not created because there are no variables in the graph to restore


In [5]:

# Get the object detection function from the model
detect_fn = model.signatures['serving_default']

# Load the label map
label_map_url = 'https://raw.githubusercontent.com/tensorflow/models/master/research/object_detection/data/mscoco_label_map.pbtxt'
label_map_path = 'mscoco_label_map.pbtxt'


In [6]:

urllib.request.urlretrieve(label_map_url, label_map_path)


('mscoco_label_map.pbtxt', <http.client.HTTPMessage at 0x27a49ffaf50>)

In [7]:
# Load the label map into memory
category_index = {}
with open(label_map_path, 'r') as f:
    lines = f.readlines()
    for i in range(0, len(lines), 5):
        id = int(lines[i+2].split(':')[1])
        name = lines[i+3].split(':')[1].strip().strip('"')
        category_index[id] = name

In [8]:
# Set the threshold for object detection
THRESHOLD = 0.25

# Define the color map for different object classes
COLORS = [(255, 0, 0), (0, 255, 0), (0, 0, 255), (0,255,255), (255,0,255), (128,128,128), (128,0,0)]  # Blue, Green, Red, cyan, magenta, gray, maroon

# Open the webcam
cap = cv2.VideoCapture(0)

while True:
    # Read a frame from the webcam
    ret, frame = cap.read()

    # Convert the frame to RGB
    frame = cv2.cvtColor(frame, cv2.COLOR_BGR2RGB)

    # Run object detection on the frame
    input_tensor = tf.convert_to_tensor(frame)
    input_tensor = input_tensor[tf.newaxis, ...]
    detections = detect_fn(input_tensor)

    # Extract the detected objects and their scores
    boxes = detections['detection_boxes'][0].numpy()
    scores = detections['detection_scores'][0].numpy()
    classes = detections['detection_classes'][0].numpy().astype(np.int32)

    # Filter out objects with low scores
    high_score_boxes = boxes[scores > THRESHOLD]
    high_score_classes = classes[scores > THRESHOLD]
    high_score_scores = scores[scores > THRESHOLD]

    # Draw a box around each detected object and label it
    for box, cls, score in zip(high_score_boxes, high_score_classes, high_score_scores):
        ymin, xmin, ymax, xmax = box
        x, y, w, h = int(xmin*frame.shape[1]), int(ymin*frame.shape[0]), int((xmax-xmin)*frame.shape[1]), int((ymax-ymin)*frame.shape[0])
        color = COLORS[cls % len(COLORS)]
        cv2.rectangle(frame, (x, y), (x+w, y+h), color, 2)
        text = f"{category_index[cls]}: {score:.2f}"
        label_size, baseline = cv2.getTextSize(text, cv2.FONT_HERSHEY_DUPLEX, 0.6, 1)
        cv2.rectangle(frame, (x, y - label_size[1] - 10), (x + label_size[0], y), color, -1)
        cv2.putText(frame, text, (x, y - 10), cv2.FONT_HERSHEY_DUPLEX, 0.6, (0, 0, 0), 1)


    # Display the resulting frame
    cv2.imshow('Object Detection', cv2.cvtColor(frame, cv2.COLOR_RGB2BGR))

    # Exit the loop if the 'q' key is pressed
    if cv2.waitKey(1) & 0xFF == ord('q'):
        break

# Release the webcam and close the window
cap.release()
cv2.destroyAllWindows()