In [8]:
# !pip install opencv-python
# !pip install tensorflow
!pip install --upgrade tensorflow-hub

Collecting tensorflow-hub
  Downloading tensorflow_hub-0.13.0-py2.py3-none-any.whl (100 kB)
     -------------------------------------- 100.6/100.6 kB 1.9 MB/s eta 0:00:00
Installing collected packages: tensorflow-hub
Successfully installed tensorflow-hub-0.13.0



[notice] A new release of pip available: 22.3.1 -> 23.1.2
[notice] To update, run: python.exe -m pip install --upgrade pip


In [1]:
import cv2
import tensorflow as tf
import numpy as np

In [2]:
def preprocess(image):
    # Convert the image to RGB
    image_rgb = cv2.cvtColor(image, cv2.COLOR_BGR2RGB)
    
    # Convert the image to a tensor
    input_tensor = tf.convert_to_tensor(image_rgb)
    
    # Resize the image to the expected size
    resized_tensor = tf.image.resize(input_tensor, [640, 640])
    
    # Add an extra dimension for the batch size
    input_tensor = tf.expand_dims(resized_tensor, 0)
    
    # Convert the tensor to tf.uint8
    input_tensor = tf.cast(input_tensor, tf.uint8)
    
    return input_tensor


In [3]:
def postprocess(outputs):
    # Get the number of detections
    num_detections = len(outputs['detection_scores'])

    # Get the detection classes and convert to numpy array
    detection_classes = outputs['detection_classes'].numpy()

    # Get the detection boxes and convert to numpy array
    detection_boxes = outputs['detection_boxes'].numpy()

    # Get the detection scores and convert to numpy array
    detection_scores = outputs['detection_scores'].numpy()

    # Filter out detections with low scores
    min_score = 0.5
    indices = np.where(detection_scores >= min_score)

    # Return filtered detections
    final_boxes = detection_boxes[indices]
    final_classes = detection_classes[indices]
    final_scores = detection_scores[indices]

    return final_boxes, final_classes, final_scores


In [4]:
class_names = [
    "person", "bicycle", "car", "motorcycle", "airplane", "bus", 
    "train", "truck", "boat", "traffic light", "fire hydrant", 
    "street sign", "stop sign", "parking meter", "bench", "bird", 
    "cat", "dog", "horse", "sheep", "cow", "elephant", "bear", 
    "zebra", "giraffe", "hat", "backpack", "umbrella", "shoe", 
    "eye glasses", "handbag", "tie", "suitcase", "frisbee", 
    "skis", "snowboard", "sports ball", "kite", "baseball bat", 
    "baseball glove", "skateboard", "surfboard", "tennis racket", 
    "bottle", "plate", "wine glass", "cup", "fork", "knife", 
    "spoon", "bowl", "banana", "apple", "sandwich", "orange", 
    "broccoli", "carrot", "hot dog", "pizza", "donut", "cake", 
    "chair", "couch", "potted plant", "bed", "mirror", "dining table", 
    "window", "desk", "toilet", "door", "tv", "laptop", "mouse", 
    "remote", "keyboard", "cell phone", "microwave", "oven", 
    "toaster", "sink", "refrigerator", "blender", "book", "clock", 
    "vase", "scissors", "teddy bear", "hair drier", "toothbrush", 
    "hair brush"
]

In [5]:
def get_top_person(boxes, classes, scores):
    # Define the class index for "person"
    person_index = class_names.index('person') + 1
    print(person_index)
    # Initialize the highest score and corresponding box and class
    highest_score = -1
    highest_box = None
    highest_class = None

    # Iterate over all the classes
    for i in range(len(classes)):
        # Check if the class is "person" and if its score is higher than the highest score found so far
        if classes[i] == person_index and scores[i] > highest_score:
            highest_score = scores[i]
            highest_box = boxes[i]
            highest_class = classes[i]

    # Return the highest scoring "person" box and class
    return highest_box, highest_class, highest_score

In [10]:
def visualize_detections(image, boxes, classes, scores):

    # image = cv2.cvtColor(image, cv2.COLOR_BGR2RGB)
    image = np.array(image, dtype=np.uint8)
    
    for i in range(len(boxes)):
        box = boxes[i]
        class_id = classes[i]
        score = scores[i]

        # Get the bounding box coordinates
        ymin, xmin, ymax, xmax = box

        # Convert coordinates to integer
        ymin = int(ymin * image.shape[0])
        xmin = int(xmin * image.shape[1])
        ymax = int(ymax * image.shape[0])
        xmax = int(xmax * image.shape[1])

        # Draw the bounding box on the image
        cv2.rectangle(image, (xmin, ymin), (xmax, ymax), (0, 255, 0), 2)

        # Prepare the label
        label = f"{class_names[int(class_id)-1]}: {score:.2f}"

        # Put the label on the image
        cv2.putText(image, label, (xmin, ymin-10), cv2.FONT_HERSHEY_SIMPLEX, 0.9, (0, 255, 0), 2)

    return image

# Use the function


In [19]:
import tensorflow as tf
import tensorflow_hub as hub

# Specify the model's URL from TensorFlow Hub
module_handle = "https://tfhub.dev/tensorflow/ssd_mobilenet_v2/2"
# Load the model
detector = hub.load(module_handle)

# Save the model locally
tf.saved_model.save(detector, "./model/")



INFO:tensorflow:Assets written to: ./model/assets


INFO:tensorflow:Assets written to: ./model/assets


In [7]:
model = tf.saved_model.load('./model/')

In [11]:
# Open the webcam
cap = cv2.VideoCapture(0)

while True:
    # Read a frame from the webcam
    ret, frame = cap.read()

    if not ret:
        break

    # Preprocess the frame
    input_frame = preprocess(frame)

    # Run the model
    outputs = model(input_frame)

    # Postprocess the outputs
    boxes, classes, scores = postprocess(outputs)
    print(classes)

    if boxes is not None:
        frame = visualize_detections(frame, boxes, classes, scores)

    # Display the frame
    cv2.imshow('Webcam Stream', frame)

    # Break the loop if 'q' is pressed
    if cv2.waitKey(1) & 0xFF == ord('q'):
        break

# Release the webcam and close the window
cap.release()
cv2.destroyAllWindows()

[1.]
[1.]
[1.]
[1.]
[1.]
[1.]
[1.]
[1.]
[1. 1.]
[1.]
[1.]
[1.]
[1.]
[1.]
[1.]
[1.]
[1.]
[1.]
[1.]
[1.]
[1.]
[1.]
[1.]
[1.]
[ 1. 77.]
[ 1. 77.]
[ 1. 77.]
[1.]
[1.]
[1.]
[1.]
[1.]
[1.]
[1.]
[1.]
[1.]
[1.]
[1.]
[1.]
[1. 1.]
[1. 1.]
[1. 1.]
[1. 1.]
[1. 1. 1.]
[1. 1. 1.]
[1. 1. 1.]
[1. 1. 1.]
[1. 1. 1.]
[1. 1. 1.]
[1. 1. 1.]
[1. 1. 1.]
[1. 1. 1.]
[1. 1. 1.]
[1. 1. 1.]
[1. 1. 1.]
[1. 1. 1.]
[1. 1. 1.]
[1. 1. 1.]
[1. 1. 1.]
[1. 1. 1.]
[1. 1. 1.]
[1. 1. 1.]
[1. 1. 1.]
[1. 1. 1.]
[1. 1. 1.]
[1. 1. 1.]
[1. 1. 1.]
[1. 1. 1.]
[1. 1. 1.]
[1. 1. 1.]
[1. 1. 1.]
[1. 1. 1.]
[1. 1. 1.]
[1. 1. 1.]
[1. 1. 1.]
[1. 1. 1.]
[1. 1. 1.]
[1. 1. 1.]
[1. 1. 1.]
[1. 1. 1.]
[1. 1. 1.]
[1. 1. 1.]
[1. 1. 1.]
[1. 1. 1.]
[1. 1. 1.]
[1. 1. 1.]
[1. 1. 1.]
[1. 1. 1.]
[1. 1. 1.]
[1. 1. 1.]
[1. 1. 1.]
[1. 1. 1.]
[1. 1. 1.]
[1. 1. 1.]
[1. 1. 1.]
[1. 1. 1.]
[1. 1. 1.]
[1. 1. 1.]
[1. 1. 1.]
[1. 1. 1.]
[1. 1. 1.]
[1. 1. 1.]
[1. 1. 1.]
[1. 1. 1.]
[1. 1. 1.]
[1. 1. 1.]
[1. 1. 1.]
[1. 1. 1.]
[1. 1. 1.]
[1. 1. 1.]
[1. 1. 1.]
[1. 1. 