In [1]:
#1 run each of the models and have them output images generated into a separate folder --- unique to the model name

# Faster R-CNN
# FCOS
# RetinaNet
# SSD
# SSDlite


In [2]:
import os
from torchvision.io.image import read_image
from torchvision.models.detection import (
    # Faster R-CNN
    fasterrcnn_resnet50_fpn_v2,
    FasterRCNN_ResNet50_FPN_V2_Weights,
    # FCOS
    fcos_resnet50_fpn,
    FCOS_ResNet50_FPN_Weights,
    # RetinaNet
    retinanet_resnet50_fpn_v2,
    RetinaNet_ResNet50_FPN_V2_Weights,
    # SSD
    ssd300_vgg16,
    SSD300_VGG16_Weights,
    # SSDlite
    ssdlite320_mobilenet_v3_large,
    SSDLite320_MobileNet_V3_Large_Weights
)
from torchvision.utils import draw_bounding_boxes
from torchvision.transforms.functional import to_pil_image
from PIL import ImageFont
from IPython.display import display

In [3]:
def object_detection(
    model, weights, image_path, output_directory="outputs", threshold=0.9
):
    img = read_image(image_path)
    font_path = os.path.abspath("fonts/OpenSans-Regular.ttf")
    # Step 1: Initialize model with the best available weights
    weights = weights.DEFAULT
    model_name = model.__name__
    model = model(weights=weights, box_score_thresh=threshold)
    model.eval()

    # Step 2: Initialize the inference transforms
    preprocess = weights.transforms()

    # Step 3: Apply inference preprocessing transforms
    batch = [preprocess(img)]

    # Step 4: Use the model and visualize the prediction
    prediction = model(batch)[0]
    labels = [weights.meta["categories"][i] for i in prediction["labels"]]
    scores = prediction["scores"]
    labels_with_scores = [
        f"{label} {score:.2f}" for label, score in zip(labels, scores)
    ]

    box = draw_bounding_boxes(
        img,
        boxes=prediction["boxes"],
        labels=labels_with_scores,
        colors="red",
        width=4,
        font=font_path,
        font_size=20,
    )
    im = to_pil_image(box.detach())

    # creating the general output dirctory
    if not os.path.exists(output_directory):
        os.makedirs(output_directory)

    # creating the model's output directory:
    model_output_directory = os.path.join(output_directory, model_name)
    if not os.path.exists(model_output_directory):
        os.makedirs(model_output_directory)
    # extracting the image base name:
    base_name = os.path.basename(image_path)

    # creating the full image output file name
    output_path = os.path.join(model_output_directory, base_name)
    print(f"Model Output saved to {output_path}")
    im.save(output_path, "JPEG")

In [6]:
for image in sorted((os.listdir("images")[:2])):
    path = "images/" + image

    object_detection(
        fasterrcnn_resnet50_fpn_v2, FasterRCNN_ResNet50_FPN_V2_Weights, path
    # )
    # object_detection(fcos_resnet50_fpn, FCOS_ResNet50_FPN_Weights, path)

    # object_detection(retinanet_resnet50_fpn_v2, RetinaNet_ResNet50_FPN_V2_Weights, path)

    # object_detection(ssd300_vgg16, SSD300_VGG16_Weights, path)

    # object_detection(
        # ssdlite320_mobilenet_v3_large, SSDLite320_MobileNet_V3_Large_Weights, path
    )

Model Output saved to outputs/fasterrcnn_resnet50_fpn_v2/image_15.jpg




Model Output saved to outputs/fasterrcnn_resnet50_fpn_v2/image_29.jpg
