In [2]:
from torchvision.io.image import decode_image
from torchvision.models.detection import fasterrcnn_resnet50_fpn_v2, FasterRCNN_ResNet50_FPN_V2_Weights
from torchvision.utils import draw_bounding_boxes
from torchvision.transforms.functional import to_pil_image

import os

In [3]:
# Step 0: Load and decode input image and label image
img_dir = r"C:\Users\andre\Desktop\mlcvprac\objdetectionpipeline\data\images"
label_dir = r"C:\Users\andre\Desktop\mlcvprac\objdetectionpipeline\data\labels"

for img_name in os.listdir(path=img_dir):
    img_path = os.path.join(img_dir, img_name)
    img_tensor = decode_image(img_path)
    print(f"image path: {img_path}")
    print(f"image tensor shape {img_tensor.shape}")
    break

image path: C:\Users\andre\Desktop\mlcvprac\objdetectionpipeline\data\images\P0000.png
image tensor shape torch.Size([3, 5502, 3875])


In [29]:
print(img_tensor.shape)

# Step 1: Initialize model with the best weights
weights = FasterRCNN_ResNet50_FPN_V2_Weights.DEFAULT
model = fasterrcnn_resnet50_fpn_v2(weights=weights)
model.eval()

# Step 2: Initialize and Apply image preprocessing transforms to match the pretrained image format
# weights.transforms() automatically does this
preprocess = weights.transforms()
batch_img_tensor = [preprocess(img_tensor)] #unsqueeze to add batch dimension
#torchvision detection models expect [[3, 5502, 3875]] list of tensors

# Step 3: Inference
prediction = model(batch_img_tensor)[0]
# convert prediction class indices to actual class words
labels = [weights.meta["categories"][i] for i in prediction["labels"]]
img_tensor_with_bbs = draw_bounding_boxes(image=img_tensor, 
                                     boxes=prediction["boxes"], 
                                     labels=labels, 
                                     colors="red",
                                     width=4)
image = to_pil_image(img_tensor_with_bbs.detach())
image.show()


torch.Size([3, 5502, 3875])
