In [None]:
#The following was run  in  colab with 'T4 GPU', located under notebook settings.
# Following was the result of the prompt to ChatGPT:

# Give me a simple working example of following:
# Develop a pipeline using transformers or deep learning models that processes an input image 
# to segment, identify, and analyze objects within the image, and outputs a summary table with mapped 
# data for each object.

!pip install torch torchvision torchaudio easyocr
!pip install 'git+https://github.com/facebookresearch/detectron2.git'


In [None]:
import torch
import torchvision
from detectron2.engine import DefaultPredictor
from detectron2.config import get_cfg
from detectron2 import model_zoo
import cv2
import easyocr
import pandas as pd


In [None]:
# Load image
image_path = "/content/sample_data/test_image.jpg"
image = cv2.imread(image_path)

# Set up Detectron2 model (Mask R-CNN for segmentation)
cfg = get_cfg()
# COCO (Common Objects in Context) dataset is a large collection of images and annotations used for training 
# and evaluating computer vision models
cfg.merge_from_file(model_zoo.get_config_file("COCO-InstanceSegmentation/mask_rcnn_R_50_FPN_3x.yaml"))
cfg.MODEL.ROI_HEADS.SCORE_THRESH_TEST = 0.5  # Set threshold for this model
cfg.MODEL.WEIGHTS = model_zoo.get_checkpoint_url("COCO-InstanceSegmentation/mask_rcnn_R_50_FPN_3x.yaml")
predictor = DefaultPredictor(cfg)

# Get predictions
outputs = predictor(image)

In [None]:
# Get object information
instances = outputs["instances"]
boxes = instances.pred_boxes.tensor.cpu().numpy()  # Bounding boxes
masks = instances.pred_masks.cpu().numpy()  # Segmentation masks
classes = instances.pred_classes.cpu().numpy()  # Class labels
scores = instances.scores.cpu().numpy()  # Confidence scores

# Load COCO class names
COCO_CLASSES = [
    "__background__", "person", "bicycle", "car", "motorcycle", "airplane", "bus", "train", "truck", "boat", "traffic light",
    "fire hydrant", "stop sign", "parking meter", "bench", "bird", "cat", "dog", "horse", "sheep", "cow", "elephant", "bear",
    "zebra", "giraffe", "backpack", "umbrella", "handbag", "tie", "suitcase", "frisbee", "skis", "snowboard", "sports ball",
    "kite", "baseball bat", "baseball glove", "skateboard", "surfboard", "tennis racket", "bottle", "wine glass", "cup",
    "fork", "knife", "spoon", "bowl", "banana", "apple", "sandwich", "orange", "broccoli", "carrot", "hot dog", "pizza",
    "donut", "cake", "chair", "couch", "potted plant", "bed", "dining table", "toilet", "TV", "laptop", "mouse", "remote",
    "keyboard", "cell phone", "microwave", "oven", "toaster", "sink", "refrigerator", "book", "clock", "vase", "scissors",
    "teddy bear", "hair drier", "toothbrush"
]


In [None]:
# Initialize EasyOCR Reader
reader = easyocr.Reader(['en'])

# Loop through each detected object
object_data = []
for i, box in enumerate(boxes):
    # Crop the detected object from the image
    x1, y1, x2, y2 = map(int, box)
    cropped_object = image[y1:y2, x1:x2]

    # Optionally, extract text from the object
    text = reader.readtext(cropped_object, detail=0)

    # Store object data
    object_data.append({
        "Object": COCO_CLASSES[classes[i]],
        "Bounding Box": box,
        "Confidence Score": scores[i],
        "Extracted Text": " ".join(text) if text else "N/A"
    })


In [None]:
# Convert to pandas DataFrame for easy viewing
df = pd.DataFrame(object_data)

# Show the table
print(df)


In [None]:
!pip install opencv-python

In [None]:
import cv2
import matplotlib.pyplot as plt

# Define function to draw bounding boxes and labels
def draw_bounding_boxes(image, boxes, classes, scores):
    for i, box in enumerate(boxes):
        x1, y1, x2, y2 = map(int, box)
        label = f"{COCO_CLASSES[classes[i]]}: {scores[i]:.2f}"

        # Draw rectangle (bounding box)
        cv2.rectangle(image, (x1, y1), (x2, y2), (0, 255, 0), 2)

        # Put label text
        font_scale = 0.6
        font = cv2.FONT_HERSHEY_SIMPLEX
        label_size, _ = cv2.getTextSize(label, font, font_scale, 1)
        label_x1 = x1
        label_y1 = y1 - 10 if y1 - 10 > 10 else y1 + 10
        cv2.rectangle(image, (label_x1, label_y1 - label_size[1]), (label_x1 + label_size[0], label_y1), (0, 255, 0), -1)
        cv2.putText(image, label, (label_x1, label_y1), font, font_scale, (0, 0, 0), 2)

# Draw bounding boxes and labels on the image
draw_bounding_boxes(image, boxes, classes, scores)

# Convert image from BGR (OpenCV) to RGB (Matplotlib)
image_rgb = cv2.cvtColor(image, cv2.COLOR_BGR2RGB)

# Display the image with bounding boxes
plt.figure(figsize=(10, 10))
plt.imshow(image_rgb)
plt.axis("off")
plt.show()
