In [None]:
import detectron2
from detectron2.utils.logger import setup_logger
setup_logger()

# import some common libraries
import numpy as np
import cv2
import torch
# from google.colab.patches import cv2_imshow
# import some common detectron2 utilities
from detectron2 import model_zoo
from detectron2.engine import DefaultPredictor
from detectron2.config import get_cfg
from detectron2.utils.visualizer import Visualizer, ColorMode
from detectron2.data import MetadataCatalog
coco_metadata = MetadataCatalog.get("coco_2017_val")
import matplotlib.pyplot as plt

# import PointRend project
from detectron2.projects import point_rend

In [None]:
# !wget http://images.cocodataset.org/val2017/000000005477.jpg -q -O input.jpg
im = cv2.imread("/p/openvocabdustr/probing_midlevel_vision/data/rgb/taskonomy/marstons/point_0_view_0_domain_rgb.png")

In [None]:
im.shape

In [None]:
# Convert the image from BGR (OpenCV's default) to RGB for proper display with matplotlib
im_rgb = cv2.cvtColor(im, cv2.COLOR_BGR2RGB)

# Display the image using matplotlib
plt.imshow(im_rgb)
plt.axis('off')  # Hide the axis
plt.show()

In [None]:
cfg = get_cfg()
# Add PointRend-specific config
point_rend.add_pointrend_config(cfg)
# Load a config from file
cfg.merge_from_file("/p/openvocabdustr/probing_midlevel_vision/code/detectron2_repo/projects/PointRend/configs/InstanceSegmentation/pointrend_rcnn_X_101_32x8d_FPN_3x_coco.yaml")
cfg.MODEL.ROI_HEADS.SCORE_THRESH_TEST = 0.5  # set threshold for this model
# Use a model from PointRend model zoo: https://github.com/facebookresearch/detectron2/tree/master/projects/PointRend#pretrained-models
cfg.MODEL.WEIGHTS = "https://dl.fbaipublicfiles.com/detectron2/PointRend/InstanceSegmentation/pointrend_rcnn_X_101_32x8d_FPN_3x_coco/28119989/model_final_ba17b9.pkl"
predictor = DefaultPredictor(cfg)

In [None]:
outputs = predictor(im)

In [None]:
from detectron2.utils.visualizer import ColorMode

# Modify the Visualizer to only draw masks and not bounding boxes
v = Visualizer(im[:, :, ::-1], coco_metadata, scale=1.2, instance_mode=ColorMode.IMAGE_BW)
point_rend_result = v.overlay_instances(masks=outputs["instances"].pred_masks.to("cpu")).get_image()

# Display the result with only masks
plt.imshow(point_rend_result[:, :, ::-1])  # Convert from BGR to RGB for correct display
plt.axis('off')  # Hide axis for cleaner visualization
plt.show()

In [None]:
import matplotlib.pyplot as plt
import torch

# Assuming outputs["instances"] contains the prediction results
instances = outputs["instances"].to("cpu")
pred_masks = instances.pred_masks  # Binary masks for each object
pred_classes = instances.pred_classes  # Class indices for each object

# Assuming you have the COCO class names
coco_classes = coco_metadata.get("thing_classes", None)  # List of object names

# Iterate over each mask and class
for i in range(len(pred_masks)):
    mask = pred_masks[i].numpy()  # Convert the mask to a numpy array (binary mask)
    class_idx = pred_classes[i].item()  # Get the class index
    class_name = coco_classes[class_idx]  # Get the class name from the COCO metadata
    
    # Display the binary mask and class name
    print(f"Object {i + 1}: {class_name}")
    
    plt.imshow(mask, cmap="gray")  # Display the binary mask in grayscale
    plt.title(f"Object {i + 1}: {class_name}")
    plt.axis('off')  # Hide axis for cleaner display
    plt.show()