# Generic pipeline for segmentation tests

We will use some pretrained models to test their semantic segmentation performance.

In [47]:
# --------------------------------------
from transformers import SegformerFeatureExtractor
from transformers import SegformerForSemanticSegmentation

# --------------------------------------
import torchvision as tvi

# --------------------------------------
import torch as pt
import torch.nn.functional as F

# --------------------------------------
from PIL import Image

# --------------------------------------
import requests

# --------------------------------------
import numpy as np

# --------------------------------------
import matplotlib.pyplot as plt

# --------------------------------------
from rsms import conf

Download some pretrained models.

In [None]:
# SegFormer fine-tuned on ADE20K
# ==================================================
feature_extractor = SegformerFeatureExtractor.from_pretrained("nvidia/segformer-b2-finetuned-ade-512-512")
model = SegformerForSemanticSegmentation.from_pretrained("nvidia/segformer-b2-finetuned-ade-512-512")

# SegFormer fine-tuned on Cityscapes
# ==================================================
feature_extractor = SegformerFeatureExtractor.from_pretrained(
    "nvidia/segformer-b2-finetuned-cityscapes-1024-1024"
)
model = SegformerForSemanticSegmentation.from_pretrained(
    "nvidia/segformer-b2-finetuned-cityscapes-1024-1024"
)

# Switch to inference mode
model.eval()

In [49]:
# Cats
# ==================================================
# image = np.array(Image.open(requests.get("http://images.cocodataset.org/val2017/000000039769.jpg", stream=True).raw))

# Building with bricks
# ==================================================
# image = np.array(Image.open(conf.DATA_DIR / "concrete.jpeg"))

# Building with bricks
# ==================================================
image = np.array(Image.open(conf.DATA_DIR / "bricks.jpeg"))

Extract the input features and the class probabilities

In [50]:
inputs = feature_extractor(images=image, return_tensors="pt")
outputs = model(**inputs)
logits = F.interpolate(
    outputs.logits, image.shape[:2], mode="bilinear", align_corners=True
)

Extract the most probable class for each pixel

In [None]:
classes = pt.argmax(logits[0], axis=0)
classes.shape

Add an alpha channel to the image so that we can overlay the segmentation results and still be able to see the original image.

In [None]:
image_opacity = 192
opacity = np.full_like(image[:, :, 0], image_opacity)[:, :, None]
image_with_opacity = np.concatenate((image, opacity), axis=-1)
plt.imshow(image_with_opacity)

Assign a random colour to each class

In [53]:
colours = {}
for cls in classes.unique():
    colours[cls] = np.random.randint(0,255, (4,), dtype=np.uint8)
    colours[cls][3] = 255 - image_opacity
    idx = classes==cls
    image_with_opacity[idx] = (image_with_opacity[idx] + colours[cls])

Show the segmentation results

In [None]:
plt.imshow(image_with_opacity)