In [1]:
# Import required libraries
import torch
import torchvision.transforms as transforms
from transformers import SegformerImageProcessor, SegformerForSemanticSegmentation, AutoImageProcessor
from PIL import Image
import requests
import numpy as np

processor = SegformerImageProcessor.from_pretrained("nvidia/segformer-b0-finetuned-ade-512-512")
model = SegformerForSemanticSegmentation.from_pretrained("nvidia/segformer-b0-finetuned-ade-512-512")

print("Loaded model and processor")



Loaded model and processor


In [2]:
# Set the model to evaluation mode
model.eval()

SegformerForSemanticSegmentation(
  (segformer): SegformerModel(
    (encoder): SegformerEncoder(
      (patch_embeddings): ModuleList(
        (0): SegformerOverlapPatchEmbeddings(
          (proj): Conv2d(3, 32, kernel_size=(7, 7), stride=(4, 4), padding=(3, 3))
          (layer_norm): LayerNorm((32,), eps=1e-05, elementwise_affine=True)
        )
        (1): SegformerOverlapPatchEmbeddings(
          (proj): Conv2d(32, 64, kernel_size=(3, 3), stride=(2, 2), padding=(1, 1))
          (layer_norm): LayerNorm((64,), eps=1e-05, elementwise_affine=True)
        )
        (2): SegformerOverlapPatchEmbeddings(
          (proj): Conv2d(64, 160, kernel_size=(3, 3), stride=(2, 2), padding=(1, 1))
          (layer_norm): LayerNorm((160,), eps=1e-05, elementwise_affine=True)
        )
        (3): SegformerOverlapPatchEmbeddings(
          (proj): Conv2d(160, 256, kernel_size=(3, 3), stride=(2, 2), padding=(1, 1))
          (layer_norm): LayerNorm((256,), eps=1e-05, elementwise_affine=True)
  

In [3]:
# Define preprocessing transforms
preprocess = transforms.Compose([
    transforms.Resize((1024, 1024)),  # Resize image to model's input dimensions
    transforms.ToTensor(),  # Convert PIL image to tensor
])


In [4]:
# Load and preprocess input data
image = Image.open('/home/baraa/Downloads/pic2.jpeg')

#url = "http://images.cocodataset.org/val2017/000000039769.jpg"
#image = Image.open(requests.get(url, stream=True).raw)

input_tensor = preprocess(image).unsqueeze(0)  # Add batch dimension

In [11]:
# Perform inference
with torch.no_grad():
    outputs = model(input_tensor)

# Get segmentation mask
#pitaj kaj radi????????????
%timeit predicted_masks = torch.argmax(outputs.logits, dim=1).squeeze().cpu().numpy()


7.98 ms ± 109 µs per loop (mean ± std. dev. of 7 runs, 100 loops each)


In [10]:
# Perform inference
with torch.no_grad():
    outputs = model(input_tensor)

# Get segmentation mask
predicted_masks = torch.argmax(outputs.logits, dim=1).squeeze().cpu().numpy()

# Define color map for segmentation classes
color_map = {
    0: [0, 0, 0],  # Background (black)
    1: [255, 0, 0],  # Person (red)
    2: [0, 255, 0],
    3: [0, 0, 255],
    4: [255, 255, 255],
    5: [255, 0, 255],
    6: [255, 255, 0],
    7: [0, 255, 255],
}


# Generate colored segmentation output image
#output_image = np.zeros((256, 256, 3), dtype=np.uint8)
#for class_idx, color in color_map.items():
#    output_image[predicted_masks == class_idx] = color

image_resized = image.resize((256, 256))
image_array = np.array(image_resized)

# Save or display the output image
#output_image = Image.fromarray(output_image*0.5 + image_array*0.5)
#output_image.save('output_image.jpg')
#output_image.show()


# Generate colored segmentation output image
output_image = np.zeros((256, 256, 3), dtype=np.uint8)
for class_idx, color in color_map.items():
    output_image[predicted_masks == class_idx] = color

# Blend the segmentation output with the resized input image
output_image = output_image * 0.3 + image_array * 0.7

# Save or display the output image
output_image = Image.fromarray(output_image.astype(np.uint8))
output_image.save('output_image.jpg')
output_image.show()

In [7]:
inputs = processor(images=image, return_tensors="pt")
outputs = model(**inputs)
logits = outputs.logits # shape (batch_size, num_labels, height/4, width/4)

In [8]:
print (outputs)

SemanticSegmenterOutput(loss=None, logits=tensor([[[[ -9.3994,  -9.8428, -10.5961,  ..., -11.3233, -11.1045, -10.8612],
          [ -9.6254, -11.0639, -11.3256,  ..., -11.7620, -11.5964, -11.5009],
          [-10.2251, -11.2533, -11.3996,  ..., -11.8099, -11.6692, -11.6239],
          ...,
          [ -9.9257, -10.9861, -11.8562,  ..., -11.1314, -11.1905, -11.1675],
          [-10.4855, -11.0725, -11.5887,  ..., -10.1375,  -9.8133,  -9.4531],
          [-10.3399, -11.4263, -11.5931,  ..., -10.2541,  -9.8501,  -9.4078]],

         [[ -7.5692,  -7.9152,  -8.7077,  ..., -14.0262, -13.7718, -13.4739],
          [ -7.5292,  -9.5364,  -9.6885,  ..., -14.9601, -14.8346, -14.6939],
          [ -8.2252,  -9.6685,  -9.6745,  ..., -14.9079, -14.8393, -14.8410],
          ...,
          [-12.1810, -13.2342, -13.9694,  ..., -13.1005, -13.1549, -13.0469],
          [-12.9779, -13.5377, -13.8197,  ..., -12.3978, -12.0402, -11.3865],
          [-12.7486, -13.7415, -13.7786,  ..., -12.3733, -12.0282, -