In [1]:
import os
import numpy as np
import torch
import evaluate
from tqdm.notebook import tqdm
from PIL import Image
from torch import nn
from transformers import SegformerImageProcessor
from model import SegFormer1, SegFormer2
from utils import preprocessor
from torch.utils.data import DataLoader


In [6]:
image_processor = SegformerImageProcessor.from_pretrained("nvidia/mit-b0")
_, _, test_dataset = preprocessor.get_datasets(task=2)
test_dataloader = DataLoader(test_dataset, batch_size=12)


Number of training examples: 300
Number of validation examples: 60
Number of test examples: 240


In [3]:
model = SegFormer2.get_model()
checkpoint = torch.load("output/task2_best_model.pth")
model.load_state_dict(checkpoint)
id2label = {int(line.split()[0][:-1]): line.split()[1] for line in open("data/labels.txt")}
# use CUDA if available
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
model.to(device)
model.eval()
test_metric = evaluate.load("mean_iou")

Some weights of the model checkpoint at nvidia/mit-b0 were not used when initializing SegformerForSemanticSegmentation: ['classifier.weight', 'classifier.bias']
- This IS expected if you are initializing SegformerForSemanticSegmentation from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).
- This IS NOT expected if you are initializing SegformerForSemanticSegmentation from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).
Some weights of SegformerForSemanticSegmentation were not initialized from the model checkpoint at nvidia/mit-b0 and are newly initialized: ['decode_head.linear_c.3.proj.weight', 'decode_head.linear_c.2.proj.bias', 'decode_head.linear_c.0.proj.bias', 'decode_head.batch_norm.running_var', 'decode_head.batch_norm.bias', 'decode_head.linear_

In [4]:
for idx, batch in enumerate(tqdm(test_dataloader)):
    with torch.no_grad():
            pixel_values = batch["pixel_values"].to(device)
            labels = batch["labels"].to(device)

            outputs = model(pixel_values=pixel_values)
            logits = outputs.logits

            logits_tensor = nn.functional.interpolate(
                logits,
                size=labels.shape[-2:],
                mode="bilinear",
                align_corners=False,
            ).argmax(dim=1)

            test_metric.add_batch(predictions=logits_tensor.detach().cpu().numpy(), references=labels.detach().cpu().numpy())

test_metrics = test_metric.compute(num_labels=7,ignore_index=255)


  0%|          | 0/20 [00:00<?, ?it/s]

In [5]:
print("Test Mean_iou:", test_metrics["mean_iou"])
print("Test Mean accuracy:", test_metrics["mean_accuracy"])

print("Test iou for each class:", {value: test_metrics["per_category_iou"][key-1] for key, value in id2label.items()})
print("Test accuracy for each class:", {value: test_metrics["per_category_accuracy"][key-1] for key, value in id2label.items()})

Test Mean_iou: 0.5871265122397915
Test Mean accuracy: 0.7415054855590087
Test iou for each class: {'background': 0.49401247518361086, 'skin': 0.9369229141196831, 'hair': 0.6763181153556446, 'tshirt': 0.6519135317564225, 'shoes': 0.36113826076400296, 'pants': 0.435149316618306, 'dress': 0.5544309718808705}
Test accuracy for each class: {'background': 0.65428734326724, 'skin': 0.9623686083986278, 'hair': 0.88391799726913, 'tshirt': 0.8353590748272944, 'shoes': 0.5408527209706663, 'pants': 0.6130508178067916, 'dress': 0.7007018363733105}
