# Load the model

In [2]:
from transformers import SegformerForSemanticSegmentation

# Load the model
model_directory = './scripts/segformer_output'
model = SegformerForSemanticSegmentation.from_pretrained(model_directory)

# Load the dataset

## GTA

In [3]:
from datasets import concatenate_datasets, DatasetDict, load_from_disk
import os

def load_batches(split_name, directory):
    batches = []
    batch_num = 0
    while True:
        batch_dir = os.path.join(directory, f"{split_name}_batch_{batch_num}.arrow")
        if not os.path.exists(batch_dir):
            break
        batch_dataset = load_from_disk(batch_dir)
        batches.append(batch_dataset)
        batch_num += 1
    return concatenate_datasets(batches) if batches else None

# Load each split
dataset_path = './gta_dataset'

train_dataset = load_batches('train', dataset_path)
validation_dataset = load_batches('validation', dataset_path)
test_dataset = load_batches('test', dataset_path)

# Create a DatasetDict
hf_datasets = DatasetDict({
    'train': train_dataset,
    'validation': validation_dataset,
    'test': test_dataset
})

gta_train_ds = hf_datasets["train"]
gta_test_ds = hf_datasets["test"].train_test_split(test_size=0.1)['test']
gta_val_ds = hf_datasets["validation"].train_test_split(test_size=0.1)['test']

## Cityscapes

# Perform evaluation

In [4]:
from transformers import SegformerImageProcessor
from PIL import Image
import numpy as np

processor = SegformerImageProcessor()


import json
from huggingface_hub import cached_download, hf_hub_url

repo_id = "huggingface/label-files"
filename = "cityscapes-id2label.json"
id2label = json.load(open(cached_download(hf_hub_url(repo_id, filename, repo_type="dataset")), "r"))
id2label = {int(k): v for k, v in id2label.items()}
label2id = {v: k for k, v in id2label.items()}


id2label[19] = 'ignore'
label2id['ignore'] = 19
num_labels = len(id2label)


def val_transforms(example_batch):
    images = [Image.fromarray(np.array(x, dtype=np.uint8)) for x in example_batch['image']]
    labels = [Image.fromarray(np.array(x, dtype=np.uint8), mode='L') for x in example_batch['mask']]
    
    # Ensure labels are within the expected range
    labels = [Image.fromarray(np.minimum(np.array(label), num_labels - 1), mode='L') for label in labels]
    
    inputs = processor(images=images, segmentation_maps=labels, return_tensors="pt")
    return inputs


import torch
from torch import nn
import evaluate

metric = evaluate.load("mean_iou")

def compute_metrics(eval_pred):
  with torch.no_grad():
    logits, labels = eval_pred
    logits_tensor = torch.from_numpy(logits)
    # scale the logits to the size of the label
    logits_tensor = nn.functional.interpolate(
        logits_tensor,
        size=labels.shape[-2:],
        mode="bilinear",
        align_corners=False,
    ).argmax(dim=1)

    pred_labels = logits_tensor.detach().cpu().numpy()
    metrics = metric.compute(
        predictions=pred_labels,
        references=labels,
        num_labels=len(id2label),
        ignore_index=19,
        reduce_labels=processor.do_reduce_labels,
    )
    
    # add per category metrics as individual key-value pairs
    per_category_accuracy = metrics.pop("per_category_accuracy").tolist()
    per_category_iou = metrics.pop("per_category_iou").tolist()

    metrics.update({f"accuracy_{id2label[i]}": v for i, v in enumerate(per_category_accuracy)})
    metrics.update({f"iou_{id2label[i]}": v for i, v in enumerate(per_category_iou)})
    
    return metrics
  
gta_test_ds.set_transform(val_transforms)




In [5]:
from transformers import Trainer, TrainingArguments

# Assuming evaluation doesn't require gradient updates
eval_args = TrainingArguments(
    output_dir='./segformer_evaluation/sgf-v0-gta',  # Directory to store evaluation results
    do_train=False,
    do_eval=True,
    eval_steps=10,
    logging_steps=5,
    per_device_eval_batch_size=10,
    dataloader_num_workers=0,
    remove_unused_columns=False,

)

trainer = Trainer(
    model=model,
    args=eval_args,
    eval_dataset=gta_test_ds,
    compute_metrics=compute_metrics  # Your metrics function as defined earlier
)

In [6]:
eval_results = trainer.evaluate()
print(eval_results)

  return F.conv2d(input, weight, bias, self.stride,


  acc = total_area_intersect / total_area_label


{'eval_loss': 0.5304863452911377, 'eval_mean_iou': 0.3863717727599142, 'eval_mean_accuracy': 0.4594820622645841, 'eval_overall_accuracy': 0.877796907688061, 'eval_accuracy_road': 0.9489809431222332, 'eval_accuracy_sidewalk': 0.8455630469421747, 'eval_accuracy_building': 0.9184319410853448, 'eval_accuracy_wall': 0.44873866548323804, 'eval_accuracy_fence': 0.2258698878655819, 'eval_accuracy_pole': 0.20655266367158562, 'eval_accuracy_traffic light': 0.0, 'eval_accuracy_traffic sign': 0.0, 'eval_accuracy_vegetation': 0.8701828218735161, 'eval_accuracy_terrain': 0.48447198018374665, 'eval_accuracy_sky': 0.9759494728206349, 'eval_accuracy_person': 0.46963642497888913, 'eval_accuracy_rider': 0.0, 'eval_accuracy_car': 0.9183919255986837, 'eval_accuracy_truck': 0.7639999478392405, 'eval_accuracy_bus': 0.6533894615622279, 'eval_accuracy_train': 0.0, 'eval_accuracy_motorcycle': 0.0, 'eval_accuracy_bicycle': 0.0, 'eval_accuracy_ignore': nan, 'eval_iou_road': 0.9180418984751986, 'eval_iou_sidewalk'

In [13]:
# Formatting the output in a pretty table using Python's tabulate library

from tabulate import tabulate

output = {
    'eval_loss': 0.5304863452911377,
    'eval_mean_iou': 0.3863717727599142,
    'eval_mean_accuracy': 0.4594820622645841,
    'eval_overall_accuracy': 0.877796907688061,
    'eval_accuracy_road': 0.9489809431222332,
    'eval_accuracy_sidewalk': 0.8455630469421747,
    'eval_accuracy_building': 0.9184319410853448,
    'eval_accuracy_wall': 0.44873866548323804,
    'eval_accuracy_fence': 0.2258698878655819,
    'eval_accuracy_pole': 0.20655266367158562,
    'eval_accuracy_traffic light': 0.0,
    'eval_accuracy_traffic sign': 0.0,
    'eval_accuracy_vegetation': 0.8701828218735161,
    'eval_accuracy_terrain': 0.48447198018374665,
    'eval_accuracy_sky': 0.9759494728206349,
    'eval_accuracy_person': 0.46963642497888913,
    'eval_accuracy_rider': 0.0,
    'eval_accuracy_car': 0.9183919255986837,
    'eval_accuracy_truck': 0.7639999478392405,
    'eval_accuracy_bus': 0.6533894615622279,
    'eval_accuracy_train': 0.0,
    'eval_accuracy_motorcycle': 0.0,
    'eval_accuracy_bicycle': 0.0,
    'eval_accuracy_ignore': 'nan',
    'eval_iou_road': 0.9180418984751986,
    'eval_iou_sidewalk': 0.7313570464885094,
    'eval_iou_building': 0.8009566177938597,
    'eval_iou_wall': 0.37947135831448287,
    'eval_iou_fence': 0.2024631043010466,
    'eval_iou_pole': 0.17764894431211686,
    'eval_iou_traffic light': 0.0,
    'eval_iou_traffic sign': 0.0,
    'eval_iou_vegetation': 0.7098098136862394,
    'eval_iou_terrain': 0.4269937162526332,
    'eval_iou_sky': 0.9295866001833509,
    'eval_iou_person': 0.35261619649221454,
    'eval_iou_rider': 0.0,
    'eval_iou_car': 0.8107652652247773,
    'eval_iou_truck': 0.6620217799686436,
    'eval_iou_bus': 0.6257031137052117,
    'eval_iou_train': 0.0,
    'eval_iou_motorcycle': 0.0,
    'eval_iou_bicycle': 0.0,
    'eval_iou_ignore': 0.0,
    'eval_runtime': 686.5265,
    'eval_samples_per_second': 0.897,
    'eval_steps_per_second': 0.012
}

# Convert the dictionary to a list of lists for tabulate
data = [[key, value] for key, value in output.items()]

# Create the table
table = tabulate(data, headers=["Metric", "Value"], tablefmt="pretty")

print(table)


+-----------------------------+---------------------+
|           Metric            |        Value        |
+-----------------------------+---------------------+
|          eval_loss          | 0.5304863452911377  |
|        eval_mean_iou        | 0.3863717727599142  |
|     eval_mean_accuracy      | 0.4594820622645841  |
|    eval_overall_accuracy    |  0.877796907688061  |
|     eval_accuracy_road      | 0.9489809431222332  |
|   eval_accuracy_sidewalk    | 0.8455630469421747  |
|   eval_accuracy_building    | 0.9184319410853448  |
|     eval_accuracy_wall      | 0.44873866548323804 |
|     eval_accuracy_fence     | 0.2258698878655819  |
|     eval_accuracy_pole      | 0.20655266367158562 |
| eval_accuracy_traffic light |         0.0         |
| eval_accuracy_traffic sign  |         0.0         |
|  eval_accuracy_vegetation   | 0.8701828218735161  |
|    eval_accuracy_terrain    | 0.48447198018374665 |
|      eval_accuracy_sky      | 0.9759494728206349  |
|    eval_accuracy_person   

: 