# Load the models

#### Original model (No - LoRA)

In [11]:
from transformers import SegformerForSemanticSegmentation, SegformerImageProcessor

# Load the base model
model_directory = 'guimCC/segformer-v0-gta'
original_model = SegformerForSemanticSegmentation.from_pretrained(model_directory)

processor = SegformerImageProcessor()



#### LoRA Model

In [12]:
from peft import PeftConfig, PeftModel
from transformers import SegformerForSemanticSegmentation, SegformerImageProcessor


base_model_id = "guimCC/segformer-v0-gta"
lora_model_id = "guimCC/segformer-v0-gta-cityscapes"

processor = SegformerImageProcessor()

model = SegformerForSemanticSegmentation.from_pretrained(base_model_id)

config = PeftConfig.from_pretrained(lora_model_id)

# Load the Lora model
lora_model = PeftModel.from_pretrained(model, lora_model_id)


# Load the datasets

#### GTA

In [13]:
from datasets import load_dataset

# Load dataset from hugginface
hf_datasets = load_dataset("guimCC/gta5-cityscapes-labeling")

gta_train_ds = hf_datasets["train"]
gta_test_ds = hf_datasets["test"].train_test_split(test_size=0.1)['test']
gta_val_ds = hf_datasets["validation"].train_test_split(test_size=0.1)['test']

Resolving data files:   0%|          | 0/53 [00:00<?, ?it/s]

Resolving data files:   0%|          | 0/27 [00:00<?, ?it/s]

Resolving data files:   0%|          | 0/26 [00:00<?, ?it/s]

Resolving data files:   0%|          | 0/53 [00:00<?, ?it/s]

Resolving data files:   0%|          | 0/27 [00:00<?, ?it/s]

Resolving data files:   0%|          | 0/26 [00:00<?, ?it/s]

Loading dataset shards:   0%|          | 0/27 [00:00<?, ?it/s]

#### Cityscapes

In [14]:
from datasets import load_from_disk

# Load the dataset from disk
path_to_cityscapes_dataset = "./../cityscapes_train_1000_dataset_v3"
loaded_dataset = load_from_disk(path_to_cityscapes_dataset)

# Prepare train and test splits
loaded_dataset = loaded_dataset.train_test_split(test_size=0.1)
cty_test_ds = loaded_dataset["test"]


# Preprocessing

#### GTA

In [15]:
from PIL import Image
import numpy as np
import json
from huggingface_hub import cached_download, hf_hub_url
import torch
from torch import nn
import evaluate

# ID handling

repo_id = "huggingface/label-files"
filename = "cityscapes-id2label.json"
id2label = json.load(open(cached_download(hf_hub_url(repo_id, filename, repo_type="dataset")), "r"))
id2label = {int(k): v for k, v in id2label.items()}
label2id = {v: k for k, v in id2label.items()}
id2label[19] = 'ignore'
label2id['ignore'] = 19
num_labels = len(id2label)

# Transformations

def val_transforms(example_batch):
    images = [Image.fromarray(np.array(x, dtype=np.uint8)) for x in example_batch['image']]
    labels = [Image.fromarray(np.array(x, dtype=np.uint8), mode='L') for x in example_batch['mask']]
    
    # Ensure labels are within the expected range
    labels = [Image.fromarray(np.minimum(np.array(label), num_labels - 1), mode='L') for label in labels]
    
    inputs = processor(images=images, segmentation_maps=labels, return_tensors="pt")
    return inputs

gta_test_ds.set_transform(val_transforms)

# Metrics

metric = evaluate.load("mean_iou")

def compute_metrics(eval_pred):
  with torch.no_grad():
    logits, labels = eval_pred
    logits_tensor = torch.from_numpy(logits)
    # scale the logits to the size of the label
    logits_tensor = nn.functional.interpolate(
        logits_tensor,
        size=labels.shape[-2:],
        mode="bilinear",
        align_corners=False,
    ).argmax(dim=1)

    pred_labels = logits_tensor.detach().cpu().numpy()
    metrics = metric.compute(
        predictions=pred_labels,
        references=labels,
        num_labels=len(id2label),
        ignore_index=19,
        reduce_labels=processor.do_reduce_labels,
    )
    
    # add per category metrics as individual key-value pairs
    per_category_accuracy = metrics.pop("per_category_accuracy").tolist()
    per_category_iou = metrics.pop("per_category_iou").tolist()

    metrics.update({f"accuracy_{id2label[i]}": v for i, v in enumerate(per_category_accuracy)})
    metrics.update({f"iou_{id2label[i]}": v for i, v in enumerate(per_category_iou)})
    
    return metrics



#### Cityscapes

In [16]:
from PIL import Image
import numpy as np
import json
from huggingface_hub import cached_download, hf_hub_url
from torchvision.transforms import ColorJitter
from torch import nn
import evaluate

# ID handling

repo_id = "huggingface/label-files"
filename = "cityscapes-id2label.json"
id2label = json.load(open(cached_download(hf_hub_url(repo_id, filename, repo_type="dataset")), "r"))
id2label = {int(k): v for k, v in id2label.items()}
label2id = {v: k for k, v in id2label.items()}


id2label[19] = 'ignore'
label2id['ignore'] = 19
num_labels = len(id2label)


# Transformations
jitter = ColorJitter(brightness=0.25, contrast=0.25, saturation=0.25, hue=0.1)

def handle_grayscale_image(image):
    np_image = np.array(image)
    if np_image.ndim == 2:
        tiled_image = np.tile(np.expand_dims(np_image, -1), 3)
        return Image.fromarray(tiled_image)
    else:
        return Image.fromarray(np_image)


def val_transforms(example_batch):
    images = [handle_grayscale_image(x) for x in example_batch["image"]]
    labels = [x for x in example_batch["annotation"]]
    inputs = processor(images, labels)
    return inputs

cty_test_ds.set_transform(val_transforms)

# Metrics

metric = evaluate.load("mean_iou")

def compute_metrics(eval_pred):
    
    with torch.no_grad(): # Don't want to store the gradients while computing this metric since it's validation
        logits, labels = eval_pred
        logits_tensor = torch.from_numpy(logits)
        # scale the logits to the size of the label
        logits_tensor = nn.functional.interpolate(
            logits_tensor,
            size=labels.shape[-2:],
            mode="bilinear",
            align_corners=False,
        ).argmax(dim=1)

        pred_labels = logits_tensor.detach().cpu().numpy()
        # currently using _compute instead of compute
        # see this issue for more info: https://github.com/huggingface/evaluate/pull/328#issuecomment-1286866576
        metrics = metric._compute(
            predictions=pred_labels,
            references=labels,
            num_labels=len(id2label),
            ignore_index=0,
            reduce_labels=processor.do_reduce_labels,
        )

        # add per category metrics as individual key-value pairs
        per_category_accuracy = metrics.pop("per_category_accuracy").tolist()
        per_category_iou = metrics.pop("per_category_iou").tolist()

        metrics.update({f"accuracy_{id2label[i]}": v for i, v in enumerate(per_category_accuracy)})
        metrics.update({f"iou_{id2label[i]}": v for i, v in enumerate(per_category_iou)})

        return metrics

# Evaluation

#### ORIGINAL model (SegFormer) and GTA

In [17]:
from transformers import Trainer, TrainingArguments

# Assuming evaluation doesn't require gradient updates
original_gta_eval_args = TrainingArguments(
    output_dir='./segformer_evaluation/sgf-v0-cty',  # Directory to store evaluation results
    do_train=False,
    do_eval=True,
    eval_steps=10,
    logging_steps=5,
    per_device_eval_batch_size=10,
    dataloader_num_workers=0,
    remove_unused_columns=False,

)

original_gta_trainer = Trainer(
    model=original_model,
    args=original_gta_eval_args,
    eval_dataset=gta_test_ds,
    compute_metrics=compute_metrics  # Your metrics function as defined earlier
)

In [18]:
# NOTE: Since the dataset is BIG, it takes several minutes to perform the evaluation

from tabulate import tabulate

original_gta_eval_results = original_gta_trainer.evaluate()

# Convert the dictionary to a list of lists for tabulate
data = [[key, value] for key, value in original_gta_eval_results.items()]

# Create the table
table = tabulate(data, headers=["Metric", "Value"], tablefmt="pretty")

print(table)

  return F.conv2d(input, weight, bias, self.stride,


+-----------------------------+---------------------+
|           Metric            |        Value        |
+-----------------------------+---------------------+
|          eval_loss          | 0.5294152498245239  |
|        eval_mean_iou        | 0.3657142979584772  |
|     eval_mean_accuracy      | 0.4490071911589912  |
|    eval_overall_accuracy    | 0.8048245155313702  |
|     eval_accuracy_road      |         nan         |
|   eval_accuracy_sidewalk    | 0.8854939033079142  |
|   eval_accuracy_building    | 0.9221117802639245  |
|     eval_accuracy_wall      | 0.4531118964645282  |
|     eval_accuracy_fence     | 0.18434359327693234 |
|     eval_accuracy_pole      | 0.19585926209927154 |
| eval_accuracy_traffic light |         0.0         |
| eval_accuracy_traffic sign  |         0.0         |
|  eval_accuracy_vegetation   | 0.8764858918100011  |
|    eval_accuracy_terrain    | 0.5068282016669517  |
|      eval_accuracy_sky      | 0.9757929543468974  |
|    eval_accuracy_person   

  acc = total_area_intersect / total_area_label


#### ORIGINAL model (SegFormer) and CityScapes

In [19]:
from transformers import Trainer, TrainingArguments

# Assuming evaluation doesn't require gradient updates
original_cty_eval_args = TrainingArguments(
    output_dir='./segformer_evaluation/sgf-v0-cty',  # Directory to store evaluation results
    do_train=False,
    do_eval=True,
    eval_steps=10,
    logging_steps=5,
    per_device_eval_batch_size=10,
    dataloader_num_workers=0,
    remove_unused_columns=False,

)

original_cty_trainer = Trainer(
    model=original_model,
    args=original_cty_eval_args,
    eval_dataset=cty_test_ds,
    compute_metrics=compute_metrics  # Your metrics function as defined earlier
)

In [20]:
from tabulate import tabulate

original_cty_eval_results = original_cty_trainer.evaluate()

# Convert the dictionary to a list of lists for tabulate
data = [[key, value] for key, value in original_cty_eval_results.items()]

# Create the table
table = tabulate(data, headers=["Metric", "Value"], tablefmt="pretty")

print(table)

  return F.conv2d(input, weight, bias, self.stride,


+-----------------------------+----------------------+
|           Metric            |        Value         |
+-----------------------------+----------------------+
|          eval_loss          |  1.3254294395446777  |
|        eval_mean_iou        | 0.08238448517048705  |
|     eval_mean_accuracy      |  0.3562056136454559  |
|    eval_overall_accuracy    |  0.5914905134656971  |
|     eval_accuracy_road      |         nan          |
|   eval_accuracy_sidewalk    |  0.2697358350534592  |
|   eval_accuracy_building    |  0.6456586171711773  |
|     eval_accuracy_wall      | 0.34087879595744164  |
|     eval_accuracy_fence     |  0.1841422404263489  |
|     eval_accuracy_pole      |  0.1184297041191259  |
| eval_accuracy_traffic light |         0.0          |
| eval_accuracy_traffic sign  |         0.0          |
|  eval_accuracy_vegetation   |  0.8448956095643261  |
|    eval_accuracy_terrain    |  0.8293257496975742  |
|      eval_accuracy_sky      |  0.9877007309439114  |
|    eval_

  iou = total_area_intersect / total_area_union
  acc = total_area_intersect / total_area_label


#### ORIGINAL model (SegFormer) and CityScapes

In [21]:
from transformers import Trainer, TrainingArguments

# Assuming evaluation doesn't require gradient updates
lora_cty_eval_args = TrainingArguments(
    output_dir='./segformer_evaluation/sgf-v0-lora-cty',  # Directory to store evaluation results
    do_train=False,
    do_eval=True,
    eval_steps=10,
    logging_steps=5,
    per_device_eval_batch_size=10,
    dataloader_num_workers=0,
    remove_unused_columns=False,
    label_names=["labels"]

)

# Compute Metrics Issue: https://github.com/huggingface/transformers/issues/29186
lora_cty_trainer = Trainer(
    model=lora_model,
    args=lora_cty_eval_args,
    eval_dataset=cty_test_ds,
    compute_metrics=compute_metrics  # Your metrics function as defined earlier
)

In [22]:
from tabulate import tabulate

lora_cty_eval_results = lora_cty_trainer.evaluate()

# Convert the dictionary to a list of lists for tabulate
data = [[key, value] for key, value in lora_cty_eval_results.items()]

# Create the table
table = tabulate(data, headers=["Metric", "Value"], tablefmt="pretty")

print(table)

  return F.conv2d(input, weight, bias, self.stride,


+-----------------------------+----------------------+
|           Metric            |        Value         |
+-----------------------------+----------------------+
|          eval_loss          | 0.17324452102184296  |
|        eval_mean_iou        | 0.18430768490184848  |
|     eval_mean_accuracy      |  0.8627898017208178  |
|    eval_overall_accuracy    |  0.9071604023011562  |
|     eval_accuracy_road      |         nan          |
|   eval_accuracy_sidewalk    |  0.8530138924322193  |
|   eval_accuracy_building    |  0.8949419367377679  |
|     eval_accuracy_wall      |  0.9192215409224207  |
|     eval_accuracy_fence     |  0.8682961170132303  |
|     eval_accuracy_pole      |  0.8069361180945014  |
| eval_accuracy_traffic light |  0.740641158221303   |
| eval_accuracy_traffic sign  |  0.830272932142559   |
|  eval_accuracy_vegetation   |  0.8866166828131362  |
|    eval_accuracy_terrain    |  0.9078715592436172  |
|      eval_accuracy_sky      |  0.994060500236837   |
|    eval_

  iou = total_area_intersect / total_area_union
  acc = total_area_intersect / total_area_label
