In [1]:
import torch
import numpy as np
import torchvision.transforms as T
import matplotlib.pyplot as plt
from collections import defaultdict
from PIL import Image
from Helper.ml_models import * 
import ray.cloudpickle as pickle
from tqdm import tqdm


2025-03-04 13:59:03.249466: I tensorflow/core/platform/cpu_feature_guard.cc:210] This TensorFlow binary is optimized to use available CPU instructions in performance-critical operations.
To enable the following instructions: AVX2 FMA, in other operations, rebuild TensorFlow with the appropriate compiler flags.


In [2]:
# Mapping von Mapillary Vistas IDs auf Cityscapes IDs (detaillierter)
mapillary_to_cityscapes = {
    # Road-related Klassen
    21: 0,   # Road -> Road
    16: 0,   # Driveway -> Road
    17: 0,   # Parking -> Road
    18: 0,   # Parking Aisle -> Road
    22: 0,   # Road Shoulder -> Road
    23: 0,   # Service Lane -> Road

    # Sidewalk
    24: 1,   # Sidewalk -> Sidewalk
    19: 1,   # Pedestrian Area -> Sidewalk
    4: 1,    # Curb -> Sidewalk

    # Building-related Klassen
    27: 2,   # Building -> Building
    28: 2,   # Garage -> Building
    29: 2,   # Tunnel -> Building
    26: 2,   # Bridge -> Building

    # Wall
    12: 3,   # Wall -> Wall

    # Fence-related Klassen
    5: 4,    # Fence -> Fence
    6: 4,    # Guard Rail -> Fence
    7: 4,    # Barrier -> Fence
    2: 4,    # Ambiguous Barrier -> Fence
    3: 4,    # Concrete Block -> Fence
    8: 4,    # Road Median -> Fence
    9: 4,    # Road Side -> Fence
    11: 4,   # Temporary Barrier -> Fence

    # Poles
    85: 5,   # Pole -> Pole
    86: 5,   # Pole Group -> Pole
    88: 5,   # Utility Pole -> Pole

    # Traffic Lights
    90: 6,   # Traffic Light - General -> Traffic Light
    91: 6,   # Traffic Light - Pedestrians -> Traffic Light
    92: 6,   # Traffic Light - Upright -> Traffic Light
    93: 6,   # Traffic Light - Horizontal -> Traffic Light
    94: 6,   # Traffic Light - Cyclists -> Traffic Light
    95: 6,   # Traffic Light - Other -> Traffic Light

    # Traffic Signs
    99: 7,   # Traffic Sign (Front) -> Traffic Sign
    100: 7,  # Traffic Sign (Front) -> Traffic Sign
    103: 7,  # Traffic Sign - Temporary (Front) -> Traffic Sign

    # Vegetation
    64: 8,   # Vegetation -> Vegetation

    # Terrain
    63: 9,   # Terrain -> Terrain
    59: 9,   # Mountain -> Terrain
    60: 9,   # Sand -> Terrain
    62: 9,   # Snow -> Terrain

    # Sky
    61: 10,  # Sky -> Sky

    # Person
    30: 11,  # Person -> Person
    31: 11,  # Person Group -> Person

    # Riders (Fahrer)
    32: 12,  # Bicyclist -> Rider
    33: 12,  # Motorcyclist -> Rider
    34: 12,  # Other Rider -> Rider

    # Vehicles
    108: 13, # Car -> Car
    115: 13, # Vehicle Group -> Car

    114: 14, # Truck -> Truck
    113: 14, # Trailer -> Truck

    107: 15, # Bus -> Bus

    111: 16, # On Rails -> Train

    110: 17, # Motorcycle -> Motorcycle

    105: 18, # Bicycle -> Bicycle

    # Alles andere als "Unlabeled"
    123: 19, # Unlabeled -> Unlabeled
}

# Falls eine Klasse nicht gemappt wurde, soll sie als "unlabeled" behandelt werden
default_cityscapes_label = 19


In [3]:

def map_model_output_to_cityscapes(output_tensor):
    """
    Konvertiert den Modell-Output (2D-Tensor) mit 124 Klassen in einen Tensor mit 20 Cityscapes-Klassen.
    
    Der Input ist ein 2D-Tensor (H x W), in dem jeder Pixel die vom Modell vorhergesagte Mapillary Vistas Klasse (0-123) repräsentiert.
    Mithilfe des 'mapillary_to_cityscapes'-Mappings wird jeder Pixelwert in die entsprechende Cityscapes Klasse umgewandelt.
    Falls ein Pixelwert nicht im Mapping vorhanden ist, wird 'default_cityscapes_label' (19) zugewiesen.
    
    Args:
        output_tensor (torch.Tensor): 2D-Tensor (H x W) mit Mapillary Vistas Label-IDs.
        
    Returns:
        torch.Tensor: 2D-Tensor (H x W) mit den gemappten Cityscapes Label-IDs (0-19).
    """
    # Initialisiere den gemappten Tensor mit dem Default-Wert (z.B. Unlabeled: 19)
    mapped_tensor = torch.full_like(output_tensor, fill_value=default_cityscapes_label)
    
    # Iteriere über das Mapping und weise die entsprechenden Cityscapes IDs zu
    for mapillary_id, cityscapes_id in mapillary_to_cityscapes.items():
        mapped_tensor[output_tensor == mapillary_id] = cityscapes_id
        
    return mapped_tensor


In [4]:
# Cityscapes K-Fold Dataset Initialisierung
cityscapes_dataset = K_Fold_Dataset(
    image_dir='/home/jan/studienarbeit/Studienarbeit-CODE_Semantische_Segmentation/CityscapesDaten/images',
    annotation_dir='/home/jan/studienarbeit/Studienarbeit-CODE_Semantische_Segmentation/CityscapesDaten/semantic',
    k_fold_csv_dir='/home/jan/studienarbeit/Studienarbeit-CODE_Semantische_Segmentation/Daten/CityscapesDaten',
    leave_out_fold=0
)

# Datenlecks prüfen
cityscapes_dataset.check_for_data_leaks()


No data leaks found.


In [5]:
# Loading MOdel Function
def load_checkpointed_model_ray(model_name, checkpoint_path, num_classes=None):
    # Hier sicherstellen, dass skip_local_load übergeben wird:
    loaded_model = MapillaryTrainedModel(
        model_name=model_name,
        width=520,
        height=520,
        weights_name='',
        skip_local_load=True  # WICHTIG!
    )
    with open(checkpoint_path, "rb") as fp:
        checkpoint_data = pickle.load(fp)
    loaded_model.model.load_state_dict(checkpoint_data["model_state"], strict=True)
    if "optimizer_state" in checkpoint_data:
        loaded_model.optimizer.load_state_dict(checkpoint_data["optimizer_state"])
    return loaded_model


In [6]:
import os
import json
import torch
from torch.utils.data import DataLoader
from tqdm import tqdm

# Lade die best_checkpoints-Datei
best_checkpoints_path = "/home/jan/studienarbeit/Studienarbeit-CODE_Semantische_Segmentation/FINAL_DATEN/best_checkpoints_Mapillary.json"
with open(best_checkpoints_path, "r") as f:
    best_checkpoints = json.load(f)

# Erstelle einen DataLoader für das Testset des Cityscapes-Datensatzes
test_loader = DataLoader(cityscapes_dataset.test_dataset, batch_size=1, shuffle=False, num_workers=4, pin_memory=True)

device = torch.device("cuda" if torch.cuda.is_available() else "cpu")

evaluation_results = {}

for model_name, checkpoint_path in best_checkpoints.items():
    print(f"\nEvaluierung für Modell: {model_name}")
    if not checkpoint_path or not os.path.isfile(checkpoint_path):
        print(f"[WARNING] Checkpoint für {model_name} ungültig oder nicht gefunden. Überspringe dieses Modell.")
        continue
    
    try:
        # Lade das Modell (auf Mapillary trainiert) aus dem Checkpoint
        model_loaded = load_checkpointed_model_ray(model_name, checkpoint_path)
    except Exception as e:
        print(f"[ERROR] Fehler beim Laden von {model_name}: {e}")
        continue
    
    model_loaded.model.to(device)
    model_loaded.model.eval()
    
    # Initialisiere die Konfusionsmatrix für 20 Cityscapes-Klassen
    confusion_matrix = torch.zeros(20, 20, dtype=torch.int64).to(device)
    
    # Inferenzschleife über das Testset mit Fortschrittsanzeige
    with torch.no_grad():
        for images, labels in tqdm(test_loader, desc=f"Evaluating {model_name}", leave=True):
            images = images.to(device)
            labels = labels.to(device)  # Ground Truth im Cityscapes-Format (0-19)
            
            # Modellinferenz: Output hat 124 Kanäle
            output = model_loaded.model(images)['out']  # [1, 124, H, W]
            predicted_mapillary = output.argmax(1).squeeze(0)  # [H, W]
            
            # Mappen auf 20 Cityscapes-Klassen
            predicted_cityscapes = map_model_output_to_cityscapes(predicted_mapillary)
            
            # Aktualisiere die Konfusionsmatrix
            for cls in range(20):
                for cls_pred in range(20):
                    confusion_matrix[cls, cls_pred] += torch.sum((labels.squeeze(0) == cls) & (predicted_cityscapes == cls_pred)).item()
    
    # Berechnung der mIoU und iou pro Klasse
    intersection = confusion_matrix.diag().float()
    gt_total = confusion_matrix.sum(dim=1).float()
    pred_total = confusion_matrix.sum(dim=0).float()
    union = gt_total + pred_total - intersection
    iou_per_class = intersection / (union + 1e-6)
    miou = iou_per_class.mean().item()
    
    evaluation_results[model_name] = {
        "confusion_matrix": confusion_matrix.cpu().numpy().tolist(),
        "mIoU": miou,
        "iou_per_class": iou_per_class.cpu().numpy().tolist()
    }
    
    print(f"Ergebnisse für {model_name}: mIoU = {miou:.4f}")

# Speichere die Evaluationsergebnisse als JSON
save_path = "/home/jan/studienarbeit/Studienarbeit-CODE_Semantische_Segmentation/FINAL_DATEN/evaluation_results_mapillary_on_cityscapes.json"
os.makedirs(os.path.dirname(save_path), exist_ok=True)
with open(save_path, "w") as f:
    json.dump(evaluation_results, f, indent=4)
    
print(f"\nEvaluationsergebnisse wurden gespeichert in: {save_path}")



Evaluierung für Modell: fcn_resnet101
Using CUDA GPU
Model loaded: fcn_resnet101 | Device: cuda 
Error loading Model with Epoch latest: Error(s) in loading state_dict for FCN:
	size mismatch for classifier.4.weight: copying a param with shape torch.Size([20, 512, 1, 1]) from checkpoint, the shape in current model is torch.Size([124, 512, 1, 1]).
	size mismatch for classifier.4.bias: copying a param with shape torch.Size([20]) from checkpoint, the shape in current model is torch.Size([124]).
Skipping local .pth load due to error above.


Evaluating fcn_resnet101: 100%|██████████| 695/695 [00:40<00:00, 17.19it/s]


Ergebnisse für fcn_resnet101: mIoU = 0.4804

Evaluierung für Modell: deeplabv3_resnet50
Using CUDA GPU
Model loaded: deeplabv3_resnet50 | Device: cuda 


Evaluating deeplabv3_resnet50: 100%|██████████| 695/695 [00:36<00:00, 19.05it/s]


Ergebnisse für deeplabv3_resnet50: mIoU = 0.4747

Evaluierung für Modell: fcn_resnet50
Using CUDA GPU
Model loaded: fcn_resnet50 | Device: cuda 


Evaluating fcn_resnet50: 100%|██████████| 695/695 [00:34<00:00, 20.13it/s]


Ergebnisse für fcn_resnet50: mIoU = 0.4728

Evaluierung für Modell: deeplabv3_resnet101
Using CUDA GPU
Model loaded: deeplabv3_resnet101 | Device: cuda 


Evaluating deeplabv3_resnet101: 100%|██████████| 695/695 [00:42<00:00, 16.36it/s]

Ergebnisse für deeplabv3_resnet101: mIoU = 0.4726

Evaluationsergebnisse wurden gespeichert in: /home/jan/studienarbeit/Studienarbeit-CODE_Semantische_Segmentation/FINAL_DATEN/evaluation_results_mapillary_on_cityscapes.json



