In [None]:
import os
import cv2
import numpy as np
from glob import glob
import torch
import torch.optim as optim
from torch.utils.data import DataLoader, Dataset
import segmentation_models_pytorch as smp
import matplotlib.pyplot as plt
from torch.amp import GradScaler
from tqdm import tqdm
from torch import autocast
from torchmetrics import JaccardIndex, F1Score

In [None]:
Image_dir_train = "Datasets/Combined_Augmented/train/images"
Mask_dir_train = "Datasets/Combined_Augmented/train/labels"
Image_dir_val = "Datasets/Combined_Augmented/val/images"
Mask_dir_val = "Datasets/Combined_Augmented/val/labels"
Image_dir_test = "Datasets/PrivateDataset_BuildingsOnly/test/images"
Mask_dir_test= "Datasets/PrivateDataset_BuildingsOnly/test/labels"
batch_size = 9
patience = 20
epochs = 65
device = 'cuda' if torch.cuda.is_available else 'cpu'

In [None]:
preprocess_input = smp.encoders.get_preprocessing_fn('timm-efficientnet-b8', pretrained='imagenet')

# Classe do Dataset utilizando Albumentations
class SegmentationDataset(Dataset):
    def __init__(self, img_paths: list, mask_paths: list, transform=None):
        self.img_paths = img_paths
        self.mask_paths = mask_paths
        self.transform = transform

    def __len__(self):
        return len(self.img_paths)

    def __getitem__(self, index):
        # Leitura e pré-processamento básico
        image = cv2.imread(self.img_paths[index])
        image = cv2.cvtColor(image,cv2.COLOR_BGR2RGB)
        image = cv2.resize(image,(512,512))
        image = image.astype('float32') / 255.0

        image = preprocess_input(image)

        mask = cv2.imread(self.mask_paths[index],cv2.IMREAD_GRAYSCALE)
        mask = cv2.resize(mask,(512,512))
        mask = mask.astype('int8')
        mask = np.expand_dims(mask,axis=0) # (1,512,512)

        image = torch.tensor(image).permute(2,0,1)
        mask = torch.tensor(mask)

        return image, mask


# Listando os caminhos dos arquivos
train_images = sorted(glob(os.path.join(Image_dir_train, "*png")))
train_mask   = sorted(glob(os.path.join(Mask_dir_train, "*png")))

val_images = sorted(glob(os.path.join(Image_dir_val, "*png")))
val_mask   = sorted(glob(os.path.join(Mask_dir_val, "*png")))

test_images = sorted(glob(os.path.join(Image_dir_test, "*png")))
test_mask   = sorted(glob(os.path.join(Mask_dir_test, "*png")))

# Criando os datasets com as transformações apropriadas
train_dataset = SegmentationDataset(train_images, train_mask)
val_dataset   = SegmentationDataset(val_images, val_mask)
test_dataset  = SegmentationDataset(test_images, test_mask)

train_loader = DataLoader(train_dataset, batch_size=batch_size, shuffle=True)
val_loader   = DataLoader(val_dataset, batch_size=batch_size, shuffle=False)
test_loader  = DataLoader(test_dataset, batch_size=batch_size, shuffle=False)

In [None]:
def load_model(encoder_name, checkpoint_path, device):
    """Carrega um modelo SMP U-Net com encoder específico e seus pesos."""
    model = smp.Unet(encoder_name=encoder_name, encoder_weights='imagenet', in_channels=3, classes=3).to(device)
    model.load_state_dict(torch.load(checkpoint_path, map_location=device))
    model.eval()
    return model

criterion_ce = torch.nn.CrossEntropyLoss()  # Cross-Entropy Loss
criterion_dice = smp.losses.DiceLoss(mode="multiclass")  # Dice Loss

In [None]:
weights = [0.2,0.6,0.2]

In [None]:
def infer_and_visualize(models,image_path,device, weights=None):
    """
    Executa inferência com ensemble usando pesos diferentes para cada modelo.

    Parameters:
    models (list): Lista de modelos PyTorch
    image_tensor (torch.Tensor): Tensor da imagem de entrada
    weights (list): Lista de pesos para cada modelo (mesmo tamanho que models)
                   Se None, todos os modelos terão o mesmo peso
    device (str): Dispositivo para execução ('cuda' ou 'cpu')

    Returns:
    numpy.ndarray: Máscara final com as classes preditas
    """

    # Se os pesos não forem fornecidos, use pesos iguais
    if weights is None:
        weights = [1.0] * len(models)

    # Verifique se o número de pesos corresponde ao número de modelos
    assert len(weights) == len(models), "O número de pesos deve ser igual ao número de modelos"

    # Normalize os pesos para que somem 1.0
    weights = np.array(weights) / np.sum(weights)

    image = cv2.imread(image_path, cv2.COLOR_BGR2RGB)
    image = cv2.resize(image,(512, 512))
    image_tensor = image.astype('float32') #/ 255.0 #(512,512,3)
    image_tensor = preprocess_input(image_tensor)

    image_tensor = torch.tensor(image_tensor).permute(2,0,1).unsqueeze(0).to(device).float() # (1,3,512,512)

    # Lista para armazenar as previsões de cada modelo
    outputs = []

    with torch.no_grad():
        for model in models:
            output = model(image_tensor)
            output = torch.softmax(output, dim=1)
            outputs.append(output)

    # Converter previsões para numpy
    outputs = [out.cpu().numpy() for out in outputs]  # (1, num_classes, 512, 512)

    # Versão 1: Média ponderada direta (sem média geométrica)
    final_output = np.zeros_like(outputs[0])
    for i, output in enumerate(outputs):
        final_output += weights[i] * output

    # Obtém a máscara final (argmax para pegar a classe com maior probabilidade)
    final_mask = np.argmax(final_output, axis=1).squeeze()  # (512, 512)

    plt.figure(figsize=(15,5))
    plt.subplot(1,3,1)
    plt.title("Original Image")
    plt.imshow(image)

    plt.subplot(1,3,2)
    plt.title("Mask (Ensemble)")
    plt.imshow(final_mask, cmap='jet')

    plt.subplot(1,3,3)
    plt.title("Overlap")
    plt.imshow(image)
    plt.imshow(final_mask,cmap='jet',alpha=0.5)


    plt.show()

Modelos disponiveis a dar load (encoderName | pathToSavedModel):

In [None]:
# Definição dos encoders e checkpoints treinados
model_configs = [
    ("timm-efficientnet-b8", "uNetB8CombinedAug.pth"),
    ("tu-maxvit_small_tf_512", "UNetMaxVitSCombinedAug.pth"),
    ("tu-convformer_s36", "UNetConvFormerSCombinedAug.pth"),
]

Da load de todos os modelos:

In [None]:
models = [load_model(encoder, path, device) for encoder, path in model_configs]
model_novo = load_model(model_configs[0][0], model_configs[0][1], device)

Dar predict de varias imagens:

In [None]:
infer_and_visualize(models,"Datasets/CITY_OSM/test/images/32.png",device, weights)

In [None]:
def test_ensemble_models(models, test_loader, device, criterion_ce, criterion_dice, weights=None):
    """
    Testa múltiplos modelos usando ensemble ponderado e avalia com métricas IoU e F1.

    Parameters:
    models (list): Lista de modelos PyTorch para ensemble
    test_loader (DataLoader): PyTorch DataLoader com dados de teste
    device (torch.device): Dispositivo para execução da inferência
    criterion_ce (torch.nn.Module): Critério de perda cross-entropy
    criterion_dice (torch.nn.Module): Critério de perda Dice
    weights (list): Lista de pesos para cada modelo (None para pesos iguais)

    Returns:
    tuple: (avg_loss, per_class_iou, mean_iou, per_class_f1, mean_f1)
    """
    # Se os pesos não forem fornecidos, use pesos iguais
    if weights is None:
        weights = [1.0] * len(models)

    # Verifique se o número de pesos corresponde ao número de modelos
    assert len(weights) == len(models), "O número de pesos deve ser igual ao número de modelos"

    # Normalize os pesos para que somem 1.0
    weights = torch.tensor(weights, device=device) / sum(weights)

    test_loss = 0.0

    # Create metric objects for 3 classes with average='none' to get value for each class
    jaccard = JaccardIndex(num_classes=3, average='none', task="multiclass").to(device)
    f1score = F1Score(num_classes=3, average='none', task="multiclass").to(device)

    with torch.no_grad():
        for images, mask in tqdm(test_loader, desc="Testing Ensemble"):
            images, mask = images.to(device).float(), mask.to(device)
            mask = mask.squeeze(1).long()  # Remove extra dimension if needed

            # List to store outputs from each model
            outputs = []

            # Obtém predições de cada modelo
            for model in models:
                output = model(images)
                outputs.append(torch.softmax(output, dim=1))

            # Metodo 1: Média ponderada direta
            final_output = torch.zeros_like(outputs[0])
            for i, output in enumerate(outputs):
                final_output += weights[i] * output

            # Calculate loss using the ensemble output
            loss_ce = criterion_ce(final_output, mask)
            loss_dice = criterion_dice(final_output, mask)
            loss = 0.5 * loss_ce + 0.5 * loss_dice

            test_loss += loss.item()

            # Get class predictions
            output_probs = torch.argmax(final_output, dim=1)

            # Update metrics for current batch
            jaccard.update(output_probs, mask)
            f1score.update(output_probs, mask)

    # Compute final metrics
    avg_loss = test_loss / len(test_loader)
    per_class_iou = jaccard.compute()  # Tensor with IoU for each class
    per_class_f1 = f1score.compute()   # Tensor with F1 score for each class

    mean_iou = per_class_iou.mean()
    mean_f1 = per_class_f1.mean()

    # Display results
    print(f"Test Loss: {avg_loss:.4f}")
    for i in range(3):
        print(f"Class {i}: IoU = {per_class_iou[i]:.4f}, F1 = {per_class_f1[i]:.4f}")
    print(f"Mean IoU: {mean_iou:.4f}")
    print(f"Mean F1: {mean_f1:.4f}")

    return avg_loss, per_class_iou, mean_iou, per_class_f1, mean_f1

x , y, z, w, u = test_ensemble_models(models, test_loader, device, criterion_ce, criterion_dice, weights)
