In [1]:
import os
import pandas as pd
import numpy as np
import random
from PIL import Image
import time
import matplotlib.pyplot as plt
import torch
from torch import nn
from torchvision import models, transforms
from torch.utils.data import Dataset, DataLoader
from torch.utils.tensorboard import SummaryWriter
from torchvision.transforms import functional as F
from torch.cuda.amp import autocast, GradScaler
import segmentation_models_pytorch as smp
from sklearn.metrics import classification_report, jaccard_score, confusion_matrix
import albumentations as A
from albumentations.pytorch import ToTensorV2

## Test all models

In [2]:
torch.cuda.empty_cache()

### General classes, functions and parameters

In [3]:
class SegmentationDataset(Dataset):
    def __init__(self, images_dir, masks_dir, transform=None):
        self.images = sorted(os.listdir(images_dir))
        self.masks = sorted(os.listdir(masks_dir))
        self.images_dir = images_dir
        self.masks_dir = masks_dir
        self.transform = transform

        assert len(self.images) == len(self.masks)

    def __len__(self):
        return len(self.images)

    def __getitem__(self, idx):
        img_path = os.path.join(self.images_dir, self.images[idx])
        mask_path = os.path.join(self.masks_dir, self.masks[idx])

        image = np.array(Image.open(img_path).convert("RGB"))
        mask = np.array(Image.open(mask_path))

        if mask.ndim == 3:
            mask = mask[:, :, 0]

        if self.transform:
            augmented = self.transform(image=image, mask=mask)
            image = augmented["image"]
            mask = augmented["mask"].long()
        else:
            image = torch.from_numpy(image).permute(2, 0, 1).float() / 255.0
            mask = torch.from_numpy(mask).long()

        return image, mask

In [None]:
def compute_metrics(preds, labels, num_classes, ignore_class=0):
    #Filter valid labels
    valid = labels != 0
    preds = preds[valid]
    labels = labels[valid]

    #Create confusion matrix
    cm = confusion_matrix(labels, preds, labels=list(range(num_classes)))

    #Exclude ignore_class
    cm = np.delete(cm, ignore_class, axis=0) 
    cm = np.delete(cm, ignore_class, axis=1)

    TP = np.diag(cm)
    FP = cm.sum(axis=0) - TP
    FN = cm.sum(axis=1) - TP
    TN = cm.sum() - (TP + FP + FN)

    iou = TP / (TP + FP + FN + 1e-7)
    accuracy = (TP + TN) / (TP + TN + FP + FN + 1e-7)
    precision = TP / (TP + FP + 1e-7)
    recall = TP / (TP + FN + 1e-7)

    return {
        "mean_iou": np.mean(iou),
        "mean_accuracy": np.mean(accuracy),
        "mean_precision": np.mean(precision),
        "mean_recall": np.mean(recall),
        "iou": iou,
        "precision": precision,
        "recall": recall
    }

In [5]:
def test_model(model, dataloader, device, num_classes):
    model.eval()
    all_preds = []
    all_labels = []

    with torch.no_grad():
        for images, masks in dataloader:
            images, masks = images.to(device), masks.to(device)

            outputs = model(images)
            preds = torch.argmax(outputs, dim=1)

            all_preds.append(preds)
            all_labels.append(masks)

    all_preds = torch.cat(all_preds).view(-1)
    all_labels = torch.cat(all_labels).view(-1)

    metrics = compute_metrics(
        all_preds.cpu().numpy(), all_labels.cpu().numpy(),
        num_classes=num_classes, ignore_class=0
    )

    
    print(f"\nGENERAL METRICS:")
    print(f"-Mean IoU: {metrics['mean_iou']:.4f}")
    print(f"-Mean Accuracy: {metrics['mean_accuracy']:.4f}")
    print(f"-Mean Precision: {metrics['mean_precision']:.4f}")
    print(f"-Mean Recall: {metrics['mean_recall']:.4f}")

    print("\nCLASS METRICS:")
    for idx, name in enumerate(class_names):
        try:
            print(f"\nClass '{name}' (index {idx + 1})")  # idx+1 si ignoras la clase 0
            print(f"-IoU: {metrics['iou'][idx]:.4f}")
            print(f"-Precision: {metrics['precision'][idx]:.4f}")
            print(f"-Recall: {metrics['recall'][idx]:.4f}")
        except IndexError:
            print(f"(Class'{name}' not found — Index out of range)")


    return metrics

### Model 1 - Only trained on CMP

In [None]:
#Test

class_names = ['background', 'porosity', 'wall'] 

device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
num_classes=4
ignore_class = 0
image_size = (512, 1024)
model_path = "../best_deeplabmodels/V1_only_cmp_.pth"
test_images_dir= "../training_images/segmentation_splits/test_images"
test_masks_dir= "../training_images/segmentation_splits/test_labels"
output_dir = "../test_results"
results_file_name = "V1_only_cmp_stats_.csv"
os.makedirs(output_dir, exist_ok=True)

idx_porosity  = 1 
idx_wall  = 2 

transform = A.Compose([
    A.Resize(height=image_size[0], width=image_size[1]),
    A.Normalize(mean=(0.485, 0.456, 0.406),
                std=(0.229, 0.224, 0.225)),
    ToTensorV2()])

model = smp.DeepLabV3Plus(
    encoder_name="resnet101",         
    encoder_weights="imagenet",      
    in_channels=3,                   
    classes=num_classes             
)

model.load_state_dict(torch.load(model_path))
model.to(device)
model.eval()

#Dataset and loaders
test_dataset = SegmentationDataset(test_images_dir, test_masks_dir, transform=transform)
test_loader = DataLoader(test_dataset, batch_size=4, shuffle=False, drop_last=True, pin_memory=True)

metrics = test_model(model, test_loader, device, num_classes=num_classes)


df = pd.DataFrame({
    "metric": ["mean_iou", "mean_accuracy", "mean_precision", "mean_recall",
               "iou_porosity", "precision_porosity", "recall_porosity",
               "iou_wall", "precision_wall", "recall_wall"],
    "value": [metrics['mean_iou'], metrics['mean_accuracy'], metrics['mean_precision'], metrics['mean_recall'],
              metrics['iou'][idx_porosity], metrics['precision'][idx_porosity], metrics['recall'][idx_porosity],
              metrics['iou'][idx_wall], metrics['precision'][idx_wall], metrics['recall'][idx_wall]]
})

output_file = os.path.join(output_dir, results_file_name)
df.to_csv(output_file, index=False)


GENERAL METRICS:
-Mean IoU: 0.4319
-Mean Accuracy: 0.7552
-Mean Precision: 0.6402
-Mean Recall: 0.5910

CLASS METRICS:

Class 'background' (index 1)
-IoU: 0.5598
-Precision: 0.6192
-Recall: 0.8538

Class 'porosity' (index 2)
-IoU: 0.3834
-Precision: 0.6383
-Recall: 0.4899

Class 'wall' (index 3)
-IoU: 0.3524
-Precision: 0.6632
-Recall: 0.4292


### Model 2, only trained on Mapillary

In [None]:
#Test

class_names = ['background', 'porosity', 'wall'] 

device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
num_classes=4
ignore_class = 0
image_size = (512, 1024)
model_path = "../best_deeplabmodels/V2_only_mapillary_.pth"
test_images_dir= "../training_images/segmentation_splits/test_images"
test_masks_dir= "../training_images/segmentation_splits/test_labels"
output_dir = "../test_results"
results_file_name = "V2_only_mapillary_stats_.csv"
os.makedirs(output_dir, exist_ok=True)

idx_porosity  = 1 
idx_wall  = 2 

transform = A.Compose([
    A.Resize(height=image_size[0], width=image_size[1]),
    A.Normalize(mean=(0.485, 0.456, 0.406),
                std=(0.229, 0.224, 0.225)),
    ToTensorV2()])

model = smp.DeepLabV3Plus(
    encoder_name="resnet101",         
    encoder_weights="imagenet",      
    in_channels=3,                   
    classes=num_classes             
)

model.load_state_dict(torch.load(model_path))
model.to(device)
model.eval()

#Dataset and loaders
test_dataset = SegmentationDataset(test_images_dir, test_masks_dir, transform=transform)
test_loader = DataLoader(test_dataset, batch_size=4, shuffle=False, drop_last=True, pin_memory=True)

metrics = test_model(model, test_loader, device, num_classes=num_classes)


df = pd.DataFrame({
    "metric": ["mean_iou", "mean_accuracy", "mean_precision", "mean_recall",
               "iou_porosity", "precision_porosity", "recall_porosity",
               "iou_wall", "precision_wall", "recall_wall"],
    "value": [metrics['mean_iou'], metrics['mean_accuracy'], metrics['mean_precision'], metrics['mean_recall'],
              metrics['iou'][idx_porosity], metrics['precision'][idx_porosity], metrics['recall'][idx_porosity],
              metrics['iou'][idx_wall], metrics['precision'][idx_wall], metrics['recall'][idx_wall]]
})

output_file = os.path.join(output_dir, results_file_name)
df.to_csv(output_file, index=False)


GENERAL METRICS:
-Mean IoU: 0.6951
-Mean Accuracy: 0.8925
-Mean Precision: 0.8111
-Mean Recall: 0.8274

CLASS METRICS:

Class 'background' (index 1)
-IoU: 0.7874
-Precision: 0.8658
-Recall: 0.8969

Class 'porosity' (index 2)
-IoU: 0.6065
-Precision: 0.7151
-Recall: 0.7997

Class 'wall' (index 3)
-IoU: 0.6914
-Precision: 0.8523
-Recall: 0.7855


### Model 3, trained on both datasets

In [None]:
#Test

class_names = ['background', 'porosity', 'wall'] 

device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
num_classes=4
ignore_class = 0
image_size = (512, 1024)
model_path = "../best_deeplabmodels/V3_combined_.pth"
test_images_dir= "../training_images/segmentation_splits/test_images"
test_masks_dir= "../training_images/segmentation_splits/test_labels"
output_dir = "../test_results"
results_file_name = "V3_combined_stats_.csv"
os.makedirs(output_dir, exist_ok=True)

idx_porosity  = 1 
idx_wall  = 2 

transform = A.Compose([
    A.Resize(height=image_size[0], width=image_size[1]),
    A.Normalize(mean=(0.485, 0.456, 0.406),
                std=(0.229, 0.224, 0.225)),
    ToTensorV2()])

model = smp.DeepLabV3Plus(
    encoder_name="resnet101",         
    encoder_weights="imagenet",      
    in_channels=3,                   
    classes=num_classes             
)

model.load_state_dict(torch.load(model_path))
model.to(device)
model.eval()

#Dataset and loaders
test_dataset = SegmentationDataset(test_images_dir, test_masks_dir, transform=transform)
test_loader = DataLoader(test_dataset, batch_size=4, shuffle=False, drop_last=True, pin_memory=True)

metrics = test_model(model, test_loader, device, num_classes=num_classes)


df = pd.DataFrame({
    "metric": ["mean_iou", "mean_accuracy", "mean_precision", "mean_recall",
               "iou_porosity", "precision_porosity", "recall_porosity",
               "iou_wall", "precision_wall", "recall_wall"],
    "value": [metrics['mean_iou'], metrics['mean_accuracy'], metrics['mean_precision'], metrics['mean_recall'],
              metrics['iou'][idx_porosity], metrics['precision'][idx_porosity], metrics['recall'][idx_porosity],
              metrics['iou'][idx_wall], metrics['precision'][idx_wall], metrics['recall'][idx_wall]]
})

output_file = os.path.join(output_dir, results_file_name)
df.to_csv(output_file, index=False)


GENERAL METRICS:
-Mean IoU: 0.6872
-Mean Accuracy: 0.8906
-Mean Precision: 0.8225
-Mean Recall: 0.8076

CLASS METRICS:

Class 'background' (index 1)
-IoU: 0.7777
-Precision: 0.8232
-Recall: 0.9336

Class 'porosity' (index 2)
-IoU: 0.5958
-Precision: 0.7645
-Recall: 0.7297

Class 'wall' (index 3)
-IoU: 0.6881
-Precision: 0.8797
-Recall: 0.7595


### Model 4: trained on cmp, fine-tuned with mapillary on decoder

In [None]:
#Test

class_names = ['background', 'porosity', 'wall'] 

device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
num_classes=4
ignore_class = 0
image_size = (512, 1024)
model_path = "../best_deeplabmodels/V4_cmp_ft_decoder_.pth"
test_images_dir= "../training_images/segmentation_splits/test_images"
test_masks_dir= "../training_images/segmentation_splits/test_labels"
output_dir = "../test_results"
results_file_name = "V4_cmp_ft_decoder_stats_.csv"
os.makedirs(output_dir, exist_ok=True)

idx_porosity  = 1 
idx_wall  = 2 

transform = A.Compose([
    A.Resize(height=image_size[0], width=image_size[1]),
    A.Normalize(mean=(0.485, 0.456, 0.406),
                std=(0.229, 0.224, 0.225)),
    ToTensorV2()])

model = smp.DeepLabV3Plus(
    encoder_name="resnet101",         
    encoder_weights="imagenet",      
    in_channels=3,                   
    classes=num_classes             
)

model.load_state_dict(torch.load(model_path))
model.to(device)
model.eval()

#Dataset and loaders
test_dataset = SegmentationDataset(test_images_dir, test_masks_dir, transform=transform)
test_loader = DataLoader(test_dataset, batch_size=4, shuffle=False, drop_last=True, pin_memory=True)

metrics = test_model(model, test_loader, device, num_classes=num_classes)


df = pd.DataFrame({
    "metric": ["mean_iou", "mean_accuracy", "mean_precision", "mean_recall",
               "iou_porosity", "precision_porosity", "recall_porosity",
               "iou_wall", "precision_wall", "recall_wall"],
    "value": [metrics['mean_iou'], metrics['mean_accuracy'], metrics['mean_precision'], metrics['mean_recall'],
              metrics['iou'][idx_porosity], metrics['precision'][idx_porosity], metrics['recall'][idx_porosity],
              metrics['iou'][idx_wall], metrics['precision'][idx_wall], metrics['recall'][idx_wall]]
})

output_file = os.path.join(output_dir, results_file_name)
df.to_csv(output_file, index=False)


GENERAL METRICS:
-Mean IoU: 0.6325
-Mean Accuracy: 0.8643
-Mean Precision: 0.7567
-Mean Recall: 0.7965

CLASS METRICS:

Class 'background' (index 1)
-IoU: 0.7460
-Precision: 0.8681
-Recall: 0.8413

Class 'porosity' (index 2)
-IoU: 0.5224
-Precision: 0.5979
-Recall: 0.8052

Class 'wall' (index 3)
-IoU: 0.6290
-Precision: 0.8041
-Recall: 0.7429


### Model 5: trained on CMP, finetuned on mapillary only on layer 4

In [None]:
#Test

class_names = ['background', 'porosity', 'wall'] 

device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
num_classes=4
ignore_class = 0
image_size = (512, 1024)
model_path = "../best_deeplabmodels/V5_cmp_ft_encoder_.pth"
test_images_dir= "../training_images/segmentation_splits/test_images"
test_masks_dir= "../training_images/segmentation_splits/test_labels"
output_dir = "../test_results"
results_file_name = "V5_cmp_ft_encoder_stats_.csv"
os.makedirs(output_dir, exist_ok=True)

idx_porosity  = 1 
idx_wall  = 2 

transform = A.Compose([
    A.Resize(height=image_size[0], width=image_size[1]),
    A.Normalize(mean=(0.485, 0.456, 0.406),
                std=(0.229, 0.224, 0.225)),
    ToTensorV2()])

model = smp.DeepLabV3Plus(
    encoder_name="resnet101",         
    encoder_weights="imagenet",      
    in_channels=3,                   
    classes=num_classes             
)

model.load_state_dict(torch.load(model_path))
model.to(device)
model.eval()

#Dataset and loaders
test_dataset = SegmentationDataset(test_images_dir, test_masks_dir, transform=transform)
test_loader = DataLoader(test_dataset, batch_size=4, shuffle=False, drop_last=True, pin_memory=True)

metrics = test_model(model, test_loader, device, num_classes=num_classes)


df = pd.DataFrame({
    "metric": ["mean_iou", "mean_accuracy", "mean_precision", "mean_recall",
               "iou_porosity", "precision_porosity", "recall_porosity",
               "iou_wall", "precision_wall", "recall_wall"],
    "value": [metrics['mean_iou'], metrics['mean_accuracy'], metrics['mean_precision'], metrics['mean_recall'],
              metrics['iou'][idx_porosity], metrics['precision'][idx_porosity], metrics['recall'][idx_porosity],
              metrics['iou'][idx_wall], metrics['precision'][idx_wall], metrics['recall'][idx_wall]]
})

output_file = os.path.join(output_dir, results_file_name)
df.to_csv(output_file, index=False)


GENERAL METRICS:
-Mean IoU: 0.6935
-Mean Accuracy: 0.8944
-Mean Precision: 0.8100
-Mean Recall: 0.8228

CLASS METRICS:

Class 'background' (index 1)
-IoU: 0.7905
-Precision: 0.8787
-Recall: 0.8873

Class 'porosity' (index 2)
-IoU: 0.5799
-Precision: 0.7035
-Recall: 0.7674

Class 'wall' (index 3)
-IoU: 0.7100
-Precision: 0.8478
-Recall: 0.8138


### Model 6: trained on CMP, finetuned on mapillary on layer 3 and 4

In [None]:
#Test
class_names = ['background', 'porosity', 'wall'] 

device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
num_classes=4
ignore_class = 0
image_size = (512, 1024)
model_path = "../best_deeplabmodels/V6_cmp_ft_encoder_l34.pth"
test_images_dir= "../training_images/segmentation_splits/test_images"
test_masks_dir= "../training_images/segmentation_splits/test_labels"
output_dir = "../test_results"
results_file_name = "V6_cmp_ft_encoder_l34_stats.csv"
os.makedirs(output_dir, exist_ok=True)

idx_porosity  = 1 
idx_wall  = 2 

transform = A.Compose([
    A.Resize(height=image_size[0], width=image_size[1]),
    A.Normalize(mean=(0.485, 0.456, 0.406),
                std=(0.229, 0.224, 0.225)),
    ToTensorV2()])

model = smp.DeepLabV3Plus(
    encoder_name="resnet101",         
    encoder_weights="imagenet",      
    in_channels=3,                   
    classes=num_classes             
)

model.load_state_dict(torch.load(model_path))
model.to(device)
model.eval()

#Dataset and loaders
test_dataset = SegmentationDataset(test_images_dir, test_masks_dir, transform=transform)
test_loader = DataLoader(test_dataset, batch_size=4, shuffle=False, drop_last=True, pin_memory=True)

metrics = test_model(model, test_loader, device, num_classes=num_classes)


df = pd.DataFrame({
    "metric": ["mean_iou", "mean_accuracy", "mean_precision", "mean_recall",
               "iou_porosity", "precision_porosity", "recall_porosity",
               "iou_wall", "precision_wall", "recall_wall"],
    "value": [metrics['mean_iou'], metrics['mean_accuracy'], metrics['mean_precision'], metrics['mean_recall'],
              metrics['iou'][idx_porosity], metrics['precision'][idx_porosity], metrics['recall'][idx_porosity],
              metrics['iou'][idx_wall], metrics['precision'][idx_wall], metrics['recall'][idx_wall]]
})

output_file = os.path.join(output_dir, results_file_name)
df.to_csv(output_file, index=False)


GENERAL METRICS:
-Mean IoU: 0.7272
-Mean Accuracy: 0.9097
-Mean Precision: 0.8327
-Mean Recall: 0.8459

CLASS METRICS:

Class 'background' (index 1)
-IoU: 0.8284
-Precision: 0.9076
-Recall: 0.9046

Class 'porosity' (index 2)
-IoU: 0.6084
-Precision: 0.7260
-Recall: 0.7896

Class 'wall' (index 3)
-IoU: 0.7448
-Precision: 0.8643
-Recall: 0.8433
