In [1]:
import csv
import logging
import os
from typing import Any, Dict, Tuple

import matplotlib.pyplot as plt
import numpy as np
import seaborn as sns
import torch
from skimage.metrics import structural_similarity as ssim

## Helpers

In [2]:
# Visualization and export utility functions for the project pipeline
# All functions in this file are documented in English and follow snake_case naming convention

'''
Example usage:
from src.utils.helpers import plot_metrics, plot_confusion_matrix, plot_histograms, plot_samples, export_metrics_csv, get_true_labels

plot_metrics(results, 'fid', './results')
plot_confusion_matrix(y_true, y_pred, labels=['Class 0', 'Class 1'], filename='./results/confusion_matrix.png')
plot_histograms(originals, generated, 'scenario1', './results')
plot_samples(originals, generated, 'scenario1', './results')
export_metrics_csv(results, perf_results, './results/metrics.csv')
labels = get_true_labels(test_loader)
# Performance metrics keys: 'total_time', 'memory_MB', 'gpu_memory_MB', 'cost_per_image', 'flops', 'params'
'''

# --- Visualization functions ---
import csv
import logging
import os
from logging.handlers import RotatingFileHandler

import matplotlib.pyplot as plt
import numpy as np
import seaborn as sns
import torch
from tqdm import tqdm

from src.metrics.image_metrics import ImageQualityMetric
from src.metrics.segmentation_metrics import AssertivenessMetric


def train_model(model, dataloader, optimizer, device, num_epochs=1, model_type='cnn'):
    """
    Train a model for a given number of epochs.
    Args:
        model: The model to train.
        dataloader: DataLoader for training data.
        optimizer: Optimizer for model parameters.
        device: Device to use ('cuda' or 'cpu').
        num_epochs: Number of epochs to train.
        model_type: Type of model ('cnn', 'gan', 'diffusion').
    Returns:
        Trained model.
    """
    model.train()
    for epoch in range(num_epochs):
        losses = []
        for batch in tqdm(dataloader, desc=f'Training {model_type} - Epoch {epoch+1}/{num_epochs}'):
            if model_type == 'cnn':
                x, y = batch
                x, y = x.to(device), y.to(device)
                optimizer.zero_grad()
                _, loss = model(x, y)
                loss.backward()
                optimizer.step()
                losses.append(loss.item())
            elif model_type == 'gan':
                d_loss, g_loss = model.train_step(batch[0])
                losses.append((d_loss, g_loss))
            elif model_type == 'diffusion':
                loss = model.train_step(batch[0], optimizer)
                losses.append(loss)
        if model_type == 'gan' and losses:
            d_losses = [dl for dl, _ in losses]
            g_losses = [gl for _, gl in losses]
            print(f"Average D loss in epoch {epoch+1}: {sum(d_losses)/len(d_losses):.4f}, G loss: {sum(g_losses)/len(g_losses):.4f}")
        else:
            print(f"Average loss in epoch {epoch+1}: {sum(losses)/len(losses) if losses else 0}")
    return model

def validate_model(model, dataloader, device, model_type='cnn'):
    """
    Validate a model on a given dataset.
    Args:
        model: The model to validate.
        dataloader: DataLoader for validation data.
        device: Device to use ('cuda' or 'cpu').
        model_type: Type of model ('cnn', 'gan', 'diffusion').
    Returns:
        Average loss over the validation set.
    """
    model.eval()
    losses = []
    with torch.no_grad():
        for batch in tqdm(dataloader, desc=f'Validation {model_type}'):
            if model_type == 'cnn':
                x, y = batch
                x, y = x.to(device), y.to(device)
                _, loss = model(x, y)
                losses.append(loss.item())
            elif model_type == 'gan':
                loss = model.validate_step(batch)
                losses.append(loss)
            elif model_type == 'diffusion':
                loss = model.validate_step(batch)
                losses.append(loss)
    return sum(losses)/len(losses) if losses else 0

def save_checkpoint(model, path):
    """
    Save model weights to a file.
    Args:
        model: The model to save.
        path: Path to the file.
    """
    torch.save(model.state_dict(), path)

def load_checkpoint(model, path, device):
    """
    Load model weights from a file.
    Args:
        model: The model to load weights into.
        path: Path to the file.
        device: Device to map the model to.
    Returns:
        The model with loaded weights.
    """
    model.load_state_dict(torch.load(path, map_location=device))
    model.to(device)
    return model

def setup_logging(log_dir="./logs", log_file="pipeline.log", level=logging.INFO):
    """
    Set up logging to file and console.
    Args:
        log_dir: Directory for log files.
        log_file: Log file name.
        level: Logging level.
    Returns:
        Configured logger.
    """
    os.makedirs(log_dir, exist_ok=True)
    log_path = os.path.join(log_dir, log_file)
    handler = RotatingFileHandler(log_path, maxBytes=5*1024*1024, backupCount=3)
    formatter = logging.Formatter('[%(asctime)s] %(levelname)s %(name)s: %(message)s')
    handler.setFormatter(formatter)
    logger = logging.getLogger()
    logger.setLevel(level)
    if not logger.handlers:
        logger.addHandler(handler)
    # Also log to console
    console = logging.StreamHandler()
    console.setFormatter(formatter)
    logger.addHandler(console)
    return logger

def plot_metrics(results, metric, results_dir):
    """
    Plot a bar chart comparing a given metric across scenarios.
    Args:
        results (dict): Dictionary with scenario keys and metric values.
        metric (str): Metric name to compare (e.g., 'fid', 'ssim', 'psnr').
        results_dir (str): Directory to save the plot.
    """
    plt.figure()
    labels = []
    values = []
    for key, res in results.items():
        labels.append(key)
        values.append(res[metric])
    plt.bar(labels, values)
    plt.title(f'Comparison of {metric.upper()}')
    plt.ylabel(metric.upper())
    plt.savefig(os.path.join(results_dir, f"{metric}_comparison.png"))
    plt.close()


def plot_confusion_matrix(y_true, y_pred, labels, filename):
    """
    Plot and save a confusion matrix as a heatmap.
    Args:
        y_true (array-like): True class labels.
        y_pred (array-like): Predicted class labels.
        labels (list): List of class label names.
        filename (str): Path to save the confusion matrix plot.
    """
    cm = AssertivenessMetric('confusion').confusion(y_true, y_pred)
    plt.figure(figsize=(6, 5))
    sns.heatmap(cm, annot=True, fmt='d', cmap='Blues', xticklabels=labels, yticklabels=labels)
    plt.xlabel('Predicted')
    plt.ylabel('True')
    plt.title('Confusion Matrix')
    plt.savefig(filename)
    plt.close()


def plot_histograms(originals, generated, key, results_dir):
    """
    Plot and save RGB and intensity histograms for a sample image.
    Args:
        originals (Tensor): Batch of original images.
        generated (Tensor): Batch of generated images.
        key (str): Scenario key for file naming.
        results_dir (str): Directory to save the plots.
    """
    metric = ImageQualityMetric('hist')
    orig = originals[0].permute(1, 2, 0).cpu().numpy()
    gen = generated[0].permute(1, 2, 0).cpu().numpy()
    # RGB histogram
    hist_orig = metric.calculate_histogram((orig * 255).astype(np.uint8), mode='rgb')
    hist_gen = metric.calculate_histogram((gen * 255).astype(np.uint8), mode='rgb')
    plt.figure(figsize=(10, 4))
    plt.plot(hist_orig, label='Original')
    plt.plot(hist_gen, label='Generated')
    plt.title('RGB Histogram (sample)')
    plt.legend()
    plt.savefig(f"{results_dir}/histogram_rgb_{key}.png")
    plt.close()
    # Intensity histogram
    hist_orig_int = metric.calculate_histogram((orig * 255).astype(np.uint8), mode='intensity')
    hist_gen_int = metric.calculate_histogram((gen * 255).astype(np.uint8), mode='intensity')
    plt.figure(figsize=(10, 4))
    plt.plot(hist_orig_int, label='Original')
    plt.plot(hist_gen_int, label='Generated')
    plt.title('Intensity Histogram (sample)')
    plt.legend()
    plt.savefig(f"{results_dir}/histogram_intensity_{key}.png")
    plt.close()


def plot_samples(originals, generated, key, results_dir):
    """
    Plot and save a comparison of original and generated image samples.
    Args:
        originals (Tensor): Batch of original images.
        generated (Tensor): Batch of generated images.
        key (str): Scenario key for file naming.
        results_dir (str): Directory to save the plot.
    """
    n = min(5, originals.shape[0])
    plt.figure(figsize=(10, 4))
    for i in range(n):
        plt.subplot(2, n, i+1)
        plt.imshow(originals[i].permute(1, 2, 0).cpu().numpy().clip(0, 1))
        plt.axis('off')
        if i == 0:
            plt.ylabel('Original')
        plt.subplot(2, n, n+i+1)
        plt.imshow(generated[i].permute(1, 2, 0).cpu().numpy().clip(0, 1))
        plt.axis('off')
        if i == 0:
            plt.ylabel('Generated')
    plt.suptitle('Samples: Original vs. Generated')
    plt.savefig(f"{results_dir}/samples_{key}.png")
    plt.close()


def export_metrics_csv(results, perf_results, filename):
    """
    Export metrics and performance results to a CSV file.
    Args:
        results (dict): Dictionary with scenario keys and metric values.
        perf_results (dict): Dictionary with scenario keys and performance metrics.
        filename (str): Path to save the CSV file.
    """
    fields = ['scenario', 'fid', 'ssim', 'psnr', 'total_time', 'memory_MB', 'gpu_memory_MB', 'cost_per_image', 'flops', 'params']
    with open(filename, 'w', newline='') as csvfile:
        writer = csv.DictWriter(csvfile, fieldnames=fields)
        writer.writeheader()
        for scenario, metrics in results.items():
            row = {'scenario': scenario}
            row.update({k: metrics.get(k, None) for k in ['fid', 'ssim', 'psnr']})
            perf = perf_results.get(scenario, {})
            row['total_time'] = perf.get('total_time', None)
            row['memory_MB'] = perf.get('memory_MB', None)
            row['gpu_memory_MB'] = perf.get('gpu_memory_MB', None)
            row['cost_per_image'] = perf.get('cost_per_image', None)
            row['flops'] = perf.get('flops', None)
            row['params'] = perf.get('params', None)
            writer.writerow(row)

def get_true_labels(test_loader):
    """
    Extract true labels from a test data loader.
    Args:
        test_loader (DataLoader): DataLoader for the test set.
    Returns:
        np.ndarray: Array of true labels.
    """
    y_true = []
    for _, labels in test_loader:
        y_true.extend(labels.cpu().numpy())
    return np.array(y_true)

# --- Data export and helper functions --- 

In [3]:
# Global configuration (could be moved to a config file or argparse)
DEVICE = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
DATA_DIR = "./Amostras_celeba"
BATCH_SIZE = 32
IMAGE_SIZE = 64
NUM_CLASSES = 2

RESULTS_DIR = "./resultados"
os.makedirs(RESULTS_DIR, exist_ok=True)

SCENARIOS = [
    {"model": "GAN", "mask": False},
    {"model": "GAN", "mask": True},
    {"model": "Diffusion", "mask": False},
    {"model": "Diffusion", "mask": True},
]

# MODELS

## DiffusionModel

In [4]:
import torch
import torch.nn as nn
from tqdm import tqdm


class DiffusionModel(nn.Module):
    """
    Diffusion model for image generation.
    Args:
        denoise_network: Neural network for denoising.
        T: Number of diffusion steps.
        beta_start: Initial beta value.
        beta_end: Final beta value.
        device: Device to use ('cuda' or 'cpu').
    """
    def __init__(self, denoise_network, T=1000, beta_start=1e-4, beta_end=0.02, device='cuda'):
        super(DiffusionModel, self).__init__()
        self.T = T
        self.device = device
        self.denoise_network = denoise_network.to(device)
        self.register_buffer('betas', torch.linspace(beta_start, beta_end, T, device=device))
        self.register_buffer('alphas', 1. - self.betas)
        self.register_buffer('alpha_bars', torch.cumprod(self.alphas, dim=0))

    def validate_step(self, batch):
        """
        Perform a validation step for the diffusion model.
        Args:
            batch: Batch of real data.
        Returns:
            Validation loss.
        """
        self.denoise_network.eval()
        x0, _ = batch
        x0 = x0.to(self.device)
        with torch.no_grad():
            t = torch.randint(1, self.T + 1, (x0.shape[0],), device=self.device)
            xt, noise = self.forward_diffusion(x0, t - 1)
            predicted_noise = self.denoise_network(xt, t)
            loss = nn.functional.mse_loss(predicted_noise, noise)
        return loss.item()

    def forward_diffusion(self, x0, t):
        """
        Perform the forward diffusion process.
        Args:
            x0: Original images.
            t: Diffusion step indices.
        Returns:
            Tuple of (noisy images, noise).
        """
        noise = torch.randn_like(x0)
        sqrt_alpha_bar = torch.sqrt(self.alpha_bars[t])[:, None, None, None]
        sqrt_one_minus_alpha_bar = torch.sqrt(1. - self.alpha_bars[t])[:, None, None, None]
        xt = sqrt_alpha_bar * x0 + sqrt_one_minus_alpha_bar * noise
        return xt, noise

    def train_step(self, x0, optimizer):
        """
        Perform a training step for the diffusion model.
        Args:
            x0: Original images.
            optimizer: Optimizer for the denoise network.
        Returns:
            Training loss.
        """
        x0 = x0.to(self.device)
        self.denoise_network.train()
        optimizer.zero_grad()
        t = torch.randint(1, self.T + 1, (x0.shape[0],), device=self.device)
        xt, noise = self.forward_diffusion(x0, t - 1)
        predicted_noise = self.denoise_network(xt, t)
        loss = nn.functional.mse_loss(predicted_noise, noise)
        loss.backward()
        optimizer.step()
        return loss.item()

    def sample(self, num_samples, image_size):
        """
        Generate samples from the diffusion model.
        Args:
            num_samples: Number of samples to generate.
            image_size: Size of the generated images.
        Returns:
            Generated images.
        """
        self.denoise_network.eval()
        xt = torch.randn((num_samples, 3, image_size, image_size), device=self.device)
        for t in tqdm(range(self.T, 0, -1), desc='Sampling'):
            t_tensor = torch.full((num_samples,), t, device=self.device)
            with torch.no_grad():
                predicted_noise = self.denoise_network(xt, t_tensor)
            alpha_t = self.alphas[t-1]
            alpha_bar_t = self.alpha_bars[t-1]
            sqrt_one_minus_alpha_bar_t = torch.sqrt(1. - alpha_bar_t)
            mean = (xt - ((1. - alpha_t) / sqrt_one_minus_alpha_bar_t) * predicted_noise) / torch.sqrt(alpha_t)
            if t > 1:
                noise = torch.randn_like(xt)
            else:
                noise = torch.zeros_like(xt)
            xt = mean + torch.sqrt(self.betas[t-1]) * noise
        x0 = xt.clamp(-1., 1.)
        return x0

class DenoiseNetwork(nn.Module):
    """
    Neural network for denoising in the diffusion model.
    Args:
        T: Number of diffusion steps (for time embedding).
    """
    def __init__(self, T):
        super().__init__()
        self.T = T
        self.conv1 = nn.Conv2d(3, 64, kernel_size=3, padding=1)
        self.conv2 = nn.Conv2d(64, 128, kernel_size=3, padding=1)
        self.conv3 = nn.Conv2d(128, 256, kernel_size=3, padding=1)
        self.time_embed = nn.Embedding(T, 256)
        self.conv4 = nn.Conv2d(256 + 256, 128, kernel_size=3, padding=1)
        self.conv5 = nn.Conv2d(128, 64, kernel_size=3, padding=1)
        self.conv6 = nn.Conv2d(64, 3, kernel_size=3, padding=1)

    def forward(self, x, t):
        """
        Forward pass for the denoise network.
        Args:
            x: Noisy images.
            t: Diffusion step indices.
        Returns:
            Denoised images.
        """
        h1 = torch.relu(self.conv1(x))
        h2 = torch.relu(self.conv2(h1))
        h3 = torch.relu(self.conv3(h2))
        temb = self.time_embed(t - 1)
        temb = temb.view(temb.shape[0], temb.shape[1], 1, 1)
        temb = temb.expand(-1, -1, h3.shape[2], h3.shape[3])
        h = torch.cat([h3, temb], dim=1)
        h = torch.relu(self.conv4(h))
        h = torch.relu(self.conv5(h))
        h = self.conv6(h)
        return h 

## GAN Model

In [5]:
import numpy as np
import torch
import torch.nn as nn
import torch.optim as optim
from tqdm import tqdm


class Generator(nn.Module):
    def __init__(self, noise_dim, img_channels=3, feature_map_size=64):
        super().__init__()
        self.net = nn.Sequential(
            nn.ConvTranspose2d(noise_dim, feature_map_size*8, 4, 1, 0, bias=False),
            nn.BatchNorm2d(feature_map_size*8),
            nn.ReLU(True),
            nn.ConvTranspose2d(feature_map_size*8, feature_map_size*4, 4, 2, 1, bias=False),
            nn.BatchNorm2d(feature_map_size*4),
            nn.ReLU(True),
            nn.ConvTranspose2d(feature_map_size*4, feature_map_size*2, 4, 2, 1, bias=False),
            nn.BatchNorm2d(feature_map_size*2),
            nn.ReLU(True),
            nn.ConvTranspose2d(feature_map_size*2, feature_map_size, 4, 2, 1, bias=False),
            nn.BatchNorm2d(feature_map_size),
            nn.ReLU(True),
            nn.ConvTranspose2d(feature_map_size, img_channels, 4, 2, 1, bias=False),
            nn.Tanh()
        )
    def forward(self, x):
        return self.net(x)

class Discriminator(nn.Module):
    def __init__(self, img_channels=3, feature_map_size=64):
        super().__init__()
        self.net = nn.Sequential(
            nn.Conv2d(img_channels, feature_map_size, 4, 2, 1, bias=False),
            nn.LeakyReLU(0.2, inplace=True),
            nn.Conv2d(feature_map_size, feature_map_size*2, 4, 2, 1, bias=False),
            nn.BatchNorm2d(feature_map_size*2),
            nn.LeakyReLU(0.2, inplace=True),
            nn.Conv2d(feature_map_size*2, feature_map_size*4, 4, 2, 1, bias=False),
            nn.BatchNorm2d(feature_map_size*4),
            nn.LeakyReLU(0.2, inplace=True),
            nn.Conv2d(feature_map_size*4, feature_map_size*8, 4, 2, 1, bias=False),
            nn.BatchNorm2d(feature_map_size*8),
            nn.LeakyReLU(0.2, inplace=True),
            nn.Conv2d(feature_map_size*8, 1, 4, 1, 0, bias=False),
            nn.Sigmoid()
        )
    def forward(self, x):
        return self.net(x)

class GANTrainer:
    """
    Trainer class for GAN models.
    Args:
        generator: Generator model.
        discriminator: Discriminator model.
        noise_dim: Dimension of the noise vector.
        lr_D: Learning rate for discriminator.
        lr_G: Learning rate for generator.
        batch_size: Batch size.
        device: Device to use ('cuda' or 'cpu').
    """
    def __init__(self, generator, discriminator, noise_dim,
                 lr_D=0.0002, lr_G=0.0002, batch_size=64, device='cuda'):
        self.device = device
        self.batch_size = batch_size
        self.noise_dim = noise_dim
        self.current_losses = {'g_loss': 0, 'd_loss': 0}
        self.G = generator.to(device)
        self.D = discriminator.to(device)
        self.optimizer_D = optim.Adam(self.D.parameters(), lr=lr_D, betas=(0.5, 0.999))
        self.optimizer_G = optim.Adam(self.G.parameters(), lr=lr_G, betas=(0.5, 0.999))
        self.criterion = nn.BCELoss()

    @property
    def generator(self):
        return self.G

    def eval(self):
        self.G.eval()
        self.D.eval()

    def train(self):
        self.G.train()
        self.D.train()

    def validate_step(self, batch):
        """
        Perform a validation step for the GAN.
        Args:
            batch: Batch of real data.
        Returns:
            Validation loss.
        """
        real_data = batch[0].to(self.device)
        batch_size = real_data.size(0)
        with torch.no_grad():
            real_labels = torch.ones(batch_size, 1, device=self.device)
            fake_labels = torch.zeros(batch_size, 1, device=self.device)
            real_output = self.D(real_data).view(batch_size, 1)
            d_loss_real = self.criterion(real_output, real_labels)
            noise = torch.randn(batch_size, self.noise_dim, 1, 1, device=self.device)
            fake_data = self.G(noise)
            fake_output = self.D(fake_data).view(batch_size, 1)
            d_loss_fake = self.criterion(fake_output, fake_labels)
            d_loss = d_loss_real + d_loss_fake
            output = self.D(fake_data).view(batch_size, 1)
            g_loss = self.criterion(output, real_labels)
            total_loss = (d_loss + g_loss) / 2
        return total_loss.item()

    def train_step(self, real_data):
        """
        Perform a training step for the GAN.
        Args:
            real_data: Batch of real data.
        Returns:
            Tuple of discriminator and generator losses.
        """
        real_data = real_data.to(self.device)
        batch_size = real_data.size(0)
        self.optimizer_D.zero_grad()
        real_labels = torch.ones(batch_size, 1, device=self.device)
        real_output = self.D(real_data).view(batch_size, 1)
        d_loss_real = self.criterion(real_output, real_labels)
        noise = torch.randn(batch_size, self.noise_dim, 1, 1, device=self.device)
        fake_data = self.G(noise)
        fake_labels = torch.zeros(batch_size, 1, device=self.device)
        fake_output = self.D(fake_data.detach()).view(batch_size, 1)
        d_loss_fake = self.criterion(fake_output, fake_labels)
        d_loss = d_loss_real + d_loss_fake
        d_loss.backward()
        self.optimizer_D.step()
        self.optimizer_G.zero_grad()
        noise = torch.randn(batch_size, self.noise_dim, 1, 1, device=self.device)
        fake_data = self.G(noise)
        output = self.D(fake_data).view(batch_size, 1)
        g_loss = self.criterion(output, real_labels)
        g_loss.backward()
        self.optimizer_G.step()
        return d_loss.item(), g_loss.item()

    def train_full(self, dataloader, num_epochs):
        """
        Train the GAN for a given number of epochs.
        Args:
            dataloader: DataLoader for training data.
            num_epochs: Number of epochs to train.
        """
        for epoch in range(num_epochs):
            d_losses, g_losses = [], []
            for real_data, _ in tqdm(dataloader, desc=f'Epoch {epoch+1}/{num_epochs}'):
                d_loss, g_loss = self.train_step(real_data)
                d_losses.append(d_loss)
                g_losses.append(g_loss)
            self.current_losses['d_loss'] = np.mean(d_losses)
            self.current_losses['g_loss'] = np.mean(g_losses)
            print(f'Epoch {epoch+1}: D loss = {self.current_losses["d_loss"]:.4f}, G loss = {self.current_losses["g_loss"]:.4f}')

    def get_current_losses(self):
        """
        Get the current average losses for discriminator and generator.
        Returns:
            Dictionary with 'd_loss' and 'g_loss'.
        """
        return self.current_losses 

## RecursiveCNN

In [6]:
import torch.nn as nn
import torch.nn.functional as F


class RecursiveCNN(nn.Module):
    """
    Recursive Convolutional Neural Network for image classification.
    Args:
        in_channels (int): Number of input channels.
        out_channels (int): Number of output channels.
        kernel_size (int): Size of the convolutional kernel.
        num_iterations (int): Number of recursive iterations.
        num_classes (int): Number of output classes.
    """
    def __init__(self, in_channels, out_channels, kernel_size, num_iterations, num_classes):
        super(RecursiveCNN, self).__init__()
        self.num_iterations = num_iterations
        self.kernel_size = kernel_size
        self.init_conv = nn.Conv2d(in_channels, out_channels, kernel_size, padding=kernel_size//2)
        self.init_relu = nn.ReLU()
        self.recursive_block = nn.Sequential(
            nn.Conv2d(out_channels, out_channels, kernel_size=3, padding=1),
            nn.ReLU(),
            nn.Conv2d(out_channels, out_channels, kernel_size=3, padding=1)
        )
        self.conv_layer = nn.Conv2d(out_channels, out_channels, kernel_size, padding=kernel_size//2)
        self.relu = nn.ReLU()
        self.bn = nn.BatchNorm2d(out_channels)
        self.global_pool = nn.AdaptiveAvgPool2d(1)
        self.fc = nn.Linear(out_channels, num_classes)

    def forward(self, x, y=None):
        """
        Forward pass for the RecursiveCNN.
        Args:
            x: Input images.
            y: Target labels (optional).
        Returns:
            Tuple of (predictions, loss).
        """
        f = self.init_conv(x)
        f = self.init_relu(f)
        for t in range(self.num_iterations):
            f_residual = self.recursive_block(f)
            f_conv = self.conv_layer(f_residual)
            f = self.relu(f_conv + f)
            f = self.bn(f)
        f_pool = self.global_pool(f)
        f_pool = f_pool.view(f_pool.size(0), -1)
        y_pred = F.softmax(self.fc(f_pool), dim=1)
        loss = None
        if y is not None:
            loss = F.cross_entropy(self.fc(f_pool), y)
        return y_pred, loss 

# Metrics

## ImageMetrics

In [7]:
from abc import ABC, abstractmethod

import cv2
import numpy as np
import torch
from piq import LPIPS
# from skimage import metrics
from skimage.metrics import structural_similarity as ssim
from torchmetrics.image.fid import FrechetInceptionDistance
from torchvision import transforms


class MetricBase(ABC):
    """
    Abstract base class for all metrics.
    """
    def __init__(self, name):
        self.name = name
        self.results = []

    @abstractmethod
    def calculate(self, *args, **kwargs):
        pass

    def add_result(self, value):
        self.results.append(value)

    def get_results(self):
        return self.results

class ImageQualityMetric(MetricBase):
    """
    Class for image quality metrics.
    """
    def __init__(self, name):
        super().__init__(name)

    def calculate(self, *args, **kwargs):
        # Default implementation (placeholder)
        pass

    def calculate_fid(self, real_images, fake_images, device='cuda'):
        """
        Calculate Frechet Inception Distance (FID) between real and fake images.
        Args:
            real_images: Tensor of real images.
            fake_images: Tensor of generated images.
            device: Device to use ('cuda' or 'cpu').
        Returns:
            FID value (float).
        """
        fid = FrechetInceptionDistance(feature=2048).to(device)
        real_tensors = real_images.to(device)
        fake_tensors = fake_images.to(device)
        fid.update(real_tensors, real=True)
        fid.update(fake_tensors, real=False)
        value = fid.compute().item()
        self.add_result(value)
        return value

    def calculate_ssim_psnr(self, real_img, fake_img):
        """
        Calculate SSIM and PSNR between two images.
        Args:
            real_img: Numpy array of real image.
            fake_img: Numpy array of generated image.
        Returns:
            Tuple (ssim, psnr).
        """
        ssim_value = ssim(real_img, fake_img, channel_axis=2, win_size=3, data_range=1.0 if real_img.max() <= 1 else 255)
        psnr_value = self.psnr(real_img, fake_img, data_range=1.0 if real_img.max() <= 1 else 255)
        self.add_result({'ssim': ssim_value, 'psnr': psnr_value})
        return ssim_value, psnr_value

    def calculate_lpips(self, real_img, fake_img, device='cuda'):
        """
        Calculate LPIPS metric between two images.
        Args:
            real_img: Numpy array or PIL image of real image.
            fake_img: Numpy array or PIL image of generated image.
            device: Device to use ('cuda' or 'cpu').
        Returns:
            LPIPS value (float).
        """
        lpips = LPIPS(replace_pooling=True).to(device)
        transform = transforms.Compose([
            transforms.ToTensor(),
            transforms.Normalize((0.5, 0.5, 0.5), (0.5, 0.5, 0.5))
        ])
        real_tensor = transform(real_img).unsqueeze(0).to(device)
        fake_tensor = transform(fake_img).unsqueeze(0).to(device)
        value = lpips(real_tensor, fake_tensor).item()
        self.add_result({'lpips': value})
        return value

    def psnr(self, real_img, fake_img, data_range=None):
        """
        Calculate Peak Signal-to-Noise Ratio (PSNR) between two images.
        Args:
            real_img: Numpy array of real image.
            fake_img: Numpy array of generated image.
            data_range: Value range of the images.
        Returns:
            PSNR value (float).
        """
        mse = np.mean((real_img.astype(np.float64) - fake_img.astype(np.float64)) ** 2)
        if mse == 0:
            return float('inf')
        if data_range is None:
            data_range = 1.0 if real_img.max() <= 1.0 else 255
        return 20 * np.log10(data_range / np.sqrt(mse))

    def calculate_histogram(self, img, mode='rgb', bins=256):
        """Calcula histograma RGB ou de intensidade."""
        if mode == 'rgb':
            chans = cv2.split(img)
            hist = [cv2.calcHist([c], [0], None, [bins], [0, 256]) for c in chans]
            hist = np.concatenate(hist).ravel()
        else:  # intensidade
            gray = cv2.cvtColor(img, cv2.COLOR_RGB2GRAY)
            hist = cv2.calcHist([gray], [0], None, [bins], [0, 256]).ravel()
        hist = hist / (hist.sum() + 1e-8)  # normalização
        return hist

    def compare_histograms(self, hist1, hist2, method='chi-square'):
        if method == 'chi-square':
            score = cv2.compareHist(hist1.astype('float32'), hist2.astype('float32'), cv2.HISTCMP_CHISQR)
        elif method == 'correlation':
            score = cv2.compareHist(hist1.astype('float32'), hist2.astype('float32'), cv2.HISTCMP_CORREL)
        elif method == 'intersection':
            score = cv2.compareHist(hist1.astype('float32'), hist2.astype('float32'), cv2.HISTCMP_INTERSECT)
        elif method == 'bhattacharyya':
            score = cv2.compareHist(hist1.astype('float32'), hist2.astype('float32'), cv2.HISTCMP_BHATTACHARYYA)
        else:
            raise ValueError('Método de comparação de histograma não suportado')
        self.add_result({f'hist_{method}': score})
        return score

    def batch_ssim(self, imgs1, imgs2):
        """Calcula SSIM para batches de imagens."""
        return [self.calculate_ssim_psnr(i1, i2)[0] for i1, i2 in zip(imgs1, imgs2)]

    def batch_psnr(self, imgs1, imgs2):
        """Calcula PSNR para batches de imagens."""
        return [self.calculate_ssim_psnr(i1, i2)[1] for i1, i2 in zip(imgs1, imgs2)]

## PerformanceMetrics

In [8]:
import time

import psutil
import torch

from src.metrics.image_metrics import MetricBase

try:
    from ptflops import get_model_complexity_info
    PT_FLOPS_AVAILABLE = True
except ImportError:
    PT_FLOPS_AVAILABLE = False

class PerformanceMetric(MetricBase):
    """
    Class for performance metrics (time, memory, FLOPs, etc).
    """
    def __init__(self, name):
        super().__init__(name)

    def measure_time(self, func, *args, **kwargs):
        """
        Measure execution time of a function.
        Args:
            func: Function to measure.
            *args, **kwargs: Arguments for the function.
        Returns:
            Tuple (result, elapsed_time).
        """
        start = time.time()
        result = func(*args, **kwargs)
        elapsed = time.time() - start
        self.add_result({'execution_time': elapsed})
        return result, elapsed

    def measure_time_batch(self, func, dataloader, *args, **kwargs):
        """
        Measure execution time for each batch in a dataloader.
        Args:
            func: Function to measure.
            dataloader: DataLoader to iterate over.
            *args, **kwargs: Arguments for the function.
        Returns:
            List of times per batch.
        """
        times = []
        for batch in dataloader:
            start = time.time()
            func(batch, *args, **kwargs)
            times.append(time.time() - start)
        self.add_result({'batch_time': times})
        return times

    def measure_time_image(self, func, images, *args, **kwargs):
        """
        Measure execution time for each image in a list.
        Args:
            func: Function to measure.
            images: List of images.
            *args, **kwargs: Arguments for the function.
        Returns:
            List of times per image.
        """
        times = []
        for img in images:
            start = time.time()
            func(img, *args, **kwargs)
            times.append(time.time() - start)
        self.add_result({'image_time': times})
        return times

    def measure_memory(self):
        """
        Measure current process memory usage (RAM).
        Returns:
            Memory usage in MB (float).
        """
        process = psutil.Process()
        mem = process.memory_info().rss / (1024 * 1024)  # in MB
        self.add_result({'memory_MB': mem})
        return mem

    def measure_memory_gpu(self):
        """
        Measure current GPU memory usage (if available).
        Returns:
            GPU memory usage in MB (float) or None.
        """
        if torch.cuda.is_available():
            mem = torch.cuda.max_memory_allocated() / (1024 * 1024)
            self.add_result({'gpu_memory_MB': mem})
            return mem
        return None

    def measure_flops(self, model, input_res):
        """
        Measure FLOPs and parameter count for a model.
        Args:
            model: Model to analyze.
            input_res: Input resolution tuple (C, H, W).
        Returns:
            Tuple (FLOPs, params) or (None, None) if not available.
        """
        if PT_FLOPS_AVAILABLE:
            macs, params = get_model_complexity_info(model, input_res, as_strings=False, print_per_layer_stat=False)
            self.add_result({'FLOPs': macs * 2, 'params': params})
            return macs * 2, params
        else:
            self.add_result({'FLOPs': None, 'params': None})
            return None, None

    def cost_per_image(self, total_time, n_images):
        """
        Calculate computational cost per image.
        Args:
            total_time: Total execution time.
            n_images: Number of images processed.
        Returns:
            Cost per image (float) or None.
        """
        cost = total_time / n_images if n_images > 0 else None
        self.add_result({'cost_per_image': cost})
        return cost

    def cost_per_batch(self, batch_times):
        """
        Calculate computational cost per batch.
        Args:
            batch_times: List of times per batch.
        Returns:
            List of costs per batch.
        """
        costs = [t for t in batch_times]
        self.add_result({'cost_per_batch': costs})
        return costs

    def add_custom_cost(self, cost):
        """
        Add a custom computational cost value.
        Args:
            cost: Custom cost value.
        Returns:
            The cost value.
        """
        self.add_result({'custom_cost': cost})
        return cost

    def calculate(self, *args, **kwargs):
        # Default implementation (placeholder)
        pass 

## AssertivenessMetrics

In [9]:
import numpy as np
import torch
from sklearn.metrics import confusion_matrix
from torchmetrics import F1Score, JaccardIndex, Precision, Recall

from src.metrics.image_metrics import MetricBase


class AssertivenessMetric(MetricBase):
    """
    Class for assertiveness (segmentation) metrics.
    """
    def __init__(self, name):
        super().__init__(name)

    def calculate(self, *args, **kwargs):
        # Default implementation (placeholder)
        pass

    def calculate_segmentation_metrics(self, mask_real, mask_pred, num_classes=2, device='cuda'):
        """
        Calculate segmentation metrics (IoU, precision, recall, F1-score).
        Args:
            mask_real: Ground truth mask (numpy array).
            mask_pred: Predicted mask (numpy array).
            num_classes: Number of classes.
            device: Device to use ('cuda' or 'cpu').
        Returns:
            Dictionary with metrics.
        """
        mask_real = torch.from_numpy(mask_real).long()
        mask_pred = torch.from_numpy(mask_pred).long()
        task = 'binary' if num_classes == 2 else 'multiclass'
        jaccard = JaccardIndex(task=task, num_classes=num_classes).to(device)
        iou = jaccard(mask_pred, mask_real)
        precision = Precision(task=task, num_classes=num_classes).to(device)
        prec = precision(mask_pred, mask_real)
        recall = Recall(task=task, num_classes=num_classes).to(device)
        rec = recall(mask_pred, mask_real)
        f1 = F1Score(task=task, num_classes=num_classes).to(device)
        f1_score = f1(mask_pred, mask_real)
        result = {
            'iou': iou.item(),
            'precision': prec.item(),
            'recall': rec.item(),
            'f1': f1_score.item()
        }
        self.add_result(result)
        return result

    def accuracy(self, y_true, y_pred):
        """
        Calculate accuracy between true and predicted labels.
        Args:
            y_true: Ground truth labels.
            y_pred: Predicted labels.
        Returns:
            Accuracy (float).
        """
        acc = np.mean(np.array(y_true) == np.array(y_pred))
        self.add_result({'accuracy': acc})
        return acc

    def confusion(self, y_true, y_pred):
        """
        Calculate confusion matrix.
        Args:
            y_true: Ground truth labels.
            y_pred: Predicted labels.
        Returns:
            Confusion matrix (numpy array).
        """
        cm = confusion_matrix(y_true, y_pred)
        self.add_result({'confusion_matrix': cm})
        return cm

    def mae(self, y_true, y_pred):
        """
        Calculate Mean Absolute Error (MAE).
        Args:
            y_true: Ground truth values.
            y_pred: Predicted values.
        Returns:
            MAE (float).
        """
        mae = np.mean(np.abs(np.array(y_true) - np.array(y_pred)))
        self.add_result({'mae': mae})
        return mae

    def mse(self, y_true, y_pred):
        """
        Calculate Mean Squared Error (MSE).
        Args:
            y_true: Ground truth values.
            y_pred: Predicted values.
        Returns:
            MSE (float).
        """
        mse = np.mean((np.array(y_true) - np.array(y_pred)) ** 2)
        self.add_result({'mse': mse})
        return mse

    def rmse(self, y_true, y_pred):
        """
        Calculate Root Mean Squared Error (RMSE).
        Args:
            y_true: Ground truth values.
            y_pred: Predicted values.
        Returns:
            RMSE (float).
        """
        rmse = np.sqrt(self.mse(y_true, y_pred))
        self.add_result({'rmse': rmse})
        return rmse

    def mape(self, y_true, y_pred):
        """
        Calculate Mean Absolute Percentage Error (MAPE).
        Args:
            y_true: Ground truth values.
            y_pred: Predicted values.
        Returns:
            MAPE (float).
        """
        y_true, y_pred = np.array(y_true), np.array(y_pred)
        mape = np.mean(np.abs((y_true - y_pred) / (y_true + 1e-8))) * 100
        self.add_result({'mape': mape})
        return mape 

## Execution

In [10]:
import torch
from torch.utils.data import DataLoader, random_split
from torchvision import transforms

from src.data.dataset import CelebADataset


def prepare_data(data_dir, batch_size, image_size, with_mask=False):
    """
    Prepare data loaders for training, validation, and testing.
    Args:
        data_dir (str): Directory containing the dataset.
        batch_size (int): Batch size for data loaders.
        image_size (int): Size to resize images to (image_size, image_size).
        with_mask (bool): Whether to use masks in the dataset.
    Returns:
        tuple: (train_loader, val_loader, test_loader)
    """
    transform = transforms.Compose([
        transforms.Resize((image_size, image_size)),
        transforms.ToTensor(),
        transforms.Normalize((0.5, 0.5, 0.5), (0.5, 0.5, 0.5))
    ])
    dataset = CelebADataset(root_dir=data_dir, transform=transform, with_mask=with_mask)
    total_size = len(dataset)
    train_size = int(0.7 * total_size)
    val_size = int(0.1 * total_size)
    test_size = total_size - train_size - val_size
    train_dataset, val_dataset, test_dataset = random_split(
        dataset, [train_size, val_size, test_size], 
        generator=torch.Generator().manual_seed(42)
    )
    train_loader = DataLoader(train_dataset, batch_size=batch_size, shuffle=True, num_workers=4)
    test_loader = DataLoader(test_dataset, batch_size=batch_size, shuffle=False, num_workers=4)
    val_loader = DataLoader(val_dataset, batch_size=batch_size, shuffle=False, num_workers=4)
    print(f"Data split: Train={train_size}, Validation={val_size}, Test={test_size}")
    return train_loader, val_loader, test_loader 

In [11]:
def prepare_dataloaders(with_mask: bool) -> Tuple[Any, Any, Any]:
    """Prepare train, validation, and test dataloaders."""
    return prepare_data(DATA_DIR, BATCH_SIZE, IMAGE_SIZE, with_mask=with_mask)

In [12]:
import numpy as np
import torch
from PIL import Image, ImageFilter


def apply_post_processing(images):
    """
    Apply additional post-processing to a batch of images (Gaussian blur).
    Args:
        images (Tensor): Batch of images (N, C, H, W).
    Returns:
        Tensor: Batch of processed images.
    """
    print("Applying additional post-processing to images...")
    batch_size, channels, height, width = images.shape
    processed_images = images.clone()
    kernel_size = 5
    sigma = 1.0
    for i in range(batch_size):
        for c in range(channels):
            processed_images[i, c] = torch.from_numpy(
                np.array(
                    Image.fromarray(
                        np.uint8(((images[i, c].cpu().numpy() + 1) * 127.5))
                    ).filter(
                        ImageFilter.GaussianBlur(sigma)
                    )
                ) / 127.5 - 1
            )
    return processed_images 

In [13]:
def initialize_models() -> Tuple[DiffusionModel, GANTrainer, RecursiveCNN]:
    """Initialize all required models."""
    denoise_net = DenoiseNetwork(T=1000)
    diffusion_model = DiffusionModel(
        denoise_network=denoise_net,
        T=1000,
        beta_start=1e-4,
        beta_end=0.02,
        device=DEVICE
    )
    noise_dim = 100
    generator = Generator(noise_dim=noise_dim)
    discriminator = Discriminator()
    gan_trainer = GANTrainer(
        generator=generator,
        discriminator=discriminator,
        noise_dim=noise_dim,
        lr_D=0.0002,
        lr_G=0.0002,
        batch_size=BATCH_SIZE,
        device=DEVICE
    )
    cnn_model = RecursiveCNN(
        in_channels=3,
        out_channels=64,
        kernel_size=3,
        num_iterations=5,
        num_classes=NUM_CLASSES
    ).to(DEVICE)
    return diffusion_model, gan_trainer, cnn_model

def generate_images(test_loader, model_type, diffusion_model, gan_trainer):
    """Generate images using the specified model."""
    originals = []
    generated = []
    with torch.no_grad():
        for images, _ in test_loader:
            images = images.to(DEVICE)
            batch_size = images.size(0)
            originals.append(images.cpu())
            generated_images = None
            if model_type == "Diffusion":
                t = torch.ones(batch_size, device=DEVICE).long() * 500
                # Placeholder: replace with actual diffusion sampling
                generated_images = images + 0.1 * torch.randn_like(images)
            elif model_type == "GAN":
                # noise = torch.randn(batch_size, gan_trainer.noise_dim, device=DEVICE)
                noise = torch.randn(batch_size, gan_trainer.noise_dim, 1, 1, device=DEVICE)
                generated_images = gan_trainer.generator(noise)
            generated.append(generated_images.cpu())
    originals = torch.cat(originals, dim=0)
    generated = torch.cat(generated, dim=0)
    return originals, generated

def classify_images(images, cnn_model):
    """Classify images using the CNN model."""
    cnn_model.eval()
    results = []
    with torch.no_grad():
        for i in range(0, len(images), BATCH_SIZE):
            batch = images[i:i+BATCH_SIZE].to(DEVICE)
            preds, _ = cnn_model(batch)
            results.append(preds.cpu())
    return torch.cat(results, dim=0)

def to_uint8_tensor(imgs):
    """Convert images to uint8 tensor."""
    imgs = imgs.clone()
    if imgs.min() < 0:
        imgs = (imgs + 1) / 2
    imgs = (imgs * 255).clamp(0, 255).to(torch.uint8)
    return imgs

def evaluate_images(originals, generated, device) -> Dict[str, float]:
    """Evaluate generated images using FID, SSIM, and PSNR."""
    orig_uint8 = to_uint8_tensor(originals)
    gen_uint8 = to_uint8_tensor(generated)
    metric = ImageQualityMetric('comparison')
    fid = metric.calculate_fid(orig_uint8, gen_uint8, device=device)
    ssim_vals, psnr_vals = zip(*[
        metric.calculate_ssim_psnr(
            o.permute(1, 2, 0).cpu().numpy(),
            g.permute(1, 2, 0).cpu().numpy()
        )
        for o, g in zip(orig_uint8, gen_uint8)
    ])
    ssim = sum(ssim_vals) / len(ssim_vals)
    psnr = sum(psnr_vals) / len(psnr_vals)
    return {"fid": fid, "ssim": ssim, "psnr": psnr}


In [14]:
def evaluate_images(originals, generated, device) -> Dict[str, float]:
    """Evaluate generated images using FID, SSIM, and PSNR."""
    orig_uint8 = to_uint8_tensor(originals)
    gen_uint8 = to_uint8_tensor(generated)
    metric = ImageQualityMetric('comparison')
    fid = metric.calculate_fid(orig_uint8, gen_uint8, device=device)
    ssim_vals, psnr_vals = zip(*[
        metric.calculate_ssim_psnr(
            o.permute(1, 2, 0).cpu().numpy(),
            g.permute(1, 2, 0).cpu().numpy()
        )
        for o, g in zip(orig_uint8, gen_uint8)
    ])
    ssim = sum(ssim_vals) / len(ssim_vals)
    psnr = sum(psnr_vals) / len(psnr_vals)
    return {"fid": fid, "ssim": ssim, "psnr": psnr}


In [15]:
def main():
    """
    Main pipeline for comparing GAN and Diffusion models on image data.
    """
    logger = setup_logging()
    logger.info('Starting the model comparison pipeline.')
    try:
        diffusion_model, gan_trainer, cnn_model = initialize_models()
        results = {}
        perf_results = {}
        num_epochs = 5
        perf_metric = PerformanceMetric('performance')
        for scenario in SCENARIOS:
            logger.info(f"Running scenario: {scenario['model']} {'with mask' if scenario['mask'] else 'without mask'}")
            train_loader, val_loader, test_loader = prepare_dataloaders(with_mask=scenario["mask"])
            # Training
            if scenario["model"] == "Diffusion":
                optimizer_diff = torch.optim.Adam(diffusion_model.denoise_network.parameters(), lr=2e-4)
                logger.info("Training Diffusion model...")
                _, train_time = perf_metric.measure_time(
                    train_model, diffusion_model, train_loader, optimizer_diff, DEVICE, num_epochs, 'diffusion'
                )
                val_loss = validate_model(diffusion_model, val_loader, DEVICE, model_type='diffusion')
                logger.info(f"Diffusion validation: mean loss = {val_loss}")
                save_checkpoint(diffusion_model.denoise_network, f"{RESULTS_DIR}/diffusion_{'mask' if scenario['mask'] else 'no_mask'}.pth")
                originals, generated = generate_images(test_loader, "Diffusion", diffusion_model, gan_trainer)
            else:
                logger.info("Training GAN model...")
                _, train_time = perf_metric.measure_time(
                    train_model, gan_trainer, train_loader, None, DEVICE, num_epochs, 'gan'
                )
                val_loss = validate_model(gan_trainer, val_loader, DEVICE, model_type='gan')
                logger.info(f"GAN validation: mean loss = {val_loss}")
                save_checkpoint(gan_trainer.G, f"{RESULTS_DIR}/ganG_{'mask' if scenario['mask'] else 'no_mask'}.pth")
                save_checkpoint(gan_trainer.D, f"{RESULTS_DIR}/ganD_{'mask' if scenario['mask'] else 'no_mask'}.pth")
                originals, generated = generate_images(test_loader, "GAN", diffusion_model, gan_trainer)
            # CNN training
            optimizer_cnn = torch.optim.Adam(cnn_model.parameters(), lr=1e-3)
            logger.info("Training CNN model...")
            _, train_time_cnn = perf_metric.measure_time(
                train_model, cnn_model, train_loader, optimizer_cnn, DEVICE, num_epochs, 'cnn'
            )
            val_loss_cnn = validate_model(cnn_model, val_loader, DEVICE, model_type='cnn')
            logger.info(f"CNN validation: mean loss = {val_loss_cnn}")
            save_checkpoint(cnn_model, f"{RESULTS_DIR}/cnn_{scenario['model']}_{'mask' if scenario['mask'] else 'no_mask'}.pth")
            processed_generated = apply_post_processing(generated)
            class_cnn = classify_images(processed_generated, cnn_model)
            metrics = evaluate_images(originals, processed_generated, DEVICE)
            key = f"{scenario['model']}_{'mask' if scenario['mask'] else 'no_mask'}"
            results[key] = metrics
            logger.info(f"Metrics for {key}: {metrics}")
            # Confusion matrix and visualizations
            y_true = get_true_labels(test_loader)
            y_pred = torch.argmax(class_cnn, dim=1).cpu().numpy()
            plot_confusion_matrix(y_true, y_pred, labels=["Class 0", "Class 1"], filename=f"{RESULTS_DIR}/confusion_matrix_{key}.png")
            logger.info(f"Confusion matrix saved at {RESULTS_DIR}/confusion_matrix_{key}.png")
            plot_histograms(originals, processed_generated, key, RESULTS_DIR)
            plot_samples(originals, processed_generated, key, RESULTS_DIR)
            logger.info(f"Histograms and samples saved for {key}")
            # Performance metrics
            mem = perf_metric.measure_memory()
            mem_gpu = perf_metric.measure_memory_gpu()
            total_time = train_time + train_time_cnn
            cost_img = perf_metric.cost_per_image(total_time, len(test_loader.dataset))
            logger.info(f"Performance: total time={total_time:.2f}s, memory={mem:.2f}MB, GPU memory={mem_gpu}, cost per image={cost_img}")
            # FLOPs (example for CNN)
            flops, params = None, None
            if hasattr(cnn_model, 'fc'):
                input_res = (3, IMAGE_SIZE, IMAGE_SIZE)
                flops, params = perf_metric.measure_flops(cnn_model, input_res)
                logger.info(f"CNN FLOPs: {flops}, parameters: {params}")
            # Save performance metrics
            with open(f"{RESULTS_DIR}/performance_{key}.txt", "w") as f:
                for r in perf_metric.get_results():
                    f.write(str(r) + "\n")
            perf_results[key] = {
                'total_time': total_time,
                'memory_MB': mem,
                'gpu_memory_MB': mem_gpu,
                'cost_per_image': cost_img,
                'flops': flops,
                'params': params
            }
        # Plot and export results
        for metric in ["fid", "ssim", "psnr"]:
            plot_metrics(results, metric, RESULTS_DIR)
        logger.info("Final Comparison Report:")
        for key, metrics in results.items():
            logger.info(f"{key}: {metrics}")
        best = min(results.items(), key=lambda x: x[1]["fid"])
        logger.info(f"Best model (lowest FID): {best[0]}")
        export_metrics_csv(results, perf_results, f"{RESULTS_DIR}/comparative_metrics.csv")
        logger.info(f"Numerical metrics exported to {RESULTS_DIR}/comparative_metrics.csv")
    except Exception as e:
        logger.error(f"Error during pipeline execution: {e}", exc_info=True)
    logger.info('End of pipeline execution.')


In [16]:
main()

[2025-04-29 21:54:57,967] INFO root: Starting the model comparison pipeline.
[2025-04-29 21:54:58,156] INFO root: Running scenario: GAN without mask
[2025-04-29 21:54:58,157] INFO root: Training GAN model...


Found 13 images in ./Amostras_celeba
Data split: Train=9, Validation=1, Test=3


Training gan - Epoch 1/5: 100%|██████████| 1/1 [00:00<00:00,  4.70it/s]


Average D loss in epoch 1: 1.0911, G loss: 1.8647


Training gan - Epoch 2/5: 100%|██████████| 1/1 [00:00<00:00, 13.01it/s]


Average D loss in epoch 2: 0.6592, G loss: 3.1662


Training gan - Epoch 3/5: 100%|██████████| 1/1 [00:00<00:00, 12.46it/s]


Average D loss in epoch 3: 0.2529, G loss: 4.4729


Training gan - Epoch 4/5: 100%|██████████| 1/1 [00:00<00:00,  9.48it/s]


Average D loss in epoch 4: 0.0815, G loss: 4.6975


Training gan - Epoch 5/5: 100%|██████████| 1/1 [00:00<00:00, 12.10it/s]


Average D loss in epoch 5: 0.1044, G loss: 4.8440


Validation gan: 100%|██████████| 1/1 [00:00<00:00, 16.06it/s]
[2025-04-29 21:54:58,786] INFO root: GAN validation: mean loss = 1.1445976495742798
[2025-04-29 21:54:58,869] INFO root: Training CNN model...
Training cnn - Epoch 1/5: 100%|██████████| 1/1 [00:00<00:00,  8.06it/s]


Average loss in epoch 1: 0.7069723606109619


Training cnn - Epoch 2/5: 100%|██████████| 1/1 [00:00<00:00, 10.37it/s]


Average loss in epoch 2: 0.7201285362243652


Training cnn - Epoch 3/5: 100%|██████████| 1/1 [00:00<00:00, 11.04it/s]


Average loss in epoch 3: 0.6968564987182617


Training cnn - Epoch 4/5: 100%|██████████| 1/1 [00:00<00:00, 10.82it/s]


Average loss in epoch 4: 0.7020423412322998


Training cnn - Epoch 5/5: 100%|██████████| 1/1 [00:00<00:00, 11.48it/s]


Average loss in epoch 5: 0.6902891397476196


Validation cnn: 100%|██████████| 1/1 [00:00<00:00, 17.96it/s]
[2025-04-29 21:54:59,426] INFO root: CNN validation: mean loss = 0.8147716522216797


Applying additional post-processing to images...


[2025-04-29 21:55:00,641] INFO root: Metrics for GAN_no_mask: {'fid': 364.22698974609375, 'ssim': np.float64(0.38879928778600564), 'psnr': np.float64(9.939539419576933)}
[2025-04-29 21:55:00,767] INFO root: Confusion matrix saved at ./resultados/confusion_matrix_GAN_no_mask.png
[2025-04-29 21:55:00,902] INFO root: Histograms and samples saved for GAN_no_mask
[2025-04-29 21:55:00,904] INFO root: Performance: total time=1.06s, memory=1479.52MB, GPU memory=502.6728515625, cost per image=0.3541439374287923
[2025-04-29 21:55:00,904] INFO root: CNN FLOPs: None, parameters: None
[2025-04-29 21:55:00,904] INFO root: Running scenario: GAN with mask
[2025-04-29 21:55:00,905] INFO root: Training GAN model...


Found 13 images in ./Amostras_celeba
Data split: Train=9, Validation=1, Test=3


Training gan - Epoch 1/5: 100%|██████████| 1/1 [00:00<00:00, 12.28it/s]


Average D loss in epoch 1: 0.0873, G loss: 4.7473


Training gan - Epoch 2/5: 100%|██████████| 1/1 [00:00<00:00, 12.26it/s]


Average D loss in epoch 2: 0.0786, G loss: 4.6411


Training gan - Epoch 3/5: 100%|██████████| 1/1 [00:00<00:00, 11.15it/s]


Average D loss in epoch 3: 0.0661, G loss: 5.4260


Training gan - Epoch 4/5: 100%|██████████| 1/1 [00:00<00:00, 12.07it/s]


Average D loss in epoch 4: 0.0862, G loss: 5.8009


Training gan - Epoch 5/5: 100%|██████████| 1/1 [00:00<00:00, 12.77it/s]


Average D loss in epoch 5: 0.0452, G loss: 6.1814


Validation gan: 100%|██████████| 1/1 [00:00<00:00, 15.56it/s]
[2025-04-29 21:55:01,391] INFO root: GAN validation: mean loss = 1.7833818197250366
[2025-04-29 21:55:01,480] INFO root: Training CNN model...
Training cnn - Epoch 1/5: 100%|██████████| 1/1 [00:00<00:00, 10.69it/s]


Average loss in epoch 1: 0.6899327635765076


Training cnn - Epoch 2/5: 100%|██████████| 1/1 [00:00<00:00, 10.14it/s]


Average loss in epoch 2: 0.6965973377227783


Training cnn - Epoch 3/5: 100%|██████████| 1/1 [00:00<00:00,  9.42it/s]


Average loss in epoch 3: 0.6815457344055176


Training cnn - Epoch 4/5: 100%|██████████| 1/1 [00:00<00:00, 10.16it/s]


Average loss in epoch 4: 0.6756211519241333


Training cnn - Epoch 5/5: 100%|██████████| 1/1 [00:00<00:00, 10.38it/s]


Average loss in epoch 5: 0.6722304821014404


Validation cnn: 100%|██████████| 1/1 [00:00<00:00, 16.37it/s]
[2025-04-29 21:55:02,044] INFO root: CNN validation: mean loss = 0.7562283873558044


Applying additional post-processing to images...


[2025-04-29 21:55:03,302] INFO root: Metrics for GAN_mask: {'fid': 427.75677490234375, 'ssim': np.float64(0.3570487293974938), 'psnr': np.float64(9.843522739373057)}
[2025-04-29 21:55:03,423] INFO root: Confusion matrix saved at ./resultados/confusion_matrix_GAN_mask.png
[2025-04-29 21:55:03,551] INFO root: Histograms and samples saved for GAN_mask
[2025-04-29 21:55:03,551] INFO root: Performance: total time=0.92s, memory=1660.88MB, GPU memory=723.3671875, cost per image=0.3072348435719808
[2025-04-29 21:55:03,552] INFO root: CNN FLOPs: None, parameters: None
[2025-04-29 21:55:03,552] INFO root: Running scenario: Diffusion without mask
[2025-04-29 21:55:03,553] INFO root: Training Diffusion model...


Found 13 images in ./Amostras_celeba
Data split: Train=9, Validation=1, Test=3


Training diffusion - Epoch 1/5: 100%|██████████| 1/1 [00:00<00:00,  6.22it/s]


Average loss in epoch 1: 1.0055468082427979


Training diffusion - Epoch 2/5: 100%|██████████| 1/1 [00:00<00:00, 10.44it/s]


Average loss in epoch 2: 1.0025639533996582


Training diffusion - Epoch 3/5: 100%|██████████| 1/1 [00:00<00:00,  8.88it/s]


Average loss in epoch 3: 1.0038663148880005


Training diffusion - Epoch 4/5: 100%|██████████| 1/1 [00:00<00:00,  9.44it/s]


Average loss in epoch 4: 0.9958980679512024


Training diffusion - Epoch 5/5: 100%|██████████| 1/1 [00:00<00:00,  9.41it/s]


Average loss in epoch 5: 0.9912746548652649


Validation diffusion: 100%|██████████| 1/1 [00:00<00:00, 14.01it/s]
[2025-04-29 21:55:04,214] INFO root: Diffusion validation: mean loss = 0.9800335764884949
[2025-04-29 21:55:04,290] INFO root: Training CNN model...
Training cnn - Epoch 1/5: 100%|██████████| 1/1 [00:00<00:00,  9.42it/s]


Average loss in epoch 1: 0.6679841876029968


Training cnn - Epoch 2/5: 100%|██████████| 1/1 [00:00<00:00,  9.36it/s]


Average loss in epoch 2: 0.6693249344825745


Training cnn - Epoch 3/5: 100%|██████████| 1/1 [00:00<00:00,  9.92it/s]


Average loss in epoch 3: 0.6601555943489075


Training cnn - Epoch 4/5: 100%|██████████| 1/1 [00:00<00:00,  9.76it/s]


Average loss in epoch 4: 0.6558259129524231


Training cnn - Epoch 5/5: 100%|██████████| 1/1 [00:00<00:00,  9.65it/s]


Average loss in epoch 5: 0.6520708203315735


Validation cnn: 100%|██████████| 1/1 [00:00<00:00, 15.18it/s]
[2025-04-29 21:55:04,885] INFO root: CNN validation: mean loss = 0.7394930720329285


Applying additional post-processing to images...


[2025-04-29 21:55:06,654] INFO root: Metrics for Diffusion_no_mask: {'fid': 339.1529235839844, 'ssim': np.float64(0.6317402597754299), 'psnr': np.float64(16.748415962127442)}
[2025-04-29 21:55:06,783] INFO root: Confusion matrix saved at ./resultados/confusion_matrix_Diffusion_no_mask.png
[2025-04-29 21:55:07,020] INFO root: Histograms and samples saved for Diffusion_no_mask
[2025-04-29 21:55:07,021] INFO root: Performance: total time=1.12s, memory=1889.74MB, GPU memory=1170.55859375, cost per image=0.37186169624328613
[2025-04-29 21:55:07,021] INFO root: CNN FLOPs: None, parameters: None
[2025-04-29 21:55:07,021] INFO root: Running scenario: Diffusion with mask
[2025-04-29 21:55:07,022] INFO root: Training Diffusion model...


Found 13 images in ./Amostras_celeba
Data split: Train=9, Validation=1, Test=3


Training diffusion - Epoch 1/5: 100%|██████████| 1/1 [00:00<00:00,  8.98it/s]


Average loss in epoch 1: 0.9937778115272522


Training diffusion - Epoch 2/5: 100%|██████████| 1/1 [00:00<00:00,  8.40it/s]


Average loss in epoch 2: 0.9888983964920044


Training diffusion - Epoch 3/5: 100%|██████████| 1/1 [00:00<00:00,  8.82it/s]


Average loss in epoch 3: 0.9810404181480408


Training diffusion - Epoch 4/5: 100%|██████████| 1/1 [00:00<00:00,  8.69it/s]


Average loss in epoch 4: 0.972320556640625


Training diffusion - Epoch 5/5: 100%|██████████| 1/1 [00:00<00:00,  8.71it/s]


Average loss in epoch 5: 0.9726639986038208


Validation diffusion: 100%|██████████| 1/1 [00:00<00:00, 13.72it/s]
[2025-04-29 21:55:07,677] INFO root: Diffusion validation: mean loss = 0.9783014059066772
[2025-04-29 21:55:07,765] INFO root: Training CNN model...
Training cnn - Epoch 1/5: 100%|██████████| 1/1 [00:00<00:00,  9.21it/s]


Average loss in epoch 1: 0.6476728916168213


Training cnn - Epoch 2/5: 100%|██████████| 1/1 [00:00<00:00,  9.15it/s]


Average loss in epoch 2: 0.6461507081985474


Training cnn - Epoch 3/5: 100%|██████████| 1/1 [00:00<00:00,  8.93it/s]


Average loss in epoch 3: 0.6387321949005127


Training cnn - Epoch 4/5: 100%|██████████| 1/1 [00:00<00:00,  9.10it/s]


Average loss in epoch 4: 0.6433608531951904


Training cnn - Epoch 5/5: 100%|██████████| 1/1 [00:00<00:00,  9.20it/s]


Average loss in epoch 5: 0.6315460801124573


Validation cnn: 100%|██████████| 1/1 [00:00<00:00, 13.32it/s]
[2025-04-29 21:55:08,400] INFO root: CNN validation: mean loss = 0.689679741859436


Applying additional post-processing to images...


[2025-04-29 21:55:10,190] INFO root: Metrics for Diffusion_mask: {'fid': 357.6922912597656, 'ssim': np.float64(0.6267254322373179), 'psnr': np.float64(16.58528720173405)}
[2025-04-29 21:55:10,328] INFO root: Confusion matrix saved at ./resultados/confusion_matrix_Diffusion_mask.png
[2025-04-29 21:55:10,473] INFO root: Histograms and samples saved for Diffusion_mask
[2025-04-29 21:55:10,473] INFO root: Performance: total time=1.14s, memory=1906.97MB, GPU memory=1170.55859375, cost per image=0.37881962458292645
[2025-04-29 21:55:10,474] INFO root: CNN FLOPs: None, parameters: None
[2025-04-29 21:55:10,569] INFO root: Final Comparison Report:
[2025-04-29 21:55:10,570] INFO root: GAN_no_mask: {'fid': 364.22698974609375, 'ssim': np.float64(0.38879928778600564), 'psnr': np.float64(9.939539419576933)}
[2025-04-29 21:55:10,570] INFO root: GAN_mask: {'fid': 427.75677490234375, 'ssim': np.float64(0.3570487293974938), 'psnr': np.float64(9.843522739373057)}
[2025-04-29 21:55:10,570] INFO root: Dif