In [None]:
from google.colab import drive
drive.mount('https://drive.google.com/drive/folders/1YFE9hfhnGYcKQeLIAkjk2sBEuDdEhZ9v?usp=drive_link')

In [None]:
import torch

print(torch.__version__)
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
print("CUDA:", torch.cuda.is_available())

2.9.1+cpu
CUDA: False


## Integridad de los datos

In [None]:
import os

base_path = r"C:\Proyectos\ServicioSocial\contaminante\contaminante"
data_types = ["train", "valid", "test"]

for split in data_types:
    images_path = os.path.join(base_path, split, "images")
    labels_path = os.path.join(base_path, split, "labels")

    num_images = len([
        f for f in os.listdir(images_path)
        if f.lower().endswith((".png", ".jpg", ".jpeg"))
    ])

    num_labels = len(os.listdir(labels_path))

    print(f"{split.upper()}:")
    print(f"  Imágenes: {num_images}")
    print(f"  Labels:   {num_labels}")


TRAIN:
  Imágenes: 3704
  Labels:   3704
VALID:
  Imágenes: 1236
  Labels:   1236
TEST:
  Imágenes: 1232
  Labels:   1232


In [None]:
import torch
from torch.utils.data import Dataset, DataLoader
import cv2
import numpy as np
import os
from PIL import Image
import torchvision.transforms as transforms
import glob

class YOLODataset(Dataset):
    def __init__(self, images_dir, labels_dir, img_size=640, transform=None):
        """
        Args:
            images_dir: ruta a la carpeta de imágenes
            labels_dir: ruta a la carpeta de etiquetas
            img_size: tamaño para redimensionar
            transform: transformaciones adicionales
        """
        self.images_dir = images_dir
        self.labels_dir = labels_dir
        self.img_size = img_size
        self.transform = transform

        # Obtener lista de imágenes ÚNICAS (sin duplicados por nombre base)
        self.image_files = self._get_unique_image_files()

        print(f"Encontradas {len(self.image_files)} imágenes ÚNICAS en {images_dir}")

    def _get_unique_image_files(self):
        """Obtener imágenes únicas por nombre base (sin extensión)"""
        valid_extensions = ['.jpg', '.jpeg', '.png', '.bmp', '.tiff']
        all_image_files = []

        # Recolectar todos los archivos de imagen
        for ext in valid_extensions:
            all_image_files.extend(glob.glob(os.path.join(self.images_dir, f'*{ext}')))
            all_image_files.extend(glob.glob(os.path.join(self.images_dir, f'*{ext.upper()}')))

        # Eliminar duplicados por nombre base
        unique_files_dict = {}
        for file_path in all_image_files:
            # Obtener nombre base sin extensión
            base_name = os.path.splitext(os.path.basename(file_path))[0]

            # Si este nombre base no está en el diccionario, añadirlo
            if base_name not in unique_files_dict:
                unique_files_dict[base_name] = file_path

        # Convertir a lista ordenada
        unique_files = sorted(unique_files_dict.values())

        return unique_files

    def __len__(self):
        return len(self.image_files)

    def __getitem__(self, idx):
        # Cargar imagen
        img_path = self.image_files[idx]

        # Usar PIL o OpenCV
        image = Image.open(img_path).convert('RGB')
        original_size = image.size  # (ancho, alto)

        # Transformaciones
        if self.transform:
            image = self.transform(image)
        else:
            # Transformación por defecto
            transform = transforms.Compose([
                transforms.Resize((self.img_size, self.img_size)),
                transforms.ToTensor(),
            ])
            image = transform(image)

        # Cargar etiquetas YOLO (formato: class x_center y_center width height)
        # Usar nombre base sin extensión
        base_name = os.path.splitext(os.path.basename(img_path))[0]
        label_path = os.path.join(self.labels_dir, f"{base_name}.txt")

        boxes = []
        labels = []

        if os.path.exists(label_path):
            with open(label_path, 'r') as f:
                for line in f.readlines():
                    line = line.strip()
                    if line:
                        data = line.split()
                        if len(data) == 5:  # Formato YOLO
                            class_id = int(data[0])
                            x_center = float(data[1])
                            y_center = float(data[2])
                            width = float(data[3])
                            height = float(data[4])

                            # Convertir a coordenadas de esquinas (x1, y1, x2, y2)
                            x1 = (x_center - width/2) * self.img_size
                            y1 = (y_center - height/2) * self.img_size
                            x2 = (x_center + width/2) * self.img_size
                            y2 = (y_center + height/2) * self.img_size

                            boxes.append([x1, y1, x2, y2])
                            labels.append(class_id)

        # Convertir a tensores
        boxes = torch.tensor(boxes, dtype=torch.float32) if boxes else torch.zeros((0, 4), dtype=torch.float32)
        labels = torch.tensor(labels, dtype=torch.int64) if labels else torch.zeros((0,), dtype=torch.int64)

        return image, {"boxes": boxes, "labels": labels}

In [None]:
def collate_fn(batch):
    """
    Función para agrupar datos en batches cuando tienen diferente número de objetos
    """
    images = []
    targets = []

    for img, target in batch:
        images.append(img)
        targets.append(target)

    images = torch.stack(images, 0)

    return images, targets

In [None]:
import torchvision.transforms as transforms

# Definir transformaciones (personaliza según necesites)
train_transform = transforms.Compose([
    transforms.Resize((640, 640)),
    transforms.ColorJitter(brightness=0.2, contrast=0.2, saturation=0.2, hue=0.1),
    transforms.RandomHorizontalFlip(p=0.5),
    transforms.ToTensor(),
    transforms.Normalize(mean=[0.485, 0.456, 0.406],
                         std=[0.229, 0.224, 0.225])
])

val_transform = transforms.Compose([
    transforms.Resize((640, 640)),
    transforms.ToTensor(),
    transforms.Normalize(mean=[0.485, 0.456, 0.406],
                         std=[0.229, 0.224, 0.225])
])

# Rutas a tus carpetas
base_path = r"C:\Proyectos\ServicioSocial\contaminante\contaminante"

# Crear datasets
train_dataset = YOLODataset(
    images_dir=os.path.join(base_path, "train", "images"),
    labels_dir=os.path.join(base_path, "train", "labels"),
    img_size=640,
    transform=train_transform
)

val_dataset = YOLODataset(
    images_dir=os.path.join(base_path, "valid", "images"),
    labels_dir=os.path.join(base_path, "valid", "labels"),
    img_size=640,
    transform=val_transform
)

test_dataset = YOLODataset(
    images_dir=os.path.join(base_path, "test", "images"),
    labels_dir=os.path.join(base_path, "test", "labels"),
    img_size=640,
    transform=val_transform
)

# Crear DataLoaders
train_loader = DataLoader(
    train_dataset,
    batch_size=16,
    shuffle=True,
    num_workers=4,
    collate_fn=collate_fn,
    pin_memory=True if torch.cuda.is_available() else False
)

val_loader = DataLoader(
    val_dataset,
    batch_size=16,
    shuffle=False,
    num_workers=4,
    collate_fn=collate_fn,
    pin_memory=True if torch.cuda.is_available() else False
)

test_loader = DataLoader(
    test_dataset,
    batch_size=16,
    shuffle=False,
    num_workers=4,
    collate_fn=collate_fn,
    pin_memory=True if torch.cuda.is_available() else False
)

Encontradas 3704 imágenes ÚNICAS en C:\Proyectos\ServicioSocial\contaminante\contaminante\train\images
Encontradas 1236 imágenes ÚNICAS en C:\Proyectos\ServicioSocial\contaminante\contaminante\valid\images
Encontradas 1232 imágenes ÚNICAS en C:\Proyectos\ServicioSocial\contaminante\contaminante\test\images


In [None]:
def basic_checks(train_loader, val_loader, test_loader):
    print("=== CHEQUEOS BÁSICOS ===")

    # 1. Verificar que no estén vacíos
    print(f"1. Train loader tiene batches: {len(train_loader) > 0}")
    print(f"   Val loader tiene batches: {len(val_loader) > 0}")
    print(f"   Test loader tiene batches: {len(test_loader) > 0}")

    # 2. Verificar número de batches
    print(f"\n2. Número de batches:")
    print(f"   Train: {len(train_loader)} batches")
    print(f"   Val: {len(val_loader)} batches")
    print(f"   Test: {len(test_loader)} batches")

    # 3. Verificar tamaño del primer batch
    try:
        images, targets = next(iter(train_loader))
        print(f"\n3. Forma del primer batch de train:")
        print(f"   Images shape: {images.shape}")  # Debe ser [batch_size, 3, H, W]
        print(f"   Número de targets: {len(targets)}")  # Debe ser batch_size

        # Verificar rangos de valores
        print(f"\n4. Rangos de valores:")
        print(f"   Images min: {images.min().item():.3f}, max: {images.max().item():.3f}")
        print(f"   Images mean: {images.mean().item():.3f}")

    except Exception as e:
        print(f"ERROR obteniendo batch: {e}")

    return images, targets

# Ejecutar chequeos
images, targets = basic_checks(train_loader, val_loader, test_loader)

=== CHEQUEOS BÁSICOS ===
1. Train loader tiene batches: True
   Val loader tiene batches: True
   Test loader tiene batches: True

2. Número de batches:
   Train: 232 batches
   Val: 78 batches
   Test: 77 batches


In [None]:
def check_targets_structure(loader, name="Train", num_batches=2):
    print(f"\n=== ESTRUCTURA DE TARGETS ({name}) ===")

    for batch_idx, (images, targets) in enumerate(loader):
        if batch_idx >= num_batches:
            break

        print(f"\nBatch {batch_idx}:")
        print(f"  Número de imágenes: {len(images)}")
        print(f"  Número de targets: {len(targets)}")

        # Verificar cada target individualmente
        for i, target in enumerate(targets[:2]):  # Primera 2 imágenes del batch
            print(f"\n  Imagen {i}:")
            print(f"    Número de boxes: {len(target['boxes'])}")
            print(f"    Número de labels: {len(target['labels'])}")

            if len(target['boxes']) > 0:
                print(f"    Box 0: {target['boxes'][0]}")
                print(f"    Label 0: {target['labels'][0]}")

                # Verificar que las coordenadas sean válidas
                boxes = target['boxes']
                if len(boxes) > 0:
                    # Las coordenadas deben estar entre 0 y img_size
                    print(f"    Coordenadas válidas?")
                    print(f"      x1 min: {boxes[:, 0].min().item():.1f}, max: {boxes[:, 0].max().item():.1f}")
                    print(f"      y1 min: {boxes[:, 1].min().item():.1f}, max: {boxes[:, 1].max().item():.1f}")
                    print(f"      x2 min: {boxes[:, 2].min().item():.1f}, max: {boxes[:, 2].max().item():.1f}")
                    print(f"      y2 min: {boxes[:, 3].min().item():.1f}, max: {boxes[:, 3].max().item():.1f}")

# Chequear estructura
check_targets_structure(train_loader, "Train", num_batches=2)
check_targets_structure(val_loader, "Validation", num_batches=1)