In [3]:
import json
from pathlib import Path

# ==========================================================
# CONFIGURACIÓN DE RUTAS
# ==========================================================

# Ruta base de tu dataset (ajusta según tu entorno)
BASE_DIR = Path("G:\My Drive\HerdNet\data")

# Rutas de las imágenes y anotaciones en formato COCO
TRAIN_IMG_DIR = BASE_DIR / "images" / "train"
VAL_IMG_DIR = BASE_DIR / "images"/ "val"
TEST_IMG_DIR = BASE_DIR / "images" / "test"

TRAIN_JSON = BASE_DIR / "coco" / "train" / "train_annotations.json"
VAL_JSON = BASE_DIR / "coco"/ "val" / "val_annotations.json"
TEST_JSON = BASE_DIR / "coco" / "test" / "test_annotations.json"

# ==========================================================
# VALIDACIÓN DE EXISTENCIA
# ==========================================================

for path in [TRAIN_IMG_DIR, VAL_IMG_DIR, TEST_IMG_DIR,
             TRAIN_JSON, VAL_JSON, TEST_JSON]:
    if not path.exists():
        raise FileNotFoundError(f"Ruta no encontrada: {path}")

print("Todas las rutas fueron encontradas correctamente.")

# ==========================================================
# VISUALIZACIÓN DE ESTRUCTURA COCO (solo verificación)
# ==========================================================

with open(TRAIN_JSON, "r", encoding="utf-8") as f:
    coco_data = json.load(f)

print("Claves principales del JSON COCO:")
print(list(coco_data.keys()))

print(f"\nEjemplo de imagen:")
print(coco_data["images"][0])

print(f"\nEjemplo de anotación:")
print(coco_data["annotations"][0])

print(f"\nNúmero total de categorías:", len(coco_data["categories"]))


Todas las rutas fueron encontradas correctamente.
Claves principales del JSON COCO:
['images', 'annotations', 'categories', '_meta']

Ejemplo de imagen:
{'id': 1, 'file_name': 'L_07_05_16_DSC00126.JPG', 'orig_file': 'L_07_05_16_DSC00126.JPG', 'width': 6000, 'height': 4000, 'source_stage': 'clean'}

Ejemplo de anotación:
{'segmentation': [[]], 'area': 2610.0, 'iscrowd': 0, 'image_id': 824, 'bbox': [1072.0, 2068.0, 58.0, 45.0], 'category_id': 6.0, 'id': 1, 'source_stage': 'clean'}

Número total de categorías: 6


In [4]:
import json
import numpy as np
import torch
from torch.utils.data import Dataset
from PIL import Image
import cv2
from pathlib import Path


class COCODualDataset(Dataset):
    """
    COCODualDataset
    ----------------
    Dataset compatible with LiteDualNet.
    Convierte anotaciones COCO en pares (imagen, (mapa de densidad, etiqueta de clase)).

    Propósito
    ----------
    Generar entradas listas para el modelo de conteo y clasificación:
      - Imagen: tensor 3xHxW normalizado [0,1].
      - Mapa de densidad: tensor 1xHxW con puntos suavizados.
      - Clase: entero que representa la categoría dominante.

    Parámetros
    ----------
    img_dir : str or Path
        Directorio que contiene las imágenes.
    ann_file : str or Path
        Ruta del archivo JSON en formato COCO.
    image_size : tuple(int, int)
        Tamaño al que se redimensionan las imágenes (alto, ancho).
    num_classes : int
        Número total de clases (sin incluir fondo).
    transform : callable, opcional
        Transformaciones (por ejemplo Albumentations o Torchvision).
    """

    def __init__(self, img_dir, ann_file, image_size=(512, 512),
                 num_classes=6, transform=None):
        self.img_dir = Path(img_dir)
        self.image_size = image_size
        self.num_classes = num_classes
        self.transform = transform

        # Validar existencia de rutas
        if not self.img_dir.exists():
            raise FileNotFoundError(f"Image directory not found: {self.img_dir}")
        if not Path(ann_file).exists():
            raise FileNotFoundError(f"Annotation file not found: {ann_file}")

        # Cargar archivo COCO
        with open(ann_file, "r", encoding="utf-8") as f:
            data = json.load(f)

        self.images = data["images"]
        self.annotations = data["annotations"]

        # Crear índice de anotaciones por imagen
        self.ann_index = {}
        for ann in self.annotations:
            img_id = ann["image_id"]
            if img_id not in self.ann_index:
                self.ann_index[img_id] = []
            self.ann_index[img_id].append(ann)

    def __len__(self):
        return len(self.images)

    def __getitem__(self, idx):
        # Obtener información de la imagen
        img_info = self.images[idx]
        img_id = img_info["id"]
        img_path = self.img_dir / img_info["file_name"]

        # Cargar y redimensionar imagen
        image = np.array(Image.open(img_path).convert("RGB"))
        h_target, w_target = self.image_size
        image = cv2.resize(image, (w_target, h_target))

        # Crear mapa de densidad vacío
        density_map = np.zeros((h_target, w_target), dtype=np.float32)

        # Obtener anotaciones de esa imagen
        anns = self.ann_index.get(img_id, [])

        for ann in anns:
            # Convertir bbox a coordenadas del centro
            x_center = ann["bbox"][0] + ann["bbox"][2] / 2
            y_center = ann["bbox"][1] + ann["bbox"][3] / 2

            # Escalar a tamaño redimensionado
            x = int(x_center * (w_target / img_info["width"]))
            y = int(y_center * (h_target / img_info["height"]))

            # Dibujar un pequeño círculo fijo (kernel constante)
            if 0 <= x < w_target and 0 <= y < h_target:
                cv2.circle(density_map, (x, y), 2, 1, -1)

        # Suavizado leve
        density_map = cv2.GaussianBlur(density_map, (7, 7), 0)

        # Determinar clase dominante o "background"
        if len(anns) > 0:
            classes = [ann["category_id"] for ann in anns]
            class_label = max(set(classes), key=classes.count) - 1
        else:
            class_label = self.num_classes - 1

        # Aplicar transformaciones si existen
        if self.transform:
            transformed = self.transform(image=image, mask=density_map)
            image = transformed["image"]
            density_map = transformed["mask"]

        # Convertir a tensores PyTorch
        image = torch.tensor(image.transpose(2, 0, 1), dtype=torch.float32) / 255.0
        density_map = torch.tensor(density_map, dtype=torch.float32).unsqueeze(0)
        class_label = torch.tensor(class_label, dtype=torch.long)

        return image, (density_map, class_label)

In [None]:
from torch.utils.data import DataLoader

IMAGE_SIZE = (512, 512)

train_dataset = COCODualDataset(
    img_dir=TRAIN_IMG_DIR,
    ann_file=TRAIN_JSON,
    image_size=IMAGE_SIZE,
    num_classes=6
)

val_dataset = COCODualDataset(
    img_dir=VAL_IMG_DIR,
    ann_file=VAL_JSON,
    image_size=IMAGE_SIZE,
    num_classes=6
)

# Verificación rápida
print("Ejemplo de salida:")
img, (dmap, cls) = train_dataset[0]
print("Imagen:", img.shape)
print("Mapa de densidad:", dmap.shape)
print("Clase:", cls.item())

Ejemplo de salida:
Imagen: torch.Size([3, 512, 512])
Mapa de densidad: torch.Size([1, 512, 512])
Clase: 2


In [None]:
import torch
from torch.utils.data import DataLoader

# ==========================================================
# PARÁMETROS DE ENTRENAMIENTO
# ==========================================================

# Tamaño de lote (ajústalo según VRAM disponible)
BATCH_SIZE = 8

# Número de clases (ajustado al dataset)
NUM_CLASSES = 6

# Número de *workers* para carga paralela
NUM_WORKERS = 4

# ==========================================================
# CREACIÓN DE DATASETS
# ==========================================================

train_dataset = COCODualDataset(
    img_dir=TRAIN_IMG_DIR,
    ann_file=TRAIN_JSON,
    image_size=IMAGE_SIZE,
    num_classes=NUM_CLASSES
)

val_dataset = COCODualDataset(
    img_dir=VAL_IMG_DIR,
    ann_file=VAL_JSON,
    image_size=IMAGE_SIZE,
    num_classes=NUM_CLASSES
)

test_dataset = COCODualDataset(
    img_dir=TEST_IMG_DIR,
    ann_file=TEST_JSON,
    image_size=IMAGE_SIZE,
    num_classes=NUM_CLASSES
)

# ==========================================================
# CREACIÓN DE DATALOADERS
# ==========================================================

train_loader = DataLoader(
    train_dataset,
    batch_size=BATCH_SIZE,
    shuffle=True,
    num_workers=NUM_WORKERS,
    pin_memory=True
)

val_loader = DataLoader(
    val_dataset,
    batch_size=BATCH_SIZE,
    shuffle=False,
    num_workers=NUM_WORKERS,
    pin_memory=True
)

test_loader = DataLoader(
    test_dataset,
    batch_size=BATCH_SIZE,
    shuffle=False,
    num_workers=NUM_WORKERS,
    pin_memory=True
)

# ==========================================================
# VALIDACIÓN RÁPIDA
# ==========================================================

print("Número de lotes (train):", len(train_loader))
print("Número de lotes (val):", len(val_loader))
print("Número de lotes (test):", len(test_loader))

# Comprobación de un batch
images, (dmap, cls) = next(iter(train_loader))
print("Batch de imágenes:", images.shape)
print("Batch de mapas:", dmap.shape)
print("Batch de clases:", cls.shape)


Número de lotes (train): 1226
Número de lotes (val): 28
Número de lotes (test): 65


In [None]:
import torch
import torch.nn as nn


class LiteDualNet(nn.Module):
    """
    LiteDualNet
    ------------
    Lightweight CNN for animal counting and classification.
    Dual-head architecture:
      - Density head: predicts per-pixel density map (counting)
      - Classification head: predicts global class label
    """

    def __init__(self, num_classes=6):
        super(LiteDualNet, self).__init__()

        # Backbone: simple feature extractor (lightweight)
        self.backbone = nn.Sequential(
            nn.Conv2d(3, 32, 3, padding=1),
            nn.BatchNorm2d(32),
            nn.ReLU(),
            nn.MaxPool2d(2),

            nn.Conv2d(32, 64, 3, padding=1),
            nn.BatchNorm2d(64),
            nn.ReLU(),
            nn.MaxPool2d(2),

            nn.Conv2d(64, 128, 3, padding=1),
            nn.BatchNorm2d(128),
            nn.ReLU(),
            nn.MaxPool2d(2),

            nn.Conv2d(128, 256, 3, padding=1),
            nn.BatchNorm2d(256),
            nn.ReLU()
        )

        # Head 1: Density regression (1 channel output)
        self.density_head = nn.Sequential(
            nn.Conv2d(256, 128, 3, padding=1),
            nn.ReLU(),
            nn.Conv2d(128, 1, 1)
        )

        # Head 2: Classification (global features)
        self.class_head = nn.Sequential(
            nn.AdaptiveAvgPool2d(1),
            nn.Flatten(),
            nn.Linear(256, 128),
            nn.ReLU(),
            nn.Linear(128, num_classes)
        )

    def forward(self, x):
        # Shared backbone
        features = self.backbone(x)
        # Density map prediction
        density_map = self.density_head(features)
        # Class prediction
        class_logits = self.class_head(features)
        return density_map, class_logits
