In [1]:
!pip install torchvision
import os
import torch
import torchvision

Collecting nvidia-cuda-nvrtc-cu12==12.4.127 (from torch==2.6.0->torchvision)
  Downloading nvidia_cuda_nvrtc_cu12-12.4.127-py3-none-manylinux2014_x86_64.whl.metadata (1.5 kB)
Collecting nvidia-cuda-runtime-cu12==12.4.127 (from torch==2.6.0->torchvision)
  Downloading nvidia_cuda_runtime_cu12-12.4.127-py3-none-manylinux2014_x86_64.whl.metadata (1.5 kB)
Collecting nvidia-cuda-cupti-cu12==12.4.127 (from torch==2.6.0->torchvision)
  Downloading nvidia_cuda_cupti_cu12-12.4.127-py3-none-manylinux2014_x86_64.whl.metadata (1.6 kB)
Collecting nvidia-cudnn-cu12==9.1.0.70 (from torch==2.6.0->torchvision)
  Downloading nvidia_cudnn_cu12-9.1.0.70-py3-none-manylinux2014_x86_64.whl.metadata (1.6 kB)
Collecting nvidia-cublas-cu12==12.4.5.8 (from torch==2.6.0->torchvision)
  Downloading nvidia_cublas_cu12-12.4.5.8-py3-none-manylinux2014_x86_64.whl.metadata (1.5 kB)
Collecting nvidia-cufft-cu12==11.2.1.3 (from torch==2.6.0->torchvision)
  Downloading nvidia_cufft_cu12-11.2.1.3-py3-none-manylinux2014_x86

In [2]:
import os
import json
import torch
import pandas as pd
from PIL import Image
from torch.utils.data import Dataset, DataLoader
from torchvision import transforms


In [4]:
import os
from google.colab import drive

# Verifico si ya esta montado
drive_mount_point = "/content/drive"
if not os.path.ismount(drive_mount_point):
    drive.mount(drive_mount_point)
    print("Google Drive montado.")
else:
    print("Google Drive ya estaba montado.")


Mounted at /content/drive
Google Drive montado.


### Descargo el dataset original y agrego imágenes aumentadas donde ya esté el original

In [None]:
import kagglehub
import zipfile
import os
import shutil

# Se Descarga dataset original
path = kagglehub.dataset_download("kneroma/tacotrashdataset")
print("Dataset original descargado en:", path)

# Carpeta donde va a ir el dataset aumentado
augmented_dataset_dir = '/content/augmented_dataset'
os.makedirs(augmented_dataset_dir, exist_ok=True)

original_data_path = os.path.join(augmented_dataset_dir, 'data')
shutil.copytree(path, original_data_path)
print("Carpeta de datos detectada:", original_data_path)

augmented_zip_path = "/content/drive/MyDrive/VC2/augmented.zip"

with zipfile.ZipFile(augmented_zip_path, 'r') as zip_ref:
    zip_ref.extractall(original_data_path)

print(f"Dataset aumentado descomprimido en: {original_data_path}")

In [None]:
import os
import shutil

# Ruta donde estan las carpetas aumentadas
augmented_batches_root = "/content/augmented_dataset/data/kaggle/working"

# Ruta de destino
target_data_dir = "/content/augmented_dataset/data/data"

# Muevo carpetas que empiecen con "aug_"
for folder in os.listdir(augmented_batches_root):
    source = os.path.join(augmented_batches_root, folder)
    destination = os.path.join(target_data_dir, folder)

    if os.path.isdir(source) and folder.startswith("aug_"):
        shutil.move(source, destination)
        print(f"Movido: {folder}")

print("Todas las carpetas 'aug_' fueron movidas a la carpeta del dataset original.")


### Dataset combinado

In [None]:
import os
import json
import pandas as pd
from PIL import Image
import torch
from torch.utils.data import Dataset
import hashlib

def stable_int_id(value: str) -> int:
    return int(hashlib.sha256(value.encode()).hexdigest(), 16) % (2**31 - 1)


class TacoCombinedDataset(Dataset):
    def __init__(self,
                 image_root,                         # Ruta raíz con todas las imágenes
                 annotations_csv_path,               # CSV con todas las anotaciones
                 image_map_json_path,                # JSON que mapea image_id -> file_name
                 transforms=None):

        self.image_root = image_root
        self.transforms = transforms

        # Lee CSV completo
        self.annotations_df = pd.read_csv(annotations_csv_path)

        # Carga el mapeo de IDs a file_names
        with open(image_map_json_path, 'r') as f:
            self.image_map = json.load(f)

        # Agrupa anotaciones por ID
        self.anns_by_image_id = self.annotations_df.groupby("image_id")

        # Mapea cada ID a la ruta del archivo correspondiente
        self.image_id_to_path = {}
        for image_id in self.anns_by_image_id.groups.keys():
            str_id = str(image_id)
            if str_id in self.image_map:
                file_name = self.image_map[str_id]["file_name"]

                # Busca imagen con tolerancia a mayúsculas/minúsculas en extensiones y paths parciales
                filename_from_json = self.image_map[str(image_id)]["file_name"]
                target_name_lower = os.path.basename(filename_from_json).lower()

                full_path = None
                for root, _, files in os.walk(self.image_root):
                    for f in files:
                        if f.lower() == target_name_lower:
                            full_path = os.path.join(root, f)
                            break
                    if full_path:
                        break


                # Si no encuentra, probar con otra extensión (.jpg <-> .JPG)
                if full_path is None and file_name.lower().endswith('.jpg'):
                    alt_ext = '.JPG' if file_name.endswith('.jpg') else '.jpg'
                    alt_file_name = file_name[:-4] + alt_ext

                    for root, _, files in os.walk(self.image_root):
                        if alt_file_name in files:
                            full_path = os.path.join(root, alt_file_name)
                            break


                if full_path and os.path.exists(full_path):
                    self.image_id_to_path[image_id] = full_path
                else:
                    print(f"⚠️ Imagen no encontrada: {file_name} (id={image_id})")
            else:
                print(f"❌ image_id {image_id} no está en el JSON")

        self.image_ids = list(self.image_id_to_path.keys())
        self.classes = self.classes = {0: 'background', **{category_id + 1: category_name for category_name, category_id in zip(self.annotations_df["new_category"].unique(), range(len(self.annotations_df["new_category"].unique())))}}


    def __len__(self):
        return len(self.image_ids)

    def __getitem__(self, idx):
        image_id = self.image_ids[idx]
        img_path = self.image_id_to_path[image_id]
        image = Image.open(img_path).convert("RGB")

        anns_df = self.anns_by_image_id.get_group(image_id)
        boxes, labels = [], []

        for _, row in anns_df.iterrows():
            bbox = json.loads(row['bbox']) if isinstance(row['bbox'], str) else row['bbox']
            x, y, w, h = bbox
            boxes.append([x, y, x + w, y + h])
            labels.append(row['category_id'] + 1) #SUMO UNO PORQUE EL 0 ES RESERVADO PARA BACKGROUND

        boxes = torch.as_tensor(boxes, dtype=torch.float32)
        labels = torch.as_tensor(labels, dtype=torch.int64)

        target = {
            "boxes": boxes,
            "labels": labels,
            "image_id": torch.tensor([stable_int_id(str(image_id))], dtype=torch.int64)
        }

        if self.transforms:
            image = self.transforms(image)

        return image, target


### Defino transformaciones de preentrenamiento

In [None]:
from torchvision import transforms

transform = transforms.Compose([
    transforms.ToTensor(),
    transforms.Normalize(mean=[0.485, 0.456, 0.406],
                         std=[0.229, 0.224, 0.225])
])

### Creo dataset

In [None]:
dataset = TacoCombinedDataset(
    image_root="/content/augmented_dataset/data/data",
    annotations_csv_path="/content/drive/MyDrive/VC2/augmented_annotations_fixed.csv",
    image_map_json_path="/content/drive/MyDrive/VC2/image_map_augmented.json",
    transforms=transform


In [None]:
image, target = dataset[0]
print("Shape:", image.shape)
print("Target keys:", target.keys())
print("Boxes:", target["boxes"].shape)
print("Labels:", target["labels"])


### Separo en Train y Val

In [None]:
from torch.utils.data import random_split, DataLoader

train_ratio = 0.8
train_size = int(train_ratio * len(dataset))
val_size = len(dataset) - train_size

train_dataset, val_dataset = random_split(dataset, [train_size, val_size], generator=torch.Generator().manual_seed(42))



### Data Loader

In [None]:
def collate_fn(batch):
    return tuple(zip(*batch))

train_loader = DataLoader(train_dataset, batch_size=16, shuffle=True, collate_fn=collate_fn, num_workers=2)
val_loader = DataLoader(val_dataset, batch_size=16, shuffle=False, collate_fn=collate_fn, num_workers=2)


In [None]:
images, targets = next(iter(train_loader))
print("Nuevo batch size:", len(images))
print("Shape imagen 0:", images[0].shape)
print("Keys del target:", targets[0].keys())


### Importar RetinaNet y configurar modelo

In [None]:
from torchvision.models.detection import retinanet_resnet50_fpn
from torchvision.models.detection.retinanet import RetinaNetClassificationHead, RetinaNet_ResNet50_FPN_Weights

# Se carga modelo preentrenado
weights = RetinaNet_ResNet50_FPN_Weights.DEFAULT
model = retinanet_resnet50_fpn(weights=weights)


# Se reemplaza la cabeza de clasificacion con la cantidad correcta de clases
num_classes = len(dataset.classes) + 1  # +1 para el background
in_features = model.head.classification_head.conv[0][0].in_channels
num_anchors = model.head.classification_head.num_anchors

model.head.classification_head = RetinaNetClassificationHead(
    in_channels=in_features,
    num_anchors=num_anchors,
    num_classes=num_classes
)


### Preparar dispositivo, optimizador y carpeta de checkpoints

In [None]:
import torch.optim as optim
import os

device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
model.to(device)

optimizer = optim.Adam(model.parameters(), lr=1e-4)

# Carpeta para guardar checkpoints
checkpoint_dir = "/content/drive/MyDrive/checkpoints_retinanet_datauag"
os.makedirs(checkpoint_dir, exist_ok=True)


### Cargar checkpoint si lo hay

In [None]:
import os
import glob

# Inicializa listas de tracking
train_losses_per_epoch = []
val_losses_per_epoch = []

# Busca checkpoints existentes
checkpoint_files = sorted(glob.glob(os.path.join(checkpoint_dir, "retinanet_epoch*_batch*.pth")))

if checkpoint_files:
    latest_checkpoint = checkpoint_files[-1]
    checkpoint = torch.load(latest_checkpoint, map_location=device)

    model.load_state_dict(checkpoint["model_state_dict"])
    optimizer.load_state_dict(checkpoint["optimizer_state_dict"])
    start_epoch = checkpoint["epoch"]

    print(f"Checkpoint cargado desde: {latest_checkpoint}")
    print(f"Retomando desde la epoch {start_epoch}")

else:
    start_epoch = 0
    print("No se encontraron checkpoints previos. Comenzando desde cero.")


### Loop de entrenamiento con guardado por batch

In [None]:
import os
import torch
import glob

num_epochs = 50
start_epoch = 0
best_val_loss = float("inf")
save_every_n_batches = 50

checkpoint_dir = "checkpoints"
os.makedirs(checkpoint_dir, exist_ok=True)

# Listas para graficar despues
train_losses_per_epoch = []
val_losses_per_epoch = []

# Entrenamiento
for epoch in range(start_epoch, num_epochs):
    model.train()
    epoch_loss = 0.0

    for batch_idx, (images, targets) in enumerate(train_loader):
        images = [img.to(device) for img in images]
        targets = [{k: v.to(device) for k, v in t.items()} for t in targets]

        loss_dict = model(images, targets)
        losses = sum(loss for loss in loss_dict.values())

        optimizer.zero_grad()
        losses.backward()
        optimizer.step()

        epoch_loss += losses.item()

        if (batch_idx + 1) % save_every_n_batches == 0:
            checkpoint_path = os.path.join(
                checkpoint_dir,
                f"retinanet_epoch{epoch+1}_batch{batch_idx+1}.pth"
            )
            torch.save({
                'epoch': epoch + 1,
                'batch': batch_idx + 1,
                'model_state_dict': model.state_dict(),
                'optimizer_state_dict': optimizer.state_dict(),
                'loss': epoch_loss,
            }, checkpoint_path)
            print(f"Checkpoint guardado: epoch {epoch+1}, batch {batch_idx+1}")

    print(f"Epoch {epoch+1}/{num_epochs} - Train Loss: {epoch_loss:.4f}")
    train_losses_per_epoch.append(epoch_loss)

    # Validacion
    model.eval()
    val_loss = 0.0
    with torch.no_grad():
        for images, targets in val_loader:
            images = [img.to(device) for img in images]
            targets = [{k: v.to(device) for k, v in t.items()} for t in targets]

            # Me Aseguro de que sea un dict (como en entrenamiento)
            output = model(images, targets)
            if isinstance(output, dict):
                losses = sum(loss for loss in output.values())
                val_loss += losses.item()
            else:
                print("Advertencia: el modelo devolvió algo inesperado en evaluación.")


    val_loss /= len(val_loader)
    val_losses_per_epoch.append(val_loss)
    print(f"Validation Loss: {val_loss:.4f}")

    if val_loss < best_val_loss:
        best_val_loss = val_loss
        torch.save(model.state_dict(), os.path.join(checkpoint_dir, "best_model.pth"))
        print(f"Mejor modelo actualizado (Val Loss: {val_loss:.4f})")

    # Borro checkpoints de batches al finalizar la epoch
    batch_ckpts = glob.glob(os.path.join(checkpoint_dir, f"retinanet_epoch{epoch+1}_batch*.pth"))
    for ckpt_file in batch_ckpts:
        os.remove(ckpt_file)
    print(f"Checkpoints de batches eliminados para epoch {epoch+1}")
