
# 🛩️ Detección de Aviones con Bounding Boxes

Este notebook entrena un modelo basado en `ResNet18` para detectar la ubicación de aviones en imágenes. Utiliza coordenadas corregidas de los bounding boxes, data augmentation y entrenamiento sobre imágenes redimensionadas. Se calcula el IoU promedio como métrica de desempeño.


In [None]:

import pandas as pd

# Cargar el CSV
df = pd.read_csv('Airplanes.csv')

# Función para corregir etiquetas (coordenadas)
def corners_to_coco(x_top, y_top, x_bottom, y_bottom):
    x = x_bottom
    y = x_top
    width = y_bottom - x_bottom
    height = y_top - x_top
    return [x, y, width, height]

# Aplicar corrección
df[['x', 'y', 'width', 'height']] = df.apply(
    lambda row: pd.Series(corners_to_coco(row['x_top'], row['y_top'], row['x_bottom'], row['y_bottom'])),
    axis=1
)

# Coordenadas finales
df['x2'] = df['x'] + df['width']
df['y2'] = df['y'] + df['height']

# Normalizar para entrenamiento
IMAGE_WIDTH = 512
IMAGE_HEIGHT = 512
df['x_norm'] = df['x'] / IMAGE_WIDTH
df['y_norm'] = df['y'] / IMAGE_HEIGHT
df['x2_norm'] = df['x2'] / IMAGE_WIDTH
df['y2_norm'] = df['y2'] / IMAGE_HEIGHT
df['bbox_norm'] = df[['x_norm', 'y_norm', 'x2_norm', 'y2_norm']].values.tolist()

# Agregar ruta de imagen
df['image_path'] = df['Image'].apply(lambda x: f"airplanes/{x}")
df.head()


In [None]:

import torch
from torch.utils.data import Dataset, DataLoader
from torchvision import transforms
from PIL import Image

class AirplaneDataset(Dataset):
    def __init__(self, dataframe, transform=None):
        self.dataframe = dataframe
        self.transform = transform

    def __len__(self):
        return len(self.dataframe)

    def __getitem__(self, idx):
        row = self.dataframe.iloc[idx]
        image = Image.open(row['image_path']).convert('RGB')
        bbox = torch.tensor(row['bbox_norm'], dtype=torch.float32)
        if self.transform:
            image = self.transform(image)
        return image, bbox

# Aumentación y normalización
transform = transforms.Compose([
    transforms.Resize((224, 224)),
    transforms.RandomHorizontalFlip(),
    transforms.ColorJitter(brightness=0.2, contrast=0.2),
    transforms.ToTensor(),
])

# Crear dataset y dataloader
dataset = AirplaneDataset(df, transform=transform)
dataloader = DataLoader(dataset, batch_size=8, shuffle=True)


In [None]:

import torch.nn as nn
from torchvision.models import resnet18

# Selección dinámica del dispositivo
if torch.cuda.is_available():
    device = torch.device("cuda")
    print("Usando GPU NVIDIA")
elif torch.backends.mps.is_available():
    device = torch.device("mps")
    print("Usando GPU Apple Silicon (MPS)")
else:
    device = torch.device("cpu")
    print("Usando CPU")

# Modelo con ResNet18 como backbone
class ResNetBackbone(nn.Module):
    def __init__(self):
        super().__init__()
        base = resnet18(pretrained=True)
        self.features = nn.Sequential(*list(base.children())[:-1])  # quitar capa final
        self.regressor = nn.Sequential(
            nn.Flatten(),
            nn.Linear(512, 4),
            nn.Sigmoid()  # Para mantener coordenadas entre 0 y 1
        )

    def forward(self, x):
        x = self.features(x)
        return self.regressor(x)

model = ResNetBackbone().to(device)
optimizer = torch.optim.Adam(model.parameters(), lr=0.001)
criterion = nn.MSELoss()


In [None]:

import numpy as np
import copy

def compute_iou(box1, box2):
    xi1 = max(box1[0], box2[0])
    yi1 = max(box1[1], box2[1])
    xi2 = min(box1[2], box2[2])
    yi2 = min(box1[3], box2[3])
    inter_area = max((xi2 - xi1), 0) * max((yi2 - yi1), 0)
    box1_area = (box1[2] - box1[0]) * (box1[3] - box1[1])
    box2_area = (box2[2] - box2[0]) * (box2[3] - box2[1])
    union_area = box1_area + box2_area - inter_area
    return inter_area / union_area if union_area != 0 else 0

# Entrenamiento
best_model = None
best_iou = 0.0
iou_per_epoch = []

for epoch in range(15):
    model.train()
    running_loss = 0.0
    for images, targets in dataloader:
        images, targets = images.to(device), targets.to(device)
        outputs = model(images)
        loss = criterion(outputs, targets)
        optimizer.zero_grad()
        loss.backward()
        optimizer.step()
        running_loss += loss.item()

    avg_loss = running_loss / len(dataloader)
    print(f"🔁 Epoch {epoch+1}/15 - Loss promedio: {avg_loss:.4f}")

    # Evaluar IoU en 20 muestras
    model.eval()
    ious = []
    for i in range(20):
        image, target = dataset[i]
        image = image.unsqueeze(0).to(device)
        pred = model(image).detach().cpu().numpy().flatten()
        true = target.numpy().flatten()
        pred_box = [pred[0]*512, pred[1]*512, pred[2]*512, pred[3]*512]
        true_box = [true[0]*512, true[1]*512, true[2]*512, true[3]*512]
        ious.append(compute_iou(pred_box, true_box))

    avg_iou = np.mean(ious)
    iou_per_epoch.append(avg_iou)
    print(f"🔍 IoU promedio: {avg_iou:.3f}")

    if avg_iou > best_iou:
        best_model = copy.deepcopy(model.state_dict())
        best_iou = avg_iou
        print("✅ Nuevo mejor modelo guardado.")

torch.save(best_model, "mejor_modelo_bbox.pth")
print("📦 Modelo guardado.")


In [None]:

import matplotlib.pyplot as plt
import cv2
import random

def visualize_predictions(model, dataset, num_images=5):
    model.eval()
    indices = random.sample(range(len(dataset)), num_images)
    fig, axes = plt.subplots(1, num_images, figsize=(20, 5))

    for i, idx in enumerate(indices):
        image, true_bbox = dataset[idx]
        image_input = image.unsqueeze(0).to(device)
        pred_bbox = model(image_input).detach().cpu().numpy().flatten()
        true = [int(x * 512) for x in true_bbox]
        pred = [int(x * 512) for x in pred_bbox]
        image_path = df.iloc[idx]['image_path']
        img = cv2.imread(image_path)
        if img is None:
            continue
        img = cv2.cvtColor(img, cv2.COLOR_BGR2RGB)
        cv2.rectangle(img, (true[0], true[1]), (true[2], true[3]), (0, 255, 0), 2)
        cv2.rectangle(img, (pred[0], pred[1]), (pred[2], pred[3]), (255, 0, 0), 2)
        axes[i].imshow(img)
        axes[i].set_title(f'Img {idx}')
        axes[i].axis('off')
    plt.tight_layout()
    plt.show()

visualize_predictions(model, dataset, num_images=5)
