# Baseline OVD - BDD100K Detection Pipeline

Pipeline completo para establecer el baseline de detección con Grounding-DINO en BDD100K.

**Fases:**
1. Configuración del modelo y dataset
2. Inferencia sobre val_eval
3. Evaluación de métricas
4. Preparación para calibración
5. Generación de artefactos

**Hardware requerido:** GPU con CUDA (mínimo 8GB VRAM)

## 1. Imports y Setup

In [2]:
#pip install seaborn

Collecting seaborn
  Using cached seaborn-0.13.2-py3-none-any.whl.metadata (5.4 kB)
Using cached seaborn-0.13.2-py3-none-any.whl (294 kB)
Installing collected packages: seaborn
Successfully installed seaborn-0.13.2
Note: you may need to restart the kernel to use updated packages.


In [3]:
#!pip install torch torchvision
#!pip install pycocotools
#!pip install pandas matplotlib seaborn
#!pip install Pillow tqdm pyyaml

Collecting torch
  Using cached torch-2.9.0-cp312-cp312-win_amd64.whl.metadata (30 kB)
Collecting torchvision
  Using cached torchvision-0.24.0-cp312-cp312-win_amd64.whl.metadata (5.9 kB)
Collecting sympy>=1.13.3 (from torch)
  Using cached sympy-1.14.0-py3-none-any.whl.metadata (12 kB)
Collecting mpmath<1.4,>=1.1.0 (from sympy>=1.13.3->torch)
  Using cached mpmath-1.3.0-py3-none-any.whl.metadata (8.6 kB)
Using cached torch-2.9.0-cp312-cp312-win_amd64.whl (109.3 MB)
Using cached torchvision-0.24.0-cp312-cp312-win_amd64.whl (4.3 MB)
Using cached sympy-1.14.0-py3-none-any.whl (6.3 MB)
Using cached mpmath-1.3.0-py3-none-any.whl (536 kB)
Installing collected packages: mpmath, sympy, torch, torchvision

   ---------------------------------------- 0/4 [mpmath]
   ---------------------------------------- 0/4 [mpmath]
   ---------------------------------------- 0/4 [mpmath]
   ---------------------------------------- 0/4 [mpmath]
   ---------------------------------------- 0/4 [mpmath]
   ----

In [None]:
print("All required packages are installed.")
#pip install ipykernel

In [None]:
#%pip install pandas

Collecting pandas
  Downloading pandas-2.3.3-cp310-cp310-manylinux_2_24_x86_64.manylinux_2_28_x86_64.whl.metadata (91 kB)
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m91.2/91.2 kB[0m [31m1.7 MB/s[0m eta [36m0:00:00[0ma [36m0:00:01[0m
Collecting tzdata>=2022.7 (from pandas)
  Downloading tzdata-2025.2-py2.py3-none-any.whl.metadata (1.4 kB)
Downloading pandas-2.3.3-cp310-cp310-manylinux_2_24_x86_64.manylinux_2_28_x86_64.whl (12.8 MB)
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m12.8/12.8 MB[0m [31m4.3 MB/s[0m eta [36m0:00:00[0m00:01[0m00:01[0m
[?25hDownloading tzdata-2025.2-py2.py3-none-any.whl (347 kB)
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m347.8/347.8 kB[0m [31m4.1 MB/s[0m eta [36m0:00:00[0ma [36m0:00:01[0m
[?25hInstalling collected packages: tzdata, pandas
Successfully installed pandas-2.3.3 tzdata-2025.2
[0mNote: you may need to restart the kernel to use updated packages.


In [4]:
import os
import sys
import json
import yaml
import time
import torch
import numpy as np
import pandas as pd
from pathlib import Path
from PIL import Image
from tqdm import tqdm
from collections import defaultdict
import matplotlib.pyplot as plt
from datetime import datetime
from pycocotools.coco import COCO
from pycocotools.cocoeval import COCOeval

In [2]:
import groundingdino, os
print("GroundingDINO en:", os.path.dirname(groundingdino.__file__))

GroundingDINO en: /opt/conda/lib/python3.10/site-packages/groundingdino-0.1.0-py3.10-linux-x86_64.egg/groundingdino


In [5]:
for i, p in enumerate(sys.path):
    print(i, p)

0 /opt/conda/lib/python310.zip
1 /opt/conda/lib/python3.10
2 /opt/conda/lib/python3.10/lib-dynload
3 
4 /opt/conda/lib/python3.10/site-packages
5 /opt/conda/lib/python3.10/site-packages/groundingdino-0.1.0-py3.10-linux-x86_64.egg
6 /opt/program/GroundingDINO


In [6]:
#sys.path.append('../installing_dino/GroundingDINO')
from groundingdino.util.inference import load_model, load_image, predict
from groundingdino.util import box_ops

torch.manual_seed(42)
np.random.seed(42)
if torch.cuda.is_available():
    torch.cuda.manual_seed_all(42)
    torch.backends.cudnn.deterministic = True
    torch.backends.cudnn.benchmark = False





In [7]:
import torch
device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
print(f"Device: {device}")
print(f"PyTorch: {torch.__version__}")
print(f"CUDA available: {torch.cuda.is_available()}")
if torch.cuda.is_available():
    print(f"GPU: {torch.cuda.get_device_name(0)}")
    print(f"CUDA version: {torch.version.cuda}")

Device: cuda
PyTorch: 2.3.1+cu121
CUDA available: True
GPU: NVIDIA GeForce RTX 4060 Laptop GPU
CUDA version: 12.1


## 2. Configuración Baseline (1.1, 1.2)

In [8]:
BASE_DIR = Path('../data')
OUTPUT_DIR = Path('./outputs/baseline')
QUALITATIVE_DIR = Path('./outputs/qualitative/baseline')
CONFIG_DIR = Path('./configs')

OUTPUT_DIR.mkdir(parents=True, exist_ok=True)
QUALITATIVE_DIR.mkdir(parents=True, exist_ok=True)
CONFIG_DIR.mkdir(parents=True, exist_ok=True)
    
baseline_config = {
    'model': {
        'name': 'Grounding-DINO',
        'checkpoint': '/opt/program/GroundingDINO/weights/groundingdino_swint_ogc.pth',
        'config': '/opt/program/GroundingDINO/groundingdino/config/GroundingDINO_SwinT_OGC.py',
        'architecture': 'SwinT-OGC',
        'input_size': [800, 1333],
        'device': str(device)
    },
    'dataset': {
        'image_dir': str(BASE_DIR / 'bdd100k/bdd100k/bdd100k/images/100k/val'),
        'val_eval_json': str(BASE_DIR / 'bdd100k_coco/val_eval.json'),
        'val_calib_json': str(BASE_DIR / 'bdd100k_coco/val_calib.json')
    },
    'inference': {
        'conf_threshold': 0.30,
        'nms_iou': 0.65,
        'batch_size': 1,
        'max_detections': 300
    },
    'prompts_file': str(BASE_DIR / 'prompts/bdd100k.txt'),
    'seed': 42,
    'timestamp': datetime.now().isoformat()
}

with open(CONFIG_DIR / 'baseline.yaml', 'w') as f:
    yaml.dump(baseline_config, f, default_flow_style=False)

print("Configuración baseline:")
print(yaml.dump(baseline_config, default_flow_style=False))

Configuración baseline:
dataset:
  image_dir: ../data/bdd100k/bdd100k/bdd100k/images/100k/val
  val_calib_json: ../data/bdd100k_coco/val_calib.json
  val_eval_json: ../data/bdd100k_coco/val_eval.json
inference:
  batch_size: 1
  conf_threshold: 0.3
  max_detections: 300
  nms_iou: 0.65
model:
  architecture: SwinT-OGC
  checkpoint: /opt/program/GroundingDINO/weights/groundingdino_swint_ogc.pth
  config: /opt/program/GroundingDINO/groundingdino/config/GroundingDINO_SwinT_OGC.py
  device: cuda
  input_size:
  - 800
  - 1333
  name: Grounding-DINO
prompts_file: ../data/prompts/bdd100k.txt
seed: 42
timestamp: '2025-11-10T22:15:34.737010'



## 3. Definición del Vocabulario (1.3, 1.4)

In [9]:
BDD_COCO_CATEGORIES = {
    1: 'person',
    2: 'rider',
    3: 'car',
    4: 'truck',
    5: 'bus',
    6: 'train',
    7: 'motorcycle',
    8: 'bicycle',
    9: 'traffic light',
    10: 'traffic sign'
}

PROMPTS = [
    'person',
    'rider',
    'car',
    'truck',
    'bus',
    'train',
    'motorcycle',
    'bicycle',
    'traffic light',
    'traffic sign'
]

PROMPT_SYNONYMS = {
    'bike': 'bicycle',
    'motorbike': 'motorcycle',
    'motor': 'motorcycle',
    'stop sign': 'traffic sign',
    'red light': 'traffic light',
    'signal': 'traffic light',
    'pedestrian': 'person',
    'vehicle': 'car',
    'bicyclist': 'rider'
}

CAT_ID_TO_PROMPT = {cat_id: name for cat_id, name in BDD_COCO_CATEGORIES.items()}
PROMPT_TO_CAT_ID = {name: cat_id for cat_id, name in BDD_COCO_CATEGORIES.items()}
PROMPT_IDX_TO_CAT_ID = {i: i+1 for i in range(len(PROMPTS))}

PROMPTS_DIR = BASE_DIR / 'prompts'
PROMPTS_DIR.mkdir(parents=True, exist_ok=True)

with open(PROMPTS_DIR / 'bdd100k.txt', 'w') as f:
    for prompt in PROMPTS:
        f.write(f"{prompt}\n")

TEXT_PROMPT = '. '.join(PROMPTS) + '.'

print(f"Clases BDD100K COCO: {len(BDD_COCO_CATEGORIES)}")
print(f"Text prompt para Grounding-DINO:\n{TEXT_PROMPT}")
print(f"\nMapeo guardado en: {PROMPTS_DIR / 'bdd100k.txt'}")

Clases BDD100K COCO: 10
Text prompt para Grounding-DINO:
person. rider. car. truck. bus. train. motorcycle. bicycle. traffic light. traffic sign.

Mapeo guardado en: ../data/prompts/bdd100k.txt


## 4. Carga del Modelo Grounding-DINO

In [10]:
print("Cargando Grounding-DINO...")
model = load_model(baseline_config['model']['config'],baseline_config['model']['checkpoint'])
model.to(device)
model.eval()
print("Modelo cargado exitosamente")

Cargando Grounding-DINO...




final text_encoder_type: bert-base-uncased




Modelo cargado exitosamente


In [11]:
for name, param in model.named_parameters():
    print(f"Primer parámetro ({name}) en:", param.device)
    break

Primer parámetro (transformer.level_embed) en: cuda:0


In [12]:
if next(model.parameters()).is_cuda:
    print("✅ El modelo está en GPU (CUDA)")
else:
    print("⚠️ El modelo está en CPU")

✅ El modelo está en GPU (CUDA)


## 5. Funciones de Post-procesamiento (2.2)

In [13]:
def normalize_label(label):
    label_lower = label.lower().strip()
    if label_lower in PROMPT_SYNONYMS:
        return PROMPT_SYNONYMS[label_lower]
    for canonical in PROMPTS:
        if canonical in label_lower:
            return canonical
    return label_lower

def cxcywh_to_xywh(bbox):
    cx, cy, w, h = bbox
    x = cx - w / 2
    y = cy - h / 2
    return [x, y, w, h]

def clip_bbox(bbox, img_w, img_h):
    x, y, w, h = bbox
    x = max(0, min(x, img_w))
    y = max(0, min(y, img_h))
    w = max(0, min(w, img_w - x))
    h = max(0, min(h, img_h - y))
    return [x, y, w, h]

def apply_nms(boxes, scores, labels, iou_threshold=0.65):
    if len(boxes) == 0:
        return boxes, scores, labels
    
    keep_indices = []
    boxes_tensor = torch.tensor(boxes)
    scores_tensor = torch.tensor(scores)
    labels_tensor = torch.tensor(labels)
    
    for label_id in torch.unique(labels_tensor):
        mask = labels_tensor == label_id
        if mask.sum() == 0:
            continue
        
        class_boxes = boxes_tensor[mask]
        class_scores = scores_tensor[mask]
        class_indices = torch.where(mask)[0]
        
        x1 = class_boxes[:, 0]
        y1 = class_boxes[:, 1]
        x2 = class_boxes[:, 0] + class_boxes[:, 2]
        y2 = class_boxes[:, 1] + class_boxes[:, 3]
        areas = class_boxes[:, 2] * class_boxes[:, 3]
        
        order = class_scores.argsort(descending=True)
        
        keep = []
        while order.numel() > 0:
            if order.numel() == 1:
                keep.append(order.item())
                break
            
            i = order[0].item()
            keep.append(i)
            
            xx1 = torch.maximum(x1[i], x1[order[1:]])
            yy1 = torch.maximum(y1[i], y1[order[1:]])
            xx2 = torch.minimum(x2[i], x2[order[1:]])
            yy2 = torch.minimum(y2[i], y2[order[1:]])
            
            w = torch.maximum(torch.tensor(0.0), xx2 - xx1)
            h = torch.maximum(torch.tensor(0.0), yy2 - yy1)
            inter = w * h
            
            iou = inter / (areas[i] + areas[order[1:]] - inter)
            
            mask_keep = iou <= iou_threshold
            order = order[1:][mask_keep]
        
        keep_indices.extend(class_indices[keep].tolist())
    
    keep_indices = sorted(keep_indices)
    return (
        [boxes[i] for i in keep_indices],
        [scores[i] for i in keep_indices],
        [labels[i] for i in keep_indices]
    )

print("Funciones de post-procesamiento definidas")

Funciones de post-procesamiento definidas


## 6. Inferencia sobre val_eval (2.1, 2.3, 2.4)

In [14]:
coco_val = COCO(baseline_config['dataset']['val_eval_json'])
image_ids = sorted(coco_val.getImgIds())

predictions = []
perf_metrics = {
    'times': [],
    'num_detections': [],
    'gpu_memory_mb': []
}

print(f"Iniciando inferencia sobre {len(image_ids)} imágenes de val_eval...")

sample_images = []
sample_interval = len(image_ids) // 50 if len(image_ids) > 50 else 1

for idx, img_id in enumerate(tqdm(image_ids)):
    img_info = coco_val.loadImgs(img_id)[0]
    img_path = Path(baseline_config['dataset']['image_dir']) / img_info['file_name']
    
    if not img_path.exists():
        continue
    
    image_pil = Image.open(img_path).convert('RGB')
    img_w, img_h = image_pil.size
    
    image_source, image_transformed = load_image(str(img_path))
    
    if torch.cuda.is_available():
        torch.cuda.synchronize()
    start_time = time.time()
    
    with torch.no_grad():
        boxes, logits, phrases = predict(
            model=model,
            image=image_transformed,
            caption=TEXT_PROMPT,
            box_threshold=baseline_config['inference']['conf_threshold'],
            text_threshold=0.25,
            device=device
        )
    
    if torch.cuda.is_available():
        torch.cuda.synchronize()
    elapsed = time.time() - start_time
    perf_metrics['times'].append(elapsed)
    
    if torch.cuda.is_available():
        perf_metrics['gpu_memory_mb'].append(torch.cuda.max_memory_allocated() / 1024 / 1024)
    
    boxes_xyxy = box_ops.box_cxcywh_to_xyxy(boxes) * torch.tensor([img_w, img_h, img_w, img_h])
    boxes_xywh = []
    for box in boxes_xyxy:
        x1, y1, x2, y2 = box.tolist()
        boxes_xywh.append([x1, y1, x2 - x1, y2 - y1])
    
    scores = logits.tolist()
    labels_raw = [normalize_label(p) for p in phrases]
    category_ids = [PROMPT_TO_CAT_ID.get(lbl, -1) for lbl in labels_raw]
    
    valid_mask = [cid != -1 for cid in category_ids]
    boxes_xywh = [b for b, m in zip(boxes_xywh, valid_mask) if m]
    scores = [s for s, m in zip(scores, valid_mask) if m]
    category_ids = [c for c, m in zip(category_ids, valid_mask) if m]
    
    if len(boxes_xywh) > 0:
        boxes_xywh, scores, category_ids = apply_nms(
            boxes_xywh, scores, category_ids, 
            iou_threshold=baseline_config['inference']['nms_iou']
        )
    
    if len(boxes_xywh) > baseline_config['inference']['max_detections']:
        sorted_indices = sorted(range(len(scores)), key=lambda i: scores[i], reverse=True)
        sorted_indices = sorted_indices[:baseline_config['inference']['max_detections']]
        boxes_xywh = [boxes_xywh[i] for i in sorted_indices]
        scores = [scores[i] for i in sorted_indices]
        category_ids = [category_ids[i] for i in sorted_indices]
    
    for box, score, cat_id in zip(boxes_xywh, scores, category_ids):
        box_clipped = clip_bbox(box, img_w, img_h)
        predictions.append({
            'image_id': int(img_id),
            'category_id': int(cat_id),
            'bbox': [float(b) for b in box_clipped],
            'score': float(score)
        })
    
    perf_metrics['num_detections'].append(len(boxes_xywh))
    
    if idx % sample_interval == 0 and len(sample_images) < 50:
        sample_images.append({
            'image_id': img_id,
            'image_path': str(img_path),
            'num_dets': len(boxes_xywh)
        })

preds_path = OUTPUT_DIR / 'preds_raw.json'
with open(preds_path, 'w') as f:
    json.dump(predictions, f)

print(f"\nInferencia completada!")
print(f"Total predicciones: {len(predictions)}")
print(f"Predicciones guardadas en: {preds_path}")
print(f"Tiempo promedio por imagen: {np.mean(perf_metrics['times']):.3f}s")
print(f"FPS: {1.0 / np.mean(perf_metrics['times']):.2f}")
print(f"Detecciones promedio por imagen: {np.mean(perf_metrics['num_detections']):.1f}")

loading annotations into memory...
Done (t=0.18s)
creating index...
index created!
Iniciando inferencia sobre 2000 imágenes de val_eval...


100%|██████████| 2000/2000 [12:05<00:00,  2.76it/s]



Inferencia completada!
Total predicciones: 22162
Predicciones guardadas en: outputs/baseline/preds_raw.json
Tiempo promedio por imagen: 0.275s
FPS: 3.64
Detecciones promedio por imagen: 11.1


## 7. Guardado de Métricas de Rendimiento (2.4)

In [15]:
perf_summary = {
    'avg_time_per_image_s': float(np.mean(perf_metrics['times'])),
    'std_time_per_image_s': float(np.std(perf_metrics['times'])),
    'fps': float(1.0 / np.mean(perf_metrics['times'])),
    'avg_detections_per_image': float(np.mean(perf_metrics['num_detections'])),
    'std_detections_per_image': float(np.std(perf_metrics['num_detections'])),
    'total_predictions': len(predictions),
    'total_images': len(image_ids)
}

if torch.cuda.is_available():
    perf_summary['avg_gpu_memory_mb'] = float(np.mean(perf_metrics['gpu_memory_mb']))
    perf_summary['peak_gpu_memory_mb'] = float(np.max(perf_metrics['gpu_memory_mb']))

with open(OUTPUT_DIR / 'perf.txt', 'w') as f:
    f.write("=" * 60 + "\n")
    f.write("BASELINE PERFORMANCE METRICS\n")
    f.write("=" * 60 + "\n\n")
    for key, value in perf_summary.items():
        f.write(f"{key}: {value}\n")
    f.write("\n" + "=" * 60 + "\n")

print("\nMétricas de rendimiento guardadas en:", OUTPUT_DIR / 'perf.txt')


Métricas de rendimiento guardadas en: outputs/baseline/perf.txt


## 8. Evaluación COCO (3.1)

In [16]:
print("Ejecutando evaluación COCO...")

coco_dt = coco_val.loadRes(str(preds_path))
coco_eval = COCOeval(coco_val, coco_dt, 'bbox')
coco_eval.evaluate()
coco_eval.accumulate()
coco_eval.summarize()

metrics_coco = {
    'mAP': float(coco_eval.stats[0]),
    'AP50': float(coco_eval.stats[1]),
    'AP75': float(coco_eval.stats[2]),
    'AP_small': float(coco_eval.stats[3]),
    'AP_medium': float(coco_eval.stats[4]),
    'AP_large': float(coco_eval.stats[5]),
    'AR_max1': float(coco_eval.stats[6]),
    'AR_max10': float(coco_eval.stats[7]),
    'AR_max100': float(coco_eval.stats[8]),
    'AR_small': float(coco_eval.stats[9]),
    'AR_medium': float(coco_eval.stats[10]),
    'AR_large': float(coco_eval.stats[11])
}

per_class_metrics = {}
for cat_id, cat_name in BDD_COCO_CATEGORIES.items():
    coco_eval_class = COCOeval(coco_val, coco_dt, 'bbox')
    coco_eval_class.params.catIds = [cat_id]
    coco_eval_class.evaluate()
    coco_eval_class.accumulate()
    
    per_class_metrics[cat_name] = {
        'mAP': float(coco_eval_class.stats[0]) if coco_eval_class.stats[0] >= 0 else 0.0,
        'AP50': float(coco_eval_class.stats[1]) if coco_eval_class.stats[1] >= 0 else 0.0,
        'AP75': float(coco_eval_class.stats[2]) if coco_eval_class.stats[2] >= 0 else 0.0
    }

metrics_coco['per_class'] = per_class_metrics

with open(OUTPUT_DIR / 'metrics.json', 'w') as f:
    json.dump(metrics_coco, f, indent=2)

print("\nMétricas principales:")
print(f"  mAP@[.50:.95]: {metrics_coco['mAP']:.4f}")
print(f"  AP@50: {metrics_coco['AP50']:.4f}")
print(f"  AP@75: {metrics_coco['AP75']:.4f}")
print(f"\nMétricas guardadas en: {OUTPUT_DIR / 'metrics.json'}")

Ejecutando evaluación COCO...
Loading and preparing results...
DONE (t=0.24s)
creating index...
index created!
Running per image evaluation...
Evaluate annotation type *bbox*
DONE (t=3.34s).
Accumulating evaluation results...
DONE (t=0.52s).
 Average Precision  (AP) @[ IoU=0.50:0.95 | area=   all | maxDets=100 ] = 0.170
 Average Precision  (AP) @[ IoU=0.50      | area=   all | maxDets=100 ] = 0.279
 Average Precision  (AP) @[ IoU=0.75      | area=   all | maxDets=100 ] = 0.171
 Average Precision  (AP) @[ IoU=0.50:0.95 | area= small | maxDets=100 ] = 0.063
 Average Precision  (AP) @[ IoU=0.50:0.95 | area=medium | maxDets=100 ] = 0.182
 Average Precision  (AP) @[ IoU=0.50:0.95 | area= large | maxDets=100 ] = 0.377
 Average Recall     (AR) @[ IoU=0.50:0.95 | area=   all | maxDets=  1 ] = 0.188
 Average Recall     (AR) @[ IoU=0.50:0.95 | area=   all | maxDets= 10 ] = 0.284
 Average Recall     (AR) @[ IoU=0.50:0.95 | area=   all | maxDets=100 ] = 0.285
 Average Recall     (AR) @[ IoU=0.50:0

IndexError: list index out of range

## 9. Curvas Precision-Recall (3.1)

In [None]:
PR_DIR = OUTPUT_DIR / 'pr_curves'
PR_DIR.mkdir(exist_ok=True)

for cat_id, cat_name in BDD_COCO_CATEGORIES.items():
    coco_eval_class = COCOeval(coco_val, coco_dt, 'bbox')
    coco_eval_class.params.catIds = [cat_id]
    coco_eval_class.evaluate()
    coco_eval_class.accumulate()
    
    precision = coco_eval_class.eval['precision'][0, :, cat_id-1, 0, 2]
    recall = np.linspace(0, 1, 101)
    
    valid_mask = precision > -1
    precision_valid = precision[valid_mask]
    recall_valid = recall[valid_mask]
    
    if len(precision_valid) > 0:
        plt.figure(figsize=(8, 6))
        plt.plot(recall_valid, precision_valid, linewidth=2)
        plt.xlabel('Recall', fontsize=12)
        plt.ylabel('Precision', fontsize=12)
        plt.title(f'Precision-Recall: {cat_name}', fontsize=14)
        plt.grid(True, alpha=0.3)
        plt.xlim([0, 1])
        plt.ylim([0, 1])
        plt.tight_layout()
        plt.savefig(PR_DIR / f'{cat_name}_pr.png', dpi=150)
        plt.close()

print(f"Curvas PR guardadas en: {PR_DIR}")

## 10. Sensibilidad a Umbrales (3.2)

In [None]:
threshold_sweep = []
conf_thresholds = [0.05, 0.10, 0.15, 0.20, 0.25, 0.30, 0.35, 0.40, 0.50, 0.60, 0.75]

print("Barrido de umbrales de confianza...")
for conf_th in tqdm(conf_thresholds):
    preds_filtered = [p for p in predictions if p['score'] >= conf_th]
    
    if len(preds_filtered) == 0:
        continue
    
    temp_path = OUTPUT_DIR / f'temp_preds_{conf_th}.json'
    with open(temp_path, 'w') as f:
        json.dump(preds_filtered, f)
    
    coco_dt_temp = coco_val.loadRes(str(temp_path))
    coco_eval_temp = COCOeval(coco_val, coco_dt_temp, 'bbox')
    coco_eval_temp.evaluate()
    coco_eval_temp.accumulate()
    coco_eval_temp.summarize()
    
    threshold_sweep.append({
        'conf_threshold': conf_th,
        'mAP': float(coco_eval_temp.stats[0]),
        'AP50': float(coco_eval_temp.stats[1]),
        'AP75': float(coco_eval_temp.stats[2]),
        'num_predictions': len(preds_filtered)
    })
    
    temp_path.unlink()

threshold_df = pd.DataFrame(threshold_sweep)

plt.figure(figsize=(12, 5))
plt.subplot(1, 2, 1)
plt.plot(threshold_df['conf_threshold'], threshold_df['mAP'], 'o-', label='mAP')
plt.plot(threshold_df['conf_threshold'], threshold_df['AP50'], 's-', label='AP50')
plt.plot(threshold_df['conf_threshold'], threshold_df['AP75'], '^-', label='AP75')
plt.xlabel('Confidence Threshold')
plt.ylabel('Average Precision')
plt.title('AP vs Confidence Threshold')
plt.legend()
plt.grid(True, alpha=0.3)

plt.subplot(1, 2, 2)
plt.plot(threshold_df['conf_threshold'], threshold_df['num_predictions'], 'o-')
plt.xlabel('Confidence Threshold')
plt.ylabel('Number of Predictions')
plt.title('Predictions vs Threshold')
plt.grid(True, alpha=0.3)

plt.tight_layout()
plt.savefig(OUTPUT_DIR / 'threshold_sensitivity.png', dpi=150)
plt.close()

threshold_df.to_csv(OUTPUT_DIR / 'threshold_sweep.csv', index=False)
print(f"\nBarrido de umbrales guardado en: {OUTPUT_DIR / 'threshold_sweep.csv'}")

## 11. Visualización Cualitativa (3.4)

In [None]:
import matplotlib.patches as patches

colors = plt.cm.tab10(np.linspace(0, 1, 10))
cat_colors = {cat_id: colors[i] for i, cat_id in enumerate(BDD_COCO_CATEGORIES.keys())}

print("Generando visualizaciones cualitativas...")
for i, sample in enumerate(tqdm(sample_images[:50])):
    img_id = sample['image_id']
    img_path = Path(sample['image_path'])
    
    if not img_path.exists():
        continue
    
    image = Image.open(img_path).convert('RGB')
    img_w, img_h = image.size
    
    img_preds = [p for p in predictions if p['image_id'] == img_id]
    
    fig, ax = plt.subplots(1, 1, figsize=(16, 9))
    ax.imshow(image)
    
    for pred in img_preds:
        bbox = pred['bbox']
        cat_id = pred['category_id']
        score = pred['score']
        
        x, y, w, h = bbox
        rect = patches.Rectangle(
            (x, y), w, h,
            linewidth=2,
            edgecolor=cat_colors[cat_id],
            facecolor='none'
        )
        ax.add_patch(rect)
        
        label = f"{CAT_ID_TO_PROMPT[cat_id]} {score:.2f}"
        ax.text(
            x, y - 5,
            label,
            fontsize=8,
            color='white',
            bbox=dict(facecolor=cat_colors[cat_id], alpha=0.7, pad=2)
        )
    
    ax.axis('off')
    ax.set_title(f'Image ID: {img_id} | Detections: {len(img_preds)}', fontsize=12)
    plt.tight_layout()
    plt.savefig(QUALITATIVE_DIR / f'{img_id:07d}.jpg', dpi=100, bbox_inches='tight')
    plt.close()

print(f"Visualizaciones guardadas en: {QUALITATIVE_DIR}")

## 12. Preparación para Calibración (4.1, 4.2, 4.3)

In [None]:
if Path(baseline_config['dataset']['val_calib_json']).exists():
    print("Generando inputs para calibración sobre val_calib...")
    
    coco_calib = COCO(baseline_config['dataset']['val_calib_json'])
    calib_image_ids = sorted(coco_calib.getImgIds())
    
    calib_records = []
    
    for img_id in tqdm(calib_image_ids):
        img_info = coco_calib.loadImgs(img_id)[0]
        img_path = Path(baseline_config['dataset']['image_dir']) / img_info['file_name']
        
        if not img_path.exists():
            continue
        
        image_pil = Image.open(img_path).convert('RGB')
        img_w, img_h = image_pil.size
        
        image_source, image_transformed = load_image(str(img_path))
        
        with torch.no_grad():
            boxes, logits, phrases = predict(
                model=model,
                image=image_transformed,
                caption=TEXT_PROMPT,
                box_threshold=0.05,
                text_threshold=0.25,
                device=device
            )
        
        boxes_xyxy = box_ops.box_cxcywh_to_xyxy(boxes) * torch.tensor([img_w, img_h, img_w, img_h])
        boxes_xywh = []
        for box in boxes_xyxy:
            x1, y1, x2, y2 = box.tolist()
            boxes_xywh.append([x1, y1, x2 - x1, y2 - y1])
        
        scores = logits.tolist()
        labels_raw = [normalize_label(p) for p in phrases]
        category_ids = [PROMPT_TO_CAT_ID.get(lbl, -1) for lbl in labels_raw]
        
        valid_mask = [cid != -1 for cid in category_ids]
        boxes_xywh = [b for b, m in zip(boxes_xywh, valid_mask) if m]
        scores = [s for s, m in zip(scores, valid_mask) if m]
        category_ids = [c for c, m in zip(category_ids, valid_mask) if m]
        
        ann_ids = coco_calib.getAnnIds(imgIds=img_id)
        anns = coco_calib.loadAnns(ann_ids)
        
        for pred_box, pred_score, pred_cat in zip(boxes_xywh, scores, category_ids):
            pred_x1, pred_y1, pred_w, pred_h = pred_box
            pred_x2 = pred_x1 + pred_w
            pred_y2 = pred_y1 + pred_h
            pred_area = pred_w * pred_h
            
            best_iou = 0.0
            best_match = None
            
            for ann in anns:
                if ann['category_id'] != pred_cat:
                    continue
                
                gt_x, gt_y, gt_w, gt_h = ann['bbox']
                gt_x2 = gt_x + gt_w
                gt_y2 = gt_y + gt_h
                gt_area = gt_w * gt_h
                
                inter_x1 = max(pred_x1, gt_x)
                inter_y1 = max(pred_y1, gt_y)
                inter_x2 = min(pred_x2, gt_x2)
                inter_y2 = min(pred_y2, gt_y2)
                
                inter_w = max(0, inter_x2 - inter_x1)
                inter_h = max(0, inter_y2 - inter_y1)
                inter_area = inter_w * inter_h
                
                union_area = pred_area + gt_area - inter_area
                iou = inter_area / union_area if union_area > 0 else 0.0
                
                if iou > best_iou:
                    best_iou = iou
                    best_match = ann['id']
            
            is_correct = best_iou >= 0.5
            
            calib_records.append({
                'image_id': int(img_id),
                'bbox': [float(b) for b in pred_box],
                'category_id_pred': int(pred_cat),
                'score': float(pred_score),
                'iou': float(best_iou),
                'is_correct': bool(is_correct),
                'gt_ann_id': int(best_match) if best_match else -1
            })
    
    calib_df = pd.DataFrame(calib_records)
    calib_df.to_parquet(OUTPUT_DIR / 'calib_inputs.parquet', index=False)
    
    print(f"\nCalibration inputs generados: {len(calib_records)} detecciones")
    print(f"Guardado en: {OUTPUT_DIR / 'calib_inputs.parquet'}")
    
    class_counts = calib_df.groupby('category_id_pred').size().to_dict()
    print("\nCobertura por clase en val_calib:")
    for cat_id, count in sorted(class_counts.items()):
        cat_name = CAT_ID_TO_PROMPT[cat_id]
        correct = calib_df[(calib_df['category_id_pred'] == cat_id) & (calib_df['is_correct'])].shape[0]
        print(f"  {cat_name}: {count} predicciones ({correct} correctas, {correct/count*100:.1f}%)")
else:
    print("val_calib.json no encontrado, saltando generación de inputs de calibración")

  0%|          | 29/8000 [02:42<12:15:45,  5.54s/it]

## 13. Tabla Resumen Baseline (5.2)

In [None]:
summary_table = []

for _, row in threshold_df.iterrows():
    conf_th = row['conf_threshold']
    
    is_baseline = conf_th == baseline_config['inference']['conf_threshold']
    marker = '⭐' if is_baseline else ''
    
    summary_table.append({
        'Config': marker,
        'Conf_Threshold': conf_th,
        'NMS_IoU': baseline_config['inference']['nms_iou'],
        'mAP': row['mAP'],
        'AP50': row['AP50'],
        'AP75': row['AP75'],
        'FPS': perf_summary['fps'],
        'GPU_MB': perf_summary.get('peak_gpu_memory_mb', 0),
        'Detections/Img': row['num_predictions'] / perf_summary['total_images']
    })

summary_df = pd.DataFrame(summary_table)

print("\n" + "="*80)
print("TABLA RESUMEN BASELINE")
print("="*80)
print(summary_df.to_string(index=False))
print("="*80)

summary_df.to_csv(OUTPUT_DIR / 'summary_table.csv', index=False)
print(f"\nTabla resumen guardada en: {OUTPUT_DIR / 'summary_table.csv'}")

## 14. Análisis de Errores (3.4)

In [None]:
error_analysis = {
    'false_positives_high_conf': [],
    'false_negatives': [],
    'confusion_pairs': defaultdict(int)
}

print("Analizando errores...")

for img_id in tqdm(image_ids[:100]):
    img_preds = [p for p in predictions if p['image_id'] == img_id]
    
    ann_ids = coco_val.getAnnIds(imgIds=img_id)
    anns = coco_val.loadAnns(ann_ids)
    
    matched_gts = set()
    
    for pred in img_preds:
        pred_box = pred['bbox']
        pred_cat = pred['category_id']
        pred_score = pred['score']
        
        px1, py1, pw, ph = pred_box
        px2 = px1 + pw
        py2 = py1 + ph
        p_area = pw * ph
        
        best_iou = 0.0
        best_ann = None
        
        for ann in anns:
            gx, gy, gw, gh = ann['bbox']
            gx2 = gx + gw
            gy2 = gy + gh
            g_area = gw * gh
            
            ix1 = max(px1, gx)
            iy1 = max(py1, gy)
            ix2 = min(px2, gx2)
            iy2 = min(py2, gy2)
            
            iw = max(0, ix2 - ix1)
            ih = max(0, iy2 - iy1)
            inter = iw * ih
            
            union = p_area + g_area - inter
            iou = inter / union if union > 0 else 0.0
            
            if iou > best_iou:
                best_iou = iou
                best_ann = ann
        
        if best_iou >= 0.5 and best_ann['category_id'] == pred_cat:
            matched_gts.add(best_ann['id'])
        elif pred_score >= 0.5:
            error_analysis['false_positives_high_conf'].append({
                'image_id': img_id,
                'pred_category': CAT_ID_TO_PROMPT[pred_cat],
                'score': pred_score,
                'iou': best_iou,
                'gt_category': CAT_ID_TO_PROMPT.get(best_ann['category_id'], 'none') if best_ann else 'none'
            })
            
            if best_ann and best_ann['category_id'] != pred_cat:
                pred_name = CAT_ID_TO_PROMPT[pred_cat]
                gt_name = CAT_ID_TO_PROMPT[best_ann['category_id']]
                error_analysis['confusion_pairs'][(pred_name, gt_name)] += 1
    
    for ann in anns:
        if ann['id'] not in matched_gts:
            error_analysis['false_negatives'].append({
                'image_id': img_id,
                'category': CAT_ID_TO_PROMPT[ann['category_id']],
                'bbox': ann['bbox']
            })

print(f"\nFalsos positivos (conf >= 0.5): {len(error_analysis['false_positives_high_conf'])}")
print(f"Falsos negativos (en 100 imágenes): {len(error_analysis['false_negatives'])}")

print("\nPares de confusión más comunes:")
confusion_sorted = sorted(error_analysis['confusion_pairs'].items(), key=lambda x: x[1], reverse=True)
for (pred, gt), count in confusion_sorted[:10]:
    print(f"  {pred} ← {gt}: {count} veces")

with open(OUTPUT_DIR / 'error_analysis.json', 'w') as f:
    json.dump({
        'false_positives_high_conf': error_analysis['false_positives_high_conf'][:20],
        'false_negatives': error_analysis['false_negatives'][:20],
        'confusion_pairs': {f"{p}->{g}": c for (p,g), c in confusion_sorted[:10]}
    }, f, indent=2)

print(f"\nAnálisis de errores guardado en: {OUTPUT_DIR / 'error_analysis.json'}")

## 15. Resumen Final y Criterios Go/No-Go (6)

In [None]:
final_report = {
    'baseline_config': baseline_config,
    'metrics': metrics_coco,
    'performance': perf_summary,
    'artefacts': {
        'predictions': str(OUTPUT_DIR / 'preds_raw.json'),
        'metrics': str(OUTPUT_DIR / 'metrics.json'),
        'pr_curves': str(PR_DIR),
        'qualitative': str(QUALITATIVE_DIR),
        'performance': str(OUTPUT_DIR / 'perf.txt'),
        'calib_inputs': str(OUTPUT_DIR / 'calib_inputs.parquet'),
        'threshold_sweep': str(OUTPUT_DIR / 'threshold_sweep.csv'),
        'summary_table': str(OUTPUT_DIR / 'summary_table.csv'),
        'error_analysis': str(OUTPUT_DIR / 'error_analysis.json')
    },
    'go_criteria': {
        'mAP_reasonable': metrics_coco['mAP'] > 0.05,
        'AP50_reasonable': metrics_coco['AP50'] > 0.10,
        'latency_measured': True,
        'artefacts_complete': True,
        'calib_inputs_generated': Path(OUTPUT_DIR / 'calib_inputs.parquet').exists(),
        'errors_identified': len(error_analysis['false_positives_high_conf']) > 0
    }
}

print("\n" + "="*80)
print("RESUMEN FINAL BASELINE")
print("="*80)
print(f"\nModelo: {baseline_config['model']['name']} ({baseline_config['model']['architecture']})")
print(f"Checkpoint: {baseline_config['model']['checkpoint']}")
print(f"Device: {baseline_config['model']['device']}")

print(f"\nMétricas de Detección:")
print(f"  mAP@[.50:.95]: {metrics_coco['mAP']:.4f}")
print(f"  AP@50: {metrics_coco['AP50']:.4f}")
print(f"  AP@75: {metrics_coco['AP75']:.4f}")

print(f"\nRendimiento:")
print(f"  Tiempo/imagen: {perf_summary['avg_time_per_image_s']:.3f}s")
print(f"  FPS: {perf_summary['fps']:.2f}")
if 'peak_gpu_memory_mb' in perf_summary:
    print(f"  GPU memoria pico: {perf_summary['peak_gpu_memory_mb']:.0f} MB")

print(f"\nArtefactos Generados:")
for name, path in final_report['artefacts'].items():
    exists = Path(path).exists()
    status = "✓" if exists else "✗"
    print(f"  {status} {name}: {path}")

print(f"\nCriterios Go/No-Go para Fase 3:")
all_pass = all(final_report['go_criteria'].values())
for criterion, passed in final_report['go_criteria'].items():
    status = "✓" if passed else "✗"
    print(f"  {status} {criterion}")

print(f"\n{'='*80}")
if all_pass:
    print("✅ BASELINE COMPLETADO - LISTO PARA FASE 3 (Incertidumbre y Calibración)")
else:
    print("⚠️  BASELINE INCOMPLETO - Revisar criterios fallidos")
print("="*80)

with open(OUTPUT_DIR / 'final_report.json', 'w') as f:
    json.dump(final_report, f, indent=2, default=str)

print(f"\nReporte final guardado en: {OUTPUT_DIR / 'final_report.json'}")