In [1]:
import torch

print("Torch version:", torch.__version__)
print("CUDA version:", torch.version.cuda)
print("CUDA disponible:", torch.cuda.is_available())

if torch.cuda.is_available():
    print("GPU:", torch.cuda.get_device_name(0))
else:
    print("No GPU disponible")

Torch version: 2.9.1+cu128
CUDA version: 12.8
CUDA disponible: True
GPU: NVIDIA GeForce RTX 5060


In [2]:
import os
import cv2
import shutil
import random
from ultralytics import YOLO
import torch
import numpy as np

In [3]:
def dividir_dataset_con_anotaciones(dataset_original, dataset_dividido,
                                    train_ratio=0.7, val_ratio=0.15, test_ratio=0.15):
    """
    Divide im√°genes + anotaciones .txt en train/val/test, copiando los archivos
    directamente sin un helper interno.
    """

    # Crear carpetas
    for split in ["train", "val", "test"]:
        os.makedirs(os.path.join(dataset_dividido, split, "images"), exist_ok=True)
        os.makedirs(os.path.join(dataset_dividido, split, "labels"), exist_ok=True)

    # Listar im√°genes originales
    imagenes = [f for f in os.listdir(dataset_original)
                if f.lower().endswith((".png"))]

    random.shuffle(imagenes)

    n = len(imagenes)
    n_train = int(n * train_ratio)
    n_val = int(n * val_ratio)

    train_imgs = imagenes[:n_train]
    val_imgs = imagenes[n_train:n_train + n_val]
    test_imgs = imagenes[n_train + n_val:]

    print(f"Total: {n} im√°genes | Train: {len(train_imgs)} | Val: {len(val_imgs)} | Test: {len(test_imgs)}")

    # ----------------------------------------------------
    # L√≥gica de Copiado para el Split 'train'
    # ----------------------------------------------------
    split_name = "train"
    folder_img = os.path.join(dataset_dividido, split_name, "images")
    folder_lbl = os.path.join(dataset_dividido, split_name, "labels")

    for img in train_imgs:
        ruta_img = os.path.join(dataset_original, img)
        # üí° CORRECCI√ìN 1: Aseguramos que la etiqueta de origen se construya solo con el nombre base
        base_name = os.path.splitext(img)[0]
        ruta_lbl = os.path.join(dataset_original, base_name + ".txt")

        if not os.path.exists(ruta_lbl):
            print(f"‚ö† La imagen {img} NO tiene anotaci√≥n TXT. Saltando en {split_name}.")
            continue

        # Copiar imagen
        destino_img = os.path.join(folder_img, img)
        shutil.copy(ruta_img, destino_img)

        # Copiar anotaci√≥n
        # ‚úÖ Usamos el nombre del archivo de imagen y cambiamos la extensi√≥n
        destino_lbl = os.path.join(folder_lbl, img.replace('.png', '.txt'))
        shutil.copy(ruta_lbl, destino_lbl)

    # ----------------------------------------------------
    # L√≥gica de Copiado para el Split 'val'
    # ----------------------------------------------------
    split_name = "val"
    folder_img = os.path.join(dataset_dividido, split_name, "images")
    folder_lbl = os.path.join(dataset_dividido, split_name, "labels")

    for img in val_imgs:
        ruta_img = os.path.join(dataset_original, img)
        base_name = os.path.splitext(img)[0]
        ruta_lbl = os.path.join(dataset_original, base_name + ".txt")

        if not os.path.exists(ruta_lbl):
            print(f"‚ö† La imagen {img} NO tiene anotaci√≥n TXT. Saltando en {split_name}.")
            continue

        # Copiar imagen
        destino_img = os.path.join(folder_img, img)
        shutil.copy(ruta_img, destino_img)

        # Copiar anotaci√≥n
        destino_lbl = os.path.join(folder_lbl, img.replace('.png', '.txt'))
        shutil.copy(ruta_lbl, destino_lbl)

    # ----------------------------------------------------
    # L√≥gica de Copiado para el Split 'test'
    # ----------------------------------------------------
    split_name = "test"
    folder_img = os.path.join(dataset_dividido, split_name, "images")
    folder_lbl = os.path.join(dataset_dividido, split_name, "labels")

    for img in test_imgs:
        ruta_img = os.path.join(dataset_original, img)
        base_name = os.path.splitext(img)[0]
        ruta_lbl = os.path.join(dataset_original, base_name + ".txt")

        if not os.path.exists(ruta_lbl):
            print(f"‚ö† La imagen {img} NO tiene anotaci√≥n TXT. Saltando en {split_name}.")
            continue

        # Copiar imagen
        destino_img = os.path.join(folder_img, img)
        shutil.copy(ruta_img, destino_img)

        # Copiar anotaci√≥n
        destino_lbl = os.path.join(folder_lbl, img.replace('.png', '.txt'))
        shutil.copy(ruta_lbl, destino_lbl)

    print("‚úÖ Divisi√≥n de dataset completada (con anotaciones).")

In [4]:
import os
import random
import cv2
import numpy as np
from pathlib import Path
import shutil

# ------------------ Configuraci√≥n ------------------
BASE_DIR = Path(r"C:\Universidad\Quinto\PS\CodigoPS\Gestion_Documental_Inteligente")
dataset_dividido = BASE_DIR / "ML_module" / "dataset_dividido"
dataset_augmented = BASE_DIR / "ML_module" / "dataset_dividido_augmented"

# Hiperpar√°metros de augmentaci√≥n
hyp_dict = {
    'flipud': 0.5,
    'fliplr': 0.5,
    'degrees': 15.0,
    'translate': 0.2,
    'scale': 0.2,
    'shear': 0.1,
    'hsv_h': 0.03,
    'hsv_s': 0.5,
    'hsv_v': 0.5
}

# ------------------ Funciones auxiliares ------------------
def apply_hsv(img, hyp_dict):
    img_hsv = cv2.cvtColor(img, cv2.COLOR_BGR2HSV).astype(np.float32)
    img_hsv[..., 0] += random.uniform(-hyp_dict['hsv_h'], hyp_dict['hsv_h']) * 180
    img_hsv[..., 1] *= 1 + random.uniform(-hyp_dict['hsv_s'], hyp_dict['hsv_s'])
    img_hsv[..., 2] *= 1 + random.uniform(-hyp_dict['hsv_v'], hyp_dict['hsv_v'])
    img_hsv = np.clip(img_hsv, 0, 255).astype(np.uint8)
    return cv2.cvtColor(img_hsv, cv2.COLOR_HSV2BGR)

def augment_image(img, bboxes, hyp_dict):
    """
    img: imagen original
    bboxes: lista de bounding boxes [[class, x_center, y_center, w, h], ...] normalizados
    hyp_dict: hiperpar√°metros de augmentation
    """
    h0, w0 = img.shape[:2]

    # ------------------ Padding para no cortar info ------------------
    max_dim = int(np.ceil(np.sqrt(h0**2 + w0**2)))
    pad_h = (max_dim - h0) // 2
    pad_w = (max_dim - w0) // 2
    img = cv2.copyMakeBorder(img, pad_h, pad_h, pad_w, pad_w, cv2.BORDER_REPLICATE)
    h, w = img.shape[:2]

    # Convertir bboxes normalizadas a p√≠xeles
    boxes_px = []
    for box in bboxes:
        cls_id, xc, yc, bw, bh = box
        xc_px = xc * w0 + pad_w
        yc_px = yc * h0 + pad_h
        bw_px = bw * w0
        bh_px = bh * h0
        boxes_px.append([cls_id, xc_px, yc_px, bw_px, bh_px])

    # ------------------ Flip ------------------
    if random.random() < hyp_dict['fliplr']:
        img = cv2.flip(img, 1)
        for i, (cls_id, xc_px, yc_px, bw_px, bh_px) in enumerate(boxes_px):
            boxes_px[i][1] = w - xc_px  # invertir x
    if random.random() < hyp_dict['flipud']:
        img = cv2.flip(img, 0)
        for i, (cls_id, xc_px, yc_px, bw_px, bh_px) in enumerate(boxes_px):
            boxes_px[i][2] = h - yc_px  # invertir y

    # ------------------ Rotaci√≥n + Translaci√≥n ------------------
    angle = random.uniform(-hyp_dict['degrees'], hyp_dict['degrees'])
    tx = random.uniform(-hyp_dict['translate'], hyp_dict['translate']) * w
    ty = random.uniform(-hyp_dict['translate'], hyp_dict['translate']) * h
    M = cv2.getRotationMatrix2D((w/2, h/2), angle, 1.0)
    M[:, 2] += [tx, ty]
    img = cv2.warpAffine(img, M, (w, h), borderMode=cv2.BORDER_REPLICATE)

    # Actualizar coordenadas de los boxes
    for i, (cls_id, xc_px, yc_px, bw_px, bh_px) in enumerate(boxes_px):
        # convertir centro a coordenadas esquina
        x1 = xc_px - bw_px/2
        y1 = yc_px - bh_px/2
        x2 = xc_px + bw_px/2
        y2 = yc_px + bh_px/2

        # puntos como array [[x1,y1],[x2,y2],[x3,y3],[x4,y4]]
        pts = np.array([[x1,y1],[x2,y1],[x2,y2],[x1,y2]])
        pts = np.hstack((pts, np.ones((4,1))))  # a√±adir columna de 1 para affine
        pts = (M @ pts.T).T

        x1_new, y1_new = pts[:,0].min(), pts[:,1].min()
        x2_new, y2_new = pts[:,0].max(), pts[:,1].max()
        xc_new = (x1_new + x2_new)/2
        yc_new = (y1_new + y2_new)/2
        bw_new = x2_new - x1_new
        bh_new = y2_new - y1_new
        boxes_px[i] = [cls_id, xc_new, yc_new, bw_new, bh_new]

    # ------------------ Shear ------------------
    shear = random.uniform(-hyp_dict['shear'], hyp_dict['shear'])
    M_shear = np.array([[1, shear, 0], [0, 1, 0]], dtype=np.float32)
    img = cv2.warpAffine(img, M_shear, (w, h), borderMode=cv2.BORDER_REPLICATE)

    # actualizar coordenadas de los boxes para shear
    for i, (cls_id, xc_px, yc_px, bw_px, bh_px) in enumerate(boxes_px):
        x1 = xc_px - bw_px/2
        y1 = yc_px - bh_px/2
        x2 = xc_px + bw_px/2
        y2 = yc_px + bh_px/2
        pts = np.array([[x1,y1],[x2,y1],[x2,y2],[x1,y2]])
        pts = np.hstack((pts, np.ones((4,1))))
        pts = (M_shear @ pts.T).T
        x1_new, y1_new = pts[:,0].min(), pts[:,1].min()
        x2_new, y2_new = pts[:,0].max(), pts[:,1].max()
        xc_new = (x1_new + x2_new)/2
        yc_new = (y1_new + y2_new)/2
        bw_new = x2_new - x1_new
        bh_new = y2_new - y1_new
        boxes_px[i] = [cls_id, xc_new, yc_new, bw_new, bh_new]

    # ------------------ HSV ------------------
    img = apply_hsv(img, hyp_dict)

    # Convertir de vuelta a normalizado YOLO
    boxes_norm = []
    for cls_id, xc_px, yc_px, bw_px, bh_px in boxes_px:
        xc_n = np.clip(xc_px / w, 0, 1)
        yc_n = np.clip(yc_px / h, 0, 1)
        bw_n = np.clip(bw_px / w, 0, 1)
        bh_n = np.clip(bh_px / h, 0, 1)
        boxes_norm.append([cls_id, xc_n, yc_n, bw_n, bh_n])

    return img, boxes_norm

# ------------------ Generar dataset augmentado ------------------
def generar_dataset_augmentado(splits=("train","val","test"), n_aug=2):
    for split in splits:
        images_src = dataset_dividido / split / "images"
        labels_src = dataset_dividido / split / "labels"

        images_dst = dataset_augmented / split / "images"
        labels_dst = dataset_augmented / split / "labels"
        images_dst.mkdir(parents=True, exist_ok=True)
        labels_dst.mkdir(parents=True, exist_ok=True)

        for img_file in images_src.iterdir():
            if img_file.suffix.lower() not in (".jpg",".jpeg",".png"):
                continue

            img = cv2.imread(str(img_file))

            # Leer bboxes
            label_file = labels_src / f"{img_file.stem}.txt"
            bboxes = []
            if label_file.exists():
                with open(label_file, "r") as f:
                    for line in f.readlines():
                        parts = line.strip().split()
                        cls_id = int(parts[0])
                        bbox = list(map(float, parts[1:]))
                        bboxes.append([cls_id] + bbox)

            # Copiar original
            shutil.copy(img_file, images_dst / img_file.name)
            if label_file.exists():
                shutil.copy(label_file, labels_dst / label_file.name)

            # Generar augmentaciones
            for i in range(n_aug):
                img_aug, boxes_aug = augment_image(img, bboxes, hyp_dict)
                new_name = f"{img_file.stem}_aug{i}{img_file.suffix}"
                cv2.imwrite(images_dst / new_name, img_aug)

                # Guardar label
                new_label_name = f"{img_file.stem}_aug{i}.txt"
                with open(labels_dst / new_label_name, "w") as f:
                    for box in boxes_aug:
                        f.write(" ".join(map(str, box)) + "\n")

    print("‚úÖ Dataset augmentado generado en:", dataset_augmented)



In [5]:
import torch
from ultralytics import YOLO

def entrenar_yolo(config_yaml, epochs=30, imgsz=1280):
    """
    Entrena YOLOv8n optimizado para poca RAM.
    """
    print("Cargando modelo YOLOv8n...")

    device = "cuda" if torch.cuda.is_available() else "cpu"
    model = YOLO("yolov8n.pt")

    print("Iniciando entrenamiento optimizado...")
    results = model.train(
        data=config_yaml,
        epochs=epochs,
        imgsz=imgsz,
        batch=6,
        device=device,
        augment=True,
        patience=5,
        workers=1,
        amp=True,
        cache=False
    )

    print("Entrenamiento finalizado.")
    return model, results


# Ejecucion de Pipline

In [6]:
from pathlib import Path

BASE_DIR = Path(r"C:\Universidad\Quinto\PS\CodigoPS\Gestion_Documental_Inteligente")

dataset_original = BASE_DIR / "dataset" / "dataset_imagenes"
dataset_dividido = BASE_DIR / "ML_module" / "dataset_dividido"
dataset_preprocesado = BASE_DIR / "ML_module" / "dataset_preprocesado"
ruta_yaml = BASE_DIR /"ML_module" / "etiquetas_yolo.yaml"

print("Existe dataset_original:", dataset_original.exists())
print("Ruta dataset_dividido:", dataset_dividido)
print("Ruta dataset_preprocesado:", dataset_preprocesado)
print("Ruta ruta_yaml:", ruta_yaml)

Existe dataset_original: True
Ruta dataset_dividido: C:\Universidad\Quinto\PS\CodigoPS\Gestion_Documental_Inteligente\ML_module\dataset_dividido
Ruta dataset_preprocesado: C:\Universidad\Quinto\PS\CodigoPS\Gestion_Documental_Inteligente\ML_module\dataset_preprocesado
Ruta ruta_yaml: C:\Universidad\Quinto\PS\CodigoPS\Gestion_Documental_Inteligente\ML_module\etiquetas_yolo.yaml


In [7]:
# Dividir Dataset
dividir_dataset_con_anotaciones(dataset_original, dataset_dividido)

Total: 6000 im√°genes | Train: 4200 | Val: 900 | Test: 900
‚úÖ Divisi√≥n de dataset completada (con anotaciones).


In [8]:
# Preprocesamiento - data augmentation
#generar_dataset_augmentado(splits=("train","val","test"), n_aug=2)

In [8]:
# Entrenar modelo
modelo, resultados = entrenar_yolo(ruta_yaml, epochs=30)
print(modelo)
print(resultados)

Cargando modelo YOLOv8n...
Iniciando entrenamiento optimizado...
New https://pypi.org/project/ultralytics/8.4.7 available  Update with 'pip install -U ultralytics'
Ultralytics 8.3.247  Python-3.10.19 torch-2.9.1+cu128 CUDA:0 (NVIDIA GeForce RTX 5060, 8151MiB)
[34m[1mengine\trainer: [0magnostic_nms=False, amp=True, augment=True, auto_augment=randaugment, batch=6, bgr=0.0, box=7.5, cache=False, cfg=None, classes=None, close_mosaic=10, cls=0.5, compile=False, conf=None, copy_paste=0.0, copy_paste_mode=flip, cos_lr=False, cutmix=0.0, data=C:\Universidad\Quinto\PS\CodigoPS\Gestion_Documental_Inteligente\ML_module\etiquetas_yolo.yaml, degrees=0.0, deterministic=True, device=0, dfl=1.5, dnn=False, dropout=0.0, dynamic=False, embed=None, epochs=30, erasing=0.4, exist_ok=False, fliplr=0.5, flipud=0.0, format=torchscript, fraction=1.0, freeze=None, half=False, hsv_h=0.015, hsv_s=0.7, hsv_v=0.4, imgsz=1280, int8=False, iou=0.7, keras=False, kobj=1.0, line_width=None, lr0=0.01, lrf=0.01, mask_r

In [9]:
print("Modelo guardado en:", resultados.save_dir)

Modelo guardado en: C:\Users\vales\runs\detect\train17


In [10]:
import os
import shutil
from ultralytics import YOLO # Importar para verificar el guardado

# --- 1. CONFIGURACI√ìN DE RUTAS ---
NOMBRE_RUN = 'train17'

# Ruta de origen
RUTA_ORIGEN = os.path.join('runs', 'detect', NOMBRE_RUN, 'weights', 'best.pt')

# Carpeta de destino final
CARPETA_FINAL = 'models'

# Nombre final que tendr√° el modelo
NOMBRE_MODELO_FINAL = 'model_yolo8n_v4_best.pt'
RUTA_FINAL = os.path.join(CARPETA_FINAL, NOMBRE_MODELO_FINAL)

# --- 2. PROCESO DE COPIA ---

print(f"Buscando modelo en: {RUTA_ORIGEN}")

# Crear la carpeta de destino si no existe
os.makedirs(CARPETA_FINAL, exist_ok=True)

try:
    # Copiar el archivo 'best.pt' al destino final con un nuevo nombre
    shutil.copy(RUTA_ORIGEN, RUTA_FINAL)

    print(f"\n‚úÖ Modelo copiado exitosamente a: {RUTA_FINAL}")
    print(f"Ruta Absoluta: {os.path.abspath(RUTA_FINAL)}")

    # --- 3. VERIFICACI√ìN---
    # Cargar el modelo guardado para asegurar que sea funcional
    # modelo_final = YOLO(RUTA_FINAL)
    # print("\nModelo cargado y verificado: ¬°listo para usarse!")

except FileNotFoundError:
    print(f"\n‚ùå ERROR: No se encontr√≥ el modelo en la ruta de origen: {RUTA_ORIGEN}")
    print("Verifica que la carpeta 'runs/detect/' exista y que el NOMBRE_RUN ('train') sea correcto.")

Buscando modelo en: runs\detect\train17\weights\best.pt

‚úÖ Modelo copiado exitosamente a: models\model_yolo8n_v4_best.pt
Ruta Absoluta: C:\Users\vales\models\model_yolo8n_v4_best.pt


In [11]:
import json

def validar_modelo(modelo, ruta_yaml):
    print("Validando modelo...")
    results = modelo.val(data=ruta_yaml, split="test")

    metricas = {
        "precision": float(results.box.mp),       # mean precision
        "recall": float(results.box.mr),          # mean recall
        "map50": float(results.box.map50),        # AP50
        "map75": float(results.box.map75),        # AP75
        "map50_95": float(results.box.map)        # mAP@0.5:0.95
    }

    return metricas

metricas = validar_modelo(modelo, ruta_yaml)

with open("metricas.json", "w") as f:
    json.dump(metricas, f, indent=4)



Validando modelo...
Ultralytics 8.3.247  Python-3.10.19 torch-2.9.1+cu128 CUDA:0 (NVIDIA GeForce RTX 5060, 8151MiB)
Model summary (fused): 72 layers, 3,007,013 parameters, 0 gradients, 8.1 GFLOPs
[34m[1mval: [0mFast image access  (ping: 0.10.0 ms, read: 514.7133.9 MB/s, size: 213.8 KB)
[K[34m[1mval: [0mScanning C:\Universidad\Quinto\PS\CodigoPS\Gestion_Documental_Inteligente\ML_module\dataset_dividido\test\labels... 900 images, 0 backgrounds, 0 corrupt: 100% ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ 900/900 1.7Kit/s 0.5s0.1ss
[34m[1mval: [0mNew cache created: C:\Universidad\Quinto\PS\CodigoPS\Gestion_Documental_Inteligente\ML_module\dataset_dividido\test\labels.cache
[K                 Class     Images  Instances      Box(P          R      mAP50  mAP50-95): 100% ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ‚îÅ 57/57 2.6it/s 21.5s0.2ss
                   all        900       6300          1          1      0.995      0.995
          tipo_factura        900        900          1          1    