In [1]:
import tensorflow as tf
print(tf.__version__)
print(tf.config.list_physical_devices('GPU'))

2.10.0
[PhysicalDevice(name='/physical_device:GPU:0', device_type='GPU')]


In [2]:
gpus = tf.config.list_physical_devices('GPU')
if gpus:
    try:
        for gpu in gpus:
            tf.config.experimental.set_memory_growth(gpu, True)
        print("✓ Memoria GPU configurada correctamente")
    except RuntimeError as e:
        print(e)

✓ Memoria GPU configurada correctamente


In [3]:
import os
import cv2
import numpy as np
from glob import glob


# ============================================================
# 1) DETECTAR AUTOMÁTICAMENTE LA ZONA ÚTIL (HAZ DE ULTRASONIDO)
# ============================================================

def detect_ultrasound_field(img, thresh=12):
    """
    Detecta automáticamente el área útil del ultrasonido.
    Funciona tanto para imágenes rectangulares como trapezoidales.
    """
    gray = img.copy()

    # 1) Binarización suave (ignora bordes negros y texto)
    _, bw = cv2.threshold(gray, thresh, 255, cv2.THRESH_BINARY)

    # 2) Abrir para eliminar ruido pequeño
    kernel = np.ones((7,7), np.uint8)
    clean = cv2.morphologyEx(bw, cv2.MORPH_OPEN, kernel)

    # 3) Elegir el contorno MÁS GRANDE (suele ser el campo acústico)
    contours, _ = cv2.findContours(clean, cv2.RETR_EXTERNAL, cv2.CHAIN_APPROX_SIMPLE)
    if not contours:
        return img  # fallback

    c = max(contours, key=cv2.contourArea)
    x, y, w, h = cv2.boundingRect(c)

    return img[y:y+h, x:x+w]


# ============================================================
# 2) ELIMINAR SOLO PARTES SUPERIORES QUE NO SON TEJIDO
# ============================================================

def remove_headers_safely(img, max_cut_ratio=0.35):
    """
    Detecta si arriba hay una zona negra grande y la elimina sin afectar tejido.
    """
    h, w = img.shape

    # Perfil vertical de intensidad
    col_mean = img.mean(axis=1)

    # Normalizar para evaluar contraste
    col_norm = (col_mean - col_mean.min()) / (col_mean.max() - col_mean.min() + 1e-6)

    # Buscamos primera zona brillante (tejido real)
    threshold = 0.08
    tissue_rows = np.where(col_norm > threshold)[0]

    if len(tissue_rows) == 0:
        return img

    first_row = tissue_rows[0]

    # limitar recorte
    limit = int(h * max_cut_ratio)
    if first_row < limit:
        return img[first_row:, :]
    else:
        return img


# ============================================================
# 3) NORMALIZACIÓN
# ============================================================

def enhance_clahe(img):
    clahe = cv2.createCLAHE(clipLimit=2.0, tileGridSize=(8,8))
    return clahe.apply(img)


# ============================================================
# 4) REDIMENSIONAR CON PADDING
# ============================================================

def resize_with_padding(img, target=(384,384)):
    h, w = img.shape
    th, tw = target

    scale = min(th/h, tw/w)
    nh, nw = int(h*scale), int(w*scale)
    resized = cv2.resize(img, (nw, nh), interpolation=cv2.INTER_AREA)

    canvas = np.zeros((th, tw), dtype=np.uint8)
    y0 = (th - nh)//2
    x0 = (tw - nw)//2
    canvas[y0:y0+nh, x0:x0+nw] = resized

    return canvas


# ============================================================
# 5) PIPELINE COMPLETO DE PREPROCESADO ROBUSTO
# ============================================================

def preprocess_image(input_path, output_path):
    img = cv2.imread(input_path, cv2.IMREAD_GRAYSCALE)
    if img is None:
        print("ERROR leyendo:", input_path)
        return

    # 1) Detectar campo acústico
    img = detect_ultrasound_field(img)

    # 2) Quitar headers sin tocar tejido
    img = remove_headers_safely(img)

    # 3) Mejorar contraste CLAHE
    img = enhance_clahe(img)

    # 4) Redimensionar con padding
    img = resize_with_padding(img, target=(384,384))

    # 5) Guardar
    img_to_save = img.astype(np.uint8)
    os.makedirs(os.path.dirname(output_path), exist_ok=True)
    cv2.imwrite(output_path, img_to_save, [int(cv2.IMWRITE_JPEG_QUALITY), 95])


# ============================================================
# 6) PROCESAMIENTO MASIVO
# ============================================================

base_in = r"../data/imgs"
base_out = r"../data/processed_2"

centers = [
    "Albacete","Barcelona","Caceres","Getafe",
    "Goya_Madrid","Guadalajara","La_Paz","Navarra","Salamanca"
]

for c in centers:
    in_folder = os.path.join(base_in, c)
    out_folder = os.path.join(base_out, c)

    files = glob(os.path.join(in_folder, "*.jpeg"))

    print(f"Procesando {len(files)} imágenes de {c}...")

    for f in files:
        fname = os.path.basename(f)
        outp = os.path.join(out_folder, fname)
        preprocess_image(f, outp)

print("✔ Preprocesado completado.")

Procesando 10490 imágenes de Albacete...
Procesando 10016 imágenes de Barcelona...
Procesando 5906 imágenes de Caceres...
Procesando 1161 imágenes de Getafe...
Procesando 3566 imágenes de Goya_Madrid...
Procesando 1714 imágenes de Guadalajara...
Procesando 7286 imágenes de La_Paz...
Procesando 8713 imágenes de Navarra...
Procesando 6325 imágenes de Salamanca...
✔ Preprocesado completado.
