In [1]:
import os, glob, random, uuid
import math
import cv2
import matplotlib.pyplot as plt
import numpy as np
import imutils
from pathlib import Path
import torch
from torchvision import models, transforms
from torch.utils.data import DataLoader
from PIL import Image

**Este script se usa para separar las bubbles que tienen cruz de las que no tienen cruz. Se usará para entrenar la red neuronal (en el siguiente script)**

INPUT: 4 crops de cada examen (student_name, grida, gridb, gridc)

OUTPUT: dos carpetas con bubbles con cruz o sin cruz

### Envio de TARGET_SAMPLES recuadros a la carpeta para posterior clasificacion

In [None]:
# configuro rutas
CROPS_DIR     = "cropped_columns"
CROSSED_DIR   = "cropped_columns/0-CROSSED"     # no usado en este script
UNCROSSED_DIR = "cropped_columns/1-UNCROSSED"

os.makedirs(CROSSED_DIR, exist_ok=True)
os.makedirs(UNCROSSED_DIR, exist_ok=True)

#  especificaciones de grilla (n_filas, n_columnas)
grid_specs = {
    "col1": (14, 4),
    "col2": (14, 4),
    "cpl3": (12, 4),
}

# recolectar todas las grillas válidas
candidates = []
for folder in os.listdir(CROPS_DIR):
    fp = os.path.join(CROPS_DIR, folder)
    if not os.path.isdir(fp):
        continue
    for fname in os.listdir(fp):
        name_no_ext, ext = os.path.splitext(fname)
        if name_no_ext in grid_specs:
            candidates.append((fp, fname, grid_specs[name_no_ext]))

if not candidates:
    raise RuntimeError("No grid images found in your crops directory!")

# construir universo de celdas únicas (sin cargar imágenes todavía)
universe = []  # (folder_path, fname, n_rows, n_cols, i, j)
for folder_path, fname, (n_rows, n_cols) in candidates:
    for i in range(n_rows):
        for j in range(n_cols):
            universe.append((folder_path, fname, n_rows, n_cols, i, j))

# muestreo sin reemplazo
TARGET_SAMPLES = 4000
total_unique = len(universe)
if total_unique == 0:
    raise RuntimeError("No hay celdas disponibles en las grillas.")

if total_unique < TARGET_SAMPLES:
    print(f"[AVISO] Solo hay {total_unique} celdas únicas. Se guardarán todas.")
    TARGET_SAMPLES = total_unique

random.seed(18)  # reproducible
selection = random.sample(universe, k=TARGET_SAMPLES)

# guardar las celdas en UNCROSSED automáticamente
saved = 0
for (folder_path, fname, n_rows, n_cols, i, j) in selection:
    img_path = os.path.join(folder_path, fname)
    img = cv2.imread(img_path)
    if img is None:
        print(f"[WARN] No pude leer {img_path}, salto.")
        continue
    img = cv2.cvtColor(img, cv2.COLOR_BGR2RGB)

    h, w = img.shape[:2]
    cell_h, cell_w = h / n_rows, w / n_cols

    y1, y2 = int(i * cell_h), int((i + 1) * cell_h)
    x1, x2 = int(j * cell_w), int((j + 1) * cell_w)

    # sanitizar límites por posibles redondeos
    y1 = max(0, min(y1, h - 1))
    y2 = max(y1 + 1, min(y2, h))
    x1 = max(0, min(x1, w - 1))
    x2 = max(x1 + 1, min(x2, w))

    cell = img[y1:y2, x1:x2]
    if cell.size == 0:
        print(f"[WARN] Celda vacía {img_path} r{i+1} c{j+1}, salto.")
        continue

    base = os.path.basename(folder_path)
    name_no_ext, ext = os.path.splitext(fname)
    out_name = f"{base}_{name_no_ext}_r{i+1}c{j+1}_{uuid.uuid4().hex}{ext}"
    out_path = os.path.join(UNCROSSED_DIR, out_name)

    ok = cv2.imwrite(out_path, cv2.cvtColor(cell, cv2.COLOR_RGB2BGR))
    if ok:
        saved += 1
        if saved % 50 == 0:
            print(f"→ guardados {saved}/{TARGET_SAMPLES}")
    else:
        print(f"[WARN] Falló al guardar {out_path}")

print(f"Listo. Guardados {saved} recuadros en {UNCROSSED_DIR}.")


→ guardados 50/4000
→ guardados 100/4000
→ guardados 150/4000
→ guardados 200/4000
→ guardados 250/4000
→ guardados 300/4000
→ guardados 350/4000
→ guardados 400/4000
→ guardados 450/4000
→ guardados 500/4000
→ guardados 550/4000
→ guardados 600/4000
→ guardados 650/4000
→ guardados 700/4000
→ guardados 750/4000
→ guardados 800/4000
→ guardados 850/4000
→ guardados 900/4000
→ guardados 950/4000
→ guardados 1000/4000
→ guardados 1050/4000
→ guardados 1100/4000
→ guardados 1150/4000
→ guardados 1200/4000
→ guardados 1250/4000
→ guardados 1300/4000
→ guardados 1350/4000
→ guardados 1400/4000
→ guardados 1450/4000
→ guardados 1500/4000
→ guardados 1550/4000
→ guardados 1600/4000
→ guardados 1650/4000
→ guardados 1700/4000
→ guardados 1750/4000
→ guardados 1800/4000
→ guardados 1850/4000
→ guardados 1900/4000
→ guardados 1950/4000
→ guardados 2000/4000
→ guardados 2050/4000
→ guardados 2100/4000
→ guardados 2150/4000
→ guardados 2200/4000
→ guardados 2250/4000
→ guardados 2300/4000
→ guarda

### Seleccion de X o noX manual

In [None]:
# configuro paths
CROPS_DIR     = "cropped_columns"
CROSSED_DIR   = "cropped_columns/0-CROSSED"
UNCROSSED_DIR = "cropped_columns/0-UNCROSSED"

os.makedirs(CROSSED_DIR, exist_ok=True)
os.makedirs(UNCROSSED_DIR, exist_ok=True)

# especificaciones de grilla (n_filas, n_columnas)
grid_specs = {
    "col1": (14, 4),
    "col2": (14, 4),
    "cpl3": (12, 4),
}

# recolecto todas las imagenes de grillas válidas
candidates = []
for folder in os.listdir(CROPS_DIR):
    fp = os.path.join(CROPS_DIR, folder)
    if not os.path.isdir(fp): continue
    for fname in os.listdir(fp):
        cls, ext = os.path.splitext(fname)
        if cls in grid_specs:
            candidates.append((fp, fname, grid_specs[cls]))

if not candidates:
    raise RuntimeError("No grid images found in your crops directory!")

# annotation loop
while True:
    # pick random image + spec
    folder_path, fname, (n_rows, n_cols) = random.choice(candidates)
    img_path = os.path.join(folder_path, fname)

    # load & compute cell dims
    img = cv2.imread(img_path)
    img = cv2.cvtColor(img, cv2.COLOR_BGR2RGB)
    h, w, _ = img.shape
    cell_h, cell_w = h / n_rows, w / n_cols

    # pick random cell
    i = random.randrange(n_rows)
    j = random.randrange(n_cols)
    y1, y2 = int(i*cell_h), int((i+1)*cell_h)
    x1, x2 = int(j*cell_w), int((j+1)*cell_w)
    cell = img[y1:y2, x1:x2]

    # show popup
    cv2.namedWindow("ANNOTATOR — x=yes, n=no, q=quit", cv2.WINDOW_NORMAL)
    cv2.imshow("ANNOTATOR — x=yes, n=no, q=quit", cv2.cvtColor(cell, cv2.COLOR_RGB2BGR))
    key = cv2.waitKey(0) & 0xFF
    cv2.destroyAllWindows()

    # handle response
    if key == ord('q'):
        print("Quitting annotation.")
        break
    elif key == ord('x'):
        dest = CROSSED_DIR
    elif key == ord('n'):
        dest = UNCROSSED_DIR
    else:
        print("Unrecognized key, skipping this cell.")
        continue

    # build unique filename and save
    base = os.path.basename(folder_path)
    name_no_ext, ext = os.path.splitext(fname)
    out_name = f"{base}_{name_no_ext}_r{i+1}c{j+1}_{uuid.uuid4().hex}{ext}"
    out_path = os.path.join(dest, out_name)
    cv2.imwrite(out_path, cv2.cvtColor(cell, cv2.COLOR_RGB2BGR))
    print(f"→ saved to {out_path}")


→ saved to cropped_columns/0-CROSSED/sample337_col2_r3c3_a2d7673f5b3042adba6e2efd69086fe1.png
→ saved to cropped_columns/0-UNCROSSED/sample337_col1_r4c3_c2b1070e4d4e4a65b3b944da75df0eb1.png
→ saved to cropped_columns/0-CROSSED/sample337_col2_r11c2_4a602eafbcfa4498b16d98961614b232.png
→ saved to cropped_columns/0-CROSSED/sample337_col2_r3c3_c3af26f3cc294774a28b0176eeb32a45.png
→ saved to cropped_columns/0-CROSSED/sample338_col1_r5c1_fca68fce0dfc4cd399f9a3bf72c0b6b5.png
→ saved to cropped_columns/0-CROSSED/sample337_col2_r7c3_05c6109c0b8b47ef9dbfad0f8553f30f.png
→ saved to cropped_columns/0-UNCROSSED/sample338_col2_r2c3_9bbe84b0e3554c5d86d3158b8f912174.png
→ saved to cropped_columns/0-UNCROSSED/sample337_col2_r2c3_6b45efab1a394819a8b8011fa93314bf.png
→ saved to cropped_columns/0-UNCROSSED/sample337_col2_r12c1_123d01365ff44eb98a31ed167843a203.png
→ saved to cropped_columns/0-UNCROSSED/sample338_col2_r2c4_e27da28e674c41d38ffaad19224e7e93.png
→ saved to cropped_columns/0-UNCROSSED/sample337