# OCR planillas de volquetas (Colab) - deteccion automatica de tablas

Este notebook detecta tablas y celdas automaticamente usando las lineas de la tabla.
Si la deteccion falla, ajusta parametros en CONFIG (MIN_TABLE_AREA, LINE_SCALE_H, LINE_SCALE_V).

Pasos:
1. Ejecuta las celdas en orden.
2. Sube la foto.
3. Revisa la visualizacion (DEBUG).
4. Genera el Excel.


In [None]:
!pip -q install easyocr opencv-python-headless matplotlib pandas openpyxl


In [None]:
import cv2, numpy as np, matplotlib.pyplot as plt, pandas as pd, re
from google.colab import files
import easyocr

# Config general
USE_GPU = False
DEBUG = True
AUTO_WARP = True
MAX_ROWS = 42
SKIP_HEADER_ROWS = 1

# Deteccion de tablas y lineas
MIN_TABLE_AREA = 0.08  # fraccion del area total
LINE_SCALE_H = 0.5     # fraccion del ancho de la tabla
LINE_SCALE_V = 0.5     # fraccion del alto de la tabla
MERGE_TOL = 10

PLATE_LIST = [
    # 'TXN733', 'KOL645', 'OCF331'
]
REMOVE_LINES = True


In [None]:
def show_img(img, title=None, size=8):
    if img is None:
        print('No image')
        return
    plt.figure(figsize=(size, size))
    if len(img.shape) == 2:
        plt.imshow(img, cmap='gray')
    else:
        plt.imshow(cv2.cvtColor(img, cv2.COLOR_BGR2RGB))
    if title:
        plt.title(title)
    plt.axis('off')
    plt.show()

def order_points(pts):
    rect = np.zeros((4, 2), dtype='float32')
    s = pts.sum(axis=1)
    rect[0] = pts[np.argmin(s)]
    rect[2] = pts[np.argmax(s)]
    diff = np.diff(pts, axis=1)
    rect[1] = pts[np.argmin(diff)]
    rect[3] = pts[np.argmax(diff)]
    return rect

def four_point_transform(image, pts):
    rect = order_points(pts)
    (tl, tr, br, bl) = rect
    widthA = np.linalg.norm(br - bl)
    widthB = np.linalg.norm(tr - tl)
    maxWidth = int(max(widthA, widthB))
    heightA = np.linalg.norm(tr - br)
    heightB = np.linalg.norm(tl - bl)
    maxHeight = int(max(heightA, heightB))
    dst = np.array([
        [0, 0],
        [maxWidth - 1, 0],
        [maxWidth - 1, maxHeight - 1],
        [0, maxHeight - 1]
    ], dtype='float32')
    M = cv2.getPerspectiveTransform(rect, dst)
    warped = cv2.warpPerspective(image, M, (maxWidth, maxHeight))
    return warped

def find_document_contour(image):
    gray = cv2.cvtColor(image, cv2.COLOR_BGR2GRAY)
    blur = cv2.GaussianBlur(gray, (5, 5), 0)
    edges = cv2.Canny(blur, 75, 200)
    cnts = cv2.findContours(edges, cv2.RETR_LIST, cv2.CHAIN_APPROX_SIMPLE)
    cnts = cnts[0] if len(cnts) == 2 else cnts[1]
    cnts = sorted(cnts, key=cv2.contourArea, reverse=True)
    for c in cnts[:10]:
        peri = cv2.arcLength(c, True)
        approx = cv2.approxPolyDP(c, 0.02 * peri, True)
        if len(approx) == 4:
            return approx
    return None

def auto_warp_document(image):
    contour = find_document_contour(image)
    if contour is None:
        return image, None
    warped = four_point_transform(image, contour.reshape(4, 2))
    return warped, contour

def binarize(gray):
    return cv2.adaptiveThreshold(gray, 255, cv2.ADAPTIVE_THRESH_MEAN_C,
                                 cv2.THRESH_BINARY, 31, 10)

def detect_lines(bin_img, scale_h=0.4, scale_v=0.4):
    h, w = bin_img.shape
    inv = 255 - bin_img
    h_len = max(40, int(w * scale_h))
    v_len = max(40, int(h * scale_v))
    h_kernel = cv2.getStructuringElement(cv2.MORPH_RECT, (h_len, 1))
    v_kernel = cv2.getStructuringElement(cv2.MORPH_RECT, (1, v_len))
    h_lines = cv2.morphologyEx(inv, cv2.MORPH_OPEN, h_kernel, iterations=1)
    v_lines = cv2.morphologyEx(inv, cv2.MORPH_OPEN, v_kernel, iterations=1)
    h_lines = cv2.dilate(h_lines, cv2.getStructuringElement(cv2.MORPH_RECT, (3, 3)), iterations=1)
    v_lines = cv2.dilate(v_lines, cv2.getStructuringElement(cv2.MORPH_RECT, (3, 3)), iterations=1)
    return h_lines, v_lines, inv

def merge_close_positions(pos, tol=10):
    if not pos:
        return []
    pos = sorted(pos)
    merged = [pos[0]]
    for p in pos[1:]:
        if abs(p - merged[-1]) <= tol:
            merged[-1] = int((merged[-1] + p) / 2)
        else:
            merged.append(p)
    return merged

def add_boundaries(pos, max_val, tol=10):
    pos = merge_close_positions(pos, tol=tol)
    if not pos or pos[0] > tol:
        pos = [0] + pos
    if max_val - 1 - pos[-1] > tol:
        pos = pos + [max_val - 1]
    return pos

def get_line_positions(line_img, axis='h', tol=10):
    cnts = cv2.findContours(line_img, cv2.RETR_EXTERNAL, cv2.CHAIN_APPROX_SIMPLE)
    cnts = cnts[0] if len(cnts) == 2 else cnts[1]
    pos = []
    for c in cnts:
        x, y, w, h = cv2.boundingRect(c)
        if axis == 'h':
            pos.append(y + h // 2)
        else:
            pos.append(x + w // 2)
    return merge_close_positions(pos, tol=tol)

def split_wide_table(grid, rect):
    x, y, w, h = rect
    roi = grid[y:y+h, x:x+w]
    col_sum = np.sum(roi > 0, axis=0).astype('float32')
    if col_sum.size < 10:
        return [rect]
    # smooth
    k = 25
    kernel = np.ones(k) / k
    smooth = np.convolve(col_sum, kernel, mode='same')
    left = int(w * 0.3)
    right = int(w * 0.7)
    if right <= left:
        return [rect]
    split_rel = np.argmin(smooth[left:right]) + left
    split_x = x + split_rel
    if split_x <= x + 10 or split_x >= x + w - 10:
        return [rect]
    left_rect = (x, y, split_x - x, h)
    right_rect = (split_x, y, x + w - split_x, h)
    return [left_rect, right_rect]

def find_tables(gray):
    bin_img = binarize(gray)
    h_lines, v_lines, inv = detect_lines(bin_img, scale_h=0.2, scale_v=0.2)
    grid = cv2.bitwise_or(h_lines, v_lines)
    cnts = cv2.findContours(grid, cv2.RETR_EXTERNAL, cv2.CHAIN_APPROX_SIMPLE)
    cnts = cnts[0] if len(cnts) == 2 else cnts[1]
    tables = []
    H, W = gray.shape[:2]
    min_area = MIN_TABLE_AREA * (H * W)
    for c in cnts:
        x, y, w, h = cv2.boundingRect(c)
        if w * h < min_area:
            continue
        if w < W * 0.2 or h < H * 0.2:
            continue
        tables.append((x, y, w, h))
    # split wide table if only one big region
    if len(tables) == 1:
        x, y, w, h = tables[0]
        if w > W * 0.8:
            tables = split_wide_table(grid, tables[0])
    tables = sorted(tables, key=lambda t: t[0])
    return tables, grid

def draw_table_boxes(img, tables):
    out = img.copy()
    for i, (x, y, w, h) in enumerate(tables):
        cv2.rectangle(out, (x, y), (x + w, y + h), (0, 255, 0), 2)
        cv2.putText(out, f'T{i+1}', (x + 5, y + 25), cv2.FONT_HERSHEY_SIMPLEX, 0.8, (255, 0, 0), 2)
    return out


In [None]:
uploaded = files.upload()
image_path = next(iter(uploaded.keys()))
img = cv2.imread(image_path)
if AUTO_WARP:
    warped, contour = auto_warp_document(img)
else:
    warped, contour = img, None

gray = cv2.cvtColor(warped, cv2.COLOR_BGR2GRAY)
tables, grid = find_tables(gray)
print('tables detected:', len(tables))
if DEBUG:
    show_img(warped, 'warped')
    show_img(draw_table_boxes(warped, tables), 'tables')
    show_img(grid, 'grid lines')


In [None]:
def remove_table_lines(gray):
    bin_img = cv2.adaptiveThreshold(gray, 255, cv2.ADAPTIVE_THRESH_MEAN_C,
                                    cv2.THRESH_BINARY, 31, 10)
    inv = 255 - bin_img
    h_kernel = cv2.getStructuringElement(cv2.MORPH_RECT, (40, 1))
    v_kernel = cv2.getStructuringElement(cv2.MORPH_RECT, (1, 40))
    h_lines = cv2.morphologyEx(inv, cv2.MORPH_OPEN, h_kernel, iterations=1)
    v_lines = cv2.morphologyEx(inv, cv2.MORPH_OPEN, v_kernel, iterations=1)
    lines = cv2.bitwise_or(h_lines, v_lines)
    cleaned = cv2.subtract(inv, lines)
    cleaned = 255 - cleaned
    return cleaned

def prep_cell(img):
    gray = cv2.cvtColor(img, cv2.COLOR_BGR2GRAY)
    gray = cv2.resize(gray, None, fx=2, fy=2, interpolation=cv2.INTER_CUBIC)
    gray = cv2.bilateralFilter(gray, 9, 75, 75)
    if REMOVE_LINES:
        gray = remove_table_lines(gray)
    th = cv2.adaptiveThreshold(gray, 255, cv2.ADAPTIVE_THRESH_GAUSSIAN_C,
                               cv2.THRESH_BINARY, 31, 10)
    return cv2.cvtColor(th, cv2.COLOR_GRAY2RGB)

def is_blank(img):
    gray = cv2.cvtColor(img, cv2.COLOR_BGR2GRAY)
    _, th = cv2.threshold(gray, 0, 255, cv2.THRESH_BINARY + cv2.THRESH_OTSU)
    ink = 255 - th
    ratio = np.mean(ink > 0)
    return ratio < 0.01

def crop_cell(img, y1, y2, x1, x2, pad=2):
    h, w = img.shape[:2]
    return img[max(y1+pad,0):min(y2-pad,h), max(x1+pad,0):min(x2-pad,w)]

def ocr_cell(reader, img):
    proc = prep_cell(img)
    text = reader.readtext(proc, detail=0, paragraph=True)
    if not text:
        return ''
    return ' '.join(text)

PLATE_RE = re.compile(r'([A-Z]{2,3}\\s?\\d{2,3})')
TIME_RE = re.compile(r'(\\d{1,2})[:.](\\d{2})\\s*([AP]M)?', re.I)
M3_RE = re.compile(r'(\\d+)\\s*(m3|m\\^3)?', re.I)

def normalize_plate(text):
    t = re.sub(r'[^A-Za-z0-9]', '', text).upper()
    return t

def edit_distance(a, b):
    if a == b:
        return 0
    if not a:
        return len(b)
    if not b:
        return len(a)
    dp = list(range(len(b) + 1))
    for i, ca in enumerate(a, 1):
        prev = dp[0]
        dp[0] = i
        for j, cb in enumerate(b, 1):
            cur = dp[j]
            if ca == cb:
                dp[j] = prev
            else:
                dp[j] = 1 + min(prev, dp[j], dp[j-1])
            prev = cur
    return dp[-1]

def best_plate(text, plate_list):
    raw = normalize_plate(text)
    if not plate_list:
        return raw
    best = None
    best_d = 999
    for p in plate_list:
        p_norm = normalize_plate(p)
        d = edit_distance(raw, p_norm)
        if d < best_d:
            best_d = d
            best = p_norm
    if best_d <= 2:
        return best
    return raw

def parse_plate(text):
    t = text.upper()
    m = PLATE_RE.search(t)
    if m:
        return best_plate(m.group(1), PLATE_LIST)
    return best_plate(t, PLATE_LIST)

def parse_time(text):
    t = text.replace(' ', '')
    m = TIME_RE.search(t)
    if not m:
        return text.strip()
    hh = int(m.group(1))
    mm = int(m.group(2))
    ampm = m.group(3)
    if ampm:
        ampm = ampm.upper()
        return f'{hh:02d}:{mm:02d} {ampm}'
    return f'{hh:02d}:{mm:02d}'

def parse_m3(text):
    t = text.lower()
    m = M3_RE.search(t)
    if not m:
        return text.strip()
    return f'{m.group(1)} m3'

def extract_table_rows(warped, table_rect, table_idx):
    x, y, w, h = table_rect
    table = warped[y:y+h, x:x+w]
    gray = cv2.cvtColor(table, cv2.COLOR_BGR2GRAY)
    bin_img = binarize(gray)
    h_lines, v_lines, inv = detect_lines(bin_img, scale_h=LINE_SCALE_H, scale_v=LINE_SCALE_V)
    ys = get_line_positions(h_lines, axis='h', tol=MERGE_TOL)
    xs = get_line_positions(v_lines, axis='v', tol=MERGE_TOL)
    ys = add_boundaries(ys, h, tol=MERGE_TOL)
    xs = add_boundaries(xs, w, tol=MERGE_TOL)
    row_bounds = list(zip(ys[:-1], ys[1:]))
    col_bounds = list(zip(xs[:-1], xs[1:]))
    if len(col_bounds) < 4:
        # fallback a 4 columnas iguales
        step = w // 4
        col_bounds = [(i * step, (i + 1) * step) for i in range(4)]
    # columnas: [N, placa, hora, m3] -> tomar 1..3
    data_cols = col_bounds[1:4] if len(col_bounds) >= 4 else col_bounds[-3:]
    rows = []
    for r_idx, (ry1, ry2) in enumerate(row_bounds):
        if r_idx < SKIP_HEADER_ROWS:
            continue
        if r_idx >= MAX_ROWS:
            break
        plate_img = crop_cell(table, ry1, ry2, data_cols[0][0], data_cols[0][1])
        if is_blank(plate_img):
            continue
        time_img = crop_cell(table, ry1, ry2, data_cols[1][0], data_cols[1][1])
        m3_img = crop_cell(table, ry1, ry2, data_cols[2][0], data_cols[2][1])
        plate_txt = ocr_cell(reader, plate_img)
        time_txt = ocr_cell(reader, time_img)
        m3_txt = ocr_cell(reader, m3_img)
        rows.append({
            'tabla': f't{table_idx + 1}',
            'fila': r_idx + 1,
            'placa_raw': plate_txt,
            'hora_raw': time_txt,
            'm3_raw': m3_txt,
            'placa': parse_plate(plate_txt),
            'hora': parse_time(time_txt),
            'm3': parse_m3(m3_txt),
        })
    if DEBUG:
        show_img(table, f'table {table_idx + 1}')
        show_img(h_lines, f'h_lines {table_idx + 1}')
        show_img(v_lines, f'v_lines {table_idx + 1}')
    return rows


In [None]:
reader = easyocr.Reader(['es', 'en'], gpu=USE_GPU)
data = []
for idx, rect in enumerate(tables):
    data.extend(extract_table_rows(warped, rect, idx))
df = pd.DataFrame(data)
df.head(10)


In [None]:
out = 'ocr_volquetas.xlsx'
df.to_excel(out, index=False)
files.download(out)
