In [None]:
# Optional: install dependencies on Colab
# If running locally with dependencies installed, you can skip this cell.
#
# Use: Runtime > Run all
#
# Note: Uncomment the following line when running on Colab.
!pip install -q ultralytics opencv-python tqdm


In [None]:
# Imports
import os, csv, glob, re
from pathlib import Path
import cv2
import numpy as np
from ultralytics import YOLO


In [None]:
# Settings
IMAGES_DIR = "images"
OUTPUT_DIR = "outputs"
CSV_NAME   = "results.csv"

DIGIT_MODEL_PATH = "digit.pt"

CONF_DIGIT = 0.1
MERGE_IOU  = 0.35

DEBUG_DRAW_AXIS  = False
DEBUG_DRAW_INDEX = True

COLOR_ROI_BOX   = (0, 255, 0)
COLOR_TEXT      = (0, 0, 255)
COLOR_INDEX     = (128, 0, 0)
COLOR_AXIS      = (0, 255, 255)

# Load model
digit_model = YOLO(DIGIT_MODEL_PATH)


In [None]:
# Utils

def natural_key(s):
    return [int(t) if t.isdigit() else t.lower() for t in re.split(r'(\d+)', s)]

# Geometry helpers

def iou_xyxy(a, b):
    ax1, ay1, ax2, ay2 = a
    bx1, by1, bx2, by2 = b
    ix1, iy1 = max(ax1, bx1), max(ay1, by1)
    ix2, iy2 = min(ax2, bx2), min(ay2, by2)
    iw, ih = max(0.0, ix2 - ix1), max(0.0, iy2 - iy1)
    inter = iw * ih
    if inter <= 0: return 0.0
    area_a = max(0.0, ax2 - ax1) * max(0.0, ay2 - ay1)
    area_b = max(0.0, bx2 - bx1) * max(0.0, by2 - by1)
    union = area_a + area_b - inter
    return inter / union if union > 0 else 0.0


def nms_agnostic_keep_best(items, iou_thr):
    if not items: return []
    items = sorted(items, key=lambda d: d["conf"], reverse=True)
    keep = []
    suppressed = [False] * len(items)
    for i in range(len(items)):
        if suppressed[i]: continue
        keep.append(items[i])
        for j in range(i+1, len(items)):
            if suppressed[j]: continue
            if iou_xyxy(items[i]["box"], items[j]["box"]) >= iou_thr:
                suppressed[j] = True
    return keep


In [None]:
# ROI from digits + ordering helpers

import numpy as np


def rois_from_digits(digit_items, img_shape, link_ratio=0.12, min_cluster=1):
    if not digit_items:
        return []
    H, W = img_shape[:2]
    pts = np.array([[(d["box"][0]+d["box"][2])/2.0, (d["box"][1]+d["box"][3])/2.0] for d in digit_items], dtype=np.float32)
    thr = max(8.0, link_ratio * max(H, W))

    N = len(pts)
    adj = [[] for _ in range(N)]
    for i in range(N):
        for j in range(i+1, N):
            if np.linalg.norm(pts[i]-pts[j]) <= thr:
                adj[i].append(j); adj[j].append(i)

    seen = [False]*N
    clusters = []
    for i in range(N):
        if seen[i]: continue
        q=[i]; seen[i]=True; comp=[i]
        while q:
            u=q.pop()
            for v in adj[u]:
                if not seen[v]:
                    seen[v]=True; q.append(v); comp.append(v)
        if len(comp) >= min_cluster:
            clusters.append(comp)

    rois = []
    if clusters:
        for comp in clusters:
            xs = []; ys = []
            for k in comp:
                x1,y1,x2,y2 = digit_items[k]["box"]
                xs += [x1, x2]; ys += [y1, y2]
            rois.append({"cls": None, "conf": 1.0, "box": (min(xs), min(ys), max(xs), max(ys))})
    else:
        xs = [d["box"][0] for d in digit_items] + [d["box"][2] for d in digit_items]
        ys = [d["box"][1] for d in digit_items] + [d["box"][3] for d in digit_items]
        rois = [{"cls": None, "conf": 1.0, "box": (min(xs), min(ys), max(xs), max(ys))}]
    return rois


def order_rois_pca(rois, prefer_top_first=True):
    if not rois:
        return [], None, None
    if len(rois) == 1:
        box = rois[0]["box"]
        c = np.array([[(box[0]+box[2])/2.0, (box[1]+box[3])/2.0]], dtype=np.float32)
        v = np.array([0.0, -1.0], dtype=np.float32)
        return rois, c.mean(axis=0), v

    centers = np.array(
        [[(r["box"][0]+r["box"][2])/2.0, (r["box"][1]+r["box"][3])/2.0] for r in rois],
        dtype=np.float32
    )
    mean = centers.mean(axis=0)
    X = centers - mean
    _, _, Vh = np.linalg.svd(X, full_matrices=False)
    v = Vh[0].astype(np.float32)

    if prefer_top_first:
        if v[1] > 0: v = -v
    else:
        if v[1] < 0: v = -v

    order = np.argsort(centers[:, 1]) if prefer_top_first else np.argsort(-centers[:, 1])
    ordered = [rois[i] for i in order.tolist()]
    return ordered, mean, v


def draw_axis(img, origin, vec, scale=120):
    if img is None or origin is None or vec is None: return
    ox, oy = float(origin[0]), float(origin[1])
    ex, ey = ox + float(vec[0]) * scale, oy + float(vec[1]) * scale
    p0 = (int(round(ox)), int(round(oy)))
    p1 = (int(round(ex)), int(round(ey)))
    cv2.arrowedLine(img, p0, p1, COLOR_AXIS, 2, tipLength=0.25)
    cv2.putText(img, "PCA axis", (p0[0]+5, p0[1]-5), cv2.FONT_HERSHEY_SIMPLEX, 0.6, COLOR_AXIS, 2)


In [None]:
# Row grouping and formatting

def group_rows(digits):
    if not digits:
        return []
    centers_y = [ (d["box"][1] + d["box"][3]) / 2.0 for d in digits ]
    heights   = [ (d["box"][3] - d["box"][1]) for d in digits ]
    median_h  = max(1.0, sum(heights) / len(heights))
    row_gap   = max(10.0, median_h * 0.60)

    order_y = sorted(range(len(digits)), key=lambda i: centers_y[i])
    used = [False]*len(digits)

    rows = []
    for i in order_y:
        if used[i]: continue
        cy_ref = centers_y[i]
        this_row_idx = [i]
        used[i] = True
        for j in order_y:
            if used[j]: continue
            if abs(centers_y[j] - cy_ref) <= row_gap:
                this_row_idx.append(j)
                used[j] = True
        row_digits = [digits[k] for k in this_row_idx]
        row_digits.sort(key=lambda d: (d["box"][0] + d["box"][2]) / 2.0)
        rows.append(row_digits)

    rows.sort(key=lambda row: sum((d["box"][1] + d["box"][3]) / 2.0 for d in row) / len(row))
    return rows


def format_two_rows_per_line(rows):
    row_texts = ["".join(str(int(d["cls"])) for d in row) for row in rows]
    lines = []
    for i in range(0, len(row_texts), 2):
        if i+1 < len(row_texts):
            lines.append(f"{row_texts[i]} {row_texts[i+1]}")
        else:
            lines.append(row_texts[i])
    return "\n".join(lines)


In [None]:
# Inference and CSV writing

def digits_inside_roi(roi_box, digit_items):
    rx1, ry1, rx2, ry2 = roi_box
    inside = []
    for it in digit_items:
        x1,y1,x2,y2 = it["box"]
        cx, cy = (x1+x2)/2, (y1+y2)/2
        if rx1 <= cx <= rx2 and ry1 <= cy <= ry2:
            inside.append(it)
    return inside


def process_image(img_path, writer):
    img = cv2.imread(img_path)
    if img is None:
        return 0

    det_digits = digit_model.predict(img, conf=CONF_DIGIT, verbose=False)[0]
    raw_digits = []
    if det_digits.boxes is not None:
        for box in det_digits.boxes:
            x1,y1,x2,y2 = [float(v) for v in box.xyxy[0].cpu().numpy()]
            cls  = int(box.cls[0].cpu().numpy())
            conf = float(box.conf[0].cpu().numpy())
            raw_digits.append({"cls": cls, "conf": conf, "box": (x1,y1,x2,y2)})

    digit_items = nms_agnostic_keep_best(raw_digits, MERGE_IOU)

    roi_items = rois_from_digits(digit_items, img.shape, link_ratio=0.12, min_cluster=1)
    roi_items = nms_agnostic_keep_best(roi_items, MERGE_IOU)
    roi_items, origin, axis_vec = order_rois_pca(roi_items, prefer_top_first=True)

    wrote_rows = 0

    if DEBUG_DRAW_AXIS and len(roi_items) > 0:
        draw_axis(img, origin, axis_vec, scale=max(img.shape[:2]) * 0.12)

    for idx, roi in enumerate(roi_items):
        rx1, ry1, rx2, ry2 = roi["box"]
        dins = digits_inside_roi(roi["box"], digit_items)
        rows = group_rows(dins)
        text_multi = format_two_rows_per_line(rows)

        cv2.rectangle(img, (int(rx1),int(ry1)), (int(rx2),int(ry2)), COLOR_ROI_BOX, 2)
        if text_multi:
            base_y = max(0, int(ry1)-5)
            line_gap = 22
            for li, line in enumerate(text_multi.split("\n")):
                cv2.putText(img, line, (int(rx1), base_y + li*line_gap),
                            cv2.FONT_HERSHEY_SIMPLEX, 0.7, COLOR_TEXT, 2)
        else:
            cv2.putText(img, "-", (int(rx1), max(0, int(ry1)-5)),
                        cv2.FONT_HERSHEY_SIMPLEX, 0.7, COLOR_TEXT, 2)

        if DEBUG_DRAW_INDEX:
            cv2.putText(img, f"#{idx}", (int(rx1), int(ry1)+20),
                        cv2.FONT_HERSHEY_SIMPLEX, 0.7, COLOR_INDEX, 2)

        writer.writerow([Path(img_path).name, idx, text_multi if text_multi else "-"])
        wrote_rows += 1

    Path(OUTPUT_DIR).mkdir(exist_ok=True)
    out_path = Path(OUTPUT_DIR)/Path(img_path).name
    cv2.imwrite(str(out_path), img)
    return wrote_rows


In [None]:
# Entry point

def main():
    Path(OUTPUT_DIR).mkdir(exist_ok=True)
    csv_path = Path(OUTPUT_DIR)/CSV_NAME

    patterns = ("*.jpg","*.jpeg","*.png","*.bmp","*.JPG","*.JPEG","*.PNG")
    images = []
    for patt in patterns:
        images += glob.glob(os.path.join(IMAGES_DIR, patt))
    images = list(dict.fromkeys(images))
    images.sort(key=natural_key)

    if not images:
        print("Görsel bulunamadı.")
        return

    with open(csv_path, "w", newline="", encoding="utf-8") as f:
        w = csv.writer(f, delimiter=';')
        w.writerow(["image","roi_index","text"])
        for p in images:
            n = process_image(p, w)
            if n == 0:
                w.writerow([Path(p).name, -1, "-"])
    print("CSV hazır ->", csv_path)

if __name__ == "__main__":
    main()
