Blok 0

In [None]:
# ✅ Environment & Backend Check (Auto Fallback)
import sys, os, platform
print(f"Python: {sys.version.split()[0]}  OS: {platform.system()} {platform.release()}")

DETECTRON2_AVAILABLE = False
try:
    import detectron2  # type: ignore
    DETECTRON2_AVAILABLE = True
    print("✅ Detectron2 available - primary pipeline can run")
except Exception as e:
    print(f"⚠️ Detectron2 not available: {e}")
    print("➡️ Will use TorchVision RetinaNet fallback pipeline")

# Core deps for fallback
%pip -q install --upgrade pillow numpy matplotlib opencv-python-headless pytesseract tqdm pyyaml torchvision torch || true

# Tesseract (Colab)
try:
    import subprocess
    if platform.system() == 'Linux':
        subprocess.run(['apt','update','-y'], stdout=subprocess.DEVNULL, stderr=subprocess.DEVNULL)
        subprocess.run(['apt','install','-y','tesseract-ocr'], stdout=subprocess.DEVNULL, stderr=subprocess.DEVNULL)
    print('✅ Tesseract ready')
except Exception as _:
    print('⚠️ Tesseract install skipped')

Blok 1

In [None]:
# 📦 Dataset Resolve (COCO zip)
import glob, json, zipfile, pathlib
from pathlib import Path

COCO_ZIP = "/content/Slab 3.v6i.coco.zip"
DATA_ROOT = Path("/content/data_coco")

if not Path(COCO_ZIP).exists():
    raise FileNotFoundError(f"Zip not found: {COCO_ZIP}")

DATA_ROOT.mkdir(exist_ok=True)
with zipfile.ZipFile(COCO_ZIP, 'r') as z:
    z.extractall(DATA_ROOT)

def _find_first(pattern):
    files = glob.glob(pattern, recursive=True)
    return files[0] if files else None

TRAIN_JSON = _find_first(f"{DATA_ROOT}/**/train/_annotations.coco.json")
VAL_JSON   = _find_first(f"{DATA_ROOT}/**/valid/_annotations.coco.json")
TEST_JSON  = _find_first(f"{DATA_ROOT}/**/test/_annotations.coco.json")
assert TRAIN_JSON and VAL_JSON, "train/valid COCO json bulunamadı"

TRAIN_DIR = str(pathlib.Path(TRAIN_JSON).parent)
VAL_DIR   = str(pathlib.Path(VAL_JSON).parent)
TEST_DIR  = str(pathlib.Path(TEST_JSON).parent) if TEST_JSON else VAL_DIR

print("TRAIN:", TRAIN_DIR)
print("VALID:", VAL_DIR)
print("TEST :", TEST_DIR)

Blok 2

In [None]:
# 🔁 Digits -> Textline COCO (with transcript)
import numpy as np

def coco_digits_to_textlines(src_json, dst_json, y_tol_ratio=0.06, gap_ratio=0.08):
    coco = json.load(open(src_json))
    id2img = {img["id"]: img for img in coco["images"]}
    id2name = {c["id"]: str(c.get("name", c["id"])) for c in coco.get("categories", [])}

    anns_by_img = {}
    for ann in coco["annotations"]:
        if ann.get("iscrowd",0)==1:
            continue
        anns_by_img.setdefault(ann["image_id"], []).append(ann)

    new = {"images": coco["images"], "type":"instances",
           "categories":[{"id":1,"name":"textline"}], "annotations":[]}
    ann_id = 1

    for img_id, anns in anns_by_img.items():
        if not anns:
            continue
        H = id2img[img_id]["height"]

        boxes, digits = [], []
        for a in anns:
            x,y,w,h = a["bbox"]
            boxes.append([x,y,w,h])
            name = id2name.get(a["category_id"], str(a["category_id"]))
            try: d = str(int(name))
            except: d = str(a["category_id"])
            digits.append(d)

        boxes = np.array(boxes, float)
        if len(boxes)==0:
            continue

        y_ctr = boxes[:,1] + boxes[:,3]/2
        x_ctr = boxes[:,0] + boxes[:,2]/2
        idx = np.argsort(y_ctr)
        rows, row = [], [idx[0]]
        y_tol = y_tol_ratio * H
        for i in idx[1:]:
            if abs(y_ctr[i]-y_ctr[row[-1]]) <= y_tol:
                row.append(i)
            else:
                rows.append(row); row=[i]
        rows.append(row)

        for r in rows:
            r_sorted = sorted(r, key=lambda i: x_ctr[i])
            xs = [x_ctr[i] for i in r_sorted]
            gap_thr = float('inf') if len(xs)<=1 else gap_ratio*(max(xs)-min(xs)+1e-6)

            groups=[]; grp=[r_sorted[0]]
            for k in range(1,len(r_sorted)):
                if xs[k]-xs[k-1] > gap_thr:
                    groups.append(grp); grp=[r_sorted[k]]
                else:
                    grp.append(r_sorted[k])
            groups.append(grp)

            for g in groups:
                x0 = float(np.min(boxes[g][:,0])); y0 = float(np.min(boxes[g][:,1]))
                x1 = float(np.max(boxes[g][:,0]+boxes[g][:,2])); y1 = float(np.max(boxes[g][:,1]+boxes[g][:,3]))
                w  = x1-x0; h = y1-y0
                transcript = "".join(digits[i] for i in g)

                new["annotations"].append({
                    "id": ann_id, "image_id": img_id, "category_id": 1,
                    "bbox":[x0,y0,w,h], "area":float(w*h), "iscrowd":0,
                    "transcript": transcript
                })
                ann_id += 1

    json.dump(new, open(dst_json, "w"))
    print(f"[OK] wrote {dst_json}  anns={len(new['annotations'])}")

TRAIN_TL_JSON = str(pathlib.Path(TRAIN_JSON).parent / "_annotations_textline.coco.json")
VAL_TL_JSON   = str(pathlib.Path(VAL_JSON).parent   / "_annotations_textline.coco.json")
if not os.path.exists(TRAIN_TL_JSON): coco_digits_to_textlines(TRAIN_JSON, TRAIN_TL_JSON)
else: print("[SKIP] train textline JSON zaten var")
if not os.path.exists(VAL_TL_JSON):   coco_digits_to_textlines(VAL_JSON,   VAL_TL_JSON)
else: print("[SKIP] valid textline JSON zaten var")

Blok 3

In [None]:
# ===== TorchVision Fallback: train + infer + OCR + export =====
if not DETECTRON2_AVAILABLE:
    import torch
    import torchvision
    from torch.utils.data import Dataset, DataLoader
    from torchvision.models.detection import retinanet_resnet50_fpn, RetinaNet_ResNet50_FPN_Weights
    from torchvision.transforms import functional as F
    import cv2, numpy as np, json, os, pathlib
    from PIL import Image
    from tqdm import tqdm
    import pytesseract

    class CocoTextlineDataset(Dataset):
        def __init__(self, root_dir, split, ann_name='_annotations_textline.coco.json'):
            self.root_dir = pathlib.Path(root_dir)
            self.split = split
            with open(self.root_dir/ann_name, 'r', encoding='utf-8') as f:
                coco = json.load(f)
            self.images = coco['images']
            self.anns = coco['annotations']
            self.img2anns = {}
            for a in self.anns:
                self.img2anns.setdefault(a['image_id'], []).append(a)
            self.id2file = {im['id']: im['file_name'] for im in self.images}
        def __len__(self): return len(self.images)
        def __getitem__(self, idx):
            iminfo = self.images[idx]
            p = pathlib.Path(TRAIN_DIR if self.split=='train' else VAL_DIR)/iminfo['file_name']
            img = Image.open(p).convert('RGB')
            anns = self.img2anns.get(iminfo['id'], [])
            boxes, labels = [], []
            for a in anns:
                x,y,w,h = a['bbox']
                if w>1 and h>1:
                    boxes.append([x,y,x+w,y+h]); labels.append(0)
            if not boxes:
                boxes = np.zeros((0,4), dtype=np.float32)
                labels = np.zeros((0,), dtype=np.int64)
            target={'boxes':torch.as_tensor(boxes,dtype=torch.float32),
                    'labels':torch.as_tensor(labels,dtype=torch.int64)}
            return F.to_tensor(img), target
    def collate_fn(b): return tuple(zip(*b))

    device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
    train_ds = CocoTextlineDataset(TRAIN_DIR, 'train')
    val_ds   = CocoTextlineDataset(VAL_DIR,   'valid')
    train_loader = DataLoader(train_ds, batch_size=2, shuffle=True, collate_fn=collate_fn)
    val_loader   = DataLoader(val_ds,   batch_size=2, shuffle=False, collate_fn=collate_fn)

    # Build model with correct num_classes (no background class in RetinaNet)
    model = retinanet_resnet50_fpn(weights=None, num_classes=1)
    print('RetinaNet num_classes =', getattr(model.head.classification_head, 'num_classes', 'unknown'))
    model.to(device)
    opt = torch.optim.AdamW([p for p in model.parameters() if p.requires_grad], lr=1e-4, weight_decay=1e-4)

    def epoch(model, loader, train=True):
        model.train(); tot=0.0
        for imgs,tgts in tqdm(loader, desc=('train' if train else 'valid')):
            imgs=[i.to(device) for i in imgs]
            tgts=[{k:v.to(device) for k,v in t.items()} for t in tgts]
            lossd=model(imgs,tgts); loss=sum(lossd.values())
            if train:
                opt.zero_grad(); loss.backward(); opt.step()
            tot+=loss.item()
        return tot/max(1,len(loader))

    best=float('inf')
    for e in range(1,51):
        tl=epoch(model,train_loader,True); vl=epoch(model,val_loader,False)
        print(f"E{e:03d} tl={tl:.4f} vl={vl:.4f}")
        if vl<best:
            best=vl; torch.save(model.state_dict(), '/content/retina_textline_best.pth')
            print('saved best')

    @torch.no_grad()
    def infer(img_path, th=0.3):
        model.eval()
        img = Image.open(img_path).convert('RGB')
        t=F.to_tensor(img).to(device)
        out=model([t])[0]
        b=out['boxes'].cpu().numpy(); s=out['scores'].cpu().numpy()
        keep=s>=th
        return b[keep], s[keep], np.array(img)

Blok 4 (22.08)

In [None]:
# 🖼️ Interactive image picker → RetinaNet(best) + OCR
# Works in Jupyter/Colab using ipywidgets. It lists images from TEST_DIR and VALID_DIR.

import glob
from ipywidgets import widgets
from IPython.display import display
from pathlib import Path
import cv2
import numpy as np

# Helper functions for OCR and drawing (error tolerance)
def yellow_pre(bgr):
    hsv=cv2.cvtColor(bgr, cv2.COLOR_BGR2HSV)
    lower=np.array([15,60,120]); upper=np.array([40,255,255])
    mask=cv2.inRange(hsv, lower, upper); inv=cv2.bitwise_not(mask)
    fg=cv2.bitwise_and(bgr,bgr,mask=inv)
    g=cv2.cvtColor(fg, cv2.COLOR_BGR2GRAY)
    g=cv2.medianBlur(g,3); g=cv2.normalize(g,None,0,255,cv2.NORM_MINMAX)
    return g

def ocr(g):
    cfg='--oem 1 --psm 7 -c tessedit_char_whitelist=0123456789'
    txt=pytesseract.image_to_string(g, config=cfg)
    txt=''.join(ch for ch in txt if ch.isdigit())
    data=pytesseract.image_to_data(g,config=cfg,output_type=pytesseract.Output.DICT)
    conf=[int(c) for c in data.get('conf',[]) if str(c).isdigit() and int(c)>=0]
    m=(np.mean(conf)/100.0) if conf else 0.0
    return txt,m
    
def draw_box_with_alpha(img_bgr, x1, y1, x2, y2, color=(0,200,0), alpha=0.75, label_text=None):
    overlay = img_bgr.copy()
    cv2.rectangle(overlay, (x1, y1), (x2, y2), color, -1)
    cv2.addWeighted(overlay, alpha, img_bgr, 1 - alpha, 0, img_bgr)
    cv2.rectangle(img_bgr, (x1, y1), (x2, y2), color, 2)
    if label_text:
        cv2.putText(img_bgr, label_text, (x1, max(16, y1-6)), cv2.FONT_HERSHEY_SIMPLEX, 0.6, color, 2, cv2.LINE_AA)

CONF_THRESH = 0.50
LABEL_MODE = "ocr"

# gather images
_candidates = []
for d in [TEST_DIR, VAL_DIR, TRAIN_DIR]:
    if d:
        _c = glob.glob(f"{d}/*.*")
        for p in _c:
            if p.lower().endswith(('.jpg','.jpeg','.png','.bmp','.tif','.tiff')):
                _candidates.append(p)

_candi_sorted = sorted(set(_candidates))
if not _candi_sorted:
    print("No images found in TEST/VALID/TRAIN directories.")
else:
    dd = widgets.Dropdown(options=_candi_sorted, description='Image:')
    btn = widgets.Button(description='Run detection + OCR', button_style='success')
    out = widgets.Output()

    def _on_click(_):
        out.clear_output()
        with out:
            img_path = dd.value
            print("Selected:", img_path)
            model.load_state_dict(torch.load('/content/retina_textline_best.pth', map_location=device))
            model.eval()
            boxes, scores, rgb = infer(img_path, th=CONF_THRESH)
            bgr = cv2.cvtColor(rgb, cv2.COLOR_RGB2BGR)
            results = []
            for bx, sc in zip(boxes, scores):
                x1, y1, x2, y2 = map(int, bx)
                roi = bgr[max(0,y1):y2, max(0,x1):x2]
                if roi.size == 0:
                    continue
                g = yellow_pre(roi)
                txt, mconf = ocr(g)
                results.append({'box': bx.tolist(), 'det': float(sc), 'text': txt, 'conf': float(mconf)})
                label = f"{int(sc*100)}%" if LABEL_MODE=="confidence" else txt
                draw_box_with_alpha(bgr, x1, y1, x2, y2, color=(0,200,0), label_text=label)

            from pathlib import Path
            OUT_ANNOT = str(Path(img_path).with_name(Path(img_path).stem + "_annot.jpg"))
            cv2.imwrite(OUT_ANNOT, bgr)
            results.sort(key=lambda r: r['box'][0])
            print("Saved:", OUT_ANNOT)
            print("Detections:", len(results))
            print(results[:10])

            # inline preview (optional)
            try:
                import matplotlib.pyplot as plt
                plt.figure(figsize=(12,8))
                plt.imshow(cv2.cvtColor(bgr, cv2.COLOR_BGR2RGB))
                plt.axis('off')
                plt.show()
            except Exception:
                pass

    btn.on_click(_on_click)
    display(widgets.VBox([dd, btn, out]))

Blok 5(22.08)

In [None]:
# ✅ Two-stage pipeline: RetinaNet boxes -> Tesseract OCR inside boxes (batch)
import csv
from pathlib import Path

def retinanet_tesseract_pipeline(input_dir, out_dir="retina_ocr_out", det_th=0.30, pad=4, use_yellow=True, save_annot=True):
    input_dir = Path(input_dir)
    out_dir = Path(out_dir); out_dir.mkdir(parents=True, exist_ok=True)
    ann_dir = out_dir / "annotated"; ann_dir.mkdir(exist_ok=True)
    csv_path = out_dir / "results.csv"

    # ensure weights loaded
    model.load_state_dict(torch.load('/content/retina_textline_best.pth', map_location=device))
    model.to(device)

    with open(csv_path, "w", newline="", encoding="utf-8") as f:
        w = csv.writer(f)
        w.writerow(["image","det_id","x1","y1","x2","y2","det_score","ocr_text","ocr_conf"])

        for img_path in sorted(input_dir.glob('*.*')):
            if img_path.suffix.lower() not in [".jpg",".jpeg",".png",".bmp",".tif",".tiff"]:
                continue
            bgr = cv2.imread(str(img_path))
            if bgr is None:
                continue
            H, W = bgr.shape[:2]

            boxes, scores = detect_boxes_retinanet(bgr, det_th=det_th)
            order = np.argsort(boxes[:, 0]) if len(boxes) else []
            drawn = bgr.copy()

            for det_id, idx in enumerate(order):
                x1, y1, x2, y2 = boxes[idx].astype(int)
                x1 = max(0, x1 - pad); y1 = max(0, y1 - pad)
                x2 = min(W, x2 + pad); y2 = min(H, y2 + pad)
                roi = bgr[y1:y2, x1:x2]
                if roi.size == 0:
                    continue

                gray = yellow_pre(roi) if use_yellow else cv2.cvtColor(roi, cv2.COLOR_BGR2GRAY)
                text, mconf = ocr(gray)

                w.writerow([img_path.name, det_id, x1, y1, x2, y2, float(scores[idx]), text, round(float(mconf), 3)])

                if save_annot:
                    color = (0, 200, 0)
                    label = f"{int(scores[idx]*100)}%" if LABEL_MODE=="confidence" else text
                    draw_box_with_alpha(drawn, x1, y1, x2, y2, color=color, label_text=label)

            if save_annot:
                cv2.imwrite(str(ann_dir / img_path.name), drawn)

    return str(csv_path)

# Example usage (edit input folder):
# csv_file = retinanet_tesseract_pipeline(input_dir=TEST_DIR, out_dir="/content/retina_ocr_out", det_th=0.35)
# print("CSV:", csv_file)

Blok 4(25.08)

In [None]:
# 🖼️ Interactive image picker → RetinaNet(best) + OCR
# Works in Jupyter/Colab using ipywidgets. It lists images from TEST_DIR and VALID_DIR.

import glob
from ipywidgets import widgets
from IPython.display import display
from pathlib import Path
import cv2
import numpy as np

# Helper functions for OCR and drawing (error tolerance)
def preprocess_for_ocr(roi_bgr):
    gray = cv2.cvtColor(roi_bgr, cv2.COLOR_BGR2GRAY)
    clahe = cv2.createCLAHE(clipLimit=2.0, tileGridSize=(8,8))
    contrast_enhanced = clahe.apply(gray)
    thresh = cv2.adaptiveThreshold(contrast_enhanced, 255, cv2.ADAPTIVE_THRESH_GAUSSIAN_C,
                                   cv2.THRESH_BINARY_INV, 11, 2)
    kernel = np.ones((1,1), np.uint8)
    opening = cv2.morphologyEx(thresh, cv2.MORPH_OPEN, kernel, iterations=1)
    closing = cv2.morphologyEx(opening, cv2.MORPH_CLOSE, kernel, iterations=1)

    return closing

def ocr(g):
    cfg='--oem 1 --psm 7 -c tessedit_char_whitelist=0123456789'
    txt=pytesseract.image_to_string(g, config=cfg)
    txt=''.join(ch for ch in txt if ch.isdigit())
    data=pytesseract.image_to_data(g,config=cfg,output_type=pytesseract.Output.DICT)
    conf=[int(c) for c in data.get('conf',[]) if str(c).isdigit() and int(c)>=0]
    m=(np.mean(conf)/100.0) if conf else 0.0
    return txt,m

def draw_box_with_alpha(img_bgr, x1, y1, x2, y2, color=(0,200,0), alpha=0.75, label_text=None):
    overlay = img_bgr.copy()
    cv2.rectangle(overlay, (x1, y1), (x2, y2), color, -1)
    cv2.addWeighted(overlay, alpha, img_bgr, 1 - alpha, 0, img_bgr)
    cv2.rectangle(img_bgr, (x1, y1), (x2, y2), color, 2)
    if label_text:
        cv2.putText(img_bgr, label_text, (x1, max(16, y1-6)), cv2.FONT_HERSHEY_SIMPLEX, 0.6, color, 2, cv2.LINE_AA)

CONF_THRESH = 0.50
LABEL_MODE = "ocr"

# gather images
_candidates = []
for d in [TEST_DIR, VAL_DIR, TRAIN_DIR]:
    if d:
        _c = glob.glob(f"{d}/*.*")
        for p in _c:
            if p.lower().endswith(('.jpg','.jpeg','.png','.bmp','.tif','.tiff')):
                _candidates.append(p)

_candi_sorted = sorted(set(_candidates))
if not _candi_sorted:
    print("No images found in TEST/VALID/TRAIN directories.")
else:
    dd = widgets.Dropdown(options=_candi_sorted, description='Image:')
    btn = widgets.Button(description='Run detection + OCR', button_style='success')
    out = widgets.Output()

    def _on_click(_):
        out.clear_output()
        with out:
            img_path = dd.value
            print("Selected:", img_path)
            model.load_state_dict(torch.load('/content/retina_textline_best.pth', map_location=device))
            model.eval()
            boxes, scores, rgb = infer(img_path, th=CONF_THRESH)
            bgr = cv2.cvtColor(rgb, cv2.COLOR_RGB2BGR)
            results = []
            for bx, sc in zip(boxes, scores):
                x1, y1, x2, y2 = map(int, bx)
                roi = bgr[max(0,y1):y2, max(0,x1):x2]
                if roi.size == 0:
                    continue

                # Apply the new preprocessing steps
                preprocessed_roi = preprocess_for_ocr(roi)

                txt, mconf = ocr(preprocessed_roi)
                results.append({'box': bx.tolist(), 'det': float(sc), 'text': txt, 'conf': float(mconf)})
                label = f"{int(sc*100)}%" if LABEL_MODE=="confidence" else txt
                draw_box_with_alpha(bgr, x1, y1, x2, y2, color=(0,200,0), label_text=label)

            from pathlib import Path
            OUT_ANNOT = str(Path(img_path).with_name(Path(img_path).stem + "_annot.jpg"))
            cv2.imwrite(OUT_ANNOT, bgr)
            results.sort(key=lambda r: r['box'][0])
            print("Saved:", OUT_ANNOT)
            print("Detections:", len(results))
            print(results[:10])

            # inline preview (optional)
            try:
                import matplotlib.pyplot as plt
                plt.figure(figsize=(12,8))
                plt.imshow(cv2.cvtColor(bgr, cv2.COLOR_BGR2RGB))
                plt.axis('off')
                plt.show()
            except Exception:
                pass

    btn.on_click(_on_click)
    display(widgets.VBox([dd, btn, out]))

Blok 5(25.08)

In [None]:
# ✅ Two-stage pipeline: RetinaNet boxes -> Tesseract OCR inside boxes (batch)
import csv
from pathlib import Path

# The preprocess_for_ocr function from the block above should be accessible here.
# If running this cell independently, redefine it.
def preprocess_for_ocr(roi_bgr):
    gray = cv2.cvtColor(roi_bgr, cv2.COLOR_BGR2GRAY)
    clahe = cv2.createCLAHE(clipLimit=2.0, tileGridSize=(8,8))
    contrast_enhanced = clahe.apply(gray)
    thresh = cv2.adaptiveThreshold(contrast_enhanced, 255, cv2.ADAPTIVE_THRESH_GAUSSIAN_C,
                                   cv2.THRESH_BINARY_INV, 11, 2)
    kernel = np.ones((1,1), np.uint8)
    opening = cv2.morphologyEx(thresh, cv2.MORPH_OPEN, kernel, iterations=1)
    closing = cv2.morphologyEx(opening, cv2.MORPH_CLOSE, kernel, iterations=1)
    return closing

def retinanet_tesseract_pipeline(input_dir, out_dir="retina_ocr_out", det_th=0.30, pad=4, save_annot=True):
    input_dir = Path(input_dir)
    out_dir = Path(out_dir); out_dir.mkdir(parents=True, exist_ok=True)
    ann_dir = out_dir / "annotated"; ann_dir.mkdir(exist_ok=True)
    csv_path = out_dir / "results.csv"

    # ensure weights loaded
    model.load_state_dict(torch.load('/content/retina_textline_best.pth', map_location=device))
    model.to(device)
    model.eval()

    with open(csv_path, "w", newline="", encoding="utf-8") as f:
        w = csv.writer(f)
        w.writerow(["image","det_id","x1","y1","x2","y2","det_score","ocr_text","ocr_conf"])

        image_paths = sorted([p for p in input_dir.glob('*.*') if p.suffix.lower() in [".jpg",".jpeg",".png",".bmp",".tif",".tiff"]])

        for img_path in tqdm(image_paths, desc="Processing Images"):
            bgr = cv2.imread(str(img_path))
            if bgr is None:
                continue
            H, W = bgr.shape[:2]

            # Re-using the infer function for consistency
            boxes, scores, _ = infer(str(img_path), th=det_th)

            order = np.argsort(boxes[:, 0]) if len(boxes) > 0 else []
            drawn = bgr.copy()

            for det_id, idx in enumerate(order):
                x1, y1, x2, y2 = boxes[idx].astype(int)
                x1 = max(0, x1 - pad); y1 = max(0, y1 - pad)
                x2 = min(W, x2 + pad); y2 = min(H, y2 + pad)
                roi = bgr[y1:y2, x1:x2]
                if roi.size == 0:
                    continue

                # Apply the advanced preprocessing steps
                preprocessed_roi = preprocess_for_ocr(roi)
                text, mconf = ocr(preprocessed_roi)

                w.writerow([img_path.name, det_id, x1, y1, x2, y2, float(scores[idx]), text, round(float(mconf), 3)])

                if save_annot:
                    color = (0, 200, 0)
                    label = f"{int(scores[idx]*100)}%" if LABEL_MODE=="confidence" else text
                    draw_box_with_alpha(drawn, x1, y1, x2, y2, color=color, label_text=label)

            if save_annot:
                cv2.imwrite(str(ann_dir / img_path.name), drawn)

    return str(csv_path)

# Example usage (edit input folder):
# from tqdm.notebook import tqdm # Use notebook-friendly progress bar
# csv_file = retinanet_tesseract_pipeline(input_dir=TEST_DIR, out_dir="/content/retina_ocr_out_v2", det_th=0.35)
# print("CSV:", csv_file)

Blok 4(26.08)

In [None]:
# 🖼️ Interactive image picker → RetinaNet(best) + OCR
# Works in Jupyter/Colab using ipywidgets. It lists images from TEST_DIR and VALID_DIR.

import glob
from ipywidgets import widgets
from IPython.display import display
from pathlib import Path
import cv2
import numpy as np

# Helper functions for OCR and drawing (error tolerance)
def preprocess_and_segment_digits(roi_bgr, min_area_ratio=0.01, aspect_ratio_range=(0.1, 2.5)):
    gray = cv2.cvtColor(roi_bgr, cv2.COLOR_BGR2GRAY)
    clahe = cv2.createCLAHE(clipLimit=2.0, tileGridSize=(8, 8))
    contrast_enhanced = clahe.apply(gray)
    thresh = cv2.adaptiveThreshold(contrast_enhanced, 255, cv2.ADAPTIVE_THRESH_GAUSSIAN_C,
                                   cv2.THRESH_BINARY_INV, 11, 2)
    kernel = np.ones((2, 2), np.uint8)
    opening = cv2.morphologyEx(thresh, cv2.MORPH_OPEN, kernel, iterations=1)
    closing = cv2.morphologyEx(opening, cv2.MORPH_CLOSE, kernel, iterations=1)

    num_labels, labels, stats, centroids = cv2.connectedComponentsWithStats(closing, 4, cv2.CV_32S)
    digit_images = []
    for i in range(1, num_labels):
        x, y, w, h, area = stats[i]
        total_roi_area = roi_bgr.shape[0] * roi_bgr.shape[1]
        if area < total_roi_area * min_area_ratio: continue
        aspect_ratio = w / h
        if not (aspect_ratio_range[0] < aspect_ratio < aspect_ratio_range[1]): continue
        digit_roi = closing[y:y+h, x:x+w]
        padded_digit = cv2.copyMakeBorder(digit_roi, 10, 10, 10, 10, cv2.BORDER_CONSTANT, value=[0,0,0])
        digit_images.append((x, padded_digit))

    digit_images.sort(key=lambda item: item[0])
    return [img for x, img in digit_images]

def ocr_single_digit(digit_image):
    cfg = '--oem 1 --psm 10 -c tessedit_char_whitelist=0123456789'
    txt = pytesseract.image_to_string(digit_image, config=cfg)
    txt = ''.join(ch for ch in txt if ch.isdigit())
    conf = 100 if txt else 0
    return txt, conf / 100.0

def draw_box_with_alpha(img_bgr, x1, y1, x2, y2, color=(0,200,0), alpha=0.75, label_text=None):
    overlay = img_bgr.copy()
    cv2.rectangle(overlay, (x1, y1), (x2, y2), color, -1)
    cv2.addWeighted(overlay, alpha, img_bgr, 1 - alpha, 0, img_bgr)
    cv2.rectangle(img_bgr, (x1, y1), (x2, y2), color, 2)
    if label_text:
        cv2.putText(img_bgr, label_text, (x1, max(16, y1-6)), cv2.FONT_HERSHEY_SIMPLEX, 0.6, color, 2, cv2.LINE_AA)

CONF_THRESH = 0.50
LABEL_MODE = "ocr"

# gather images
_candidates = sorted(set(glob.glob(f"{TEST_DIR}/*.*") + glob.glob(f"{VAL_DIR}/*.*")))
if not _candidates:
    print("No images found.")
else:
    dd = widgets.Dropdown(options=_candidates, description='Image:')
    btn = widgets.Button(description='Run detection + OCR', button_style='success')
    out = widgets.Output()

    def _on_click(_):
        out.clear_output()
        with out:
            img_path = dd.value
            print("Selected:", img_path)
            model.load_state_dict(torch.load('/content/retina_textline_best.pth', map_location=device))
            model.eval()
            boxes, scores, rgb = infer(img_path, th=CONF_THRESH)
            bgr = cv2.cvtColor(rgb, cv2.COLOR_RGB2BGR)
            results = []
            
            # ===== GÜNCELLENMİŞ ANA DÖNGÜ =====
            for bx, sc in zip(boxes, scores):
                x1, y1, x2, y2 = map(int, bx)
                roi = bgr[max(0, y1):y2, max(0, x1):x2]
                if roi.size == 0: continue

                segmented_digit_images = preprocess_and_segment_digits(roi)
                
                full_text = ""
                confidences = []
                for digit_img in segmented_digit_images:
                    digit_txt, digit_conf = ocr_single_digit(digit_img)
                    full_text += digit_txt
                    if digit_conf > 0: confidences.append(digit_conf)
                
                avg_conf = np.mean(confidences) if confidences else 0.0

                results.append({'box': bx.tolist(), 'det': float(sc), 'text': full_text, 'conf': float(avg_conf)})
                label = f"{int(sc*100)}%" if LABEL_MODE == "confidence" else full_text
                draw_box_with_alpha(bgr, x1, y1, x2, y2, color=(0, 200, 0), label_text=label)

            OUT_ANNOT = str(Path(img_path).with_name(Path(img_path).stem + "_annot.jpg"))
            cv2.imwrite(OUT_ANNOT, bgr)
            results.sort(key=lambda r: r['box'][0])
            print("Saved:", OUT_ANNOT)
            print("Detections:", len(results))
            print(results)

            import matplotlib.pyplot as plt
            plt.figure(figsize=(12,8))
            plt.imshow(cv2.cvtColor(bgr, cv2.COLOR_BGR2RGB))
            plt.axis('off'); plt.show()

    btn.on_click(_on_click)
    display(widgets.VBox([dd, btn, out]))

Blok 5(26.08)

In [None]:
# ✅ Two-stage pipeline: RetinaNet boxes -> Tesseract OCR inside boxes (batch)
import csv
from pathlib import Path

# Not: Gerekli fonksiyonlar bir önceki hücrede zaten tanımlı olduğu için burada tekrar tanımlamaya gerek yok.
# Eğer bu hücreyi bağımsız çalıştıracaksanız, preprocess_and_segment_digits ve ocr_single_digit fonksiyonlarını
# buraya da kopyalamanız gerekir.

def retinanet_tesseract_pipeline(input_dir, out_dir="retina_ocr_out", det_th=0.30, pad=4, save_annot=True):
    input_dir = Path(input_dir)
    out_dir = Path(out_dir); out_dir.mkdir(parents=True, exist_ok=True)
    ann_dir = out_dir / "annotated"; ann_dir.mkdir(exist_ok=True)
    csv_path = out_dir / "results.csv"

    model.load_state_dict(torch.load('/content/retina_textline_best.pth', map_location=device))
    model.to(device)
    model.eval()

    with open(csv_path, "w", newline="", encoding="utf-8") as f:
        w = csv.writer(f)
        w.writerow(["image","det_id","x1","y1","x2","y2","det_score","ocr_text","ocr_conf"])

        image_paths = sorted([p for p in input_dir.glob('*.*') if p.suffix.lower() in [".jpg",".jpeg",".png"]])

        for img_path in tqdm(image_paths, desc="Processing Images"):
            bgr = cv2.imread(str(img_path))
            if bgr is None: continue
            H, W = bgr.shape[:2]

            boxes, scores, _ = infer(str(img_path), th=det_th)
            order = np.argsort(boxes[:, 0]) if len(boxes) > 0 else []
            drawn = bgr.copy()

            # ===== GÜNCELLENMİŞ ANA DÖNGÜ =====
            for det_id, idx in enumerate(order):
                x1, y1, x2, y2 = boxes[idx].astype(int)
                x1 = max(0, x1 - pad); y1 = max(0, y1 - pad)
                x2 = min(W, x2 + pad); y2 = min(H, y2 + pad)
                roi = bgr[y1:y2, x1:x2]
                if roi.size == 0: continue

                segmented_digit_images = preprocess_and_segment_digits(roi)
                full_text = ""
                confidences = []
                for digit_img in segmented_digit_images:
                    digit_txt, digit_conf = ocr_single_digit(digit_img)
                    full_text += digit_txt
                    if digit_conf > 0: confidences.append(digit_conf)
                
                avg_conf = np.mean(confidences) if confidences else 0.0

                w.writerow([img_path.name, det_id, x1, y1, x2, y2, float(scores[idx]), full_text, round(float(avg_conf), 3)])

                if save_annot:
                    label = f"{int(scores[idx]*100)}%" if LABEL_MODE == "confidence" else full_text
                    draw_box_with_alpha(drawn, x1, y1, x2, y2, color=(0, 200, 0), label_text=label)

            if save_annot:
                cv2.imwrite(str(ann_dir / img_path.name), drawn)

    return str(csv_path)

# Örnek Kullanım:
# from tqdm.notebook import tqdm
# csv_file = retinanet_tesseract_pipeline(input_dir=TEST_DIR, out_dir="/content/retina_ocr_out_v2", det_th=0.35)
# print("CSV:", csv_file)