
# 01 — PDF Blocks with MCP OCR (Tesseract, EasyOCR, Paddle, Surya)

Compare multiple OCR engines **via MCP-friendly REST** (POST `/ocr`) on each PDF page.
This notebook:
- Extracts **native** text (PyMuPDF) for vector PDFs (fallback: pdf2image raster only)
- Calls any configured **MCP OCR servers**:
  - **Tesseract MCP** (`TESS_MCP_URL`)
  - **EasyOCR MCP** (`EASYOCR_MCP_URL`)
  - **PaddleOCR MCP** (`PADDLE_MCP_URL`)
  - **Surya OCR MCP** (`SURYA_MCP_URL`)
- Normalizes blocks, **merges & deduplicates**, saves per-page JSON and overlay PNGs
- Writes a **comparison CSV**: per-engine page stats (blocks, chars, mean conf)

> **Server contract**: Endpoint must accept `POST /ocr` with form `image=<file>, lang=<code>` and return JSON with either:
> - `{"text": "...", "avg_confidence": 0.0}` (text-only), **or**
> - `{"blocks":[{"text": "...", "confidence": 0.95, "bbox":[x0,y0,x1,y1]}, ...]}` (or `lines`/`results` with poly boxes)


In [8]:
# %% [markdown]
# # 01 — PDF Blocks with MCP OCR (Tesseract, EasyOCR, Paddle, Surya)
# 
# Compare multiple OCR engines via MCP-friendly REST (POST `/ocr`) on each PDF page.

# %%
# --------------------------- CONFIG ----------------------------------------
import os, sys, json, time, base64, math, traceback
from pathlib import Path
from typing import Dict, Any, List, Tuple, Optional
import requests
from PIL import Image
import fitz  # PyMuPDF

# PDF input & output
pdf_path       = "input_pdfs/ET1-Adobe Scan 10 Sept 2025.pdf"
output_dir     = "outputs/run_mcp/01_blocks"
dpi            = 300
ocr_lang       = "en"   # 'en','hi','te','mr','ta'

# Which engines to try
USE_NATIVE      = False
USE_TESS_MCP    = False
USE_EASYOCR_MCP = False
USE_PADDLE_MCP  = True  
USE_SURYA_MCP   = False
USE_DOCLING_MCP = False

# MCP endpoint URLs
MCP_ENDPOINTS: Dict[str, Optional[str]] = {
    "tesseract": os.getenv("TESS_MCP_URL",    "http://127.0.0.1:8089/ocr").strip() or None,
    "easyocr":   os.getenv("EASYOCR_MCP_URL", "http://127.0.0.1:8092/ocr").strip() or None,
    "paddle":    os.getenv("PADDLE_MCP_URL",  "http://127.0.0.1:8090/ocr").strip() or None,
    "surya":     os.getenv("SURYA_MCP_URL",   "http://127.0.0.1:8091/ocr").strip() or None,
    "docling":   os.getenv("DOCLING_MCP_URL", "http://127.0.0.1:8093/ocr").strip() or None,
}

# Preprocessing
mask_banners    = True
banner_top_pct  = 0.18
banner_bot_pct  = 0.20

# Merge / Filtering
min_conf        = 0.50
line_join_px    = 14
para_join_px    = 26
dedup_iou_thr   = 0.50
dedup_sim_thr   = 0.92
native_len_gate = 100

# Visualization
make_viz_png    = True

# I/O
out_dir = Path(output_dir).expanduser().resolve()
out_dir.mkdir(parents=True, exist_ok=True)

print("[INFO] PDF:", Path(pdf_path).expanduser().resolve())
print("[INFO] Out:", out_dir)
print("[INFO] Endpoints:", json.dumps(MCP_ENDPOINTS, indent=2))

# %%
# --------------------------- IMPORTS & SETUP -------------------------------
import io, json, math, warnings
warnings.filterwarnings("ignore")

from typing import List, Dict, Any
import numpy as np
from PIL import Image, ImageDraw

try:
    import fitz as _fitz
    HAVE_FITZ = True
except:
    HAVE_FITZ = False

try:
    from rapidfuzz.fuzz import ratio as fuzz_ratio
except:
    from difflib import SequenceMatcher
    def fuzz_ratio(a,b): return int(100*SequenceMatcher(None, a, b).ratio())

LANG_MAP = {
    "en":["en"], "hi":["hi","en"], "te":["te","en"], "mr":["mr","en"], "ta":["ta","en"]
}
langs = LANG_MAP.get(ocr_lang.lower(), ["en"])

# %%
# ------------------------------ UTILITIES ----------------------------------
def page_to_image(doc, page_index: int, dpi: int=300) -> Image.Image:
    page = doc[page_index]
    zoom = dpi / 72
    mat  = _fitz.Matrix(zoom, zoom)
    pix  = page.get_pixmap(matrix=mat, alpha=False)
    return Image.frombytes("RGB", [pix.width, pix.height], pix.samples)

def mask_bands(pil: Image.Image, top_pct: float, bot_pct: float) -> Image.Image:
    if not mask_banners:
        return pil
    w,h   = pil.size
    top_h = int(h * max(0, min(0.45, top_pct)))
    bot_h = int(h * max(0, min(0.45, bot_pct)))
    out   = pil.copy()
    draw  = ImageDraw.Draw(out)
    bg = (240,240,240)
    if top_h>0: draw.rectangle([0,0,w,top_h], fill=bg)
    if bot_h>0: draw.rectangle([0,h-bot_h,w,h], fill=bg)
    return out

def blocks_sort_key(b):
    y0 = round(b["bbox"][1],1); x0 = round(b["bbox"][0],1)
    return (y0, x0)

def iou(a, b) -> float:
    ax0, ay0, ax1, ay1 = a; bx0, by0, bx1, by1 = b
    inter_x0 = max(ax0, bx0); inter_y0 = max(ay0, by0)
    inter_x1 = min(ax1, bx1); inter_y1 = min(ay1, by1)
    iw = max(0.0, inter_x1 - inter_x0); ih = max(0.0, inter_y1 - inter_y0)
    inter = iw * ih
    if inter <= 0: return 0.0
    area_a = (ax1-ax0)*(ay1-ay0); area_b = (bx1-bx0)*(by1-by0)
    return inter / max(1e-6, area_a + area_b - inter)

def section_of(bbox, page_h):
    cy = 0.5 * (bbox[1] + bbox[3])
    if cy < 0.25*page_h: return "header"
    if cy > 0.85*page_h: return "footer"
    return "body"

# %%
# ------------------------------ NATIVE -------------------------------------
def extract_native(page) -> List[Dict[str,Any]]:
    out = []
    for b in page.get_text("blocks"):
        if len(b) >= 5:
            x0,y0,x1,y1,txt = b[:5]
            if txt and str(txt).strip():
                out.append({
                    "bbox":[float(x0),float(y0),float(x1),float(y1)],
                    "text":str(txt).strip(),
                    "source":"native",
                    "confidence":1.0
                })
    return sorted(out, key=blocks_sort_key)

# %%
# ------------------------------ MCP CLIENTS --------------------------------
def pick_lang(langs):
    supported = {"en","hi","te","mr","ta"}
    return next((l for l in langs if l in supported), "en")

def parse_mcp_json(js, w, h, source_tag="mcp"):
    """Parse MCP OCR response into normalized blocks."""
    out = []

    def to_bbox(bb):
        if not bb:
            return (0,0,w,h)
        # [x0,y0,x1,y1]
        if isinstance(bb, (list, tuple)) and len(bb) == 4 and all(isinstance(v,(int,float)) for v in bb):
            return tuple(float(x) for x in bb)
        # flat polygon [x0,y0,x1,y1,x2,y2,x3,y3]
        if isinstance(bb, (list, tuple)) and len(bb) == 8 and all(isinstance(v,(int,float)) for v in bb):
            xs, ys = bb[0::2], bb[1::2]
            return (float(min(xs)), float(min(ys)), float(max(xs)), float(max(ys)))
        # list of [x,y] points
        if isinstance(bb, (list, tuple)) and bb and isinstance(bb[0], (list, tuple)) and len(bb[0]) == 2:
            xs = [p[0] for p in bb]; ys = [p[1] for p in bb]
            return (float(min(xs)), float(min(ys)), float(max(xs)), float(max(ys)))
        return (0,0,w,h)

    def norm_conf(c):
        try:
            c = float(c)
        except:
            c = 0.0
        if c > 1.0: c = c / 100.0
        return max(0.0, min(1.0, c))

    # Check for blocks/lines/results containers
    for key in ("blocks","lines","results","predictions","preds","data"):
        arr = js.get(key) if isinstance(js, dict) else None
        if isinstance(arr, list) and arr:
            for item in arr:
                if isinstance(item, dict):
                    txt  = item.get("text", "").strip()
                    # Remove HTML tags like <br>
                    txt = txt.replace("<br>", " ").replace("<BR>", " ")
                    conf = item.get("confidence", item.get("score", 1.0))
                    box  = item.get("bbox", item.get("box", item.get("points", item.get("polygon"))))
                    
                    if txt and box is not None:
                        x0,y0,x1,y1 = to_bbox(box)
                        out.append({
                            "bbox": [x0,y0,x1,y1],
                            "text": txt,
                            "confidence": norm_conf(conf),
                            "source": source_tag
                        })
            return out

    # Fallback: plain text
    if isinstance(js, dict) and js.get("text"):
        txt = js.get("text","").strip()
        if txt:
            out.append({
                "bbox": [0,0,w,h],
                "text": txt,
                "confidence": norm_conf(js.get("avg_confidence", 0.0)),
                "source": source_tag
            })

    return out

def mcp_ocr(pil_img, url, langs, tag, engine_tag=None):
    """Call MCP OCR endpoint and return normalized blocks."""
    if not url:
        return []
    try:
        # Per-engine timeouts: longer for heavy models like docling
        timeout = 600 if engine_tag == "docling" else 120
        
        buf = io.BytesIO()
        pil_img.save(buf, format="PNG")
        buf.seek(0)
        payload = {"lang": pick_lang(langs)}
        files = {"image": ("page.png", buf.getvalue(), "image/png")}
        r = requests.post(url, data=payload, files=files, timeout=timeout)
        r.raise_for_status()
        js = r.json()
        
        # Debug logging
        if isinstance(js, dict) and "blocks" in js:
            print(f"  [{tag}] Server returned {len(js['blocks'])} blocks")
        
    except Exception as e:
        print(f"  [WARN] {tag} request failed: {e}")
        return []
    
    w,h = pil_img.size
    out = parse_mcp_json(js, w, h, source_tag=tag)
    print(f"  [{tag}] After parsing: {len(out)} blocks")
    
    # Confidence filter
    out = [b for b in out if b.get("text") and float(b.get("confidence",0.0)) >= min_conf]
    print(f"  [{tag}] After filtering (>={min_conf}): {len(out)} blocks")
    
    return sorted(out, key=blocks_sort_key)

# %%
# --------------------------- POST-PROCESSING --------------------------------
def regroup_lines(blocks: List[Dict[str,Any]], line_gap:int=14, para_gap:int=26)->List[Dict[str,Any]]:
    if not blocks: return []
    bs = sorted(blocks, key=blocks_sort_key)
    rows, cur = [], [bs[0]]
    for b in bs[1:]:
        prev = cur[-1]
        if abs(b["bbox"][1] - prev["bbox"][1]) <= line_gap:
            cur.append(b)
        else:
            rows.append(cur); cur=[b]
    rows.append(cur)

    lines=[]
    for row in rows:
        row = sorted(row, key=lambda x:x["bbox"][0])
        text = " ".join(x["text"] for x in row if x["text"])
        x0 = min(x["bbox"][0] for x in row); y0=min(x["bbox"][1] for x in row)
        x1 = max(x["bbox"][2] for x in row); y1=max(x["bbox"][3] for x in row)
        src= "+".join(sorted(set(x["source"] for x in row)))
        conf=sum(x.get("confidence",1.0) for x in row)/len(row)
        lines.append({"bbox":[x0,y0,x1,y1], "text":text.strip(), "source":src, "confidence":conf})

    paras=[]
    current=[lines[0]] if lines else []
    for ln in lines[1:]:
        prev=current[-1]
        if abs(ln["bbox"][1]-prev["bbox"][3]) <= para_gap:
            current.append(ln)
        else:
            txt=" ".join(x["text"] for x in current if x["text"])
            x0=min(x["bbox"][0] for x in current); y0=min(x["bbox"][1] for x in current)
            x1=max(x["bbox"][2] for x in current); y1=max(x["bbox"][3] for x in current)
            src="+".join(sorted(set(",".join(x["source"] for x in current).split("+"))))
            conf=sum(x.get("confidence",1.0) for x in current)/len(current)
            paras.append({"bbox":[x0,y0,x1,y1], "text":txt.strip(), "source":src, "confidence":conf})
            current=[ln]
    if current:
        txt=" ".join(x["text"] for x in current)
        x0=min(x["bbox"][0] for x in current); y0=min(x["bbox"][1] for x in current)
        x1=max(x["bbox"][2] for x in current); y1=max(x["bbox"][3] for x in current)
        src="+".join(sorted(set(",".join(x["source"] for x in current).split("+"))))
        conf=sum(x.get("confidence",1.0) for x in current)/len(current)
        paras.append({"bbox":[x0,y0,x1,y1], "text":txt.strip(), "source":src, "confidence":conf})
    return paras

def deduplicate(blocks: List[Dict[str,Any]], iou_thr:float=0.45, sim_thr:float=0.90)->List[Dict[str,Any]]:
    out=[]
    for b in sorted(blocks, key=lambda x: (-x.get("confidence",1.0), len(x.get("text","")))):
        t = (b.get("text","") or "").strip()
        if not t: continue
        keep=True
        for a in out:
            if iou(b["bbox"], a["bbox"]) >= iou_thr:
                if fuzz_ratio(t.lower(), a["text"].lower())/100.0 >= sim_thr:
                    keep=False; break
        if keep: out.append(b)
    return sorted(out, key=blocks_sort_key)

def merge_ensemble(native: List[Dict], ocrs: List[List[Dict]], page_h: int) -> List[Dict]:
    all_blocks = []
    all_blocks.extend(native)
    for s in ocrs:
        all_blocks.extend(s)
    regrouped = regroup_lines(all_blocks, line_join_px, para_join_px)
    deduped   = deduplicate(regrouped, dedup_iou_thr, dedup_sim_thr)
    for b in deduped:
        b["section"] = section_of(b["bbox"], page_h)
    return deduped

# %%
# ------------------------------- HEALTH CHECK -------------------------------
def ping_endpoint(url, langs=langs, engine_tag=None):
    if not url: 
        return False, "unset"
    try:
        # Per-engine timeouts: longer for heavy models like docling
        timeout = 300 if engine_tag == "docling" else 10
        
        img = Image.new("RGB", (32, 32), (255,255,255))
        buf = io.BytesIO()
        img.save(buf, format="PNG")
        buf.seek(0)
        r = requests.post(url, data={"lang": pick_lang(langs)}, 
                         files={"image":("tiny.png", buf.getvalue(), "image/png")}, 
                         timeout=timeout)
        r.raise_for_status()
        _ = r.json()
        return True, "ok"
    except Exception as e:
        return False, str(e)

# Warmup Docling if endpoint available (pre-loads model)
if MCP_ENDPOINTS.get("docling"):
    try:
        print("[INFO] Warming up Docling...")
        warmup_url = MCP_ENDPOINTS["docling"].rstrip("/ocr") + "/warmup"
        r = requests.get(warmup_url, timeout=600)
        r.raise_for_status()
        warmup_js = r.json()
        if warmup_js.get("ok"):
            print(f"[INFO] Docling warmup OK in {warmup_js.get('seconds', 'N/A')}s")
        else:
            print(f"[WARN] Docling warmup returned {warmup_js}")
    except Exception as e:
        print(f"[WARN] Docling warmup failed: {e}")

health = {
    "tesseract": ping_endpoint(MCP_ENDPOINTS["tesseract"], engine_tag="tesseract") if USE_TESS_MCP and MCP_ENDPOINTS.get("tesseract") else (False, "disabled"),
    "easyocr":   ping_endpoint(MCP_ENDPOINTS["easyocr"], engine_tag="easyocr") if USE_EASYOCR_MCP and MCP_ENDPOINTS.get("easyocr") else (False, "disabled"),
    "paddle":    ping_endpoint(MCP_ENDPOINTS["paddle"], engine_tag="paddle") if USE_PADDLE_MCP and MCP_ENDPOINTS.get("paddle") else (False, "disabled"),
    "surya":     ping_endpoint(MCP_ENDPOINTS["surya"], engine_tag="surya") if USE_SURYA_MCP and MCP_ENDPOINTS.get("surya") else (False, "disabled"),
    "docling":   ping_endpoint(MCP_ENDPOINTS["docling"], engine_tag="docling") if USE_DOCLING_MCP and MCP_ENDPOINTS.get("docling") else (False, "disabled"),
}
print("Health:", health)

# %%
# --------------------------------- MAIN -------------------------------------
pdf_path_p = Path(pdf_path).expanduser().resolve()

if HAVE_FITZ:
    doc = _fitz.open(pdf_path_p)
    total_pages = len(doc)
else:
    raise RuntimeError("PyMuPDF required")

meta = {
    "pages": total_pages,
    "dpi": dpi,
    "langs": langs,
    "mcp_endpoints": MCP_ENDPOINTS
}
(out_dir/"metadata.json").write_text(json.dumps(meta, indent=2), encoding="utf-8")

def page_metrics(blocks):
    if not blocks: return 0, 0.0, 0
    chars = sum(len(b.get("text","")) for b in blocks)
    confs = [float(b.get("confidence",0.0)) for b in blocks if "confidence" in b]
    mean_conf = (sum(confs)/len(confs)) if confs else 0.0
    return chars, mean_conf, len(blocks)

summary_rows = []

for i in range(total_pages):
    print(f"\n[Page {i+1}/{total_pages}]")
    
    # Native + raster
    page = doc[i]
    native = extract_native(page) if USE_NATIVE else []
    (out_dir/f"page_{i+1:03d}_native.json").write_text(json.dumps(native,indent=2,ensure_ascii=False), encoding="utf-8")
    pil = page_to_image(doc, i, dpi=dpi)

    # Preprocess
    pil_base = mask_bands(pil, banner_top_pct, banner_bot_pct) if mask_banners else pil

    # Call each MCP engine (force docling if endpoint exists, even if health failed)
    o_tess = mcp_ocr(pil_base, MCP_ENDPOINTS["tesseract"], langs, "tesseract_mcp", engine_tag="tesseract") if (USE_TESS_MCP and health["tesseract"][0]) else []
    o_easy = mcp_ocr(pil_base, MCP_ENDPOINTS["easyocr"],   langs, "easyocr_mcp", engine_tag="easyocr") if (USE_EASYOCR_MCP and health["easyocr"][0]) else []
    o_padl = mcp_ocr(pil_base, MCP_ENDPOINTS["paddle"],    langs, "paddle_mcp", engine_tag="paddle") if (USE_PADDLE_MCP and health["paddle"][0]) else []
    o_sur  = mcp_ocr(pil_base, MCP_ENDPOINTS["surya"],     langs, "surya_mcp", engine_tag="surya") if (USE_SURYA_MCP and health["surya"][0]) else []
    o_doc  = mcp_ocr(pil_base, MCP_ENDPOINTS["docling"],   langs, "docling_mcp", engine_tag="docling") if USE_DOCLING_MCP and MCP_ENDPOINTS.get("docling") else []

    # Save raw per engine
    if o_tess: (out_dir/f"page_{i+1:03d}_ocr_tesseract_mcp.json").write_text(json.dumps(o_tess,indent=2,ensure_ascii=False), encoding="utf-8")
    if o_easy: (out_dir/f"page_{i+1:03d}_ocr_easyocr_mcp.json").write_text(json.dumps(o_easy,indent=2,ensure_ascii=False), encoding="utf-8")
    if o_padl: (out_dir/f"page_{i+1:03d}_ocr_paddle_mcp.json").write_text(json.dumps(o_padl,indent=2,ensure_ascii=False), encoding="utf-8")
    if o_sur:  (out_dir/f"page_{i+1:03d}_ocr_surya_mcp.json").write_text(json.dumps(o_sur,indent=2,ensure_ascii=False), encoding="utf-8")
    if o_doc:  (out_dir/f"page_{i+1:03d}_ocr_docling_mcp.json").write_text(json.dumps(o_doc,indent=2,ensure_ascii=False), encoding="utf-8")

    # Merge
    ocr_heads = [o_tess, o_easy, o_padl, o_sur, o_doc]
    native_chars = sum(len(b.get("text","")) for b in native)
    merged = merge_ensemble(native if native_chars >= native_len_gate else [], ocr_heads, pil.height)

    # Save merged
    (out_dir / f"page_{i+1:03d}_blocks.json").write_text(json.dumps(merged, indent=2, ensure_ascii=False), encoding="utf-8")

    # Visualization
    if make_viz_png:
        im = pil.copy()
        dr = ImageDraw.Draw(im, "RGBA")
        for b in merged:
            x0,y0,x1,y1 = map(int, b["bbox"])
            src = (b.get("source") or "").lower()
            col = (122,199,136,80)
            if "native"       in src: col=(147,112,219,90)   # purple
            if "tesseract_mcp"in src: col=(70,130,180,90)    # steelblue
            if "easyocr_mcp"  in src: col=(255,165,0,90)     # orange
            if "paddle_mcp"   in src: col=(34,139,34,90)     # green
            if "surya_mcp"    in src: col=(220,20,60,90)     # crimson
            if "docling_mcp"  in src: col=(255,215,0,90)     # gold
            dr.rectangle([x0,y0,x1,y1], outline=(0,0,0,180), width=2, fill=col)
        im.save(out_dir/f"page_{i+1:03d}_viz.png")

    # Metrics
    for name, blocks in [
        ("native", native),
        ("tesseract_mcp", o_tess),
        ("easyocr_mcp", o_easy),
        ("paddle_mcp", o_padl),
        ("surya_mcp", o_sur),
        ("docling_mcp", o_doc),
        ("merged", merged),
    ]:
        chars, mean_conf, n = page_metrics(blocks)
        summary_rows.append({
            "page": i+1, "engine": name, "n_blocks": n, "chars": chars, "mean_conf": round(mean_conf,4)
        })

    print(f"  Summary: native={len(native)} tess={len(o_tess)} easy={len(o_easy)} "
          f"paddle={len(o_padl)} surya={len(o_sur)} docling={len(o_doc)} → merged={len(merged)}")

# Write CSV
import csv
csv_path = out_dir / "engine_comparison.csv"
with open(csv_path, "w", newline="", encoding="utf-8") as f:
    w = csv.DictWriter(f, fieldnames=["page","engine","n_blocks","chars","mean_conf"])
    w.writeheader()
    w.writerows(summary_rows)

print(f"\n✅ Done → {out_dir}")
print(f"📄 CSV → {csv_path}")

[INFO] PDF: /Users/balijepalli/Documents/GitHub/entheory-ai/notebooks/input_pdfs/ET1-Adobe Scan 10 Sept 2025.pdf
[INFO] Out: /Users/balijepalli/Documents/GitHub/entheory-ai/notebooks/outputs/run_mcp/01_blocks
[INFO] Endpoints: {
  "tesseract": "http://127.0.0.1:8089/ocr",
  "easyocr": "http://127.0.0.1:8092/ocr",
  "paddle": "http://127.0.0.1:8090/ocr",
  "surya": "http://127.0.0.1:8091/ocr",
  "docling": "http://127.0.0.1:8093/ocr"
}
[INFO] Warming up Docling...
[WARN] Docling warmup failed: 500 Server Error: Internal Server Error for url: http://127.0.0.1:8093/warmup
Health: {'tesseract': (False, 'disabled'), 'easyocr': (False, 'disabled'), 'paddle': (False, "HTTPConnectionPool(host='127.0.0.1', port=8090): Max retries exceeded with url: /ocr (Caused by NewConnectionError('<urllib3.connection.HTTPConnection object at 0x1054b4c10>: Failed to establish a new connection: [Errno 61] Connection refused'))"), 'surya': (False, 'disabled'), 'docling': (False, 'disabled')}

[Page 1/4]
  Summa

In [39]:
# Test with actual PDF page
import io, requests, json
from PIL import Image
import fitz

# Get first page as image
doc = fitz.open(pdf_path)
page = doc[0]
zoom = dpi / 72
mat = fitz.Matrix(zoom, zoom)
pix = page.get_pixmap(matrix=mat, alpha=False)
img = Image.frombytes("RGB", [pix.width, pix.height], pix.samples)

# Send to Surya
buf = io.BytesIO()
img.save(buf, format="PNG")
buf.seek(0)

print(f"Sending image: {img.size}")
r = requests.post(MCP_ENDPOINTS["surya"],
                  data={"lang":"en"},
                  files={"image":("page.png", buf.getvalue(), "image/png")},
                  timeout=120)
print(f"Status: {r.status_code}")
result = r.json()
print(f"Response: {json.dumps(result, indent=2)[:1000]}")
print(f"Blocks found: {len(result.get('blocks', []))}")
if result.get('blocks'):
    print(f"First block: {result['blocks'][0]}")

Sending image: (2200, 3300)
Status: 200
Response: {
  "engine": "surya",
  "blocks": [
    {
      "text": "\u0c05\u0c16\u0c3f\u0c32 \u0c2d\u0c3e\u0c30\u0c24 \u0c35\u0c48\u0c26\u0c4d\u0c2f \u0c35\u0c3f\u0c1c\u0c4d\u0c1e\u0c3e\u0c28 \u0c38\u0c02\u0c38\u0c4d\u0c25<br>\u0905\u0916\u093f\u0932 \u092d\u093e\u0930\u0924\u0940\u092f \u0906\u092f\u0941\u0930\u094d\u0935\u093f\u091c\u094d\u091e\u093e\u0928 \u0938\u0902\u0938\u094d\u0925\u093e\u0928, \u092c\u0940\u092c\u0940\u0928\u0917\u0930",
      "confidence": 0.9953843077023824,
      "bbox": [
        781.0,
        193.0,
        1538.0,
        314.0
      ]
    },
    {
      "text": "All India Institute of Medical Sciences, Bibinagar",
      "confidence": 0.998117997096135,
      "bbox": [
        665.0,
        296.0,
        1607.0,
        362.0
      ]
    },
    {
      "text": "Hyderabad Metropolitan Region (HMR), Telangana-508126, India",
      "confidence": 0.9985745116587608,
      "bbox": [
        693.0,
        376.0,
     