In [None]:
# Install surya-ocr if needed
# !pip install surya-ocr pillow pymupdf matplotlib

In [None]:
import os
from pathlib import Path
import fitz  # PyMuPDF
from PIL import Image
import matplotlib.pyplot as plt
import matplotlib.patches as patches
from surya.detection import batch_text_detection
from surya.model.detection.segformer import load_model, load_processor

In [None]:
# Configuration
PDF_PATH = "../data/raw/fomcprojtabl20230322.pdf"  # UPDATE THIS
OUTPUT_DIR = "surya_debug"
os.makedirs(OUTPUT_DIR, exist_ok=True)

In [None]:
# Load Surya models
print("Loading Surya models...")
model = load_model()
processor = load_processor()
print("Models loaded")

In [None]:
def pdf_page_to_image(pdf_path, page_num):
    """Convert PDF page to PIL Image"""
    doc = fitz.open(pdf_path)
    page = doc[page_num]
    pix = page.get_pixmap(matrix=fitz.Matrix(2, 2))  # 144 DPI
    img = Image.frombytes("RGB", [pix.width, pix.height], pix.samples)
    doc.close()
    return img

In [None]:
def detect_and_visualize(pdf_path, page_num, output_dir):
    """Detect tables/figures and visualize bboxes"""
    
    # Convert page to image
    img = pdf_page_to_image(pdf_path, page_num)
    
    # Run detection
    predictions = batch_text_detection([img], model, processor)
    result = predictions[0]
    
    # Visualize
    fig, ax = plt.subplots(1, 1, figsize=(15, 20))
    ax.imshow(img)
    
    # Draw bboxes
    for bbox in result.bboxes:
        x1, y1, x2, y2 = bbox.bbox
        width = x2 - x1
        height = y2 - y1
        
        rect = patches.Rectangle(
            (x1, y1), width, height,
            linewidth=2,
            edgecolor='red',
            facecolor='none'
        )
        ax.add_patch(rect)
    
    ax.set_title(f"Page {page_num + 1} - Detected {len(result.bboxes)} regions", fontsize=16)
    ax.axis('off')
    
    # Save
    output_path = os.path.join(output_dir, f"page_{page_num + 1}.png")
    plt.tight_layout()
    plt.savefig(output_path, dpi=150, bbox_inches='tight')
    plt.show()
    
    print(f"Page {page_num + 1}: {len(result.bboxes)} regions detected")
    print(f"Saved to {output_path}")
    
    return result

In [None]:
# Process all pages in PDF
doc = fitz.open(PDF_PATH)
num_pages = len(doc)
doc.close()

print(f"Processing {num_pages} pages from {Path(PDF_PATH).name}\n")

results = []
for page_num in range(num_pages):
    result = detect_and_visualize(PDF_PATH, page_num, OUTPUT_DIR)
    results.append(result)
    print()

In [None]:
# Print bbox details
for page_num, result in enumerate(results):
    print(f"\nPage {page_num + 1}:")
    for i, bbox in enumerate(result.bboxes, 1):
        x1, y1, x2, y2 = bbox.bbox
        print(f"  Region {i}: [{x1:.1f}, {y1:.1f}, {x2:.1f}, {y2:.1f}] - Size: {x2-x1:.1f}x{y2-y1:.1f}")