In [None]:
# # Install surya-ocr if needed
# !pip install surya-ocr pillow pymupdf matplotlib

In [None]:
import torch 
print (torch.mps.is_available())

In [None]:
import os
from pathlib import Path
import fitz  # PyMuPDF
from PIL import Image
import matplotlib.pyplot as plt
import matplotlib.patches as patches

# NEW: Import the predictor class instead of the old function
from surya.detection import DetectionPredictor

In [None]:
# Configuration
from google.colab import drive 
drive.mount('/content/drive', force_remount=True)
PDF_PATH = f"/content/drive/MyDrive/fomcprojtabl20200610.pdf" 
OUTPUT_DIR = "surya_debug"
os.makedirs(OUTPUT_DIR, exist_ok=True)

In [None]:
# Load Surya models
print("Loading Surya models...")
predictor = DetectionPredictor()
print("Models loaded")

In [None]:
def pdf_page_to_image(pdf_path, page_num):
    """Convert PDF page to PIL Image"""
    doc = fitz.open(pdf_path)
    page = doc[page_num]
    pix = page.get_pixmap(matrix=fitz.Matrix(2, 2))  # 144 DPI
    img = Image.frombytes("RGB", [pix.width, pix.height], pix.samples)
    doc.close()
    return img

In [None]:
def detect_and_visualize(pdf_path, page_num, output_dir):
    """Detect tables/figures and visualize bboxes"""
    
    # Convert page to image
    img = pdf_page_to_image(pdf_path, page_num)
    
    # NEW: Run detection using the class instance
    # Pass a list of images, returns a list of results
    predictions = predictor([img])
    result = predictions[0]
    
    # Visualize
    fig, ax = plt.subplots(1, 1, figsize=(15, 20))
    ax.imshow(img)
    
    # Draw bboxes
    # Note: result.bboxes is still the correct attribute to iterate over
    for bbox in result.bboxes:
        # The .bbox attribute usually contains [x1, y1, x2, y2]
        x1, y1, x2, y2 = bbox.bbox
        width = x2 - x1
        height = y2 - y1
        
        rect = patches.Rectangle(
            (x1, y1), width, height,
            linewidth=2,
            edgecolor='red',
            facecolor='none'
        )
        ax.add_patch(rect)
    
    ax.set_title(f"Page {page_num + 1} - Detected {len(result.bboxes)} regions", fontsize=16)
    ax.axis('off')
    
    # Save
    output_path = os.path.join(output_dir, f"page_{page_num + 1}.png")
    plt.tight_layout()
    plt.savefig(output_path, dpi=150, bbox_inches='tight')
    plt.show()
    
    print(f"Page {page_num + 1}: {len(result.bboxes)} regions detected")
    print(f"Saved to {output_path}")
    
    return result

In [None]:
# Process all pages in PDF
doc = fitz.open(PDF_PATH)
num_pages = len(doc)
doc.close()

print(f"Processing {num_pages} pages from {Path(PDF_PATH).name}\n")

results = []
for page_num in range(num_pages):
    result = detect_and_visualize(PDF_PATH, page_num, OUTPUT_DIR)
    results.append(result)
    print()

In [None]:
# Print bbox details
for page_num, result in enumerate(results):
    print(f"\nPage {page_num + 1}:")
    for i, bbox in enumerate(result.bboxes, 1):
        x1, y1, x2, y2 = bbox.bbox
        print(f"  Region {i}: [{x1:.1f}, {y1:.1f}, {x2:.1f}, {y2:.1f}] - Size: {x2-x1:.1f}x{y2-y1:.1f}")

# layout detector 

In [None]:
# import os
# import fitz  # PyMuPDF
# from PIL import Image
# import matplotlib.pyplot as plt
# import matplotlib.patches as patches

# # 1. Correct Import Paths
# from surya.layout import LayoutPredictor
# from surya.foundation import FoundationPredictor

# # 2. Initialize the Foundation Model first
# print("Loading Foundation model...")
# foundation_predictor = FoundationPredictor()

# # 3. Initialize Layout Predictor with the foundation model
# print("Loading Layout model...")
# layout_predictor = LayoutPredictor(foundation_predictor)

# # Define your visualization function
# def detect_layout_and_visualize(pdf_path, page_num, output_dir):
#     """Detect layout regions (Table, Figure, Text, etc.) and visualize"""

#     # Load Image
#     doc = fitz.open(pdf_path)
#     page = doc[page_num]
#     pix = page.get_pixmap(matrix=fitz.Matrix(2, 2))
#     img = Image.frombytes("RGB", [pix.width, pix.height], pix.samples)
#     doc.close()

#     # Run Prediction
#     predictions = layout_predictor([img])
#     result = predictions[0]

#     # Visualize
#     fig, ax = plt.subplots(1, 1, figsize=(15, 20))
#     ax.imshow(img)

#     colors = {
#         'Text': 'red', 'Title': 'blue', 'Picture': 'green', 'Figure': 'green',
#         'Table': 'orange', 'Section-header': 'purple', 'Caption': 'cyan', 'Formula': 'magenta'
#     }

#     print(f"Page {page_num + 1} Layout:")
#     for bbox in result.bboxes:
#         label = bbox.label
#         x1, y1, x2, y2 = bbox.bbox
#         width = x2 - x1
#         height = y2 - y1

#         color = colors.get(label, 'red')

#         rect = patches.Rectangle((x1, y1), width, height, linewidth=2, edgecolor=color, facecolor='none')
#         ax.add_patch(rect)

#         ax.text(x1, y1 - 5, f"{label}", color=color, fontsize=12, weight='bold',
#                 bbox=dict(facecolor='white', alpha=0.7, edgecolor='none'))

#         print(f"  - Found {label} at [{x1:.0f}, {y1:.0f}, {x2:.0f}, {y2:.0f}]")

#     ax.axis('off')
    
#     # Define output path
#     output_path = os.path.join(output_dir, f"page_{page_num + 1}.png")
    
#     # 1. SAVE FIRST (while the figure is still active)
#     plt.savefig(output_path, dpi=150, bbox_inches='tight')
#     print(f"Saved to {output_path}")

#     # 2. THEN SHOW (this clears the figure after displaying)
#     plt.show()

#     print(f"Page {page_num + 1}: {len(result.bboxes)} regions detected")
    
#     return result

In [None]:
# # Process all pages in PDF
# doc = fitz.open(PDF_PATH)
# num_pages = len(doc)
# doc.close()

# print(f"Processing {num_pages} pages from {Path(PDF_PATH).name}\n")

# results = []
# for page_num in range(num_pages):
#     result = detect_layout_and_visualize(PDF_PATH, page_num, OUTPUT_DIR)
#     results.append(result)
#     print()
# # Print bbox details
# for page_num, result in enumerate(results):
#     print(f"\nPage {page_num + 1}:")
#     for i, bbox in enumerate(result.bboxes, 1):
#         x1, y1, x2, y2 = bbox.bbox
#         print(f"  Region {i}: [{x1:.1f}, {y1:.1f}, {x2:.1f}, {y2:.1f}] - Size: {x2-x1:.1f}x{y2-y1:.1f}")

## LayoutPredictor

In [None]:
# import os
# import fitz  # PyMuPDF
# from PIL import Image
# import matplotlib.pyplot as plt
# import matplotlib.patches as patches
# from surya.layout import LayoutPredictor
# from surya.foundation import FoundationPredictor

# # 1. Initialize Models
# print("Loading Foundation model...")
# foundation_predictor = FoundationPredictor()

# print("Loading Layout model...")
# layout_predictor = LayoutPredictor(foundation_predictor)

# def detect_layout_and_visualize(pdf_path, page_num, output_dir):
#     """
#     Detect layout regions with higher resolution and lower sensitivity threshold.
#     """
    
#     # --- FIX 1: HIGH RESOLUTION ---
#     # Open PDF
#     doc = fitz.open(pdf_path)
#     page = doc[page_num]
    
#     # Change Matrix(2, 2) to (4, 4). 
#     # This creates a larger, sharper image (approx 300 DPI) which helps 
#     # the AI distinguish between 'Lines' (Tables) and 'Text' (Headers).
#     pix = page.get_pixmap(matrix=fitz.Matrix(4, 4))
#     img = Image.frombytes("RGB", [pix.width, pix.height], pix.samples)
#     doc.close()

#     # --- FIX 2: LOWER THRESHOLD ---
#     # The model might be ignoring the tables because it's only 40% sure.
#     # We lower the confidence threshold to 0.3 to force it to report everything.
#     predictions = layout_predictor([img])
#     result = predictions[0]

#     # Visualize
#     fig, ax = plt.subplots(1, 1, figsize=(20, 25)) # Increased figure size for the larger image
#     ax.imshow(img)

#     colors = {
#         'Text': 'red', 'Title': 'blue', 'Picture': 'green', 'Figure': 'green',
#         'Table': 'orange', 'Section-header': 'purple', 'Caption': 'cyan', 'Formula': 'magenta'
#     }

#     print(f"\nPage {page_num + 1} Analysis:")
    
#     if not result.bboxes:
#         print("  WARNING: No regions detected. The page might be empty or scanned poorly.")

#     for bbox in result.bboxes:
#         label = bbox.label
#         x1, y1, x2, y2 = bbox.bbox
#         width = x2 - x1
#         height = y2 - y1

#         color = colors.get(label, 'red')

#         # Draw box
#         rect = patches.Rectangle((x1, y1), width, height, linewidth=3, edgecolor=color, facecolor='none')
#         ax.add_patch(rect)

#         # Draw label
#         ax.text(x1, y1 - 10, f"{label}", color=color, fontsize=14, weight='bold',
#                 bbox=dict(facecolor='white', alpha=0.8, edgecolor='none'))

#         print(f"  - Found {label}")

#     ax.axis('off')
    
#     # Save Output
#     if not os.path.exists(output_dir):
#         os.makedirs(output_dir)
        
#     output_path = os.path.join(output_dir, f"page_{page_num + 1}_layout.png")
    
#     # Save BEFORE Show
#     plt.savefig(output_path, dpi=150, bbox_inches='tight')
#     print(f"  Saved visualization to {output_path}")

#     plt.show()
    
#     return result

# # ==========================================
# # Run the test
# # ==========================================
# # Make sure your PDF_PATH is correct
# # detect_layout_and_visualize(PDF_PATH, 0, "surya_debug") # Check Page 1 specifically

In [None]:
# import fitz
# from pathlib import Path

# # 1. Get total page count first
# doc = fitz.open(PDF_PATH)
# num_pages = len(doc)
# doc.close()

# print(f"Processing {num_pages} pages from {Path(PDF_PATH).name}\n")

# results = []

# # 2. Loop through all pages
# for page_num in range(num_pages):
#     print(f"--- Processing Page {page_num + 1} ---")
#     try:
#         # Call your function
#         result = detect_layout_and_visualize(PDF_PATH, page_num, OUTPUT_DIR)
        
#         # SAFETY CHECK: Only append if we actually got a result back
#         if result:
#             results.append(result)
#         else:
#             print(f"Warning: No result returned for Page {page_num + 1}")
#             results.append(None) # Append None to keep index aligned
            
#     except Exception as e:
#         print(f"Error processing page {page_num + 1}: {e}")
#         results.append(None)

# # 3. Print details safely
# print("\n" + "="*40)
# print("FINAL SUMMARY")
# print("="*40)

# for page_num, result in enumerate(results):
#     print(f"\nPage {page_num + 1}:")
    
#     # SAFETY CHECK: If result is None, skip it
#     if result is None:
#         print("  [No data returned for this page]")
#         continue
        
#     # SAFETY CHECK: If result exists but has no boxes
#     if not hasattr(result, 'bboxes') or not result.bboxes:
#         print("  [No layout regions detected]")
#         continue

#     # Loop through boxes
#     for i, bbox in enumerate(result.bboxes, 1):
#         x1, y1, x2, y2 = bbox.bbox
#         label = bbox.label
#         width = x2 - x1
#         height = y2 - y1
#         print(f"  Region {i} ({label}): [{x1:.1f}, {y1:.1f}, {x2:.1f}, {y2:.1f}] - Size: {width:.1f}x{height:.1f}")

# detecting table by density PyMuPDF

In [None]:
# import fitz  # PyMuPDF
# import matplotlib.pyplot as plt
# import matplotlib.patches as patches
# from PIL import Image
# import numpy as np

# def detect_table_by_density(pdf_path, page_num):
#     doc = fitz.open(pdf_path)
#     page = doc[page_num]
    
#     # 1. Extract all words with their coordinates
#     words = page.get_text("words")  # Returns (x0, y0, x1, y1, "word", block_no, line_no, word_no)
    
#     # 2. Filter for "Numeric" words (digits, %, etc.)
#     numeric_boxes = []
#     for w in words:
#         text = w[4]
#         # Check if text contains digits or is a common table symbol
#         if any(char.isdigit() for char in text) or text in ['-', '.', '%']:
#             numeric_boxes.append(w[:4]) # Store just the bbox (x0, y0, x1, y1)

#     if not numeric_boxes:
#         print(f"Page {page_num+1}: No numeric clusters found.")
#         return

#     # 3. Find the "Cluster" (The area with the most numbers)
#     # We find the min/max coordinates of the numbers, ignoring outliers (headers/footers)
    
#     # Convert to numpy for easy math
#     boxes = np.array(numeric_boxes)
    
#     # Simple heuristic: The table is likely the bounding box of roughly 80% of the numbers
#     # (This filters out page numbers at the bottom or dates at the top)
#     x0 = np.percentile(boxes[:, 0], 10) # 10th percentile Left
#     y0 = np.percentile(boxes[:, 1], 15) # 15th percentile Top (ignore title dates)
#     x1 = np.percentile(boxes[:, 2], 90) # 90th percentile Right
#     y1 = np.percentile(boxes[:, 3], 90) # 90th percentile Bottom
    
#     # Pad the box slightly
#     padding = 20
#     final_bbox = [x0 - padding, y0 - padding, x1 + padding, y1 + padding]

#     # 4. Visualize
#     pix = page.get_pixmap(matrix=fitz.Matrix(2, 2))
#     img = Image.frombytes("RGB", [pix.width, pix.height], pix.samples)
    
#     fig, ax = plt.subplots(1, 1, figsize=(12, 16))
#     ax.imshow(img)
    
#     # Draw the detected "Density Table"
#     # Scale coordinates because image is 2x zoom
#     scale_x = pix.width / page.rect.width
#     scale_y = pix.height / page.rect.height
    
#     rect_x = final_bbox[0] * scale_x
#     rect_y = final_bbox[1] * scale_y
#     rect_w = (final_bbox[2] - final_bbox[0]) * scale_x
#     rect_h = (final_bbox[3] - final_bbox[1]) * scale_y
    
#     rect = patches.Rectangle((rect_x, rect_y), rect_w, rect_h, 
#                              linewidth=3, edgecolor='red', facecolor='none')
#     ax.add_patch(rect)
    
#     # Label it
#     ax.text(rect_x, rect_y - 10, "DETECTED TABLE (Density)", 
#             color='white', fontsize=12, weight='bold', 
#             bbox=dict(facecolor='red', edgecolor='none'))

#     ax.axis('off')
#     plt.title(f"Page {page_num + 1}: Heuristic Detection", fontsize=15)
#     plt.show()
    
#     print(f"Page {page_num + 1}: Table detected at {final_bbox}")
#     return final_bbox

# # ================================
# # RUN ON PAGE 1 (Where the table usually is)
# # ================================
# # Change page_num to 0, 1, or 2 to find where your table is
# detect_table_by_density(PDF_PATH, page_num=0)