In [None]:
import cv2
import numpy as np
import matplotlib.pyplot as plt
import pandas as pd
from paddleocr import PaddleOCR, draw_ocr
import os

# First, make sure layoutparser is properly installed with Detectron2
# pip install "layoutparser[detectron2]"
# If you're in Google Colab, you might need:
# !pip install "layoutparser[layoutmodels,detectron2]"

import layoutparser as lp

# Initialize PaddleOCR
ocr = PaddleOCR(use_angle_cls=True, lang="en", det_db_box_thresh=0.5)

# Load Image
image_path = "hdfc.png"  # Replace with your vendor statement
image = cv2.imread(image_path)
image = cv2.cvtColor(image, cv2.COLOR_BGR2RGB)

# OCR Processing
ocr_results = ocr.ocr(image_path, cls=True)

# Extracted Text & Bounding Boxes
extracted_text = []
for result in ocr_results[0]:
    try:
        if len(result) >= 2:
            box = result[0]
            text = result[1][0]  
            score = result[1][1] 
            extracted_text.append((text, score))
    except (IndexError, TypeError) as e:
        print(f"Skipping result with unexpected format: {result}, Error: {e}")

# Print Extracted Text
print("🔹 Extracted Text from Invoice:")
for text, score in extracted_text:
    print(f"{text} (Confidence: {score:.2f})")

# For layout detection, properly import the Detectron2 model
try:
    # Import the specific Detectron2LayoutModel
    from layoutparser.models import Detectron2LayoutModel

    # Initialize the layout model
    model = Detectron2LayoutModel(
        "lp://PubLayNet/faster_rcnn_R_50_FPN/mask_rcnn_X_101_32x8d_FPN_3x",
        extra_config=["MODEL.ROI_HEADS.SCORE_THRESH_TEST", 0.8],
        label_map={0: "Text", 1: "Title", 2: "List", 3: "Table", 4: "Figure"}
    )

    # Perform layout detection
    layout = model.detect(image)

    # Extract table regions
    table_blocks = [b for b in layout if b.type == 'Table']
    if table_blocks:
        print("\n🔹 Table Detected!")

        for idx, table in enumerate(table_blocks):
            x1, y1, x2, y2 = map(int, table.coordinates)
            cropped_table = image[y1:y2, x1:x2]

            # Run OCR on the cropped table
            table_results = ocr.ocr(cropped_table, cls=True)

            # Convert to structured format
            table_data = []
            for result in table_results[0]:
                try:
                    text = result[1][0]  # Fix the indexing based on PaddleOCR structure
                    table_data.append(text)
                except (IndexError, TypeError):
                    pass

            # Attempt to create a structured table
            # This is a simplified approach - for production, you'd need more logic
            # to determine rows and columns
            rows = []
            current_row = []

            # Very simple approach: group by y-coordinate proximity
            if table_results[0]:
                sorted_results = sorted(table_results[0], key=lambda x: (x[0][0][1] + x[0][2][1])/2)  # Sort by y-coordinate

                current_y = (sorted_results[0][0][0][1] + sorted_results[0][0][2][1])/2

                for result in sorted_results:
                    y = (result[0][0][1] + result[0][2][1])/2
                    text = result[1][0]

                    # If y is significantly different, start a new row
                    if abs(y - current_y) > 20:  # Threshold of 20 pixels
                        if current_row:
                            rows.append(current_row)
                            current_row = []
                        current_y = y

                    current_row.append(text)

                # Add the last row
                if current_row:
                    rows.append(current_row)

            # Create DataFrame
            max_cols = max([len(row) for row in rows]) if rows else 0
            df = pd.DataFrame(rows)

            print(f"\n🔹 Extracted Table {idx + 1}:")
            print(df)

            # Save table to CSV
            df.to_csv(f"invoice_table_{idx + 1}.csv", index=False)
    else:
        print("\n🔹 No tables detected in the image.")

except ImportError as e:
    print(f"\n🔹 Layout detection error: {e}")
    print("Make sure you have the full LayoutParser installation with Detectron2:")
    print("pip install 'layoutparser[detectron2]'")

    # Fallback: Create a simple dataframe from all OCR text
    all_text = [text for text, _ in extracted_text]
    print("\n🔹 Fallback: Creating a simple data structure from all OCR text")
    df = pd.DataFrame({'text': all_text})
    print(df.head())
    df.to_csv("invoice_extracted_text.csv", index=False)

# Display Image with OCR Text Overlay
plt.figure(figsize=(10, 10))
plt.imshow(image)

# Add text annotations
for result in ocr_results[0]:
    if len(result) >= 2:
        box = result[0]
        text = result[1][0]

        # Convert box coordinates to integers
        box = np.array(box).astype(np.int32)

        # Draw bounding box
        plt.plot([box[0][0], box[1][0], box[2][0], box[3][0], box[0][0]],
                [box[0][1], box[1][1], box[2][1], box[3][1], box[0][1]], 'r-')

        # Add text annotation
        plt.text(box[0][0], box[0][1], text, color='blue', fontsize=8)

plt.axis("off")
plt.tight_layout()
plt.savefig("invoice_annotated.png", bbox_inches='tight')
plt.show()

print("\n🔹 Processing complete! Annotated image and extracted data saved.")