In [2]:
import fitz
import pandas as pd
import cv2
import json
# allow pandas to display everything in ipynb
pd.set_option('display.max_rows', None)
pd.set_option('display.max_columns', None)
pd.set_option('display.width', None)
pd.set_option('display.max_colwidth', None)

doc_id = "a1e613ba-d726-4920-a7bc-f5fc9f7ab225"

metadata_path = f"/Users/bigo/Projects/timbergem/data/processed/{doc_id}/symbols/symbols_metadata.json"
project_data_path = f"/Users/bigo/Projects/timbergem/data/processed/{doc_id}/project_data.json"
document_path = f"/Users/bigo/Projects/timbergem/data/processed/{doc_id}/original.pdf"
legend_path = f"/Users/bigo/Projects/timbergem/data/processed/{doc_id}/clippings/page1/SymbolLegend_1_clipping.png"

"""
Debug the symbol coordinates in the legend by loading the legend image and applying the coordinates in metadata.symbols[].coordinates.legend_relative (utilizing left, top, width, height). Saves a clipping from the legend image in "symbol_debug"
"""
def debug_symbol_coordinates():
    with open(metadata_path, "r") as f:
        metadata_json = json.load(f)
    symbols = metadata_json["symbols"]
    metadata = pd.DataFrame(symbols)
    legend_image = cv2.imread(legend_path)

    for i, symbol in metadata.iterrows():
        coordinates = symbol['coordinates']['legend_clipping_relative']
        left = coordinates['left_clipping_pixels']
        top = coordinates['top_clipping_pixels']
        width = coordinates['width_clipping_pixels']
        height = coordinates['height_clipping_pixels']
        # extract the symbol clipping from the legend image
        symbol_clipping = legend_image[top:top + height, left:left + width]
        # save the symbol clipping as an image
        symbol_image_path = f"symbol_debug/image_symbol_{i}.png"
        cv2.imwrite(symbol_image_path, symbol_clipping)
        print(f"Saved symbol clipping to {symbol_image_path}")
debug_symbol_coordinates()


Saved symbol clipping to symbol_debug/image_symbol_0.png


In [None]:

"""
calculates the symbol dimensions in 300 dpi based on the contours of the symbol clippings
references each symbol in the metadata json "symbols" list. Utilizes "source_legend.page_number"(1-indexed) and "coordinates.pdf_absolute" to find the symbol clipping in the pdf.
prints the height and width of each symbol
saves the contour clippings in "symbol_debug" dir in root of the project
"""
def calculate_symbol_dimensions():
    with open(metadata_path, "r") as f:
        metadata_json = json.load(f)
    symbols = metadata_json["symbols"]
    metadata = pd.DataFrame(symbols)
    document = fitz.open(document_path)

    for i, symbol in metadata.iterrows():
        legend_id = symbol['source_legend']['annotation_id']
        with open(project_data_path, "r") as f:
            project_data = json.load(f)
        key_areas = project_data["projectData"]['keyAreas']
        for _, v in key_areas.items():
            for area in v:
                print(area)
                print(area["id"], legend_id)
                if area["id"] == legend_id:
                    page_number = area['pageNumber']
                    break
        if not page_number:
            print(f"XXX Page Number not found for Legend ID {legend_id} in project_data.json")
            continue
        legend_left_points = symbol['source_legend']['pdf_coordinates']['left_points']
        legend_top_points = symbol['source_legend']['pdf_coordinates']['top_points']
        legend_width_points = symbol['source_legend']['pdf_coordinates']['width_points']
        legend_height_points = symbol['source_legend']['pdf_coordinates']['height_points']
        symbol_left_points = symbol['coordinates']['pdf_absolute']['left_points']
        symbol_top_points = symbol['coordinates']['pdf_absolute']['top_points']
        symbol_width_points = symbol['coordinates']['pdf_absolute']['width_points']
        symbol_height_points = symbol['coordinates']['pdf_absolute']['height_points']
        page = document[page_number- 1]  # convert to 0-indexed
        # print all the coordinates and dimensions detected
        print(f"Symbol {i}: Left: {symbol_left_points}, Top: {symbol_top_points}, Width: {symbol_width_points}, Height: {symbol_height_points}")
        print(f"Legend: Left: {legend_left_points}, Top: {legend_top_points}, Width: {legend_width_points}, Height: {legend_height_points}")
        # draw a sample rectangle on the page for debugging
        # page.draw_rect(fitz.Rect(0, 0, 100, 100), color=(0, 1, 0), width=1)
        # page.draw_rect(fitz.Rect(100, 100, 200, 200), color=(1, 0, 0), width=1)
        # pix = page.get_pixmap()
        # pix.save(f"symbol_debug/pdf_symbol_debug_{i}.png")
        # save the symbol clipping as an image
        symbol_rect = fitz.Rect(symbol_left_points, symbol_top_points, symbol_left_points + symbol_width_points, symbol_top_points + symbol_height_points)
        legend_rect = fitz.Rect(legend_left_points, legend_top_points, legend_left_points + legend_width_points, legend_top_points + legend_height_points)
        # draw the rectangle on the page for debugging
        page.draw_rect(legend_rect, color=(0, 1, 0), width=1)
        page.draw_rect(symbol_rect, color=(1, 0, 0), width=1)
        pix = page.get_pixmap()
        # save the symbol clipping as an image
        symbol_image_path = f"symbol_debug/pdf_symbol_{i}.png"
        pix.save(symbol_image_path)
        print(f"Saved symbol clipping to {symbol_image_path}")

calculate_symbol_dimensions()


{'id': '1753736797620-1', 'tag': 'SymbolLegend', 'left': 269, 'top': 47.359375, 'width': 198, 'height': 166, 'pageNumber': 1, 'pdfCoordinates': {'left_points': 581.04, 'top_points': 1267.2, 'width_points': 427.68, 'height_points': 358.56}}
1753736797620-1 1753736797620-1
Symbol 0: Left: 611.52, Top: 1312.56, Width: 23.52, Height: 22.56
Legend: Left: 581.04, Top: 1267.2, Width: 427.68, Height: 358.56
Saved symbol clipping to symbol_debug/pdf_symbol_0.png


In [3]:
"""
"left": 275.5,
"top": 45.359375,
"width": 193,
"height": 179,
"""
import fitz

# load /Users/bigo/Projects/timbergem/data/Gasc_annot_debug.pdf and draw a rectangle at (275.5, 45.359375) with width 193 and height 179. Save the page as an image in "symbol_debug" dir
def draw_debug_rectangle():
    doc = fitz.open("/Users/bigo/Projects/timbergem/data/Gasc_annot_debug.pdf")
    page = doc[0]  # first page
    left = 275.5
    top = 45.359375
    width = 193
    height = 179
    rect = fitz.Rect(left, top, left + width, top + height)
    rect = fitz.Rect(595, 98, 1011, 484)
    # draw the rectangle on the page for debugging
    page.draw_rect(rect, color=(1, 0, 0), width=1)
    pix = page.get_pixmap(dpi=300)
    # save the page as an image
    debug_image_path = "symbol_debug/debug_rectangle.png"
    pix.save(debug_image_path)
    print(f"Saved debug rectangle image to {debug_image_path}")
draw_debug_rectangle()

Saved debug rectangle image to symbol_debug/debug_rectangle.png
