In [1]:
import fitz
import pandas as pd
import cv2
import json
# allow pandas to display everything in ipynb
pd.set_option('display.max_rows', None)
pd.set_option('display.max_columns', None)
pd.set_option('display.width', None)
pd.set_option('display.max_colwidth', None)

doc_id = "fbccf66c-16f4-4e6d-a56d-1688ce3e0941"

metadata_path = f"/Users/bigo/Projects/timbergem/data/processed/{doc_id}/symbols/symbols_metadata.json"
project_data_path = f"/Users/bigo/Projects/timbergem/data/processed/{doc_id}/project_data.json"
document_path = f"/Users/bigo/Projects/timbergem/data/processed/{doc_id}/original.pdf"
legend_path = f"/Users/bigo/Projects/timbergem/data/processed/{doc_id}/clippings/page1/SymbolLegend_1_clipping.png"


In [None]:

"""
Debug the symbol coordinates in the legend by loading the legend image and applying the coordinates in metadata.symbols[].coordinates.legend_relative (utilizing left, top, width, height). Saves a clipping from the legend image in "symbol_debug"
"""
def debug_symbol_coordinates():
    with open(metadata_path, "r") as f:
        metadata_json = json.load(f)
    symbols = metadata_json["symbols"]
    metadata = pd.DataFrame(symbols)
    legend_image = cv2.imread(legend_path)

    for i, symbol in metadata.iterrows():
        coordinates = symbol['coordinates']['legend_clipping_relative']
        left = coordinates['left_clipping_pixels']
        top = coordinates['top_clipping_pixels']
        width = coordinates['width_clipping_pixels']
        height = coordinates['height_clipping_pixels']
        # extract the symbol clipping from the legend image
        symbol_clipping = legend_image[top:top + height, left:left + width]
        # save the symbol clipping as an image
        symbol_image_path = f"symbol_debug/image_symbol_{i}.png"
        cv2.imwrite(symbol_image_path, symbol_clipping)
        print(f"Saved symbol clipping to {symbol_image_path}")
debug_symbol_coordinates()


In [None]:
"""
calculates the symbol dimensions in 300 dpi based on the contours of the symbol clippings
references each symbol in the metadata json "symbols" list. Utilizes "source_legend.page_number"(1-indexed) and "coordinates.pdf_absolute" to find the symbol clipping in the pdf.
prints the height and width of each symbol
saves the contour clippings in "symbol_debug" dir in root of the project
"""
def calculate_symbol_dimensions():
    with open(metadata_path, "r") as f:
        metadata_json = json.load(f)
    symbols = metadata_json["symbols"]
    metadata = pd.DataFrame(symbols)
    document = fitz.open(document_path)

    for i, symbol in metadata.iterrows():
        page_number = symbol["page_number"]
        # get the coordinates of the legend in the pdf
        legend_left_points = symbol['source_legend']['pdf_coordinates']['left_points']
        legend_top_points = symbol['source_legend']['pdf_coordinates']['top_points']
        legend_width_points = symbol['source_legend']['pdf_coordinates']['width_points']
        legend_height_points = symbol['source_legend']['pdf_coordinates']['height_points']
        symbol_left_points = symbol['coordinates']['pdf_absolute']['left_points']
        symbol_top_points = symbol['coordinates']['pdf_absolute']['top_points']
        symbol_width_points = symbol['coordinates']['pdf_absolute']['width_points']
        symbol_height_points = symbol['coordinates']['pdf_absolute']['height_points']
        page = document[page_number- 1]  # convert to 0-indexed
        # print all the coordinates and dimensions detected
        print(f"Symbol {i}: Left: {symbol_left_points}, Top: {symbol_top_points}, Width: {symbol_width_points}, Height: {symbol_height_points}")
        print(f"Legend: Left: {legend_left_points}, Top: {legend_top_points}, Width: {legend_width_points}, Height: {legend_height_points}")
        # save the symbol clipping as an image
        symbol_rect = fitz.Rect(symbol_left_points, symbol_top_points, symbol_left_points + symbol_width_points, symbol_top_points + symbol_height_points)
        legend_rect = fitz.Rect(legend_left_points, legend_top_points, legend_left_points + legend_width_points, legend_top_points + legend_height_points)
        # draw the rectangle on the page for debugging
        page.draw_rect(legend_rect, color=(0, 1, 0), width=1)
        page.draw_rect(symbol_rect, color=(1, 0, 0), width=1)
        pix = page.get_pixmap()
        # save the symbol clipping as an image
        symbol_image_path = f"symbol_debug/pdf_symbol_{i}.png"
        pix.save(symbol_image_path)
        print(f"Saved symbol clipping to {symbol_image_path}")

calculate_symbol_dimensions()


Symbol 0: Left: 605.52, Top: 145.2, Width: 31.919999999999998, Height: 26.16
Legend: Left: 563.76, Top: 97.92, Width: 455.76, Height: 365.03999999999996
Saved symbol clipping to symbol_debug/pdf_symbol_0.png
Symbol 1: Left: 608.88, Top: 235.68, Width: 31.919999999999998, Height: 18.24
Legend: Left: 563.76, Top: 97.92, Width: 455.76, Height: 365.03999999999996
Saved symbol clipping to symbol_debug/pdf_symbol_1.png


In [8]:
"""
loads the symbol from the pdf and detects the size of the symbol in pixel space (300 dpi) based on the contours
"""
import fitz
import json
import pandas as pd
import numpy as np
import cv2

def calculate_symbol_dimensions():
    with open(metadata_path, "r") as f:
        metadata_json = json.load(f)
    symbols = metadata_json["symbols"]
    metadata = pd.DataFrame(symbols)
    document = fitz.open(document_path)

    for i, symbol in metadata.iterrows():
        page_number = symbol["page_number"]
        symbol_name = symbol["name"]
        # get the coordinates of the legend in the pdf
        symbol_left_points = symbol['coordinates']['pdf_absolute']['left_points']
        symbol_top_points = symbol['coordinates']['pdf_absolute']['top_points']
        symbol_width_points = symbol['coordinates']['pdf_absolute']['width_points']
        symbol_height_points = symbol['coordinates']['pdf_absolute']['height_points']

        # determine the height and length of the symbol in the pdf (i.e. the dark pixels, not the empty space)
        page = document[page_number- 1]  # convert to 0-indexed
        symbol_rect = fitz.Rect(symbol_left_points, symbol_top_points, symbol_left_points + symbol_width_points, symbol_top_points + symbol_height_points)
        dpi = 300
        symbol_pix = page.get_pixmap(clip=symbol_rect, dpi=dpi)
        # calculate the height and length of the contours in the symbol_pix
        img_data = np.frombuffer(symbol_pix.samples, dtype=np.uint8).reshape(symbol_pix.h, symbol_pix.w, symbol_pix.n)
        
        # Convert to grayscale if it's not already
        if img_data.shape[2] > 1:
            gray_image = cv2.cvtColor(img_data, cv2.COLOR_RGB2GRAY)
        else:
            gray_image = img_data.reshape(symbol_pix.h, symbol_pix.w)

        # Find contours
        contours, _ = cv2.findContours(cv2.bitwise_not(gray_image), cv2.RETR_EXTERNAL, cv2.CHAIN_APPROX_SIMPLE)
        
        if contours:
            # Combine all contours to get the overall bounding box
            all_points = np.concatenate([cnt for cnt in contours])
            x, y, w, h = cv2.boundingRect(all_points)
            symbol_height = h
            symbol_width = w
        else:
            symbol_height = 0
            symbol_width = 0

        print(f"{symbol_name}: Height: {symbol_height}, Width: {symbol_width}")
        # save the symbol_pix as an image
        symbol_pix.save(f"symbol_debug/symbol_size_{i}.png")

calculate_symbol_dimensions()

construction: Height: 93, Width: 92
door: Height: 43, Width: 84
window: Height: 76, Width: 66


In [2]:
# Examine the current metadata structure
with open(metadata_path, "r") as f:
    metadata_json = json.load(f)

print("Current metadata structure:")
print(f"Total symbols: {len(metadata_json['symbols'])}")
print("\nFirst symbol structure:")
if metadata_json['symbols']:
    import json
    print(json.dumps(metadata_json['symbols'][0], indent=2))

Current metadata structure:
Total symbols: 3

First symbol structure:
{
  "id": "acac5acc-793d-492f-9841-3cbd0d3b9d24",
  "name": "construction",
  "description": "",
  "filename": "construction_1.png",
  "relative_path": "symbols/legend_1753764509365-1/construction_1.png",
  "coordinates": {
    "pdf_absolute": {
      "left_points": 608.16,
      "top_points": 144.72,
      "width_points": 29.759999999999998,
      "height_points": 27.84
    },
    "legend_clipping_relative": {
      "left_clipping_pixels": 86,
      "top_clipping_pixels": 213,
      "width_clipping_pixels": 124,
      "height_clipping_pixels": 116,
      "clipping_dpi": 300
    },
    "canvas_annotation": {
      "left_canvas_pixels": 41.0,
      "top_canvas_pixels": 100.81380001227154,
      "width_canvas_pixels": 59.0,
      "height_canvas_pixels": 55.045574987728465,
      "canvas_width_pixels": 796.0,
      "canvas_height_pixels": 745.0
    }
  },
  "source_legend": {
    "annotation_id": "1753764509365-1",
    

In [3]:
# Test the new symbol dimensions module integration
import sys
import os
sys.path.append('/Users/bigo/Projects/timbergem/backend')

from utils.symbol_dimensions import SymbolDimensionCalculator
from utils.coordinate_mapping import PDFCoordinates

# Test with the first symbol from our metadata
with open(metadata_path, "r") as f:
    metadata_json = json.load(f)

first_symbol = metadata_json['symbols'][0]
print(f"Testing symbol: {first_symbol['name']}")

# Create PDF coordinates object
pdf_coords = PDFCoordinates(
    left=first_symbol['coordinates']['pdf_absolute']['left_points'],
    top=first_symbol['coordinates']['pdf_absolute']['top_points'],
    width=first_symbol['coordinates']['pdf_absolute']['width_points'],
    height=first_symbol['coordinates']['pdf_absolute']['height_points']
)

# Test the calculator
calculator = SymbolDimensionCalculator()
document = fitz.open(document_path)

dimensions = calculator.calculate_dimensions_from_pdf(
    document, 
    first_symbol['page_number'], 
    pdf_coords
)

print(f"Calculated dimensions: {dimensions}")
print(f"This would be added to the metadata as: 'symbol_template_dimensions': {dimensions}")

document.close()

print("\n✅ Integration test successful! The symbol_annotation.py will now automatically:")
print("   - Calculate these dimensions for each symbol during saving")
print("   - Store them in the 'symbol_template_dimensions' field in symbols_metadata.json")

Testing symbol: construction
Calculated dimensions: {'height_pixels_300dpi': 93, 'width_pixels_300dpi': 92}
This would be added to the metadata as: 'symbol_template_dimensions': {'height_pixels_300dpi': 93, 'width_pixels_300dpi': 92}

✅ Integration test successful! The symbol_annotation.py will now automatically:
   - Calculate these dimensions for each symbol during saving
   - Store them in the 'symbol_template_dimensions' field in symbols_metadata.json


In [None]:
# Test symbol dimension calculation directly in notebook
import numpy as np

def calculate_symbol_dimensions_simple(document_path, page_number, symbol_left, symbol_top, symbol_width, symbol_height, dpi=300):
    """Simple version of the symbol dimension calculator"""
    try:
        document = fitz.open(document_path)
        page = document[page_number - 1]  # Convert to 0-indexed
        symbol_rect = fitz.Rect(symbol_left, symbol_top, symbol_left + symbol_width, symbol_top + symbol_height)
        
        # Get pixmap at specified DPI
        symbol_pix = page.get_pixmap(clip=symbol_rect, dpi=dpi)
        
        # Convert to numpy array
        img_data = np.frombuffer(symbol_pix.samples, dtype=np.uint8).reshape(symbol_pix.h, symbol_pix.w, symbol_pix.n)
        
        # Convert to grayscale if needed
        if img_data.shape[2] > 1:
            gray_image = cv2.cvtColor(img_data, cv2.COLOR_RGB2GRAY)
        else:
            gray_image = img_data.reshape(symbol_pix.h, symbol_pix.w)
        
        # Find contours
        contours, _ = cv2.findContours(cv2.bitwise_not(gray_image), cv2.RETR_EXTERNAL, cv2.CHAIN_APPROX_SIMPLE)
        
        if contours:
            # Combine all contours to get overall bounding box
            all_points = np.concatenate([cnt for cnt in contours])
            x, y, w, h = cv2.boundingRect(all_points)
            height_pixels = h
            width_pixels = w
        else:
            height_pixels = 0
            width_pixels = 0
        
        document.close()
        return {"height_pixels_300dpi": height_pixels, "width_pixels_300dpi": width_pixels}
        
    except Exception as e:
        print(f"Error: {e}")
        return {"height_pixels_300dpi": 0, "width_pixels_300dpi": 0}

# Test with all symbols
with open(metadata_path, "r") as f:
    metadata_json = json.load(f)

print("Testing symbol dimension calculation:")
print("-" * 50)

for i, symbol in enumerate(metadata_json["symbols"]):
    symbol_name = symbol["name"]
    page_number = symbol["page_number"]
    
    # Get PDF coordinates
    pdf_coords = symbol['coordinates']['pdf_absolute']
    symbol_left = pdf_coords['left_points']
    symbol_top = pdf_coords['top_points']
    symbol_width = pdf_coords['width_points']
    symbol_height = pdf_coords['height_points']
    
    # Calculate dimensions
    dimensions = calculate_symbol_dimensions_simple(
        document_path, page_number, symbol_left, symbol_top, symbol_width, symbol_height
    )
    
    print(f"Symbol {i}: '{symbol_name}'")
    print(f"  Dimensions: {dimensions['width_pixels_300dpi']}x{dimensions['height_pixels_300dpi']} pixels @ 300 DPI")
    print()