In [2]:
import fitz  # PyMuPDF
from pathlib import Path
import pandas as pd
from mlbox.settings import ROOT_DIR

CURRENT_DIR = Path.cwd()
input_folder = ROOT_DIR / "tmp" / CURRENT_DIR.name / "input"
result_folder = ROOT_DIR / "tmp" / CURRENT_DIR.name / "output"

In [None]:
from pathlib import Path
import fitz  # PyMuPDF
from PIL import Image
import io

pdf_path = ROOT_DIR / "assets" / "LabelGuard" / "input" / "Lovita_CC_Glazur_150g_UNI_v181224E__tt.pdf"
output_dir = ROOT_DIR / "assets" / "LabelGuard" / "blocks"
output_dir.mkdir(parents=True, exist_ok=True)

doc = fitz.open(pdf_path)

for page_num in range(len(doc)):
    page = doc[page_num]
    text_dict = page.get_text("dict")
    for block_num, block in enumerate(text_dict["blocks"]):
        if block["type"] == 0:  # text
            bbox = block["bbox"]
            # Extract and print text
            text = " ".join(span["text"] for line in block["lines"] for span in line["spans"])
            print(f"Page {page_num+1}, Block {block_num+1} text:\n{text}\n")
            # Render only bbox region
            clip = fitz.Rect(bbox)
            pix = page.get_pixmap(clip=clip, dpi=300)
            image_path = output_dir / f"page{page_num+1}_block{block_num+1}.png"
            pix.save(image_path)

doc.close()


In [None]:
ai_path = ROOT_DIR / "assets" / "LabelGuard" / "input" / "Lovita_CC_Glazur_150g_UNI_v181224E копія.ai"

doc = fitz.open(ai_path)
text = ""
for page in doc:
    text += page.get_text()
print(text)

In [None]:
from pathlib import Path
from bs4 import BeautifulSoup
from PIL import Image, ImageDraw
from mlbox.settings import ROOT_DIR

svg_path   = ROOT_DIR / "assets" / "LabelGuard" / "input" / "Lovita_CC_Glazur_150g_UNI_v181224E.svg"
jpeg_path  = ROOT_DIR / "assets" / "LabelGuard" / "input" / "Lovita_CC_Glazur_150g_UNI_v181224E__tt__curv_1.jpg"
out_path   = ROOT_DIR / "assets" / "LabelGuard" / "input" / "bbox_overlay.jpg"

# 1. Read SVG
with open(svg_path, "r", encoding="utf-8") as f:
    soup = BeautifulSoup(f, "xml")

svg_root = soup.find("svg")

# 2. Get SVG viewBox for proper scaling
viewbox = svg_root.get("viewBox")
if viewbox:
    _, _, svg_w, svg_h = map(float, viewbox.split())
else:
    svg_w = float(svg_root.get("width", 1000))
    svg_h = float(svg_root.get("height", 1000))

# 3. Create blank image
blank_img_w, blank_img_h = int(svg_w), int(svg_h)
img  = Image.new("RGB", (blank_img_w, blank_img_h), color="white")
draw = ImageDraw.Draw(img)

# 4. Parse boxes
boxes = [(float(r.get("x", 0)),
          float(r.get("y", 0)),
          float(r.get("width", 0)),
          float(r.get("height", 0))) for r in soup.find_all("rect")]

# 5. Draw boxes directly
for x, y, w, h in boxes:
    draw.rectangle([x, y, x + w, y + h], outline="red", width=2)

img.save(out_path)
img.show()


In [None]:
from fontTools.ttLib import TTFont

font_path   = ROOT_DIR / "assets" / "LabelGuard" / "input" / "fonts" / "NotoSans-ExtraCondensed.otf"
font = TTFont(font_path)

# List all cmap tables (character-to-glyph maps)
for table in font['cmap'].tables:
    print(f"Platform ID: {table.platformID}, Encoding ID: {table.platEncID}, Format: {table.format}")
    print(f"Contains {len(table.cmap)} mappings")
    sample = list(table.cmap.items())[:5]
    for code, name in sample:
        print(f" U+{code:04X} → {name}")
    print("-" * 30)


In [None]:
import fitz  # PyMuPDF

pdf_path   = ROOT_DIR / "assets" / "LabelGuard" / "input" / "Lovita_CC_Glazur_150g_UNI_v181224E__tt.pdf"
doc = fitz.open(pdf_path)


for page_num in range(len(doc)):
    page = doc[page_num]
    fonts = page.get_fonts(full=True)
    print(f"--- Page {page_num + 1} ---")
    for font in fonts:
        print(f"Font: {font[3]} | Subset: {font[2]}")
