<a href="https://colab.research.google.com/github/agmCorp/colab/blob/main/GenerateReport3.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [1]:
# 1. Instalar las librerías necesarias
!pip install python-docx
!pip install requests

Collecting python-docx
  Downloading python_docx-1.2.0-py3-none-any.whl.metadata (2.0 kB)
Downloading python_docx-1.2.0-py3-none-any.whl (252 kB)
[?25l   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m0.0/253.0 kB[0m [31m?[0m eta [36m-:--:--[0m[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m253.0/253.0 kB[0m [31m11.2 MB/s[0m eta [36m0:00:00[0m
[?25hInstalling collected packages: python-docx
Successfully installed python-docx-1.2.0


In [3]:
import re
from docx import Document
from docx.shared import Cm, RGBColor
from docx.enum.text import WD_ALIGN_PARAGRAPH
from docx.text.paragraph import Paragraph
from docx.table import Table
from PIL import Image as PilImage, ImageDraw, ImageFont
import os
import io
import requests
import sys

# ----------------------------------------------------------------------
# CONVERSION FUNCTION
# ----------------------------------------------------------------------

def convert_docx_to_pdf_cloudmersive(docx_bytes: io.BytesIO, api_key: str) -> bytes | None:
    import requests, sys

    url = "https://api.cloudmersive.com/convert/docx/to/pdf"
    headers = {"Apikey": api_key}

    files = {
        "inputFile": (
            "input.docx",
            docx_bytes.getvalue(),
            "application/vnd.openxmlformats-officedocument.wordprocessingml.document",
        )
    }

    try:
        r = requests.post(url, headers=headers, files=files, timeout=60)
        if r.status_code == 200:
            return r.content  # PDF bytes
        else:
            print(f"Error {r.status_code}. Response: {r.text[:300]}")
            return None
    except requests.RequestException as e:
        print(f"Network error: {e}", file=sys.stderr)
        return None

# ----------------------------------------------------------------------
# IMAGE SUPPORT CODE
# ----------------------------------------------------------------------

def get_unavailable_image(width_px: int = 400, height_px: int = 300) -> io.BytesIO:
    """Creates a PNG image with the text 'IMAGE NOT AVAILABLE'."""
    width_px = 400
    height_px = 300
    img = PilImage.new('RGB', (width_px, height_px), color='#E0E0E0')
    draw = ImageDraw.Draw(img)
    font_size = 400
    try:
        # Ensure the script can locate 'arial.ttf'
        font = ImageFont.truetype("arial.ttf", font_size)
    except IOError:
        font = ImageFont.load_default()
    text = "IMAGE NOT AVAILABLE"
    left, top, right, bottom = draw.textbbox((0, 0), text, font)
    textwidth = right - left
    textheight = bottom - top
    x = (width_px - textwidth) / 2
    y = (height_px - textheight) / 2
    draw.text((x, y), text, fill=(50, 50, 50), font=font)
    img_byte_arr = io.BytesIO()
    img.save(img_byte_arr, format='PNG')
    img_byte_arr.seek(0)
    return img_byte_arr

# ----------------------------------------------------------------------
# DOCX REPORT GENERATION CODE
# ----------------------------------------------------------------------

def generate_verification_report_bytes(template_path: str, replacements: dict, image_paths: dict) -> io.BytesIO:
    """Generates a DOCX report from a template and returns it as an io.BytesIO object."""
    try:
        document = Document(template_path)
    except Exception as e:
        print(f"Error loading template {template_path}: {e}")
        return None

    target_width_cm = 4.0
    target_width_emu = Cm(target_width_cm)

    def add_colored_text(paragraph, text: str):
        """Writes text with color for ✔ (green) and ✘ (red)."""
        for ch in text:
            run = paragraph.add_run(ch)
            if ch == "✔":
                run.font.color.rgb = RGBColor(0, 150, 0)   # green
            elif ch == "✘":
                run.font.color.rgb = RGBColor(200, 0, 0)   # red

    def replace_in_element(element):
        """Replaces text and image placeholders in a paragraph or table cell."""
        # Ensures iteration through all paragraphs if it's a table
        if isinstance(element, Paragraph):
            paragraphs = [element]
        elif isinstance(element, Table):
            paragraphs = []
            for row in element.rows:
                for cell in row.cells:
                    paragraphs.extend(cell.paragraphs)
        else:
            return

        for paragraph in paragraphs:
            # Combine all runs’ text to find placeholders
            full_text = "".join([run.text for run in paragraph.runs])
            image_inserted = False

            # 1. Image replacement (placeholders {{..._image}})
            # Loops to handle one image replacement per paragraph/cell,
            # and removes previous runs.
            for img_placeholder, img_path in image_paths.items():
                token = f"{{{{{img_placeholder}}}}}"
                if token in full_text:
                    # Remove existing content before inserting image
                    for run in reversed(paragraph.runs):
                        paragraph._element.remove(run._element)

                    final_img_source = None
                    new_width_emu = target_width_emu
                    new_height_emu = target_width_emu * (300 / 400)  # Default ratio for placeholder

                    if img_path and os.path.exists(img_path):
                        final_img_source = img_path
                        try:
                            img = PilImage.open(final_img_source)
                            original_width, original_height = img.size
                            aspect_ratio = original_height / original_width
                            new_height_emu = new_width_emu * aspect_ratio
                        except Exception as e:
                            print(f"Warning: Unable to open real image '{img_path}'. Using 'Not Available' placeholder. Error: {e}")
                            final_img_source = get_unavailable_image()
                    else:
                        print(f"Generating 'Not Available' placeholder for '{img_placeholder}' in memory.")
                        final_img_source = get_unavailable_image()

                    if final_img_source:
                        run = paragraph.add_run()
                        run.add_picture(final_img_source, width=new_width_emu, height=new_height_emu)
                        paragraph.alignment = WD_ALIGN_PARAGRAPH.CENTER
                        image_inserted = True
                    break

            # 2. Text replacement (remaining placeholders {{...}})
            if not image_inserted:
                # Recalculate text after possible run removals
                full_text = "".join([run.text for run in paragraph.runs])
                new_text = full_text

                for placeholder, value in replacements.items():
                    new_text = new_text.replace(placeholder, str(value))

                if new_text != full_text:
                    # If text has changed, replace paragraph content
                    for run in reversed(paragraph.runs):
                        paragraph._element.remove(run._element)
                    add_colored_text(paragraph, new_text)

    # Iterate through all paragraphs and tables in the document
    for paragraph in document.paragraphs:
        replace_in_element(paragraph)

    for table in document.tables:
        replace_in_element(table)  # Process each table

    # Save the document to an in-memory byte buffer
    docx_stream = io.BytesIO()
    document.save(docx_stream)
    docx_stream.seek(0)

    print("\nDOCX report successfully generated in memory.")
    return docx_stream

# --- Define text replacements ---
replacements = {
    "{{instance_id}}": "INS-20251010-001",
    "{{created_date}}": "2025/10/10",
    "{{created_time}}": "13:00:45",
    "{{requested_by_user_id}}": "user123",
    "{{requested_by_user_name}}": "Álvaro Morales",
    "{{requested_by_user_email}}": "alvaro.m@company.com",
    "{{requested_by_user_role}}": "Operator",
    "{{client_app_version}}": "1.5.2",
    "{{expected_prod_code}}": "PROD-ABC-456",
    "{{expected_prod_desc}}": "Paracetamol 500mg - Tablets",
    "{{expected_lot}}": "LOT-2025-09-001",
    "{{validation_lot_ok}}": "✔",
    "{{expected_exp_date}}": "2027/12/31",
    "{{validation_exp_date_ok}}": "✔",
    "{{expected_pack_date}}": "2025/09/20",
    "{{validation_pack_date_ok}}": "✘",
    "{{validation_barcode_detected_ok}}": "✘",
    "{{validation_barcode_legible_ok}}": "✔",
    "{{barcode_payload_decoded_value}}": "GS1-98765432101234",
    "{{barcode_payload_barcode_symbology}}": "DataMatrix",
    "{{input_container}}": "cont-in-2025",
    "{{input_blob_name}}": "input_001.jpg",
    "{{processed_image_container}}": "cont-proc-2025",
    "{{processed_image_blob_name}}": "processed_001.jpg",
    "{{ocr_overlay_container}}": "cont-ocr-2025",
    "{{ocr_overlay_blob_name}}": "ocr_overlay_001.png",
    "{{barcode_overlay_container}}": "cont-bar-2025",
    "{{barcode_overlay_blob_name}}": "barcode_overlay_001.png",
    "{{barcode_roi_container}}": "cont-roi-2025",
    "{{barcode_roi_blob_name}}": "barcode_roi_001.png",
    "{{VALOR_AND}}": "✔",
    "{{validation_barcode_ok}}": "✔",
    "{{validation_summary}}": "✘",
    "{{user_comment}}": "Product successfully verified. Dates are visible and legible.",
    "{{report_container}}": "cont-report-2025",
    "{{report_blob_name}}": "report_001.docx",
}

# --- Create dummy images (or simulate loading) ---
temp_images = {
    "input_image.png": "red",
    "processed_image.png": "lightgray",
    "ocr_overlay_image.png": "lightblue",
    "barcode_overlay_image.png": "lightgreen",
}

def create_dummy_image(filename, color):
    img = PilImage.new('RGB', (400, 300), color=color)
    img.save(filename)
    return filename

print("Creating test images...")
for name, color in temp_images.items():
    create_dummy_image(name, color)
print("Test images created.")

# --- Define image paths ---
# Note: The 'barcode_roi_image' remains None to test placeholder
image_paths = {
    "input_image": "input_image.png",
    "processed_image": "processed_image.png",
    "ocr_overlay_image": "ocr_overlay_image.png",
    "barcode_overlay_image": "barcode_overlay_image.png",
    "barcode_roi_image": None,
}

# --- Execution routine ---
rejected = False
TEMPLATE_NAME = "/content/sample_data/Plantilla_Informe_Verificacion_Rechazado.docx" if rejected else "/content/sample_data/Plantilla_Informe_Verificacion_Aceptado.docx"

# Generate DOCX in memory
docx_content_stream = generate_verification_report_bytes(TEMPLATE_NAME, replacements, image_paths)

pdf_bytes = None
if docx_content_stream:
    # Perform synchronous call and get PDF bytes
    CLOUDMERSIVE_API_KEY = "d2e4f77e-0140-4072-9390-1ffcfbe2b1e9"
    pdf_bytes = convert_docx_to_pdf_cloudmersive(docx_content_stream, CLOUDMERSIVE_API_KEY)

# Handle resulting PDF
if pdf_bytes:
    # Save to disk for verification
    with open("final_report.pdf", "wb") as f:
        f.write(pdf_bytes)
    print("\nPDF successfully generated and saved as 'final_report.pdf'.")
else:
    print("\nPDF generation failed.")


Creating test images...
Test images created.
Generating 'Not Available' placeholder for 'barcode_roi_image' in memory.

DOCX report successfully generated in memory.

PDF successfully generated and saved as 'final_report.pdf'.
