In [1]:
from typing import List, Dict, Any
import pypdf
from pypdf import PdfReader, PdfWriter
import copy

def extract_all_annotations(pdf_path: str) -> List[Dict[str, Any]]:
    """Extract all annotations from the PDF."""
    annotations = []
    with open(pdf_path, "rb") as f:
        reader = PdfReader(f)
        for page_num, page in enumerate(reader.pages):
            annots = page.get("/Annots", [])
            if annots:
                for annot_ref in annots:
                    try:
                        annot = annot_ref.get_object()
                        annotations.append((page_num, annot))
                    except Exception as e:
                        print(f"Failed to extract annotation: {e}")
    return annotations


def merge_annotations_to_pdf(base_pdf: str, source_pdf: str, output_pdf: str):
    """Merge annotations from source_pdf into base_pdf and save as output_pdf."""
    # Read PDFs
    with open(base_pdf, "rb") as f1, open(source_pdf, "rb") as f2:
        base_reader = PdfReader(f1)
        source_reader = PdfReader(f2)
        writer = PdfWriter()

        # Extract annotations from source
        annotations = extract_all_annotations(source_pdf)

        # Copy pages from base PDF into writer
        for page in base_reader.pages:
            writer.add_page(page)

        # Add annotations to respective pages
        for page_num, annot in annotations:
            try:
                # Deep copy annotation dictionary so we can reuse it
                copied_annot = copy.deepcopy(annot)
                page = writer.pages[page_num]

                if "/Annots" not in page:
                    page["/Annots"] = []

                page["/Annots"].append(writer._add_object(copied_annot))
            except Exception as e:
                print(f"Failed to add annotation to page {page_num}: {e}")

        # Write merged output
        with open(output_pdf, "wb") as out:
            writer.write(out)

    print(f"Annotations from '{source_pdf}' merged into '{base_pdf}' and saved as '{output_pdf}'.")


# === RUN THE MERGE ===
merge_annotations_to_pdf("pdf1.pdf", "pdf2.pdf", "merged_output.pdf")


Annotations from 'pdf2.pdf' merged into 'pdf1.pdf' and saved as 'merged_output.pdf'.


In [None]:
get 