In [7]:
from typing import List, Dict, Any
import pypdf
from pypdf.generic import NameObject, ArrayObject, IndirectObject

keep_ids = {146, 151, 158, 163, 170}

def extract_annotations_by_ids(pdf_path: str, keep_ids: set) -> Dict[int, List[Any]]:
    """Extract annotations from a PDF that match specific object IDs."""
    print("=== Extracting annotations from:", pdf_path)
    page_annots = {}
    
    with open(pdf_path, "rb") as file:
        reader = pypdf.PdfReader(file)
        
        for page_num, page in enumerate(reader.pages):
            if "/Annots" in page:
                new_annots = []
                for annot_ref in page["/Annots"]:
                    obj_id = getattr(annot_ref, "idnum", None)
                    if obj_id in keep_ids:
                        # Resolve the indirect reference to get the actual annotation object
                        if isinstance(annot_ref, IndirectObject):
                            annot_obj = annot_ref.get_object()
                            new_annots.append(annot_obj)
                        else:
                            new_annots.append(annot_ref)
                
                if new_annots:
                    page_annots[page_num] = new_annots
    
    return page_annots

def merge_annotations(source_pdf_path: str, target_pdf_path: str, output_path: str, keep_ids: set):
    """Extract selected annotations from source_pdf and add to target_pdf."""
    
    # Read both PDFs
    with open(source_pdf_path, "rb") as source_file:
        source_reader = pypdf.PdfReader(source_file)
        
        with open(target_pdf_path, "rb") as target_file:
            target_reader = pypdf.PdfReader(target_file)
            writer = pypdf.PdfWriter()
            
            # Extract annotations from source
            selected_annots = extract_annotations_by_ids(source_pdf_path, keep_ids)
            
            # Process each page in target PDF
            for i, target_page in enumerate(target_reader.pages):
                # Add annotations from source if available
                if i in selected_annots:
                    annots_to_add = []
                    
                    # Clone each annotation object into the writer's object space
                    for annot in selected_annots[i]:
                        cloned_annot = writer._add_object(annot)
                        annots_to_add.append(cloned_annot)
                    
                    # Handle existing annotations on target page
                    existing_annots = target_page.get(NameObject("/Annots"))
                    if existing_annots:
                        if not isinstance(existing_annots, list):
                            existing_annots = [existing_annots]
                        combined_annots = list(existing_annots) + annots_to_add
                    else:
                        combined_annots = annots_to_add
                    
                    # Set the annotations array on the target page
                    target_page[NameObject("/Annots")] = ArrayObject(combined_annots)
                
                # Add the page to writer
                writer.add_page(target_page)
    
    # Save output
    with open(output_path, "wb") as out_file:
        writer.write(out_file)
    
    print(f"Annotations merged and saved to {output_path}")

# Run the merge process
merge_annotations("pdf2.pdf", "pdf1.pdf", "merged2.pdf", keep_ids)

=== Extracting annotations from: pdf2.pdf
Annotations merged and saved to merged2.pdf
