In [None]:
try:
    import fitz  # PyMuPDF
except ImportError:
    !pip install pymupdf
    import fitz

In [None]:
def parse_bookmarks_from_md(txt_path, offset):
    """
    Parses bookmark information from a Markdown formatted text file.

    This function reads a Markdown file where bookmarks are presented as multi-level headers. 
    Each header is followed by an "@" symbol and a page number.
    The level of the bookmark is determined by the number of hash symbols `#` preceding the title.

    Args:
        txt_path (str): The path to the Markdown file to be parsed.
        offset (int): Page number offset. This offset is added to each parsed page number.

    Returns:
        list: A list where each element is another list containing the level, 
        title, and page number (with offset applied).
        For example: [[1, 'Chapter 1', 10], [2, 'Section 1.1', 12]],
        where 1 and 2 represent the bookmark's level, 
        'Chapter 1' and 'Section 1.1' are titles, 10 and 12 are page numbers.
    """
    bookmarks = []  # Used to store parsed bookmark information
    with open(txt_path, 'r', encoding='utf-8') as file:  # Open the file in read mode
        for line in file:  # Iterate through each line in the file
            # Remove whitespace from the beginning and end of the line
            stripped_line = line.strip()
            if stripped_line:  # If the processed line is not empty, continue processing
                # Determine the level by counting the number of hash symbols (#) at the beginning of the line
                # The number of hash symbols before the first space is the level
                level = stripped_line.count('#', 0, stripped_line.find(' '))
                if level > 0:  # If hash symbols are present, it's considered a title line
                    # Remove hash symbols and spaces, then split title and page number
                    # Remove level identifiers and leading spaces
                    title_page_part = stripped_line[level:].strip()
                    # Split title and page number by the "@" symbol
                    title, page = title_page_part.split('@')
                    # Add parsed information to the list
                    bookmarks.append(
                        [level, title.strip(), int(page) + offset])
    return bookmarks  # Return the list of parsed bookmark information

In [None]:
def add_bookmarks_to_pdf(pdf_path, output_pdf_path, bookmarks):
    """
    Adds bookmark information to the specified PDF file and saves it to a new file path.

    Args:
        pdf_path (str): The path to the source PDF file.
        output_pdf_path (str): The path where the PDF file with added bookmarks will be saved.
        bookmarks (list): A list of bookmark information, where each bookmark is a list 
        containing level, title, and page number.      
    """
    # Open the PDF file
    doc = fitz.open(pdf_path)

    # Set the document's table of contents (TOC) to the provided list of bookmarks
    # PyMuPDF's set_toc method accepts a list, where each element is also a list representing a bookmark.
    # Each bookmark list contains a level, a title, and a page number.
    # Note: Page numbers start at 0, so the actual page number 
    # should be one less than the page number in the bookmarks.
    bookmarks = [[level, title, page-1] for level, title, page in bookmarks]
    doc.set_toc(bookmarks)

    # Save the changes to a new PDF file to avoid overwriting the original file
    doc.save(output_pdf_path)
    doc.close()  # Close the document
    print(f"Bookmarks have been added to the new PDF file: {output_pdf_path}")

In [None]:
# Usage Example
bookmarks_md_path = "./input/Example.md"  # Path to the bookmarks Markdown file
pdf_path = "./input/Example.pdf"  # Source PDF file path
output_pdf_path = "./output/Example.pdf"  # Output PDF file path
# If you need to adjust your bookmark pages
# (for example, if the page numbers displayed by the PDF reader
# differ from the actual page numbers), set this offset
offset = 11

# Parse bookmarks
bookmarks = parse_bookmarks_from_md(bookmarks_md_path, offset)

# Add bookmarks to PDF
add_bookmarks_to_pdf(pdf_path, output_pdf_path, bookmarks)