In [18]:
#!pip install pdoc weasyprint
#!pip install markdown
#!pip install pdfkit
#https://wkhtmltopdf.org/downloads.html

In [22]:
import ast
import os
import glob
import re
import pdfkit

# Define folder and output file paths
FOLDER_DOC = "docs"
os.makedirs(FOLDER_DOC, exist_ok=True)
combined_markdown_path = f'{FOLDER_DOC}/AIzymes_Manual.md'
combined_html_path = f'{FOLDER_DOC}/AIzymes_Manual.html'
combined_pdf_path = f'../AIzymes_Manual.pdf'

# Configure pdfkit to use wkhtmltopdf
config = pdfkit.configuration(wkhtmltopdf=r"C:\Program Files\wkhtmltopdf\bin\wkhtmltopdf.exe")

# Function to extract docstrings and generate ToC entries with numbered headings
def extract_docstrings(filepath, file_number):
    filename = os.path.basename(filepath)
    formatted_filename = re.sub(r'_\d+\.py$', '', filename)
    
    # Numbered heading for each script
    docstrings = [f"<h2 id='{formatted_filename}'>1.{file_number} {formatted_filename}</h2><br>"]
    toc_entries = [f"<li><a href='#{formatted_filename}'>1.{file_number} {formatted_filename}</a></li>"]

    with open(filepath, "r") as file:
        tree = ast.parse(file.read())

    section_number = 1  # Sequential section number for both classes and functions
    
    for node in ast.walk(tree):
        if isinstance(node, (ast.FunctionDef, ast.ClassDef)):  # Track both functions and classes
            docstring = ast.get_docstring(node)
            if docstring:
                # Generate a unique anchor and numbered heading for each class or function
                section_id = f"{formatted_filename}_{node.name}"
                header = f"<h3 id='{section_id}'>1.{file_number}.{section_number}: {node.name}</h3><br>"
                toc_entries.append(f"<li style='margin-left: 20px;'><a href='#{section_id}'>1.{file_number}.{section_number}: {node.name}</a></li>")
                section_number += 1

                formatted_docstring = []
                inside_parameters_or_returns = False

                for line in docstring.splitlines():
                    line = line.strip()
                    
                    # Start of Parameters or Returns section with gray styling and line break before
                    if line.startswith("Parameters:") or line.startswith("Returns:"):
                        formatted_docstring.append(f"<br><span style='color:gray; font-weight:bold;'>{line}</span><br>")
                        inside_parameters_or_returns = True
                    # Format parameter or return line with bold description and indentation for the value    
                    elif ": " in line and inside_parameters_or_returns:
                        parts = line.split(": ", 1)
                        formatted_docstring.append(f"<span style='font-family:Lucida Console; font-weight:bold;'>{parts[0].strip()}</span>:&nbsp;&nbsp;{parts[1].strip()}<br>")
                    elif not line:
                        inside_parameters_or_returns = False
                        formatted_docstring.append(f"{line}<br>")
                    else:
                        formatted_docstring.append(line)

                docstrings.append(header + "".join(formatted_docstring) + "<br>")

    return "\n".join(toc_entries), "\n".join(docstrings)

# Step 1: Collect docstrings and ToC from all matching files
combined_docstring_content = []
combined_toc_entries = ["<h1>Table of Contents</h1><ul>", "<li><a href='#code'>1. Code</a></li>"]
combined_docstring_content.append("<h1 id='code'>1. Code</h1><br>")

file_number = 1
for filepath in glob.glob("*.py"):
    # Use regex to match filenames that end with an underscore followed by three digits and .py
    if re.search(r"_\d{3}\.py$", filepath):
        toc_entries, docstring_content = extract_docstrings(filepath, file_number)
        combined_toc_entries.append(toc_entries)
        combined_docstring_content.append(docstring_content)
        file_number += 1

combined_toc_entries.append("</ul>")
toc_html = "\n".join(combined_toc_entries)
doc_html = "\n".join(combined_docstring_content)

# Combine ToC and docstring content
html_content = toc_html + "<br>" + doc_html

# CSS for styling and page numbering
css = """
<style>
    body { font-family: Calibri, sans-serif; font-size: 12px; margin: 20px; }
    h1 { color: #4CAF50; font-size: 1.2em; margin-bottom: 0.5em; }
    h2 { color: #333; font-size: 1.1em; margin-bottom: 0.0em; }
    h3 { color: #555; font-size: 1.0em; margin-bottom: 0.0em; }
    p { font-size: 11px; line-height: 1.1; margin-bottom: 0; }
    p + p { margin-top: 0.0em; }
    @page { size: A4; margin: 1in; }
    @page { @bottom-right { content: "Page " counter(page); } }
</style>
"""

html_content = css + html_content

with open(combined_html_path, "w") as f:
    f.write(html_content)

# Generate PDF with page numbers
pdfkit.from_file(combined_html_path, combined_pdf_path, configuration=config, options={
    'quiet': '',
    'dpi': 300,
    'disable-smart-shrinking': '',
    'enable-local-file-access': '',
    'footer-right': 'Page [page] of [topage]'  # Footer with page numbers
})

print(f"Combined PDF generated at {combined_pdf_path}")


Combined PDF generated at ../AIzymes_Manual.pdf
