In [31]:
"""
Documentation Generator using LaTeX

This script:
  1. Scans for Python files (with names matching *_###.py) and extracts module,
     function, and class docstrings.
  2. Reads Markdown files for non-code sections.
  3. Generates a LaTeX document combining these sections.
  4. Compiles the LaTeX file to produce a final PDF.
"""

import ast
import os
import glob
import re
import subprocess
import shutil

tab_space = 3.5

# List of .py scripts that should be described first, in the given order.
PRIORITY_SCRIPTS = [
    "aizymes.py",
    "main_startup_002.py",
    "main_running_003.py",
    "setup_system_001.py"
]

# Define folder and output file paths.
combined_tex_path = "AIzymes_Manual.tex"
pdf_path = os.path.join("..", "..", "AIzymes_Manual.pdf")
generated_pdf = os.path.basename(pdf_path)

# Remove old generated files if they exist.
for f in (generated_pdf, pdf_path):
    if os.path.exists(f):
        os.remove(f)

def latex_escape(text):
    """
    Escape LaTeX special characters.
    """
    replacements = {
        '&': r'\&',
        '%': r'\%',
        '$': r'\$',
        '#': r'\#',
        '_': r'\_',
        '{': r'\{',
        '}': r'\}',
        '~': r'\textasciitilde{}',
        '^': r'\textasciicircum{}',
        '\\': r'\textbackslash{}',
        '|': r'\textbar{}'
    }
    pattern = re.compile("|".join(re.escape(key) for key in replacements.keys()))
    return pattern.sub(lambda match: replacements[match.group()], text)

def format_text_with_specific_headers(text, keywords):
    """
    Convert a docstring section into LaTeX-formatted text.

    When a line starts with one of the specified keywords (e.g. "Parameters:"), this
    function outputs the header in dark grey (mpgDarkGrey) and then collects subsequent
    lines as key–value items.

    Rules:
      - A line containing ": " starts a new item.
      - A nonblank line that does NOT contain ": " is appended as a continuation.
      - A blank line ends the collection.

    If items are collected, they are wrapped in a description environment with reduced
    spacing (locally set to 1.15) and appropriate enumitem options.
    """
    lines = text.splitlines()
    result = []
    i = 0
    while i < len(lines):
        line = lines[i].strip()
        handled = False
        for keyword in keywords:
            if line.startswith(keyword + ":"):
                header_text = line.split(":")[0]
                header_line = (
                    r"\par\vspace*{0.5\baselineskip}"
                    r"\noindent{\color{mpgAccentBlue}\textbf{" + header_text + r":}}"
                )
                result.append(header_line)
                items = []
                i += 1
                while i < len(lines):
                    current_line = lines[i].rstrip()  # preserve trailing spaces for continuation
                    stripped = current_line.strip()
                    if not stripped:
                        i += 1
                        break  # blank line ends collection
                    if ": " in stripped:
                        parts = stripped.split(": ", 1)
                        key_part = parse_formatting(parts[0].strip())
                        val_part = parse_formatting(parts[1].strip())
                        items.append(
                            r"\item[\textcolor{mpgAccentBlue!75!white}{" + key_part + r"\dotfill}] " + val_part
                        )

                    else:
                        if items:
                            items[-1] += " " + parse_formatting(stripped)
                    i += 1
                if items:
                    result.append(r"{\setstretch{1.15}")
                    result.append(r"\begin{description}[noitemsep,topsep=0pt,parsep=0pt,labelwidth=5cm,leftmargin=!," +
                                  r"labelindent=0pt,labelsep=0.2cm,itemsep=0pt]")
                    result.extend(items)
                    result.append(r"\end{description}")
                    result.append("}")
                handled = True
                break
        if not handled:
            result.append(parse_formatting(line))
            i += 1
    return "\n".join(result)

def extract_docstrings(filepath):
    """
    Extracts docstrings from a Python file and returns LaTeX-formatted content.
    The module becomes a subsection; functions and classes become subsubsections.
    (Functions have "()" appended.)
    """
    filename = os.path.basename(filepath)
    formatted_filename = re.sub(r'_\d+\.py$', '', filename)
    doc_parts = [r"\subsection{" + parse_formatting(f'{formatted_filename}.py') + "}"]
    
    with open(filepath, "r", encoding="utf-8") as file:
        tree = ast.parse(file.read())
    
    module_docstring = ast.get_docstring(tree)
    if module_docstring:
        formatted_docstring = format_text_with_specific_headers(
            module_docstring,
            ["Parameters", "Optional Parameters", "Returns", "Functions", "Classes", "Modules Required", "Usage"]
        )
        doc_parts.append(formatted_docstring)
    
    for node in ast.walk(tree):
        if isinstance(node, ast.FunctionDef):
            doc_parts.append(r"\subsubsection{" + parse_formatting(node.name + "()") + "}")
            docstring = ast.get_docstring(node)
            if docstring:
                formatted_docstring = format_text_with_specific_headers(
                    docstring,
                    ["Parameters", "Optional Parameters", "Returns", "Functions", "Classes", "Modules Required", "Usage"]
                )
                doc_parts.append(formatted_docstring)
        elif isinstance(node, ast.ClassDef):
            doc_parts.append(r"\subsubsection{" + parse_formatting(node.name) + "}")
            docstring = ast.get_docstring(node)
            if docstring:
                formatted_docstring = format_text_with_specific_headers(
                    docstring,
                    ["Parameters", "Optional Parameters", "Returns", "Functions", "Classes", "Modules Required", "Usage"]
                )
                doc_parts.append(formatted_docstring)
    return "\n".join(doc_parts)

def create_code_latex(doc_tex):
    """
    Processes all Python files matching *_###.py and adds their documentation.
    Files in PRIORITY_SCRIPTS are processed first.
    """
    code_parts = [r"\section{Code}"]
    all_files = [f for f in glob.glob(os.path.join("..", "aizymes", "*.py")) if re.search(r"_\d{3}\.py$", f)]

    print(all_files)
    priority_files = [f for f in all_files if f in PRIORITY_SCRIPTS]
    others = [f for f in all_files if f not in PRIORITY_SCRIPTS]
    ordered_priority = [p for p in PRIORITY_SCRIPTS if p in priority_files]
    ordered_files = ordered_priority + others
    for filepath in ordered_files:
        code_parts.append(extract_docstrings(filepath))
    doc_tex += "\n".join(code_parts)
    return doc_tex

def create_section_latex(section, doc_tex):
    """
    Reads a Markdown file (section.md) and converts it to LaTeX.
    Markdown headers become LaTeX sections. Code blocks (delimited by ```),
    GitHub-style callouts (e.g., > [!WARNING]), and images are supported.

    For the cover, unnumbered sections (\section*) are used so that it does not appear in the TOC.
    """
    if section.lower() == "cover":
        return doc_tex

    filepath = f"{section}.md"
    if not os.path.exists(filepath):
        return doc_tex

    with open(filepath, "r", encoding="utf-8") as file:
        content = file.read()

    lines = content.splitlines()
    processed_lines = []
    in_code_block = False
    i = 0

    while i < len(lines):
        line = lines[i]

        if line.startswith("```"):
            if not in_code_block:
                in_code_block = True
                code_block_start = i
                code_lines = []  # <-- add this line
                i += 1
                continue  # Skip ```
            else:
                in_code_block = False

                processed_lines.append(r"\vspace*{0.5\baselineskip}")
                processed_lines.append(
                    r"\begin{lstlisting}[basicstyle=\color{black}\fontsize{9}{11}\selectfont\ttfamily, frame=single, rulecolor=\color{black}, breaklines=true]"
                ) 
                \
                processed_lines.extend(code_lines)
                processed_lines.append(r"\end{lstlisting}")
                processed_lines.append(r"\vspace*{0.5\baselineskip}")
                i += 1
                continue

        # Handlen table
        if line.lstrip().startswith("**Tab."):
            table_lines = []
            # Include the table header line (with caption info)
            table_lines.append(line.strip())
            i += 1
            # Collect subsequent lines that start with "|" (the actual table rows)
            while i < len(lines) and lines[i].lstrip().startswith("|"):
                table_lines.append(lines[i])
                i += 1
            table_markdown = "\n".join(table_lines)
            # Convert the collected markdown table to LaTeX and add it to processed_lines
            processed_lines.append(convert_markdown_table_to_latex(table_markdown))
            continue  # Skip further processing for this block

        if in_code_block:
            code_lines.append(line)
            i += 1
            continue

        # Handle GitHub-style callouts
        if re.match(r'> \[!\w+\]', line):
            callout_lines = []
            while i < len(lines) and lines[i].startswith(">"):
                callout_lines.append(lines[i][2:].rstrip())
                i += 1
            callout_text = "\n".join(callout_lines)
            match = re.match(r'\[!(\w+)\](.*)', callout_text, re.DOTALL)
            if match:
                box_type, message = match.groups()
                box_type = box_type.upper()
                box_styles = {
                    "WARNING": (r"mpgAccentOrange!20!white", r"mpgAccentOrange!80!black"),
                    "IMPORTANT": (r"mpgAccentGreen!20!white", r"mpgAccentGreen!80!black"),
                    "NOTE": (r"mpgAccentBlue!20!white", r"mpgAccentBlue!80!black"),
                    "TIP": (r"mpgAccentCyan!20!white", r"mpgAccentCyan!80!black"),
                }
                bg, border = box_styles.get(box_type, ("gray!10!white", "gray!80!black"))
                processed_lines.append(
                    rf"\begin{{tcolorbox}}[colback={bg},colframe={border},title={box_type.capitalize()}]"
                )
                processed_lines.append(parse_formatting(message.strip()))
                processed_lines.append(r"\end{tcolorbox}")
            continue

        # Handle markdown headers
        if line.startswith("# "):
            processed_lines.append(r"\section{" + parse_formatting(line[2:].strip()) + "}")
        elif line.startswith("## "):
            processed_lines.append(r"\subsection{" + parse_formatting(line[3:].strip()) + "}")
        elif line.startswith("### "):
            processed_lines.append(r"\subsubsection{" + parse_formatting(line[4:].strip()) + "}")

        # Handle markdown images
        elif line.startswith("!["):
            match = re.match(r"!\[(.*?)\]\((.*?)\)", line)
            if match:
                alt_text, image_path = match.groups()
                caption = ""
                # Check if the next line is a <sub> line containing the caption
                if i+1 < len(lines) and lines[i+1].strip().startswith("<sub>") and lines[i+1].strip().endswith("</sub>"):
                    caption_match = re.search(r"<sub>(.*?)</sub>", lines[i+1].strip(), re.DOTALL)
                if caption_match:
                    caption_raw = caption_match.group(1).strip()
                    caption =  re.sub(r'(?i)^(?:\*\*?)?\s*fig\.\s*\d+\s*\|\s*', "", caption_raw)
                    caption = "**" + caption                    
                    i += 1  # Skip the caption line so it doesn't get processed again
                processed_lines.append(r"\begin{figure}[htbp]")
                processed_lines.append(r"\centering")
                processed_lines.append(r"\includegraphics[width=16cm]{" + image_path + r"}")
                if caption:
                    processed_lines.append(r"\caption{" + parse_formatting(caption) + r"}")
                else:
                    processed_lines.append(r"\caption{" + parse_formatting(alt_text) + r"}")
                processed_lines.append(r"\end{figure}")

        # Default: treat as plain paragraph
        else:
            processed_lines.append(parse_formatting(line))

        i += 1

    section_tex = "\n".join(processed_lines)
    doc_tex += section_tex
    return doc_tex

def parse_formatting(text):
    """
    Convert Markdown bold (**text**), italics (*text*), and callouts to LaTeX.
    """
    text = latex_escape(text)

    # Define replacement functions for our custom tokens.
    def tab_plain(match):
        left = match.group(1).rstrip()  # Text before <t>
        right = match.group(2).lstrip()   # Text after <t>
        # Use a fixed-width makebox (3.0cm), left-aligned, with no additional dots.
        return r'\makebox[' + str(tab_space) + r'cm][l]{' + left + r'} ' + right

    def tab_dots(match):
        left = match.group(1).rstrip()  # Text before <d>
        right = match.group(2).lstrip()   # Text after <d>
        # Use a fixed-width makebox (3.0cm) with \dotfill appended.
        return r'\makebox[' + str(tab_space) + r'cm][l]{' + left + r'\dotfill} ' + right

    # Process each line separately so we can catch our custom tokens.
    lines = text.splitlines()
    processed_lines = []
    for line in lines:
        # First, process the <d> token (with dotfill)
        if '<d>' in line:
            line = re.sub(r'(.*?)<d>(.*)', tab_dots, line)
        # Then, process the <t> token (plain, no dotfill)
        if '<t>' in line:
            line = re.sub(r'(.*?)<t>(.*)', tab_plain, line)
        processed_lines.append(line)
    text = "\n".join(processed_lines)

    # Add chapter number
    text = text.replace("**Full Manual**", "**5. Full Manual**")

    # GitHub callouts
    callout_patterns = {
        "[!WARNING]": r"\begin{tcolorbox}[colback=red!5!white,colframe=red!75!black,title=Warning]",
        "[!IMPORTANT]": r"\begin{tcolorbox}[colback=yellow!10!white,colframe=yellow!60!black,title=Important]",
        "[!NOTE]": r"\begin{tcolorbox}[colback=blue!5!white,colframe=blue!75!black,title=Note]",
        "[!TIP]": r"\begin{tcolorbox}[colback=green!5!white,colframe=green!75!black,title=Tip]",
    }

    for marker, latex_box in callout_patterns.items():
        if text.strip().startswith(f"> {marker}"):
            content = text.strip()[len(f"> {marker}"):].strip()
            content = latex_escape(content)
            content = re.sub(r'\*\*(.+?)\*\*', r'\\textbf{\1}', content)
            content = re.sub(r'\*(.+?)\*', r'\\textit{\1}', content)
            return f"{latex_box}\n{content}\n\\end{{tcolorbox}}"

    # Naked URLs → \href{url}{url}
    text = re.sub(r'(?<!href\{)(https?://[^\s}]+)', r'\\href{\1}{\1}', text)
    # Markdown bold and italic
    text = re.sub(r'\*\*(.+?)\*\*', r'\\textbf{\1}', text)
    text = re.sub(r'\*(.+?)\*', r'\\textit{\1}', text)
    # <sub>...</sub> → smaller font
    #text = re.sub(r'<sub>(.+?)</sub>', r'{\\fontsize{8}{10}\\selectfont \1}', text)
    # <br> → line break
    text = text.replace("<br>", r"\\")
    
    return text

# Define the markdown sections to process.
markdown_sections = ["installation", "quick_start", "available_tools", "introduction", "full_manual"]

# Now build the document in proper order:
# 1. Start with the preamble.
doc_tex = r"""\documentclass[10pt]{extarticle}
\usepackage{etoolbox}
\AtBeginEnvironment{tabular}{\fontsize{9}{11}\selectfont}
\usepackage[most]{tcolorbox}
\usepackage[colorlinks=true, linkcolor=black, urlcolor=blue]{hyperref}
\usepackage{graphicx}
\usepackage[utf8]{inputenc}
\DeclareUnicodeCharacter{221B}{\ensuremath{\sqrt[3]{}}} % Map ∛ to cube-root
\usepackage{xcolor}
\definecolor{mpgDarkGreen}{HTML}{005555}
\definecolor{mpgLightGreen}{HTML}{006c66}
\definecolor{mpgDarkGrey}{HTML}{777777}
\definecolor{mpgAccentOrange}{HTML}{ef7c00}
\definecolor{mpgAccentGreen}{HTML}{c6d325}
\definecolor{mpgAccentBlue}{HTML}{29485d}
\definecolor{mpgAccentCyan}{HTML}{00b1ea}
\usepackage[a4paper, left=2.5cm, right=2.5cm, top=2.5cm, bottom=2.5cm]{geometry}
\usepackage{setspace}
\setstretch{1.5}
\usepackage{titlesec}

\renewcommand{\figurename}{Fig.}
\renewcommand{\thefigure}{\arabic{figure}\,\textbar}
\usepackage{caption}
\DeclareCaptionFont{figfont}{\fontsize{9}{11}\selectfont}
\captionsetup[figure]{labelfont={bf,figfont}, textfont=figfont, labelsep=space}

\usepackage{enumitem} % for description formatting

\titleformat{\section}
  {\color{mpgDarkGreen}\normalfont\fontsize{16}{18}\selectfont\bfseries}
  {\thesection}{1em}{}

\titleformat{\subsection}
  {\color{mpgLightGreen}\normalfont\fontsize{14}{16}\selectfont\bfseries}
  {\thesubsection}{1em}{}

\titleformat{\subsubsection}
  {\color{mpgLightGreen}\normalfont\fontsize{12}{14}\selectfont\bfseries}
  {\thesubsubsection}{1em}{}

\usepackage{listings}
\lstset{
    basicstyle=\color{black}\fontsize{9}{11}\selectfont\ttfamily,
    backgroundcolor=\color{gray!20},
    frame=single,
    rulecolor=\color{black},
    breaklines=true
}
\usepackage{pdfpages}
\usepackage[sfdefault]{roboto}
\renewcommand*\familydefault{\sfdefault}
\setlength{\parindent}{0pt}
\usepackage{hyperref}
\usepackage{longtable}
\usepackage{fancyhdr}
\pagestyle{fancy}
\fancyhf{} % clear all headers and footers
\fancyhead[R]{\textsf{\nouppercase{\leftmark}}}
\fancyfoot[C]{\textsf{\thepage}} 
\renewcommand{\headrulewidth}{0.4pt}
\renewcommand{\footrulewidth}{0pt}
% Make each new section start on a new page.
\let\oldsection\section
\renewcommand{\section}[1]{\clearpage\oldsection{#1}}
\begin{document}
"""
# 2. Insert the Cover page (it uses unnumbered section commands so it will not appear in the TOC).
doc_tex += r"""
\includepdf[pages=1]{cover.pdf}
\clearpage
\pagenumbering{roman}
\setcounter{page}{1}
"""
doc_tex += "\n\\clearpage\\pagenumbering{roman}\\setcounter{page}{1}\n"
doc_tex += "% Temporarily restore the original section command for the TOC\n\\let\\sectionTemp=\\section\n\\let\\section=\\oldsection\n\\clearpage\n{\\setstretch{1.15}\\tableofcontents}\n\\clearpage\n\\let\\section=\\sectionTemp\n"
doc_tex += "\n\\clearpage\\pagenumbering{arabic}\\setcounter{page}{1}\n"

# 3. Process the other markdown sections.
for section in markdown_sections:
    doc_tex = create_section_latex(section, doc_tex)

# 4. Finally, add the code documentation.
doc_tex = create_code_latex(doc_tex)

doc_tex += r"\end{document}"

with open(combined_tex_path, "w", encoding="utf-8") as f:
    f.write(doc_tex)

# Compile the LaTeX file into a PDF using pdflatex.
latex_executable = r'C:\Users\bunzelh\AppData\Local\Programs\MiKTeX\miktex\bin\x64\pdflatex.exe'
cmd = [
    latex_executable,
    '-output-directory', ".",
    '-interaction=nonstopmode',
    combined_tex_path
]

try:
    result = subprocess.run(
        cmd,
        check=True,
        stdout=subprocess.PIPE,
        stderr=subprocess.PIPE,
        text=True,
        timeout=60
    )
    print("LaTeX compilation finished successfully.")
except subprocess.TimeoutExpired:
    print("LaTeX compilation timed out!")
except subprocess.CalledProcessError as err:
    print("LaTeX compilation failed with errors:")
    print(err.stderr)

if os.path.exists(generated_pdf):
    shutil.copy(generated_pdf, pdf_path)
    print(f"Final PDF generated at {pdf_path}")
else:
    print("PDF generation failed. Please check for errors during compilation.")


  """


['..\\aizymes\\design_AlphaFold3_001.py', '..\\aizymes\\design_ESMfold_001.py', '..\\aizymes\\design_match_001.py', '..\\aizymes\\design_MDMin_001.py', '..\\aizymes\\design_MPNN_001.py', '..\\aizymes\\design_RosettaDesign_001.py', '..\\aizymes\\design_RosettaRelax_001.py', '..\\aizymes\\helper_002.py', '..\\aizymes\\main_design_001.py', '..\\aizymes\\main_running_003.py', '..\\aizymes\\main_scripts_001.py', '..\\aizymes\\main_startup_002.py', '..\\aizymes\\plotting_002.py', '..\\aizymes\\plotting_tree_001.py', '..\\aizymes\\scoring_BioDC_001.py', '..\\aizymes\\scoring_efields_001.py', '..\\aizymes\\setup_system_001.py']
LaTeX compilation failed with errors:

Final PDF generated at ..\..\AIzymes_Manual.pdf
