# Workbench

Notebook used to do development.

In [None]:
import pypandoc
import os
import subprocess
import yaml
import re

def compile_latex_with_xelatex(latex_content: str
                               , output_directory: str='out'
                               , tex_filename: str='document.tex'
                               , pdf_filename: str='document.pdf') -> str:
    """
    Compile a LaTeX file to a PDF using xelatex.

    Args:
        tex_filepath (str): The path to the LaTeX (.tex) file to be compiled.
        output_directory (str): The directory where the compiled PDF should be saved. Default is 'output'.
        pdf_filename (str): The name of the generated PDF file. Default is 'document.pdf'.

    Returns:
        str: The path to the generated PDF file if compilation is successful, None otherwise.

    Note: Written with ChatGPT
    """

    # Ensure the output directory exists
    os.makedirs(output_directory, exist_ok=True)
    
    # Write the LaTeX content to a .tex file
    tex_filepath = os.path.join(output_directory, tex_filename)
    with open(tex_filepath, 'w') as tex_file:
        tex_file.write(latex_content)
    
    # Compile the .tex file to a PDF using xelatex
    try:
        process = subprocess.run(['xelatex', '-output-directory', output_directory, tex_filepath],
                                 stdout=subprocess.PIPE, stderr=subprocess.PIPE, text=True)
        
        # Check for errors
        if process.returncode != 0:
            print("Error during LaTeX compilation:")
            print(process.stdout)
            print(process.stderr)
            return None
        else:
            print("Compilation successful")
        
        # Return the path to the generated PDF
        return os.path.join(output_directory, pdf_filename)
    
    except Exception as e:
        print(f"An error occurred: {e}")
        return None

class SingleDocument:
    """Class holding information for a single document"""

    ### CLASS VARIABLES
    filepath_source: str = ''
    """Path to which the filename is relative"""

    verbose: bool = True
    """Set to True for verbose info"""

    def __init__(self,filename: str):
        # set instance variables
        self.filename = filename
        self._markdown_raw: str = None #markdown file content as text (raw = unmodified)
        self._markdown_mod: str = None #markdown file content as text (modified)
        self._latex_raw: str = None #latex representation of file (raw = unmodified)
        self._latex_mod: str = None #latex representation of file (modified)

        self._metadata: dict = None #dictionary holding metadata
        return
    
    ### PUBLIC
    
    def get_markdown_text(self) -> str:
        """return the (modified) markdown text of this document
        
        if it has not been loaded, it will do so"""

        if self._markdown_mod is not None:
            return self._markdown_mod
        
        if self._markdown_raw is not None:
            return self._modify_markdown()
        
        self._read_markdown_text()
        return self._modify_markdown()
    
    def get_latex_text(self) -> str:
        """return the latex text of this document
        
        loads and converts it if needed"""

        if self._latex_mod is not None:
            return self._latex_mod
        
        if self._latex_raw is not None:
            return self._modify_latex()
        
        self.get_markdown_text() #get and modify markdown
        self._convert_to_latex()
        return self._modify_latex()

    
    ### PRIVATE

    def _read_markdown_text(self) -> str:
        """Reads the file specified by the filename, stores it, and returns it
        
        also loads the metadata"""
        if SingleDocument.verbose:
            print("Reading content of file "+self.filename+"...")

        filepath = SingleDocument.filepath_source + self.filename
        assert os.path.exists(filepath), \
            "Document "+filepath+" does not exist!"
        
        with open(filepath,'r',encoding='utf-8') as file:
            assert file.readable(), "File "+filepath+" is not readable!"
            content = file.read()

        ### Extract Metadata
        self._metadata = self._extract_yaml_header(content)

        self._markdown_raw = content
        return content
    
    def _modify_markdown(self) -> str:
        """Modifies the raw markdown string, stores it, and returns it"""
        if SingleDocument.verbose:
            print("Mopdifying raw markdown of file "+self.filename+"...")

        assert self._markdown_raw is not None, "need to load raw markdown first!"

        ### CONVERTIONS
        converted = self._markdown_raw

        ### STORING
        self._markdown_mod = converted
        return converted
    
    def _extract_yaml_header(self, markdown_content: str):
        # Use a regular expression to find the YAML header at the beginning of the file
        yaml_header = re.match(r'^---\n(.*?)\n---', markdown_content, re.DOTALL)
        if yaml_header:
            yaml_content = yaml_header.group(1)
            # Parse the YAML content
            return yaml.safe_load(yaml_content)
        return None
    
    # LATEX RELATED STUFF
    
    def _convert_to_latex(self) -> str:
        """Converts the modified markdown to latex, stores it, and returns it"""
        if SingleDocument.verbose:
            print("Converting modified markdown of file "+self.filename+" to Latex...")

        assert self._markdown_mod  is not None, "need to have a modified markdown text first!"

        converted = pypandoc.convert_text(source=self._markdown_mod, to='latex',format='md')

        self._latex_raw = converted
        return converted
    
    def _modify_latex(self) -> str:
        """Modifies the raw latex string, stores it, and returns it"""
        if SingleDocument.verbose:
            print("Modifying raw latex of file "+self.filename+"...")

        assert self._latex_raw is not None, "need to convert to latex first!"

        ### CONVERTIONS

        # use metadata if available
        header = ''

        if self._metadata:
            metadata: dict = self._metadata
            if "title" in metadata:
                header += "\\mezdoctitle{"+metadata["title"]+"}\n\n"

        converted = header + self._latex_raw

        ### STORING
        self._latex_mod = converted
        return converted
    
    


def merge_documents(documents: list[SingleDocument]) -> str:
    """Merges a bunch of documents into a single latex stirng"""

    concat = ''

    for document in documents:
        latex = document.get_latex_text()

        # wrap it into a minipage
        latex = "\\begin{minipage}{\columnwidth}\n"\
            + latex +"\n"\
            + "\end{minipage}\n"
        
        # concatenate it
        concat += latex

    # put it into the tempalte
    with open('template/template_outputfile.tex','r') as templatefile:
        template_latex = templatefile.read()

    merged = template_latex.replace('% content_placeholder',concat)

    return merged

In [None]:
### Settings

filenames = ['sample1.md','sample3.md']
filepath_source = 'sample/'

SingleDocument.verbose = True

### Run

documents = []
SingleDocument.filepath_source = filepath_source
for filename in filenames:
    documents.append(SingleDocument(filename))

for document in documents:
    print(document.get_markdown_text())

latex_total = merge_documents(documents=documents)

# print(latex_total)

compile_latex_with_xelatex(latex_total)
