# latex.formatting

> Adjust formatting for text from LaTeX files to be more usable by Markdown files for `Obsidian.md` 

In [None]:
#| default_exp latex.formatting

In [None]:
#| export
import re
from typing import Union

import regex

from trouver.helper.files_and_folders import (
    text_from_file
)
from trouver.helper.regex import inline_latex_indices, separate_indices_from_str
from trouver.helper.latex.comments import remove_comments
from trouver.helper.latex.macros_and_commands import custom_commands, regex_pattern_detecting_command
from trouver.latex.preamble import divide_preamble 


See `helper.latex.macros_and_commands`, which contains some functions originally written for this module.

## Numbers

In [None]:
#| export
def is_number(
        x: Union[float, int, complex, str]
        ) -> bool:
    """Return `True` if the input `x` represents a number.
    
    This function is different from Python's built-in `is_numeric`
    function, which returns `True` when all characters of a string
    are digits.
    """
    if isinstance(x, (float, int, complex)):
        return True
    #For the case where string is None
    if x is None:
        return False
    if x and x[0] == '-': x = x[1:]
    return x.replace(".", "1", 1).isdigit()

In [None]:
assert is_number("3.45")
assert is_number(1 + 5j)
assert is_number(5)
assert is_number(0.0)
assert not is_number("3.43.55")
assert not is_number("hie")
assert not is_number("[^1]")
assert not is_number(None)

In [None]:
#| export
DEFAULT_NUMBERED_ENVIRONMENTS = ['theorem', 'corollary', 'lemma', 'proposition',
                                 'definition', 'conjecture', 'remark', 'example',
                                 'question']

In [None]:
from fastcore.test import test_eq

from trouver.helper.tests import _test_directory# , non_utf8_chars_in_file


## Formatting modifications

### Identify macros and commands to replace

Authors usually define a lot of custom commands and macros in their LaTeX files. Such customizations vary from author to author and most customized commands are not recognized by Obsidian. 

See `nbs/_tests/latex_examples/commands_example/main.tex` for some examples of custom commands.

In [None]:
#| export
def replace_command_in_text(
        text: str,
        command_tuple: tuple[str, int, Union[None, str], str], # Consists of 1. the name of the custom command 2. the number of parameters 3. The default argument if specified or `None` otherwise, and 4. the display text of the command.
    ):
    """
    Replaces all invocations of the specified command in `text` with the display text
    with the arguments used in the display text.

    Assumes that '\1', '\2', '\3', etc. are not part of the display text. 
    """
    command_name, num_parameters, optional_arg, display_text = command_tuple
    command_pattern = regex_pattern_detecting_command(command_tuple)
    replace_pattern = display_text.replace('\\', r'\\')
    # if optional_arg is not None:
    #     replace_pattern = replace_pattern.replace('#1', optional_arg)
    replace_pattern = re.sub(r'#(\d)\b', r'\\\1', replace_pattern)
    text = regex.sub(
        command_pattern,
        lambda match: _replace_command(match, command_tuple, command_pattern, replace_pattern),
        text)
    return text


def _replace_command(
        match: regex.match,
        command_tuple: tuple[str, int, Union[None, str], str],
        command_pattern: regex.Pattern,
        replace_pattern: re.Pattern) -> str:
    """
    Replace the matched command with the display text
    
    This is a helper function to `replace_command_in_text`.
    """
    command_name, num_parameters, optional_arg, display_text = command_tuple
    start, end = match.span()
    matched_string_to_replace = match.string[start:end]
    if len(match.groups()) > 0 and match.group(1) is None:
        replace_pattern = replace_pattern.replace(r'\1', optional_arg)
        replaced_string = regex.sub(command_pattern, replace_pattern, matched_string_to_replace)
        return replaced_string
    else:
        return regex.sub(command_pattern, replace_pattern, matched_string_to_replace)


# def _replace_nonexplicit_instances_of_command(
#         text: str,
#         command_tuple: tuple[str, int, Union[None, str], str], # Consists of 1. the name of the custom command 2. the number of parameters 3. The default argument if specified or `None` otherwise, and 4. the display text of the command.
#     ) -> str:
#     """
#     Replace the nonexplicitly instances of a custom command. 

#     Sometimes, a LaTeX command is used nonexplicitly, i.e. the arguments are not
#     explicitly typed with surrounding curly braces `{}`.  An example of this phenomenon
#     is a command named `\til` defined by `\newcommand{\til}[1]{{\widetilde{#1}}}`
#     that is later invoked using `$\til \calh_g$`.

#     This function is only a workaround.

#     This is a helper function to `replace_command_in_text`.
#     """



In [None]:
#| hide


In [None]:
# Basic
command_tuple = ('Sur', 0, None, r'\mathrm{Sur}')
# pattern = regex_pattern_detecting_command(command_tuple)
text = r'The number of element of $\Sur(\operatorname{Cl} \mathcal{O}_L, A)$ is ... Perhaps $\Sur$ is nonempty.'
test_eq(replace_command_in_text(text, command_tuple), 'The number of element of $\mathrm{Sur}(\operatorname{Cl} \mathcal{O}_L, A)$ is ... Perhaps $\mathrm{Sur}$ is nonempty.')


# One parameter
command_tuple = ('field', 1, None, r'\mathbb{#1}')
# pattern = regex_pattern_detecting_command(command_tuple)
text = r'$\field{Q}$ is the field of rational numbers. $\field{C}$ is the field of complex numbers'
test_eq(replace_command_in_text(text, command_tuple), '$\mathbb{Q}$ is the field of rational numbers. $\mathbb{C}$ is the field of complex numbers')

# Multiple parameters
command_tuple = ('mat', 4, None, r'\left[\begin{array}{cc}#1 & #2 \\ #3 & #4\end{array}\right]')
# pattern = regex_pattern_detecting_command(command_tuple)
text = r'\mat{{123}}{asdfasdf{}{}}{{{}}}{{asdf}{asdf}{}}' # This is a balanced str.
test_eq(replace_command_in_text(text, command_tuple), r'\left[\begin{array}{cc}{123} & asdfasdf{}{} \\ {{}} & {asdf}{asdf}{}\end{array}\right]')

# Multiple parameters, one of which is optional parameter
command_tuple = ('plusbinomial', 3, '2', r'(#2 + #3)^#1')
# pattern = regex_pattern_detecting_command(command_tuple)
# When the optional parameter is used
text = r'\plusbinomial{x}{y}'
test_eq(replace_command_in_text(text, command_tuple), r'(x + y)^2')

# When the optional parameter is not used
text = r'\plusbinomial[4]{x}{y}'
test_eq(replace_command_in_text(text, command_tuple), r'(x + y)^4')


# One parameter that is optional.
command_tuple = ('greet', 1, 'world', r'Hello #1!')
# pattern = regex_pattern_detecting_command(command_tuple)
# When the optional parameter is used
text = r'\greet'
test_eq(replace_command_in_text(text, command_tuple), r'Hello world!')

# When the optional parameter is not used
text = r'\greet[govna]'
test_eq(replace_command_in_text(text, command_tuple), r'Hello govna!')

# In the following example, `\del` is a command defined as `\delta`.
# Any invocation `\delta` should NOT be replaced into `\deltata` should NOT be replaced into `\deltata`
command_tuple = (r'del', 0, None, r'\delta')
# pattern = regex_pattern_detecting_command(command_tuple)
text = r'\del should be replaced. \delta should not.'
test_eq(replace_command_in_text(text, command_tuple), r'\delta should be replaced. \delta should not.')


In [None]:
#| export
def replace_commands_in_text(
        text: str, # The text in which to replace the commands. This should not include the preamble of a latex document.
        command_tuples: tuple[str, int, Union[None, str], str], # An output of `custom_commands`. Each tuple Consists of 1. the name of the custom command 2. the number of parameters 3. The default argument if specified or `None` otherwise, and 4. the display text of the command.
        repeat: int = 1 # The number of times to repeat replacing the commands throughout the text; note that some custom commands could be "nested", i.e. the custom commands are defined in terms of other custom commands. Defaults to `1`, in which custom commands are replaced throughout the entire document once. If set to -1, then this function attempts to replace custom commands until no commands to replace are found. 
    ) -> str:
    """
    Replaces all invocations of the specified commands in `text` with the
    display text with the arguments used in the display text.

    Assumes that '\1', '\2', '\3', etc. are not part of the display text. 

    If `repeat` is set to `-1`, then this function attempts to replace
    custom commands until no commands to replace are found. However, this
    might cause infinite loops for some documents.

    """
    while repeat != 0:
        old_text = text
        for command_tuple in command_tuples:
            text = replace_command_in_text(text, command_tuple)
        repeat -= 1
        if old_text == text:
            break
    return text

The `replace_comands_in_text` function replaces custom commands from a (the main part of) a LaTeX document.

In [None]:
text = r'''Here is a matrix over $\field{Q}$: $\mat{1/2}{2}{-1}{5/7}$.
           Note that it is not over $\field{F}_7$ and not over $\field{F}_2$.'''

command_tuples = [
    ('field', 1, None, r'\mathbb{#1}'),
    ('mat', 4, None, r'\left[\begin{array}{cc}#1 & #2 \\ #3 & #4\end{array}\right]')]

sample_output = replace_commands_in_text(text, command_tuples)

test_eq(sample_output, 
        r'''Here is a matrix over $\mathbb{Q}$: $\left[\begin{array}{cc}1/2 & 2 \\ -1 & 5/7\end{array}\right]$.
           Note that it is not over $\mathbb{F}_7$ and not over $\mathbb{F}_2$.''')


Note that some writers define custom commands using other custom commands. By default, the `replace_commands_in_text` function replaces custom commands just once. In the following example, there is a custom command that is defined using another custom command and the function replace the "outer" custom command:

In [None]:
# TODO: continue this example
text = r'''$\Gm$'''

command_tuples = [
    ('Gm', 0, None, r'\bG_\mult'),
    ('bG', 0, None, r'\mathbb{G}'),
    ('mult', 0, None, r'\times'),
]

sample_output = replace_commands_in_text(text, command_tuples)
test_eq(sample_output, 
        r'''$\mathbb{G}_\times$''')

In [None]:
#| export 
def replace_commands_in_latex_document(
        document: str,
        repeat: int = 1 # The number of times to repeat replacing the commands throughout the text; note that some custom commands could be "nested", i.e. the custom commands are defined in terms of other custom commands. Defaults to `1`, in which custom commands are replaced throughout the entire document once. If set to -1, then this function attempts to replace custom commands until no commands to replace are found.  See also `replace_commands_in_text`
        ) -> str:
    """Return the latex document (with the preamble) with invocations
    of custom commands/operators replaced with their display text.

    Assumes that all custom commands and operators are defined in the
    preamble.

    Assumes that, if commands with the same name are defined multiple times,
    only the finally defined command is used. 

    Even replaces these invocations incommented out text.
    """
    preamble, document = divide_preamble(document)
    commands = custom_commands(preamble)
    # Note that `command_tuple[0]` is the name of the command.
    # print(commands)
    unique_commands = {command_tuple[0]: command_tuple for command_tuple in commands} 
    document = replace_commands_in_text(document, list(unique_commands.values()), repeat)
    # for _, command_tuple in unique_commands.items():
    #     document = replace_command_in_text(document, command_tuple)
    return document
    

In [None]:
file = _test_directory() / 'latex_examples' / 'commands_recursive_example' / 'main.tex'
document = text_from_file(file)
commands_replaced = replace_commands_in_latex_document(document)
assert commands_replaced.startswith(r'\begin{document}')
assert commands_replaced.endswith(r'\end{document}')
assert r'\S' not in commands_replaced
assert r'\mathbb{S}1' in commands_replaced  # Note that $\S$ is defined twice in the preamble; only the latter definition is used.
assert r'\field{Q}$' not in commands_replaced
assert r'\mathbb{Q}$' in commands_replaced
assert r'\commentedout' not in commands_replaced
assert r'This is actually a command that is commented out, but it is also replaced!' in commands_replaced
print(commands_replaced)

\begin{document}

$\mathbb{S}1$
%$\mathbf{Q}$
%$\mathbf{Q}$
%This is actually a command that is commented out, but it is also replaced!
$\mathbb{Q}$

\end{document}


### Replace commonly used syntax

Obsidian does not compile all LaTeX syntax. For example

- `\( \)` and `\[ \]` are not recognized as math mode delimiters.
- `\begin{equation} \end{equation}` and `\begin{align} \end{align}` (as well as their unnumbered versions with `*`) require surrounding `$$`.

The `def_adjust_common_syntax_to_markdown` function gives options to modify formatting from LaTeX text.

In [None]:
#| export

def _replace_math_mode_delimiters(text: str):
    """Helper function to `adjust_common_syntax_to_markdown."""
    text = re.sub(r'\\\(|\\\)', '$', text)
    text = re.sub(r'\\\[|\\]', '$$', text)
    return text


def _replace_equationlike_envs(text: str):
    """Helper function to `adjust_common_syntax_to_markdown."""
    text = re.sub(r'(\\begin\{(?:align|displaymath|equation|eqnarray)\*?\})', r'$$\1', text)
    text = re.sub(r'(\\end\{(?:align|displaymath|equation|eqnarray)\*?\})', r'\1$$', text)
    return text


def _replace_backtick_and_apostrophe_quotes(text: str):
    """Helper function to `adjust_common_syntax_to_markdown."""
    text = re.sub(r"``(.*?)''", r'"\1"', text, flags=re.DOTALL)
    return text



In [None]:
#| export

def _inline_mathmode_to_own_paragraph(text: str):
    """Add newlines before and after inline mathmode strings in `text`
    if necessary so that each inline mathmode string has at least one
    blank line before and after.

    Also delete one blank space character `' '` from the text immediately
    before and after the in-line math mode string if they exist.

    Helper function to `adjust_common_syntax_to_markdown.
    """
    parts = _separate_inline_latex(text)
    for i in range(len(parts)-1):
        part, next_part = parts[i], parts[i+1]
        if part.endswith('$$') and _starts_with_less_than_two_newlines(next_part):
            next_part = _remove_one_blank_space_if_exists(next_part, 'start')
            parts[i+1] = _make_start_with_two_newlines(next_part)
        if next_part.startswith('$$') and _ends_with_less_than_two_newlines(part):
            part = _remove_one_blank_space_if_exists(part, 'end')
            parts[i] = _make_end_with_two_newlines(part)
    return ''.join(parts)


def _separate_inline_latex(
        text: str
        ) -> list[str]: # Each str is a substring of `text`, either an inline mathmode string or a substring in between the inline mathmode strings.
    """Divide `text` into parts along the inline mathmode strings
    along the inline mathmode strings.

    Invoking `"".join(output)` where `output` is an output to this
    function should recover `text`.

    If `text` starts with an inline-mathmode string, then the
    outputted list starts with the empty string `''`.

    Helper function to `_inline_mathmode_to_own_paragraph.
    """
    indices = inline_latex_indices(text)
    return separate_indices_from_str(text, indices)


def _starts_with_less_than_two_newlines(text: str):
    """Helper function to `_inline_mathmode_to_own_paragraph`."""
    return bool(re.match('(?!\n\n)', text))


def _ends_with_less_than_two_newlines(text: str):
    """Helper function to `_inline_mathmode_to_own_paragraph`."""
    return not text.endswith('\n\n')

def _make_start_with_two_newlines(text: str):
    """Helper function to `_inline_mathmode_to_own_paragraph`."""
    if text.startswith('\n'):
        return f'\n{text}'
    else:
        return f'\n\n{text}'


def _make_end_with_two_newlines(text: str):
    """Helper function to `_inline_mathmode_to_own_paragraph`."""
    if text.endswith('\n'):
        return f'{text}\n'
    else:
        return f'{text}\n\n'


def _remove_one_blank_space_if_exists(
        text: str,
        start_or_end: str #'start' or 'end'
        ):
    """Remove one blank space character `' '` from either the start or
    end of `text` if such a character exists.

    Helper function to `_inline_mathmode_to_own_paragraph`.
    """
    if start_or_end == 'start':
        if text.startswith(' '):
            return text[1:]
    else:
        if text.endswith(' '):
            return text[:-1]
    return text


In [None]:
#| hide
text = r"""lalala $$\operatorname{Gal}$$ lalala $hi$."""
sample_output = _separate_inline_latex(text)
test_eq(sample_output, ['lalala ', '$$\\operatorname{Gal}$$', ' lalala $hi$.'])
test_eq("".join(sample_output), text)


text = r"""lalala
$$\begin{align*}\operatorname{Gal}\end{align*}$$
lalala $hi$."""
sample_output = _separate_inline_latex(text)
test_eq(sample_output, ['lalala\n', '$$\\begin{align*}\\operatorname{Gal}\\end{align*}$$', '\nlalala $hi$.'])
test_eq("".join(sample_output), text)

text = r"""lalala
$$
\begin{align*}
\operatorname{Gal}
\end{align*}
$$
lalala $hi$."""
sample_output = _separate_inline_latex(text)
test_eq(sample_output, ['lalala\n', '$$\n\\begin{align*}\n\\operatorname{Gal}\n\\end{align*}\n$$', '\nlalala $hi$.'])
test_eq("".join(sample_output), text)

text = r"""$$
\begin{align*}
\operatorname{Gal}
\end{align*}
$$
lalala $hi$."""
sample_output = _separate_inline_latex(text)
test_eq("".join(sample_output), text)

In [None]:
#| hide
text = r"""lalala $$\operatorname{Gal}$$ lalala $hi$."""
sample_output = _inline_mathmode_to_own_paragraph(text)
listy = _separate_inline_latex(sample_output)
assert listy[0].endswith('\n\n')
assert listy[2].startswith('\n\n')


In [None]:
#| export

def _merge_multilines(text: str):
    """Helper function to `adjust_common_syntax_to_markdown."""
    # TODO: account for enumerate and itemizes
    parts = _separate_inline_latex(text)
    modified_parts = [_merge_multilines_for_non_mathmode_part(part) for part in parts]
    return ''.join(modified_parts)


def _merge_multilines_for_non_mathmode_part(text: str):
    """Merge multiple lines (semantically making up a paragraph)
    in `text` into a single line.
    
    Does not merge inline-mathmode strings into single-lines,
    but rather leaves such strings unaffected.
    
    Helper function to `_merge_multilines`.
    """
    if text.startswith('$$'):
        return text
    leading_whitespaces, stripped, trailing_whitespaces = _strip_and_return_whitespaces(text)
    lines = stripped.splitlines()
    new_lines = [[]]
    # new_lines = []
    for line in lines:
        if _is_special_line(line) or line.strip() == '':
            new_lines.append([]) 
        new_lines[-1].append(line)
    new_lines = [(' '.join(group).strip()) for group in new_lines]
    # new_lines = [line for line in new_lines if line.strip() != '']
    main = "\n\n".join(new_lines)
    return f'{leading_whitespaces}{main.strip()}{trailing_whitespaces}'


def _is_special_line(line: str):
    """Helper function to `_merge_multilines."""
    stripped = line.strip()
    return (stripped.startswith('\\begin')
            or stripped.startswith('\\section') 
            or stripped.startswith('\\subsection')
            or stripped.startswith('\\subsubsection')
            or stripped.startswith('\\item')
            # or line.strip().startswith('$$')
            )

def _strip_and_return_whitespaces(
        text: str) -> tuple[str, str, str]: # The leading whitespaces, the sripped string, and the trailing whitespaces 
    """
    Strip `text` and return the leading and trailing whitespaces as well.

    Helper function to `_merge_multilines.
    """
    lstripped = text.lstrip()
    leading_whitespaces = text[:-len(lstripped)]
    rstripped = text.rstrip()
    trailing_whitespaces = text[len(rstripped):]
    return leading_whitespaces, text.strip(), trailing_whitespaces

In [None]:
#| hide
leading_whitespaces = '   '
stripped = 'asdf'
trailing_whitespaces = ' '
test_eq((leading_whitespaces, stripped, trailing_whitespaces), _strip_and_return_whitespaces(leading_whitespaces + stripped + trailing_whitespaces))

In [None]:
#| export
# TODO: give the option to replace emph with `****`, e.g. ``\emph{special}``.
# TODO: get everything that is tabbed to the left.
# TODO: merge multi-line text into singular lines.
# TODO: replace enumerated environments with markdown enumerated lists
# TODO: replace \ref's with links and numbers if in mathmode.
# and itemizes with markdown bulleted lists

MATH_MODE_DELIMITERS = 'math_mode_delimiters'
BEGIN_END_EQUATIONLIKE_ENV = 'begin_end_equationlike_env'
REPLACE_BACKTICK_AND_APOSTROPHE_QUOTES = 'replace_backtick_and_apostrophe_quotes'
REMOVE_COMMENTS = 'remove_comments'
INLINE_MATHMODE_TO_OWN_PARAGRAPH = 'inline_mathmode_to_own_paragraph'
MERGE_MULTILINE_PARAGRAPH = 'merge_multiline_paragraph'
def adjust_common_syntax_to_markdown(
        text: str,  # The LaTeX code to adjust to Markdown.
        options: list[str] = [
            MATH_MODE_DELIMITERS,
            BEGIN_END_EQUATIONLIKE_ENV,
            REPLACE_BACKTICK_AND_APOSTROPHE_QUOTES,
            REMOVE_COMMENTS,
            INLINE_MATHMODE_TO_OWN_PARAGRAPH,
            MERGE_MULTILINE_PARAGRAPH,
            ],  # Each `str` specifies what formatting should be done.
        ) -> str:
    """
    Adjust some common syntax, such as math mode delimiters and equation/align
    environments, for Markdown.

    Assumes that the tokens for math mode delimiters (e.g. `\( \)` and `\[ \]`)
    are not used otherwise.

    The following lists admissible parameters in the `options` parameter and
    the effects that including them have:

    - `"math_mode_delimiters"`
        - Replace `\( \)` as math mode delimiters with `$ $`.
        - Replace `\[ \]` as math mode delimiters with `$$ $$`.
    - `"begin_end_equationlike_env"`
        - Replace `\\begin{...} \end{...}` with `$$\\begin{...} \end{...}$$`
          and `\\begin{...*} \end{...*}` with `$$\\begin{...*} \end{...*}$$` for
          the following environments:
            - `align`
            - `displaymath`
            - `equation`
            - `eqnarray`
    - `"replace_backtick_and_apostrophe_quotes"`
        - replace ` `` ''` as quotation delimiters with `" "`.
    - `"remove_comments"`
        - remove LaTeX comments.
    - `"inline_mathmode_to_own_paragraph"`
        - Make it so that each inline-math mode string (of the form `$$...$$`)
          has at least one newline before and after it. Also delete one blank
          space character `' '` from the text immediately before and after the
          in-line math mode string if they exist.
    - `"merge_multiline_paragraph"`
        - Some writers will type paragraphs in multiple lines, likely because
          their LaTeX editor of choice does not wrap text within a single
          line. Including this option merges "normal" paragraphs into a single
          line. 
            - Inline-mathmode text are not affected by this option.
    """
    if MATH_MODE_DELIMITERS in options:
        text = _replace_math_mode_delimiters(text)
    if BEGIN_END_EQUATIONLIKE_ENV in options:
        text = _replace_equationlike_envs(text)
    if REPLACE_BACKTICK_AND_APOSTROPHE_QUOTES in options:
        text = _replace_backtick_and_apostrophe_quotes(text)
    if REMOVE_COMMENTS in options:
        text = remove_comments(text)
    if INLINE_MATHMODE_TO_OWN_PARAGRAPH in options:
        text = _inline_mathmode_to_own_paragraph(text)
    if MERGE_MULTILINE_PARAGRAPH in options:
        text = _merge_multilines(text)
    return text

In [None]:
text = r'''
are cellular and the DG category one gets by replacing the
topological spaces ${Mor}_{{\mathcal K}_{{top}}}(X_i,X_j)$ by their
cellular chain complexes equals ${\mathcal K}$.
'''
print(_merge_multilines(text))


are cellular and the DG category one gets by replacing the topological spaces ${Mor}_{{\mathcal K}_{{top}}}(X_i,X_j)$ by their cellular chain complexes equals ${\mathcal K}$.



Here is a basic example:

In [None]:
text = r'''I want to talk about \(\mathbb{Z}[i]\). It is the ring whose elements are of the form $a+bi$ where $a,b \in \mathhbb{Z}$.
It has a multiplication structure:
\[ (a+bi) (c+di) = (ac-bd) + (ad+bc)i.\]

Here is an equation:
\begin{equation}
5+7 = 12
\end{equation}

Here is another:
\begin{equation*}
5+6 = 11
\end{equation*}

Here is an align:
\begin{align}
5+7 = 12
\end{align}

Here is another:
\begin{align*}
5+6 = 11
\end{align*}

\begin{eqnarray}
asdf
\end{eqnarray}

\begin{displaymath}
asdf
\end{displaymath}

asdf ``hello''

``a multiline
quotation'' % Sneak comment!

'''
sample_output = adjust_common_syntax_to_markdown(text)
print(sample_output)
assert r'\(' not in sample_output
assert r'\)' not in sample_output
assert r'\[' not in sample_output
assert r'\]' not in sample_output
assert r'$$\begin{align}' in sample_output
assert r'\end{align}$$' in sample_output
assert r'$$\begin{equation}' in sample_output
assert r'\end{equation}$$' in sample_output
assert r'$$\begin{align*}' in sample_output
assert r'\end{align*}$$' in sample_output
assert r'$$\begin{equation*}' in sample_output
assert r'\end{equation*}$$' in sample_output
assert r'$$\begin{eqnarray}' in sample_output
assert r'\end{eqnarray}$$' in sample_output
assert r'$$\begin{displaymath}' in sample_output
assert r'\end{displaymath}$$' in sample_output
assert r"``hello''" not in sample_output
assert r'"hello"' in sample_output
assert '"a multiline quotation"' in sample_output
assert ' "a multiline quotation"' not in sample_output
assert '``' not in sample_output
assert r'%' not in sample_output
assert r'Sneak comment' not in sample_output

I want to talk about $\mathbb{Z}[i]$. It is the ring whose elements are of the form $a+bi$ where $a,b \in \mathhbb{Z}$. It has a multiplication structure:

$$ (a+bi) (c+di) = (ac-bd) + (ad+bc)i.$$

Here is an equation:

$$\begin{equation}
5+7 = 12
\end{equation}$$

Here is another:

$$\begin{equation*}
5+6 = 11
\end{equation*}$$

Here is an align:

$$\begin{align}
5+7 = 12
\end{align}$$

Here is another:

$$\begin{align*}
5+6 = 11
\end{align*}$$

$$\begin{eqnarray}
asdf
\end{eqnarray}$$

$$\begin{displaymath}
asdf
\end{displaymath}$$

asdf "hello"

"a multiline quotation" 




## Formatting irregularities

Since writers use their own formatting for LaTeX documents, there will be formatting practices that the functions in this module do not account for or do not fully account for. The following parses the LaTeX document to warn the user of formatting choices that `trouver` does not account for. 

In [None]:
def warn_for_formatting_irregularities(
        document: str
        ):
    # TODO: Go through assumptions made by various functions
    # TODO: custom command for section
    # TODO: custom command is invoked, but does not match explicit formatting, e.g. {\tilde M}
    # TODO: warn about \docuemntclass; the most common are `amsart` and `article`, but some writers will use other classes, such as `standalone, svmult, icmart, compositio`
    # TODO: warn about \newcounter, cf https://www.overleaf.com/learn/latex/Counters#LaTeX_commands_for_working_with_counters.
    # TODO: warn about newcomand and DeclareOperator being declared as custom commands of their own. cf. arinkin_gaitsgory_sscsglc
    # TODO: warn about section/subsection/subsubsetion being wrapped around some custom commands.
    return

## Correct syntax errors in autogenerated math mode strings