# latex.divide

> Divide LaTeX files into parts

In [None]:
#| default_exp latex.divide

In [None]:
#| export
from itertools import product
import re
from typing import Optional, Union

from pylatexenc.latexwalker import (
    LatexWalker, LatexEnvironmentNode, LatexMacroNode, LatexNode
)

from pylatexenc.latex2text import (
    LatexNodes2Text
)
import regex

from trouver.helper.files_and_folders import text_from_file

from trouver.latex.comments import remove_comments
from trouver.latex.formatting import replace_commands_in_latex_document
from trouver.latex.preamble import divide_preamble


In [None]:
from fastcore.test import ExceptionExpected, test_eq

from trouver.helper.tests import _test_directory# , non_utf8_chars_in_file


In [None]:
#| export
# matches `\newtheorem{theorem}{Theorem}`, `\newtheorem{proposition}[theorem]{Proposition}`
# does not match `\newtheorem{theorem}{Theorem}[Section]`

# SECOND_PARAMETER_PATTERN = re.compile(
#     # r'\\newtheorem\s*\{\s*(\w+)\s*\}\s*(\[\s*(\w+)\s*\])?\s*\{\s*(.*)\s*\}')
#     r'\\newtheorem\s*\{\s*(\w+)\s*\}\s*(\[\s*(\w+)\s*\])?\s*\{\s*(.*)\s*\}(?!\s*\[\s*(\w+)\s*\])')
SECOND_PARAMETER_PATTERN = regex.compile(
    r'\\newtheorem\s*'
    r'\{((?>[^{}]+|\{(?2)\})*)\}'
    r'\s*(\[\s*(\w+)\s*\])?\s*'
    r'\{((?>[^{}]+|\{(?2)\})*)\}'
    r'(?!\s*\[\s*(\w+)\s*\])',
    regex.MULTILINE)

SECOND_PARAMETER_PATTERN_WITH_OPTIONAL_STAR = regex.compile(
    r'\\newtheorem\*?\s*'
    r'\{((?>[^{}]+|\{(?2)\})*)\}'
    r'\s*(\[\s*(\w+)\s*\])?\s*'
    r'\{((?>[^{}]+|\{(?2)\})*)\}'
    r'(?!\s*\[\s*(\w+)\s*\])',
    regex.MULTILINE)

# matches `\newtheorem{theorem}{Theorem}`, `\newtheorem{theorem}{Theorem}[Section]`,
# does not match `\newtheorem{proposition}[theorem]{Proposition}`
# THIRD_PARAMETER_PATTERN = re.compile(
#     r'\\newtheorem\s*\{\s*(\w+)\s*\}\s*\{\s*(.*)\s*\}\s*(\[\s*(\w+)\s*\])?')
THIRD_PARAMETER_PATTERN = regex.compile(
    r'\\newtheorem\s*'
    r'\{((?>[^{}]+|\{(?2)\})*)\}'
    r'\s*'
    r'\{((?>[^{}]+|\{(?2)\})*)\}'
    r'\s*'
    r'(\[\s*(\w+)\s*\])?',
    regex.MULTILINE)

THIRD_PARAMETER_PATTERN_WITH_OPTIONAL_STAR = regex.compile(
    r'\\newtheorem\*?\s*'
    r'\{((?>[^{}]+|\{(?2)\})*)\}'
    r'\s*'
    r'\{((?>[^{}]+|\{(?2)\})*)\}'
    r'\s*'
    r'(\[\s*(\w+)\s*\])?',
    regex.MULTILINE)

# matches \section{title}, \subsection{title}, \subsubsection{title}, \section*{title}, etc.
SECTION_LIKE_PATTERN = regex.compile(
        r'\\(?:section|subsection|subsubsection)\s*(?:\[.*\])?(\*)?\s*'
        r'\{((?>[^{}]+|\{(?2)\})*)\}',
        regex.MULTILINE)
    

# matches \begin{theorem},
ENVIRONMENT_PATTERN = regex.compile(
        r'\\begin\s*'
        r'\{((?>[^{}]+|\{(?1)\})*)\}',
        regex.MULTILINE)


## Divide LaTeX file into parts

To make Obsidian notes from a LaTeX file, I use sections/subsections, and environments as places to make new notes.

Things to think about:
Sections/subsections
environments, including theorems, corollaries, propositions, lemmas, definitions, notations
citations
Macros defined in the preamble?

LatexMacroNodes include: sections/subsections, citations, references, and labels, e.g.

```latex
> \section{Introduction}
\cite{ellenberg2nilpotent}
\subsection{The section conjecture}
\'e
\ref{fundamental-exact-sequence}
\cite{stix2010period}
\ref{fundamental-exact-sequence}
\cite{stix2012rational}
\cite[Appendix C]{stix2010period}
\subsection{The tropical section conjecture}
\label{subsec:tropical-section-conjecture}
```

#### Get the Document Node

In [None]:
#| export
class NoDocumentNodeError(Exception):
    """Exception raised when a LatexEnvironmentNode corresponding to the document 
    environment is expected in a LaTeX string, but no such node exists.
    
    **Attributes**
    - text - str
        - The text in which the document environment is not found.
    """
    
    def __init__(self, text):
        self.text = text
        super().__init__(
            f"The following text does not contain a document environment:\n{text}")



In [None]:
#| export
def find_document_node(
        text: str, # LaTeX str
        document_environment_name: str = "document" # The name of the document environment.
        ) -> LatexEnvironmentNode:
    """Find the `LatexNode` object for the main document in `text`.
    
    **Raises**
    - NoDocumentNodeError
        - If document environment node is not detected.
    """
    w = LatexWalker(text)
    nodelist, _, _ = w.get_latex_nodes(pos=0)
    for node in nodelist:
        if node.isNodeType(LatexEnvironmentNode)\
                and node.environmentname == document_environment_name:
            return node
    raise NoDocumentNodeError(text)

The main content of virtually all LaTeX math articles belongs to a document environment, which pylatexenc can often detect. The `find_document_node` function returns this `LatexEnvironmentNode` object:

In [None]:
latex_file_path = _test_directory() / 'latex_examples' / 'latex_example_1' / 'main.tex'
text = text_from_file(latex_file_path)
document_node = find_document_node(text)

If the LaTeX file has no `document` environment, then a `NoDocumentNodeError` is raised:

In [None]:
# This latex document has its `document` environment commented out.
latex_file_path = _test_directory() / 'latex_examples' / 'latex_example_2' / 'main.tex'
text = text_from_file(latex_file_path)
with ExceptionExpected(NoDocumentNodeError):
    document_node = find_document_node(text)

At the time of this writinga `NoDocumentNodeError` may be raised even if the LaTeX file has a proper `document` environment

In [None]:
latex_file_path = _test_directory() / 'latex_examples' / 'example_with_a_command_with_begin.tex'
text = text_from_file(latex_file_path)

# Perhaps in the future, pylatexenc will be able to find the document node for this file.
# When that time comes, delete this example.
with ExceptionExpected(NoDocumentNodeError):
    find_document_node(text)



The `divide_preamble` function can be used to circumvent this problem:

In [None]:
preamble, document = divide_preamble(text)
document_node = find_document_node(document)
test_eq(document_node.environmentname, 'document')
assert document_node.isNodeType(LatexEnvironmentNode)

In [None]:
# hide
# Find no document node error causes

# latex_file_path = r'_tests\latex_full\litt_cfag\main.tex'
# text = text_from_file(latex_file_path)
# document_node = find_document_node(text)

### Detect environment names used in a file

In [None]:
#| export
def environment_names_used(
        text: str # LaTeX document
        ) -> set[str]: # The set of all environment names used in the main document.
    """Return the set of all environment names used in the main document
    of the latex code.
    """
    document_node = find_document_node(text)
    return {node.environmentname for node in document_node.nodelist
            if node.isNodeType(LatexEnvironmentNode)}        

Writers often use different environment names. For examples, writers often use `theorem`, `thm`, or `theo` for theorem environments or `lemma` or `lem` for lemma environments. The `environment_names_used` function returns the environment names actually used in the tex file.

In the example below, note that only the environments that are actually used are returned. For instance, the preamble of the document defines the theorem environments `problem`, and `lemma` (among other things), but these are not actually used in the document itself.

In [None]:
latex_file_path = _test_directory() / 'latex_examples' / 'has_fully_written_out_environment_names.tex'
sample_text_1 = text_from_file(latex_file_path)
sample_output_1 = environment_names_used(sample_text_1)
test_eq({'corollary', 'proof', 'maincorollary', 'abstract', 'proposition'}, sample_output_1)

The document in the example below uses shorter names for theorem environments:

In [None]:
latex_file_path = _test_directory() / 'latex_examples' / 'has_shorter_environment_names.tex'
sample_text_2 = text_from_file(latex_file_path)
sample_output_2 = environment_names_used(sample_text_2)
test_eq({'conj', 'notation', 'corollary', 'defn'}, sample_output_2)

#### Identify the numbering convention of a LaTeX document

LaTeX documents have various number conventions. Here are some examples of papers on the arXiv and notes on their numbering schemes. Note that the source code to these articles are publicly available on the arXiv. 

- Ellenberg, Venkatesh, and Westerland, *[Homological stability for Hurwitz spaces and the Cohen-Lenstra conjecture over function fields](https://arxiv.org/abs/0912.0325)*, 
    - The subsections and theorem-like environments of each section share a numbering scheme, e.g. section 1 has subsection `1.1 The Cohen-Lenstra heuristics`, `1.2 Theorem`, `1.3 Hurwitz spaces`. This is accomplished by defining theorem-like environments using the `subsection` counter, e.g.

        ```latex
        \theoremstyle{plain}
        \newtheorem{thm}[subsection]{Theorem}
        \newtheorem{prop}[subsection]{Proposition}
        \newtheorem{cor}[subsection]{Corollary}
        \newtheorem{remark}{Remark}
        \newtheorem{conj}[subsection]{Conjecture}
        \newtheorem*{conj*}{Conjecture}
         ```

        defines the `thm`, `prop`, `cor`, and `conj` environments to be numbered using the `subsection` counter, the `remark` environmment to be defiend as an unnumbered environment, and the `conj*` environment to be defined as an unnumbered environment with a different name than the `conj` environment.

    - The `\swapnumbers` command is included in the preamble to change the way that theorems are numbered in the document, e.g. the article has `1.2 Theorem` as opposed to `Theorem 1.2`.
    - The equations are numbered along the subsections - this is accomplished by the lines 

        ```latex
        \numberwithin{equation}{subsection}
        \renewcommand{\theequation}{\thesubsection.\arabic{equation}}
        ```

        in the preamble.
- Hoyois, *[A quadratic refinement of the Grothendieck-Lefschetz-Verdier Trace Formula](https://arxiv.org/abs/1309.6147)*
    - The theorem-like environments are numbered `Theorem 1.1, Theorem 1.3, Corollary 1.4, Theorem 1.5`, etc.
        - The theorem-like environments that are numbered are assigned the `equation` counter. In particular, the equation
        environments share their numberings with the theorem-like environments. For example, section 1 has Equation `(1.2)`
        - This equation counter is reset at the beginning of each section and the section number is included in the numbering via
        ```latex 
        \numberwithin{equation}{section}
        ```

In [None]:
# TODO: consider different arxiv articles to see how they are numbered

In [None]:
#| export
def _search_counters_by_pattern(
        preamble: str,
        newtheorem_regex: re.Pattern, # This is supposed to be a regex that detects and captures parameters of `\newtheorem` commands.
        counter_group: int # This depends on which `newtheorem_regex` is used, and is either 3 or 4. 
        ) -> dict[str, str]: # The 
    """
    Capture the newly defined theorem-like environment names as well as the
    counters that they belong to
    
    This is a helper function for `numbered_newtheorems_counters_in_preamble`.
    
    """
    counters = {}
    for match in newtheorem_regex.finditer(preamble):
        env_name = match.group(1)
        counter = match.group(counter_group)
        # If no counter was specified, use the environment name as the counter
        if counter is None:
            counter = env_name
        counters[env_name] = counter
    return counters

In [None]:
#| hide

# Test that the contents of the `counters_for_environments` function are detecting
# The defined commands correctly.
text = text_from_file(_test_directory() / 'latex_examples' / 'newtheorem_example.tex') 
preamble, _ = divide_preamble(text)
# second_parameter_pattern = re.compile(
#     # r'\\newtheorem\s*\{\s*(\w+)\s*\}\s*(\[\s*(\w+)\s*\])?\s*\{\s*(.*)\s*\}')
#     r'\\newtheorem\s*\{\s*(\w+)\s*\}\s*(\[\s*(\w+)\s*\])?\s*\{\s*(.*)\s*\}(?!\s*\[\s*(\w+)\s*\])')
# third_parameter_pattern = re.compile(
#     r'\\newtheorem\s*\{\s*(\w+)\s*\}\s*\{\s*(.*)\s*\}\s*(\[\s*(\w+)\s*\])?')

# second_parameter_pattern = regex.compile(
#     r'\\newtheorem\s*'
#     r'\{((?>[^{}]+|\{(?2)\})*)\}'
#     r'\s*(\[\s*(\w+)\s*\])?\s*'
#     r'\{((?>[^{}]+|\{(?2)\})*)\}'
#     r'(?!\s*\[\s*(\w+)\s*\])',
#     regex.MULTILINE)
# third_parameter_pattern = regex.compile(
#     r'\\newtheorem\s*'
#     r'\{((?>[^{}]+|\{(?2)\})*)\}'
#     r'\s*'
#     r'\{((?>[^{}]+|\{(?2)\})*)\}'
#     r'\s*'
#     r'(\[\s*(\w+)\s*\])?')
second_results = _search_counters_by_pattern(preamble, SECOND_PARAMETER_PATTERN, 3)
third_results = _search_counters_by_pattern(preamble, THIRD_PARAMETER_PATTERN, 4)
assert 'remark' not in second_results
assert 'remark' in third_results

In [None]:
#| hide
preamble = text = r"""
\theoremstyle{plain}
\newtheorem{thm}[subsection]{Theorem}
\newtheorem{prop}[subsection]{Proposition}
\newtheorem{cor}[subsection]{Corollary}
\newtheorem{remark}{Remark}
\newtheorem{conj}[subsection]{Conjecture}
\newtheorem*{conj*}{Conjecture}
"""

# second_parameter_pattern = re.compile(
#     # r'\\newtheorem\s*\{\s*(\w+)\s*\}\s*(\[\s*(\w+)\s*\])?\s*\{\s*(.*)\s*\}')
#     r'\\newtheorem\s*\{\s*(\w+)\s*\}\s*(\[\s*(\w+)\s*\])?\s*\{\s*(.*)\s*\}(?!\s*\[\s*(\w+)\s*\])')
# third_parameter_pattern = re.compile(
#     r'\\newtheorem\s*\{\s*(\w+)\s*\}\s*\{\s*(.*)\s*\}\s*(\[\s*(\w+)\s*\])?')

# second_parameter_pattern = regex.compile(
#     r'\\newtheorem\s*'
#     r'\{((?>[^{}]+|\{(?2)\})*)\}'
#     r'\s*(\[\s*(\w+)\s*\])?\s*'
#     r'\{((?>[^{}]+|\{(?2)\})*)\}'
#     r'(?!\s*\[\s*(\w+)\s*\])',
#     regex.MULTILINE)
# third_parameter_pattern = regex.compile(
#     r'\\newtheorem\s*'
#     r'\{((?>[^{}]+|\{(?2)\})*)\}'
#     r'\s*'
#     r'\{((?>[^{}]+|\{(?2)\})*)\}'
#     r'\s*'
#     r'(\[\s*(\w+)\s*\])?')
second_results = _search_counters_by_pattern(preamble, SECOND_PARAMETER_PATTERN, 3)
third_results = _search_counters_by_pattern(preamble, THIRD_PARAMETER_PATTERN, 4)

second_results
# third_results

{'thm': 'subsection',
 'prop': 'subsection',
 'cor': 'subsection',
 'remark': 'remark',
 'conj': 'subsection'}

In [None]:
#| export
def _article_is_amsart_or_article(
        preamble: str # The preamble with no comments
        ):
    """
    helper function of `numbered_newtheorems_counters_in_preamble`.
    """
    return bool(re.search(r'\\documentclass\s*(\[\s*(.*?)\s*\])?\s*\{\s*(amsart|article)\}', preamble))


In [None]:
#| hide
assert _article_is_amsart_or_article(r'\documentclass[12pt,letterpaper]{amsart}')
assert _article_is_amsart_or_article(r'\documentclass{  amsart}')
assert _article_is_amsart_or_article(r'\documentclass{amsart}')
# When I tried compiling a sample LaTeX document, I found that having spaces before, but not after, `amsart` is fine.
assert not _article_is_amsart_or_article(r'\documentclass{  amsart }')  
assert not _article_is_amsart_or_article(r'\documentclass[12pt,letterpaper]{art}')

assert _article_is_amsart_or_article(
    r'''\documentclass{amsart}
    Lorem Ipsum''')
    
assert _article_is_amsart_or_article(r'\documentclass[12pt,letterpaper]{article}')
assert _article_is_amsart_or_article(r'\documentclass{  article}')
assert _article_is_amsart_or_article(r'\documentclass{article}')
# When I tried compiling a sample LaTeX document, I found that having spaces before, but not after, `amsart` is fine.
assert not _article_is_amsart_or_article(r'\documentclass{  article }')  
assert not _article_is_amsart_or_article(r'\documentclass[12pt,letterpaper]{art}')

assert _article_is_amsart_or_article(
    r'''\documentclass{article}
    Lorem Ipsum''')

In [None]:
#| export
def numbered_newtheorems_counters_in_preamble(
        document: str, # The LaTeX document
        add_equation_counter: Optional[bool] = None, # Determines whether or not the `equation` environment will have a counter added when a `newthoerem` command for the `equation` environment is not explicitly invoked in the preamble. If `None`, then the counter is added if the article is of class `amsart` or `article`. If `True`, then the counter is added. If `False`, then the counter is not added.
        ) -> dict[str, tuple[str, Union[str, None]]]: # The keys are the command names of the environments. The value a key is a tuple `(<counter>, <reset_by_counter>)`, where `<counter>` is the counter that the environment belongs to, which can be custom defined or predefined in LaTeX, and `<reset_by_counter>` is a counter whose incrementation resets the # counter of the environment, if available. 
    r"""Return the dict specifying the numbered `\newtheorem` command invocations

    Assumes that

    - invocations of the `\newtheorem` command are exclusively in the
    preamble of the LaTeX document.
    - theorem-like environments are defined using the `\newtheorem` command.
    - no environments of the same name are defined twice.
    - There is at most one invocation of `\theoremstyle` or `\newtheorem` in each line.

    This function does not take into account `\numberwithins` being used.
    The `numberwithins_in_preamble` function accounts for invocations of
    the `\numberwithins` command instead.

    The `equation` environment (and other related environments, such as `eqnarray`)
    seems to be included in documents of
    the class `amsart` or `article` (i.e. documents which invoke
    `\documentclass{amsart}` or `\documentclass{article}`,
    possibly with some optional arguments).
    The `equation` environment (and other related environments) is accordingly included
    in the output
    of this function if the document is of the class `amsart` and 
    `add_equation_counter` is not specified, set to `None`.

    This function uses two separate regex patterns, one to detect the invocations
    of `\newtheorem` in which the optional parameter is the second parameter and
    one to detect those in which the optional parameter is the third parameter.


    """
    preamble, _ = divide_preamble(document)
    preamble = remove_comments(preamble)
    # TODO: maybe use the `regex` package instead of `re` with a recursive
    # balanced-curly braces detecting regex.
    commands_and_counters = _combine_second_and_third_paramter_results(preamble)
    if 'equation' not in commands_and_counters and (
            add_equation_counter == True or
            add_equation_counter is None and _article_is_amsart_or_article(preamble)):
        commands_and_counters['equation'] = ('equation', None)  
        if 'eqnarray' not in commands_and_counters:
            commands_and_counters['eqnarray'] = ('equation', None)

    return commands_and_counters


def _combine_second_and_third_paramter_results(preamble):
    """
    Inspect invocations of the `\newtheorem` command in the preamble,
    separately dealing with invocations with a third optional parameter vs.
    a second optional parameter.

    helper function of `numbered_newtheorems_counters_in_preamble`.
    """
    # matches `\newtheorem{theorem}{Theorem}`, `\newtheorem{proposition}[theorem]{Proposition}`
    # does not match `\newtheorem{theorem}{Theorem}[Section]`
    # second_parameter_pattern = re.compile(
    #     # In this case, the optional parameter (if any) should not follow the newtheorem.
    #     r'\\newtheorem\s*\{\s*(\w+)\s*\}\s*(\[\s*(\w+)\s*\])?\s*\{\s*(.*)\s*\}(?!\s*\[\s*(\w+)\s*\])')

    # second_parameter_pattern = regex.compile(
    #     r'\\newtheorem\s*'
    #     r'\{((?>[^{}]+|\{(?2)\})*)\}'
    #     r'\s*(\[\s*(\w+)\s*\])?\s*'
    #     r'\{((?>[^{}]+|\{(?2)\})*)\}'
    #     r'(?!\s*\[\s*(\w+)\s*\])',
    #     regex.MULTILINE)

    # matches `\newtheorem{theorem}{Theorem}`, `\newtheorem{theorem}{Theorem}[Section]`,
    # does not match `\newtheorem{proposition}[theorem]{Proposition}`
    # third_parameter_pattern = re.compile(
    #     r'\\newtheorem\s*\{\s*(\w+)\s*\}\s*\{\s*(.*)\s*\}\s*(\[\s*(\w+)\s*\])?')

    # third_parameter_pattern = regex.compile(
    #     r'\\newtheorem\s*'
    #     r'\{((?>[^{}]+|\{(?2)\})*)\}'
    #     r'\s*'
    #     r'\{((?>[^{}]+|\{(?2)\})*)\}'
    #     r'\s*'
    #     r'(\[\s*(\w+)\s*\])?')


    second_results = _search_counters_by_pattern(preamble, SECOND_PARAMETER_PATTERN, 3)
    third_results = _search_counters_by_pattern(preamble, THIRD_PARAMETER_PATTERN, 4)
    to_return = {}
    for environment_name, counter in second_results.items():
        to_return[environment_name] = (counter, None)
    for environment_name, reset_counter in third_results.items():
        if environment_name in to_return:
            continue
        to_return[environment_name] = (environment_name, reset_counter)
    return to_return        

In [None]:
text = r"""\theoremstyle{definition}                 \newtheorem{conj}{Conjecture}
\newtheorem*{example}{Example}            \newtheorem{defn}{Definition}
\newtheorem{remark}{Remark} \newtheorem*{notation}{Notation}
\begin{document}
\end{document}"""
numbered_newtheorems_counters_in_preamble(text)


{'conj': ('conj', None), 'defn': ('defn', None), 'remark': ('remark', None)}

The `numbered_newtheorems_counter_in_preamble` function parses the preamble of a LaTeX document for invocations of the `\newtheorem` command and returns what counters each theorem-like environment command belongs to.

In [None]:
text = text_from_file(_test_directory() / 'latex_examples' / 'newtheorem_example.tex') 
print(text)

counters = numbered_newtheorems_counters_in_preamble(text)
test_eq(counters,
   {'theorem': ('theorem', None), 'lemma': ('theorem', None), 'definition': ('theorem', None), 'corollary': ('corollary', None), 'remark': ('remark', 'theorem'), 'equation': ('equation', None), 'eqnarray': ('equation', None)}
)

\documentclass{article}
\usepackage{amsthm}

\newtheorem{theorem}{Theorem}
\newtheorem{lemma}[theorem]{Lemma}
\newtheorem{definition}[theorem]{Definition} % Note that `theorem`, `lemma`, and `definition` all have `theorem` as their counter.
\newtheorem{corollary}{Corollary} % Note that `corollary` has its own counter.
\newtheorem{remark}{Remark}[theorem] % `remark` has `theorem` as its counter
\newtheorem*{conjecture*}{Conjecture} % `conjecture*` has no counter

\begin{document}

\section{Introduction}

\begin{theorem}
This is Theorem 1.
\end{theorem}

\begin{lemma}
This is Lemma 2.
\end{lemma}

\begin{definition}
This is Definition 3.
\end{definition}

\end{document}


In [None]:
text = r"""
\theoremstyle{plain}
\newtheorem{thm}[subsection]{Theorem}
\newtheorem{prop}[subsection]{Proposition}
\newtheorem{cor}[subsection]{Corollary}
\newtheorem{remark}{Remark}
\newtheorem{conj}[subsection]{Conjecture}
\newtheorem*{conj*}{Conjecture}
\begin{document}
\end{document}
"""
counters = numbered_newtheorems_counters_in_preamble(text)
test_eq(
    counters,
    {'thm': ('subsection', None), 'prop': ('subsection', None), 'cor': ('subsection', None), 'remark': ('remark', None), 'conj': ('subsection', None)})

`numbered_newtheorems_counters_in_preamble` ignores commented out text:

In [None]:
text = r"""
\theoremstyle{plain}
\newtheorem{thm}[subsection]{Theorem}
\newtheorem{prop}[subsection]{Proposition}
\newtheorem{cor}[subsection]{Corollary}
% \newtheorem{remark}{Remark}
\newtheorem{conj}[subsection]{Conjecture}
\newtheorem*{conj*}{Conjecture} %\newtheorem{fakeenv}{This won't be picked up!}
\begin{document}
\end{document}
"""
counters = numbered_newtheorems_counters_in_preamble(text)
test_eq(
    counters,
    {'thm': ('subsection', None), 'prop': ('subsection', None), 'cor': ('subsection', None), 'conj': ('subsection', None)})

`numbered_newtheorems_counters_in_preamble` does not account for `\numberwithin` command invocations. The `numberwithins_in_preamble` function accounts for invocations of `\numberwithin` instead.

In [None]:
text = text_from_file(_test_directory() / 'latex_examples' / 'numbering_example_3_theorem_like_environments_share_counter_with_equation_and_reset_at_each_section' / 'main.tex')
print(text)
# So `numbered_newtheorems_counters_in_preamble` only considers
# the theorem-like environemnts as being counted by 'equation'.
# Note that the command  `\numberwithin{equation}{section}`
# resets the equation counter
# every time the `section` counter is incremented.
test_eq(numbered_newtheorems_counters_in_preamble(text), 
       {'theorem': ('equation', None), 'proposition': ('equation', None), 'lemma': ('equation', None), 'corollary': ('equation', None), 'definition': ('equation', None), 'example': ('equation', None), 'remark': ('equation', None), 'equation': ('equation', None), 'eqnarray': ('equation', None)}
        )

\documentclass{amsart}
\usepackage[utf8]{inputenc}
\usepackage{amsmath, amsfonts, amssymb, amsthm, amsopn}

\numberwithin{equation}{section}

\theoremstyle{plain}
\newtheorem*{theorem*}{Theorem}
\newtheorem*{theoremA}{Theorem A}
\newtheorem*{theoremB}{Theorem B}
\newtheorem{theorem}[equation]{Theorem}
\newtheorem{proposition}[equation]{Proposition}
\newtheorem{lemma}[equation]{Lemma}
\newtheorem{corollary}[equation]{Corollary}

\theoremstyle{definition}
\newtheorem{definition}[equation]{Definition}
\newtheorem{example}[equation]{Example}
\newtheorem*{acknowledgements}{Acknowledgements}
\newtheorem*{conventions}{Conventions}

\theoremstyle{remark}
\newtheorem{remark}[equation]{Remark}

\begin{document}

\section{Introduction}

\begin{theorem}
This is Theorem 1.1. This is because the \verb|\numberwithin{equation}{section}| makes the section number included in the equation counter and because the \\
\verb|\newtheorem{theorem}[equation]{Theorem}| command makes the environment \verb|theorem

The `\newtheorem` command can be used to specify the counter of the newly defined theorem-like environment to be reset upon another counter's incrementation; for example `\newtheorem{theorem}{Theorem}[section]` specifies for a new environment named `theorem` (with display text `Theorem`) that is reset whenever the `section` counter is incremented.

In [None]:
text = text_from_file(_test_directory() / 'latex_examples' / 'numbering_example_7_newtheorem_command_restarts_counter_by_section' / 'main.tex') 
print(text)
# So `numbered_newtheorems_counters_in_preamble` only considers the theorem-like
#  environemnts as being counted by 'equation'.
# Note that the command  `\numberwithin{equation}{section}` resets the equation counter
# every time the `section` counter is incremented.

test_eq(numbered_newtheorems_counters_in_preamble(text), 
        {'lemma': ('theorem', None), 'theorem': ('theorem', 'section'), 'corollary': ('corollary', 'theorem'), 'proposition': ('proposition', 'section'), 'equation': ('equation', None), 'eqnarray': ('equation', None)}

        )


% Based on an example from https://www.overleaf.com/learn/latex/Theorems_and_proofs#Numbered_theorems.2C_definitions.2C_corollaries_and_lemmas

\documentclass[12 pt]{amsart}

\newtheorem{theorem}{Theorem}[section]
\newtheorem{corollary}{Corollary}[theorem]
\newtheorem{lemma}[theorem]{Lemma}
% Note that the below invocation of \newtheorem is invalid:
% \newtheorem{proposition}[theorem]{Proposition}[section]
\newtheorem{proposition}{Proposition}[section]

\begin{document}
\section{Introduction}
Theorems can easily be defined:

\begin{theorem}
Let \(f\) be a function whose derivative exists in every point, then \(f\) is 
a continuous function.
\end{theorem}

\begin{theorem}[Pythagorean theorem]
\label{pythagorean}
This is a theorem about right triangles and can be summarised in the next 
equation 
\[ x^2 + y^2 = z^2 \]
\end{theorem}

And a consequence of theorem \ref{pythagorean} is the statement in the next 
corollary.

\begin{corollary}
There's no right rectangle whose sides measure 3c

In [None]:
#| hide
# TODO
# I found a bug where the section numbering cannot handle the theorem-like environment defined like
# \newtheorem{theorem}{Theorem}[section], cf. https://arxiv.org/abs/2106.10586 and the example in



For the following test, we have multiple theorems defined in the same line:

In [None]:
text = r"""\theoremstyle{definition}                 \newtheorem{conj}{Conjecture}
\newtheorem*{example}{Example}            \newtheorem{defn}{Definition}
\newtheorem{remark}{Remark} \newtheorem*{notation}{Notation}
\begin{document}
\end{document}"""
numbered_newtheorems_counters_in_preamble(text)


{'conj': ('conj', None), 'defn': ('defn', None), 'remark': ('remark', None)}

In [None]:
#| export
def numberwithins_in_preamble(
        document: str # The LaTeX document
    ) -> dict[str, str]: # The keys are the first arguments of `numberwithin` invocations and the values ar ethe second arguments of `numberwithin` invocations.
    r"""Return the `dict` describing `\numberwithin` commands invoked
    in the preamble of `document`.
    
    Assumes that `\numberwithin` commands are invoked exclusively in the
    preamble.

    See also the `numbered_newtheorems_counter_in_preamble` function,
    which parses invocations of the `\newtheorem` command.
    """
    preamble, _ = divide_preamble(document)
    preamble = remove_comments(preamble)
    pattern = regex.compile(r'\\numberwithin\s*\{\s*(\w+)\s*\}\s*\{\s*(.*)\s*\}')
    numberwithins = {}

    for match in pattern.finditer(preamble):
        environment_to_number = match.group(1)
        environment_to_count = match.group(2)
        numberwithins[environment_to_number] = environment_to_count

    return numberwithins

The `numberwithins_in_preamble` function returns a `dict` describing invocations of the `\numberwithin` commands. See also the `numbered_newtheorems_counter_in_preamble` function, which parses invocations of the `\newtheorem` command.

In the following example, there is an invocation of the `\numberwithin` command; for the LaTeX document in the example below, the equation counter is reset every time the `section` counter is incremented. 

The `numberwithins_in_preamble` function returns a `dict` that is used by the `divide_latex_text` function to account for this fact.

In [None]:
text = text_from_file(_test_directory() / 'latex_examples' / 'numbering_example_3_theorem_like_environments_share_counter_with_equation_and_reset_at_each_section' / 'main.tex')
print(text)
test_eq(numberwithins_in_preamble(text), {'equation': 'section'})

\documentclass{amsart}
\usepackage[utf8]{inputenc}
\usepackage{amsmath, amsfonts, amssymb, amsthm, amsopn}

\numberwithin{equation}{section}

\theoremstyle{plain}
\newtheorem*{theorem*}{Theorem}
\newtheorem*{theoremA}{Theorem A}
\newtheorem*{theoremB}{Theorem B}
\newtheorem{theorem}[equation]{Theorem}
\newtheorem{proposition}[equation]{Proposition}
\newtheorem{lemma}[equation]{Lemma}
\newtheorem{corollary}[equation]{Corollary}

\theoremstyle{definition}
\newtheorem{definition}[equation]{Definition}
\newtheorem{example}[equation]{Example}
\newtheorem*{acknowledgements}{Acknowledgements}
\newtheorem*{conventions}{Conventions}

\theoremstyle{remark}
\newtheorem{remark}[equation]{Remark}

\begin{document}

\section{Introduction}

\begin{theorem}
This is Theorem 1.1. This is because the \verb|\numberwithin{equation}{section}| makes the section number included in the equation counter and because the \\
\verb|\newtheorem{theorem}[equation]{Theorem}| command makes the environment \verb|theorem

#### Getting the display names of environment

For example, `\newtheorem{theorem}{Theorem}` defines a theorem-like environment called `theorem` whose display name is `Theorem`.

In [None]:
#| export
def display_names_of_environments(
        document: str # The LaTeX document
        ) -> dict[str, str]:  
    r"""Return the dict specifying the display names for each theorem-like
    environment.

    This function uses two separate regex patterns, one to detect the invocations
    of `\newtheorem`
    in which the optional parameter is the second parameter and one to detect
    those in which the optional parameter is the third parameter.

    Assumes that
    - invocations of the `\newtheorem` command are exclusively in the
    preamble of the LaTeX document.
    - theorem-like environments are defined using the `\newtheorem` command.
    - no environments of the same name are defined twice.

    """
    preamble, _ = divide_preamble(document)
    # # matches `\newtheorem{theorem}{Theorem}`, `\newtheorem{proposition}[theorem]{Proposition}`
    # # does not match `\newtheorem{theorem}{Theorem}[Section]`
    # second_parameter_pattern = re.compile(
    #     # In this case, the optional parameter (if any) should not follow the newtheorem.
    #     r'\\newtheorem\*?\s*\{\s*(\w+\*?)\s*\}\s*(\[\s*(\w+)\s*\])?\s*\{\s*(.*)\s*\}(?!\s*\[\s*(\w+)\s*\])')
    # # matches `\newtheorem{theorem}{Theorem}`, `\newtheorem{theorem}{Theorem}[Section]`,
    # # does not match `\newtheorem{proposition}[theorem]{Proposition}`
    # third_parameter_pattern = re.compile(
    #     r'\\newtheorem\*?\s*\{\s*(\w+\*?)\s*\}\s*\{\s*(.*)\s*\}\s*(\[\s*(\w+)\s*\])?')
    second_results = _search_display_names_by_pattern(preamble, SECOND_PARAMETER_PATTERN_WITH_OPTIONAL_STAR, 4)
    third_results = _search_display_names_by_pattern(preamble, THIRD_PARAMETER_PATTERN_WITH_OPTIONAL_STAR, 2)
    return second_results | third_results
    

def _search_display_names_by_pattern(
        preamble: str,
        newtheorem_regex: re.Pattern,
        display_name_group: int # This depends on which `newtheorem_regex` is used, and is either 3 or 4. 
        ) -> dict[str, str]:
    """
    Capture the newly defined theorem-like environment names as well as the
    counters that they belong to"""
    display_names = {}
    for match in newtheorem_regex.finditer(preamble):
        env_name = match.group(1)
        display_name = match.group(display_name_group)
        display_names[env_name] = display_name
    return display_names

Basic examples:

In [None]:
text = text_from_file(_test_directory() / 'latex_examples' / 'newtheorem_example.tex') 
print(text)
display_names = display_names_of_environments(text)
test_eq(display_names,{'theorem': 'Theorem', 'lemma': 'Lemma', 'definition': 'Definition', 'corollary': 'Corollary', 'conjecture*': 'Conjecture', 'remark': 'Remark'})

\documentclass{article}
\usepackage{amsthm}

\newtheorem{theorem}{Theorem}
\newtheorem{lemma}[theorem]{Lemma}
\newtheorem{definition}[theorem]{Definition} % Note that `theorem`, `lemma`, and `definition` all have `theorem` as their counter.
\newtheorem{corollary}{Corollary} % Note that `corollary` has its own counter.
\newtheorem{remark}{Remark}[theorem] % `remark` has `theorem` as its counter
\newtheorem*{conjecture*}{Conjecture} % `conjecture*` has no counter

\begin{document}

\section{Introduction}

\begin{theorem}
This is Theorem 1.
\end{theorem}

\begin{lemma}
This is Lemma 2.
\end{lemma}

\begin{definition}
This is Definition 3.
\end{definition}

\end{document}


In [None]:
file = _test_directory() / 'latex_examples' / 'numbering_example_1_consecutive_numbering_scheme' / 'main.tex'
print(text)
display_names = display_names_of_environments(text)
print(display_names)

\documentclass{article}
\usepackage{amsthm}

\newtheorem{theorem}{Theorem}
\newtheorem{lemma}[theorem]{Lemma}
\newtheorem{definition}[theorem]{Definition} % Note that `theorem`, `lemma`, and `definition` all have `theorem` as their counter.
\newtheorem{corollary}{Corollary} % Note that `corollary` has its own counter.
\newtheorem{remark}{Remark}[theorem] % `remark` has `theorem` as its counter
\newtheorem*{conjecture*}{Conjecture} % `conjecture*` has no counter

\begin{document}

\section{Introduction}

\begin{theorem}
This is Theorem 1.
\end{theorem}

\begin{lemma}
This is Lemma 2.
\end{lemma}

\begin{definition}
This is Definition 3.
\end{definition}

\end{document}
{'theorem': 'Theorem', 'lemma': 'Lemma', 'definition': 'Definition', 'corollary': 'Corollary', 'conjecture*': 'Conjecture', 'remark': 'Remark'}


In [None]:
text = text_from_file(_test_directory() / 'latex_examples' / 'numbering_example_7_newtheorem_command_restarts_counter_by_section' / 'main.tex') 
print(text)
display_names = display_names_of_environments(text)
test_eq(display_names,
{'theorem': 'Theorem',
 'corollary': 'Corollary',
 'lemma': 'Lemma',
 'proposition': 'Proposition',})


% Based on an example from https://www.overleaf.com/learn/latex/Theorems_and_proofs#Numbered_theorems.2C_definitions.2C_corollaries_and_lemmas

\documentclass[12 pt]{amsart}

\newtheorem{theorem}{Theorem}[section]
\newtheorem{corollary}{Corollary}[theorem]
\newtheorem{lemma}[theorem]{Lemma}
% Note that the below invocation of \newtheorem is invalid:
% \newtheorem{proposition}[theorem]{Proposition}[section]
\newtheorem{proposition}{Proposition}[section]

\begin{document}
\section{Introduction}
Theorems can easily be defined:

\begin{theorem}
Let \(f\) be a function whose derivative exists in every point, then \(f\) is 
a continuous function.
\end{theorem}

\begin{theorem}[Pythagorean theorem]
\label{pythagorean}
This is a theorem about right triangles and can be summarised in the next 
equation 
\[ x^2 + y^2 = z^2 \]
\end{theorem}

And a consequence of theorem \ref{pythagorean} is the statement in the next 
corollary.

\begin{corollary}
There's no right rectangle whose sides measure 3c

In the following example, there are multiple `\newtheorem` commands defined in a single line. 

In [None]:
text = r"""\theoremstyle{definition}                 \newtheorem{conj}{Conjecture}
\newtheorem*{example}{Example}            \newtheorem{defn}{Definition}
\newtheorem{remark}{Remark} \newtheorem*{notation}{Notation}
\begin{document}
\end{document}"""
test_eq(display_names_of_environments(text), {'conj': 'Conjecture', 'example': 'Example', 'defn': 'Definition', 'remark': 'Remark', 'notation': 'Notation'})

### Divide latex text into parts

In [None]:
#| export
def _setup_counters(
        numbertheorem_counters: dict[str, tuple[str, Union[str, None]]], # An output of `numbered_newtheorems_counters_in_preamble`
        ) -> dict[str, int]:
    r"""
    Return a dict whose keys are of counters in the LaTeX document and whose
    values are all `0`. These key-value pairs are used to keep track of
    the numberings of `parts`.

    One special key is the key of the empty string `''`, which counters the
    parts which do not get a numbering, i.e. for most text that lie outside
    of (numbered) environments

    """

    # cf. https://www.overleaf.com/learn/latex/Counters#Default_counters_in_LaTeX
    predefined_counters = [
        'part', # Incremented each time the `\part` command is used. It is not reset automatically and casn only be reset by the user
        'chapter', # Incremeneted each time the `\chapter` command is used.
        'section', # Incremented whenever a new `\section` command is encountered
        'subsection', # Incremented whenever a new `\subsection` command is encountered, reset whenever a new `\section` command is encountered
        'subsubsection', # Incremented whenever a new `\subsubsection` command is encounted, reset whenever a new `\subsection` or `\section` command is encountered
        'paragraph', # Incremeneted whenever a new paragraph is started. Reset whenever a new `\subsubsection`, `\subsection`, or `\section` command is encounted
        'subparagraph', # Incremented each time the `\subparagraph` command is used and reset at the beginning of a new
        'page', # Incremented each time a new page is started in the document
        'equation', # Incremeneted whenever the `\begin{equation}` environment is used. 
        'figure', # Incremented whenever a new `figure` environment is encountered
        'table', # Incremeneted whenever a new `taable` environment is encountered`
        'footnote', 
        'mpfootnote',
        'enumi',
        'enumii',
        'enumiii',
        'enumiv']

    counters = {counter: 0 for _, (counter, reset_counter) in numbertheorem_counters.items()}
    for counter in predefined_counters:
        counters[counter] = 0

    counters[''] = 0
    return counters

In [None]:
#| hide
sample_counters = _setup_counters(
    {'thm': ('subsection', None), 'prop': ('subsection', None), 'cor': ('subsection', None), 'remark': ('remark', None), 'conj': ('subsection', None)})
assert 'remark' in sample_counters
test_eq(sample_counters['remark'], 0)
assert 'thm' not in sample_counters  # 'thm' is an environment name, but not a counter.


In [None]:
#| export
def _setup_numberwithins(
        explicit_numberwithins: dict[str, str],
        numbertheorem_counters: dict[str, tuple[str, Union[str, None]]], # An output of `numbered_newtheorems_counters_in_preamble`.
        ) -> dict[str, str]: # The keys are counters and the values are all counters that the key is immediately numbered within.
    """
    Extracts information of counters that are reset when other counters are
    incremented.

    This is a helper function of `_setup_all_numberwithins` as well as
    `divide_latex_text`.
    """
    builtin_numberwithins = {
        'subsection': 'section',
        'subsubsection': 'subsection',
        'paragraph': 'subsubsection',
        'subparagraph': 'paragraph',
        'enumii': 'enumi',
        'enumiii': 'enumii',
        'enumiv': 'enumiii',
        'part': 'chapter',
        'appendix': 'chapter'
    }
    numberwithins = explicit_numberwithins | builtin_numberwithins

    for environmentname, (counter, reset_by_counter) in numbertheorem_counters.items():
        if reset_by_counter is None:
            continue
        numberwithins[environmentname] = reset_by_counter
    return numberwithins

    

def _setup_all_numberwithins(
        explicit_numberwithins: dict[str, str],
        numbertheorem_counters: dict[str, tuple[str, Union[str, None]]], # An output of `numbered_newtheorems_counters_in_preamble`.
        ) -> dict[str, list[str]]: # The keys are counters and the values are all counters that the key is numbered within.
    """
    This is a helper function of `divide_latex_text`.
    """
    numberwithins = _setup_numberwithins(explicit_numberwithins, numbertheorem_counters)
    all_counters = set()
    for key, value in numberwithins.items():
        all_counters.add(key)
        all_counters.add(value)
    all_numbered_withins = {counter: [] for counter in all_counters}
    for counter_1, counter_2 in product(all_counters, all_counters):
        if _is_numberedwithin(counter_1, counter_2, numberwithins):
            all_numbered_withins[counter_1].append(counter_2)
    return all_numbered_withins


def _is_numberedwithin(
        counter_1, counter_2, numberwithins: dict[str, str]
        ) -> bool:
    """Return `True` if `counter_1` is numbered within `counter_2""" 
    if counter_1 not in numberwithins:
        return False
    elif numberwithins[counter_1] == counter_2:
        return True
    return _is_numberedwithin(
        numberwithins[counter_1], counter_2, numberwithins)


In [None]:
#| hide
sample_output = _setup_all_numberwithins({'equation': 'section'}, {})
test_eq(sample_output['section'], [])
test_eq(sample_output['subsection'], ['section'])
test_eq(sample_output['equation'], ['section'])

sample_output = _setup_all_numberwithins({'theorem': 'section'}, {})
test_eq(sample_output['theorem'], ['section'])

# In case that there is a `\newtheorem` invocation that also numbers the
# theorem-like environment within some counter (e.g. `\newtheorem{theorem}{Theorem}[section]`),
# we need to make sure that it is being setup like a numberwithin:
sample_output = _setup_all_numberwithins({'equation': 'section'}, {'theorem': ('theorem', 'section')})
test_eq(sample_output['theorem'], ['section'])
test_eq(sample_output['equation'], ['section'])

# In this example, let's say that we have a `\newtheorem{theorem}{Theorem}` instead
sample_output = _setup_all_numberwithins({'equation': 'section'}, {'theorem': ('theorem', None)})
assert 'theorem' not in sample_output
test_eq(sample_output['equation'], ['section'])

In [None]:
#| export
def _unnumbered_environments(
        numbertheorem_counters: dict[str, tuple[str, Union[str, None]]], # An output of `numbered_newtheorems_counters_in_preamble`
        display_names: dict[str, str]) -> set[str]:
    r"""Return the set of unnumbered theorem-like environments defined by
    `\newtheorem`.

    This is a helper function of `divide_latex_text`.
    """
    return {environment for environment in display_names
            if environment not in numbertheorem_counters}

    

In [None]:
#| hide
sample_unnumbered_environments = _unnumbered_environments(
    {'theorem': ('theorem', None), 'lemma': ('theorem', None), 'definition': ('theorem', None), 'corollary': ('corollary', None), 'remark': ('theorem', None)},
    {'theorem': 'Theorem', 'lemma': 'Lemma', 'definition': 'Definition', 'corollary': 'Corollary', 'conjecture*': 'Conjecture', 'remark': 'Remark'} 
    )
test_eq(sample_unnumbered_environments, {'conjecture*'})

In [None]:
#| export
def _section_title(
        text: str
        ) -> tuple[bool, str]: # The bool is `True` if the section/subsection is numbered (i.e. is `section` or `subsection` as opposed to `section*` or `subsection*`). The `str` is the title of the section or subsection
    """Return the title of a section or subsection from a latex str
    and whether or not the section/subsection is numbered"""

    # Note that the `section` command has the optional argument `toc-title` which appears
    # in the table of contents, cf.
    # http://latexref.xyz/_005csection.html
    # pattern = regex.compile(
    #     r'\\(?:section|subsection|subsubsection)\s*(?:\[.*\])?(\*)?\s*'
    #     r'\{((?>[^{}]+|\{(?2)\})*)\}',
    #     regex.MULTILINE
    # )
    regex_search = regex.search(SECTION_LIKE_PATTERN, text)
    is_numbered = regex_search.group(1) is None
    title = regex_search.group(2)
    return is_numbered, title


In [None]:
#| hide

# subsection, no extraneous spaces
sample_section = _section_title(r"\subsection{I am a subsection}")
test_eq(sample_section, (True, 'I am a subsection'))

# section, with extraneous spaces
sample_section = _section_title(r"\section {Generating series of special divisors}")
test_eq(sample_section, (True, 'Generating series of special divisors'))

# section, unnumbered
sample_section = _section_title(r"\section*{I am an unnumbered section}")
test_eq(sample_section, (False, 'I am an unnumbered section'))

# Subsection, unnumbered, extraneous spaces
sample_section = _section_title(r"\subsection*    {I am an unnumbered section and I have extraneous spaces}")
test_eq(sample_section, (False, 'I am an unnumbered section and I have extraneous spaces'))

# Multiline section
sample_section = _section_title(
    r"""\section*    {I am a section and I have span 
    multiple lines}""")
test_eq(sample_section, (False, 'I am a section and I have span \n    multiple lines'))

# Section with curly braces
sample_section = _section_title(
    r"""\section{ Can I talk about the finite field \mathcal{F}_p in this title?
        Can I also have multiple lines? Yes I can!}"""
)
test_eq(sample_section, (True, r""" Can I talk about the finite field \mathcal{F}_p in this title?
        Can I also have multiple lines? Yes I can!"""))

# Section with table of contents
sample_section = _section_title(
    r"\section [This is a Table of contents title] {This is the section title}"
)
test_eq(sample_section, (True, r"""This is the section title"""))

# # Section, also multiline
# sample_section = _section_title(
#     r"""\section{Exceptional maximal subgroups of 
# \texorpdfstring{\(\GSp_4(\ff_\ell)\)}{GSp4Fell}}"""
# )
# sample_section[1]

In [None]:
#| export
def _is_section_node(node: LatexNode):
    return (node.isNodeType(LatexMacroNode)
            and node.macroname == 'section')

def _is_subsection_node(node: LatexNode):
    return (node.isNodeType(LatexMacroNode)
            and node.macroname == 'subsection')

def _is_subsubsection_node(node: LatexNode):
    return (node.isNodeType(LatexMacroNode)
            and node.macroname == 'subsubsection')

def _is_environment_node(node: LatexNode):
    return node.isNodeType(LatexEnvironmentNode)

def _text_is_of_section_like_node(text: str):
    """Return `True` if `text` represents the text for a section node.

    In principal, this function should act like 
    `_is_section_node or _is_subsection_node or _is_subsubsection_node`
    except that it takes a `str` as its argument instead of a `LatexNode`.
    This function is
    implemented using a regex pattern instead of using
    `_is_section_node` to save time.
    """
    return bool(regex.match(SECTION_LIKE_PATTERN, text.lstrip()))


def _text_is_of_environment_node(text: str):
    """Return `True` if `text` represents an environment node
    (at least at the start).
    
    In principal, this function should act like `_is_environment_node`
    except that it takes a `str` as its argument instead of a `LatexNode`.
    This function is implemented using a regex pattern instead of using
    '_is_environment_node` to save time.
    """
    return bool(regex.match(ENVIRONMENT_PATTERN, text.lstrip()))


def _environment_name_of_text(text: str):
    """Return `True` if `text` represents an environment node
    (at least at the start).
    
    Assumes that `_text_is_of_environment_node(text)` is `True`.
    """
    match = regex.match(ENVIRONMENT_PATTERN, text.lstrip())
    return match.group(1)

In [None]:
#| hide
text = r"""
\documentclass{article}

\theoremstyle{plain}
\newtheorem{theorem}{Theorem}

\begin{document}

\section{This is section 1}

\subsection{This is subsection 1.1}

\begin{theorem}
\end{theorem}

\end{document}
"""
document_node = find_document_node(text)
assert _is_section_node(document_node.nodelist[1])
assert not _is_section_node(document_node.nodelist[3])
assert not _is_section_node(document_node.nodelist[5])

assert not _is_subsection_node(document_node.nodelist[1])
assert _is_subsection_node(document_node.nodelist[3])
assert not _is_subsection_node(document_node.nodelist[5])

assert not _is_environment_node(document_node.nodelist[1])
assert not _is_environment_node(document_node.nodelist[3])
assert _is_environment_node(document_node.nodelist[5])

# for node in document_node.nodelist:
#     print('\n')
#     print(node)
#     if node.isNodeType(LatexMacroNode):
#         print(node.macroname)
#     elif node.isNodeType(LatexEnvironmentNode):
#         print(node.environmentname)


In [None]:
#| hide
assert _text_is_of_section_like_node(r"\section {Generating series of special divisors}")
assert _text_is_of_section_like_node(r"   \section {Generating series of special divisors}")
assert _text_is_of_section_like_node(r"\subsection{I am a subsection}")
assert _text_is_of_section_like_node(r"\section*{I am an unnumbered section}")
assert _text_is_of_section_like_node(r"\subsection*    {I am an unnumbered section and I have extraneous spaces}")
assert _text_is_of_section_like_node(r"""\section*    {I am a section and I have span 
    multiple lines}""")
assert _text_is_of_section_like_node(r"""\section{ Can I talk about the finite field \mathcal{F}_p in this title?
        Can I also have multiple lines? Yes I can!}""")
assert _text_is_of_section_like_node(r"""\subsubsection{Hi}""")

assert not _text_is_of_section_like_node(r"""Something something""")
assert not _text_is_of_section_like_node(r"""\begin{theorem}""")
assert not _text_is_of_section_like_node(r"hi \section{title}")

assert _text_is_of_environment_node(r"""\begin{theorem} blah blah blah""")
assert _text_is_of_environment_node(r"""\begin{theorem} blah blah blah \end{theorem}""")
assert not _text_is_of_environment_node(r"""hi""")
_environment_name_of_text(r"""\begin{theorem} blah blah blah""")


'theorem'

In [None]:
#| export
def _is_numbered(
        node: LatexNode,
        numbertheorem_counters: dict[str, str]
        ) -> bool:
    if _is_section_node(node) or _is_subsection_node(node) or _is_subsubsection_node(node):
        is_numbered, _ = _section_title(node.latex_verbatim())
        return is_numbered
    elif _is_environment_node(node):
        return node.environmentname in numbertheorem_counters
    else:
        return False

In [None]:
#| hide
text = r"""
\documentclass{article}

\theoremstyle{plain}
\newtheorem{theorem}{Theorem}
\newtheorem*{theorem*}{Theorem}

\begin{document}
\begin{theorem}
\end{theorem}
\begin{theorem*}
\end{theorem*}
\end{document}
"""
document_node = find_document_node(text)
environments_to_counters = {'theorem': 'theorem'}

assert _is_numbered(document_node.nodelist[1], environments_to_counters)
assert not _is_numbered(document_node.nodelist[2], environments_to_counters)


# Example with numberwithin specified.
text = r"""
\documentclass{article}

\numberwithin{equation}{section}

\theoremstyle{plain}
\newtheorem{theorem}[equation]{Theorem}
\newtheorem*{theorem*}{Theorem}

\begin{document}
\begin{theorem}
\end{theorem}
\begin{theorem*}
\end{theorem*}
\end{document}
"""

document_node = find_document_node(text)
environments_to_counters = {'theorem': 'section'}

assert _is_numbered(document_node.nodelist[1], environments_to_counters)
assert not _is_numbered(document_node.nodelist[2], environments_to_counters)

# Example for sections and subsections
text = r"""
\begin{document}
\section{Section 1}
\subsection*{Unnumbered section}
\end{document}
"""
document_node = find_document_node(text)
environments_to_counters = {}

assert _is_numbered(document_node.nodelist[1], environments_to_counters)
assert not _is_numbered(document_node.nodelist[2], environments_to_counters)


In [None]:
#| export
def get_node_from_simple_text(
        text: str) -> LatexNode:
    """Return the (first) `LatexNode` object from a str."""
    w = LatexWalker(text)
    nodelist, _, _ = w.get_latex_nodes(pos=0)
    return nodelist[0]


def text_from_node(
        node: LatexNode) -> str:
    """Return the str representing `node`."""
    return node.latex_verbatim()
    # l2t = LatexNodes2Text()
    # if node.isNodeType('text'):
    #     return node.chars
    # else:
    #     full_text = ''
    #     for child_node in node.children:
    #         full_text += text_from_node(child_node)
    #     return full_text
    # return LatexNodes2Text().node_to_text(node)


In [None]:
node = get_node_from_simple_text(
    r"""\begin{theorem}
lalalala
\begin{equation}
\end{equation}
\end{theorem}"""
)
text_from_node(node)

'\\begin{theorem}\nlalalala\n\\begin{equation}\n\\end{equation}\n\\end{theorem}'

In [None]:
text = r"""\begin{thm}This is a theorem. \end{thm}"""
node = get_node_from_simple_text(text)
assert isinstance(node, LatexEnvironmentNode)
test_eq(node.environmentname, 'thm')


text = r"""\begin{thm}This is a theorem. \end{thm} \begin{proof} This is a proof. It is not captured by the `get_node_from_simple_text` function \end{proof}"""
node = get_node_from_simple_text(text)
assert isinstance(node, LatexEnvironmentNode)
test_eq(node.environmentname, 'thm')

In [None]:
#| export
def _change_counters(
        node,
        counters,
        numbertheorem_counters: dict[str, str],
        all_numberwithins: dict[str, list[str]]
        ):
    """Preliminarily update the counters for `node`, but not for
    any of its subnodes. This is mostly for
    theoremlike environments and for sectionlike environments.
    
    Helper function to `_process_node`.
    """
    # identify which counter to change
    # TODO
    # Take into consideration unnumbered non-environment node
    # Take into consideration unnumbered environment node
    if _is_environment_node(node):
        if node.environmentname in numbertheorem_counters:
           counter = numbertheorem_counters[node.environmentname][0]
        else:
            counter = None
    elif _is_section_node(node):
        counter = 'section'
    elif _is_subsection_node(node):
        counter = 'subsection'
    elif _is_subsubsection_node(node):
        counter = 'subsubsection'
    else:
        counter = None

    # Section counters seem to only reset subsection counters
    # When the section is numbered, etc., cf. `numbering_example_4...`
    # and `numbering_example_5...` in `nbs\_tests\latex_examples`
    is_numbered = _is_numbered(node, numbertheorem_counters)
    # e.g. `\numberwithin{equation}{section}`` means that `equation` is
    # numbered within `section`, i.e. `equation` is reset whenever
    # `section` is incremeneted

    # if counter is None and not _is_environment_node(node):
    #     counters[''] += 1 
    #     return

    if is_numbered:
        counters[counter] += 1
    for numbered_counter, within_counter in all_numberwithins.items():
        if counter is not None and counter in within_counter:
            counters[numbered_counter] = 0


def _change_counters_antecedently(
        node,
        counters,
        numbertheorem_counters: dict[str, str],
        all_numberwithins: dict[str, list[str]],
        ):
    """Update the counters to account for any environments contained within
    `node`, but not `node` itself.

    This is mostly for theorem-like environments which share a counter with 
    something like the `equation` environment; sometimes a theorem-like
    environment can have an `equation` environment within it. Note that
    the `_process_node` function alreay invokes `_change_counters` on
    `node`, so the counter is already updated for `node` by the time
    `_change_counters_antecedently` is invoked.

    Helper function to `_process_node`.
    """
    queue = [node]
    while queue:
        subnode = queue.pop()
        _update_counter_for_subsubnodes(
            subnode, counters, numbertheorem_counters, all_numberwithins)
        queue.extend(_subsubnodes(subnode))


def _update_counter_for_subsubnodes(
        subnode,
        counters,
        numbertheorem_counters: dict[str, str],
        all_numberwithins: dict[str, list[str]],
        ) -> None: 
    """Iterate through the immediate subnodes of `subnode` to see if the
    counter needs to be updated for any
    
    Helper function to `_change_counters_antecedently`.
    """ 
    if not hasattr(subnode, 'nodelist'):
        return
    for subsubnode in subnode.nodelist:
       _change_counters(subsubnode, counters, numbertheorem_counters, all_numberwithins)
            

def _subsubnodes(
        subnode,
        ) -> list[LatexNode]: 
    """Find subnodes of `subnode` to add to the queue

    Helper function to `_change_counters_antecedently`.
    """ 
    if not hasattr(subnode, 'nodelist'):
        return []
    return subnode.nodelist




In [None]:
#| hide

text = r"""\begin{thm}This is a theorem. \end{thm}"""
node = get_node_from_simple_text(text)
# Test a theoreem being counted by its own counter.
numbertheorem_counters = {'thm': ('thm', None)}
all_numberwithins = {}
counters = {'thm': 1}
_change_counters(node, counters, numbertheorem_counters, all_numberwithins)
test_eq(counters, {'thm': 2})
# Test a theorem being countered by the equation counter.
numbertheorem_counters = {'thm': ('equation', None)}
all_numberwithins = {}
counters = {'equation': 2}
_change_counters(node, counters, numbertheorem_counters, all_numberwithins)
test_eq(counters, {'equation': 3})

text = r"""\begin{corollary}This is a corollary. \end{orollary}"""
node = get_node_from_simple_text(text)
# Test a theorem-like environment being counted by the counter of
# another theorem-like environment
numbertheorem_counters = {'corollary': ('theorem', None), 'theorem': ('theorem', None)}
all_numberwithins = {}
counters = {'theorem': 0}
_change_counters(node, counters, numbertheorem_counters, all_numberwithins)
test_eq(counters, {'theorem': 1})


# Test a theorem-like environment whose counter is numbered within
# The section counter.
# First, see what happens when a theorem is called
text = r"""\begin{theorem}This is a theorem. \end{theorem}"""
node = get_node_from_simple_text(text)
numbertheorem_counters = {'theorem': ('theorem', None)}
all_numberwithins = {'theorem': ['section']}
counters = {'section': 1, 'theorem': 0}
_change_counters(node, counters, numbertheorem_counters, all_numberwithins)
test_eq(counters, {'section': 1, 'theorem': 1})
# Next, see what happens when a new section is invoked:
text = r"""\section{New section! The theorem counter should be reset}"""
node = get_node_from_simple_text(text)
_change_counters(node, counters, numbertheorem_counters, all_numberwithins)
test_eq(counters, {'section': 2, 'theorem': 0})

# Test a theorem-like environment sharing a counter with equation
# and in turn equation is numbered within section.
text = r"""\begin{theorem}This is a theorem. \end{theorem}"""
node = get_node_from_simple_text(text)
numbertheorem_counters = {'theorem': ('equation', None)}
all_numberwithins = {'equation': ['section']}
counters = {'section': 1, 'equation': 0}
_change_counters(node, counters, numbertheorem_counters, all_numberwithins)
test_eq(counters, {'section': 1, 'equation': 1})
# Next, see what happens when a new section is invoked:
text = r"""\section{New section! The theorem counter should be reset}"""
node = get_node_from_simple_text(text)
_change_counters(node, counters, numbertheorem_counters, all_numberwithins)
test_eq(counters, {'section': 2, 'equation': 0})

# Test an unnumbered theorem-like environment counter
text = r"""\begin{thm*}This is a theorem. \end{thm*}"""
node = get_node_from_simple_text(text)
# Test a theoreem being counted by its own counter.
numbertheorem_counters = {'thm': ('thm', None)}
all_numberwithins = {}
counters = {'thm': 1}
_change_counters(node, counters, numbertheorem_counters, all_numberwithins)
test_eq(counters, {'thm': 1})

# Test a theorem-like environment sharing a counter with equation
# and in turn equation is numbered within section, but the 
# environment is unnumbered.
text = r"""\begin{theorem*}This is a theorem. \end{theorem*}"""
node = get_node_from_simple_text(text)
numbertheorem_counters = {'theorem': ('equation', None)}
all_numberwithins = {'equation': ['section']}
counters = {'section': 1, 'equation': 0}
_change_counters(node, counters, numbertheorem_counters, all_numberwithins)
test_eq(counters, {'section': 1, 'equation': 0})
# Next, see what happens when a unnumbered new section is invoked:
text = r"""\section*{New section! The theorem counter should be reset}"""
node = get_node_from_simple_text(text)
_change_counters(node, counters, numbertheorem_counters, all_numberwithins)
test_eq(counters, {'section': 1, 'equation': 0})

# Test the counter for text that does not belong to an environment
# In the current implementation of _change_counters, the '' counter
# is not actually changed.
text = r"""Just some text."""
node = get_node_from_simple_text(text)
numbertheorem_counters = {'theorem': ('equation', None)}
all_numberwithins = {'equation': ['section']}
counters = {'section': 1, 'equation': 0, '': 0}
_change_counters(node, counters, numbertheorem_counters, all_numberwithins)
test_eq(counters, {'section': 1, 'equation': 0, '': 0})

# Test the counter for theorems the share a counter with subsubsection
text = r"""\begin{theorem}This is a theorem. \end{theorem}"""
node = get_node_from_simple_text(text)
numbertheorem_counters = {'theorem': ('subsubsection', None)}
all_numberwithins = {}
counters = {'section': 1, 'subsubsection': 1}
_change_counters(node, counters, numbertheorem_counters, all_numberwithins)
test_eq(counters, {'section': 1, 'subsubsection': 2})
# Next, see what happens when a new subsection is invoked:
text = r"""\subsubsection{New subsubsection! The theorem counter should be reset}"""
node = get_node_from_simple_text(text)
_change_counters(node, counters, numbertheorem_counters, all_numberwithins)
test_eq(counters, {'section': 1, 'subsubsection': 3})

In [None]:
#| hide

# Here, we have that equations are numbered within sections and theorems share the equation's counter. 
# We also have a theorem environment that houses an equation environment.
text = r"""\begin{theorem}
This is theorem 1.1
\begin{equation}
asdf
\end{equation}
\end{theorem}"""
node = get_node_from_simple_text(text)
numbertheorem_counters = {'theorem': ('equation', None), 'equation': ('equation', None)}
all_numberwithins = {'equation': ['section'], 'figure': ['section']}
counters = {'section': 1, 'equation': 0}
_change_counters(node, counters, numbertheorem_counters, all_numberwithins)
test_eq(counters, {'section': 1, 'equation': 1})
# Next, see what happens when `_change_counters_antecedently` is invoked to inspect the
# subnodes.
_change_counters_antecedently(node, counters, numbertheorem_counters, all_numberwithins)
test_eq(counters, {'section': 1, 'equation': 2})

# Everything in the below example is the same as in the previous example,
# except the theorem environment houses an enumerate environment, which in turn houses
# an equation environment.
text = r"""\begin{theorem}
This is theorem 1.1
\begin{enumerate}
\item \begin{equation}
asdf
\end{equation}
\end{enumerate}
\end{theorem}"""
node = get_node_from_simple_text(text)
numbertheorem_counters = {'theorem': ('equation', None), 'equation': ('equation', None)}
all_numberwithins = {'equation': ['section'], 'figure': ['section']}
counters = {'section': 1, 'equation': 0}
_change_counters(node, counters, numbertheorem_counters, all_numberwithins)
test_eq(counters, {'section': 1, 'equation': 1})
# Next, see what happens when `_change_counters_antecedently` is invoked to inspect the
# subnodes.
_change_counters_antecedently(node, counters, numbertheorem_counters, all_numberwithins)
test_eq(counters, {'section': 1, 'equation': 2})

In [None]:
#| export
def _node_numbering(
        node: LatexNode,
        numbertheorem_counters: dict[str, str],
        numberwithins: dict[str, str],
        counters: dict[str, int]
        ) -> str: # Just the numbering of the node, no "section/subsection" or displayname
    if _is_section_node(node):
        counter = 'section'
    elif _is_subsection_node(node):
        counter = 'subsection'
    elif _is_environment_node(node):
        counter = numbertheorem_counters[node.environmentname][0]
    return _numbering_helper('', counter, numberwithins, counters)


def _numbering_helper(
        trailing_numbering: str,
        counter: str,
        numberwithins: dict[str, str],
        counters: dict[str, int]
        ) -> str:
    """Recurisve helper function to `_node_numbering`."""
    if counter not in numberwithins and counter not in counters:
        return trailing_numbering
    if counter not in numberwithins and counter in counters and trailing_numbering:
        return f'{counters[counter]}.{trailing_numbering}'
    if counter not in numberwithins and counter in counters and not trailing_numbering:
        return f'{counters[counter]}'

    parent_counter = numberwithins[counter]
    current_count = counters[counter]
    if not trailing_numbering:
        to_pass_to_trailing_numbering = str(current_count)
    else:
        to_pass_to_trailing_numbering = f'{current_count}.{trailing_numbering}'

    return _numbering_helper(
        to_pass_to_trailing_numbering,
        parent_counter,
        numberwithins,
        counters)
    

In [None]:
text = r"""\begin{thm}This is a theorem. \end{thm}"""
node = get_node_from_simple_text(text)
# Test a theoreem being counted by its own counter.
numbertheorem_counters = {'thm': ('thm', None)}
numberwithins = {}
counters = {'thm': 1}
sample_numbering = _node_numbering(
    node, numbertheorem_counters, numberwithins, counters)
test_eq(sample_numbering, '1')
# Test a theorem being countered by the equation counter.
numbertheorem_counters = {'thm': ('equation', None)}
numberwithins = {}
counters = {'equation': 2}
sample_numbering = _node_numbering(
    node, numbertheorem_counters, numberwithins, counters)
test_eq(sample_numbering, '2')
# Test a theorem being countered by the equation counter.
numbertheorem_counters = {'thm': ('equation', None)}
numberwithins = {}
counters = {'equation': 2}
sample_numbering = _node_numbering(
    node, numbertheorem_counters, numberwithins, counters)
test_eq(sample_numbering, '2')

text = r"""\begin{corollary}This is a corollary. \end{orollary}"""
node = get_node_from_simple_text(text)
# Test a theorem-like environment being counted by the counter of
# another theorem-like environment
numbertheorem_counters = {'corollary': ('theorem', None), 'theorem': ('theorem', None)}
numberwithins = {}
counters = {'theorem': 0}
sample_numbering = _node_numbering(
    node, numbertheorem_counters, numberwithins, counters)
test_eq(sample_numbering, '0')

# Test a theorem-like environment whose counter is numbered within
# The section counter.
# First, see what happens when a theorem is called
text = r"""\begin{theorem}This is a theorem. \end{theorem}"""
node = get_node_from_simple_text(text)
numbertheorem_counters = {'theorem': ('theorem', None)}
numberwithins = {'theorem': 'section'}
counters = {'section': 1, 'theorem': 0}
sample_numbering = _node_numbering(
    node, numbertheorem_counters, numberwithins, counters)
test_eq(sample_numbering, '1.0')

# Next, see what happens when a new section is invoked:
text = r"""\section{New section! The theorem counter should be reset}"""
node = get_node_from_simple_text(text)
sample_numbering = _node_numbering(
    node, numbertheorem_counters, numberwithins, counters)
test_eq(sample_numbering, '1')

# Test a theorem-like environment sharing a counter with equation
# and in turn equation is numbered within section.
text = r"""\begin{theorem}This is a theorem. \end{theorem}"""
node = get_node_from_simple_text(text)
numbertheorem_counters = {'theorem': ('equation', None)}
numberwithins = {'equation': 'section'}
counters = {'section': 1, 'equation': 0}
sample_numbering = _node_numbering(
    node, numbertheorem_counters, numberwithins, counters)
test_eq(sample_numbering, '1.0')
# Next, see what happens when a new section is invoked:
text = r"""\section{New section! The theorem counter should be reset}"""
node = get_node_from_simple_text(text)
sample_numbering = _node_numbering(
    node, numbertheorem_counters, numberwithins, counters)
test_eq(sample_numbering, '1')

In [None]:
#| export

def _title(
        node: LatexNode,
        numbertheorem_counters: dict[str, str],
        numberwithins: dict[str, str], # An output of _setup_numberwithins
        all_numberwithins: dict[str, list[str]], # An output of all_numberwithins
        display_names: dict[str, str],
        counters: dict[str, int],
        swap_numbers: bool):
    """Return the title of a node based on the count in
    `counters`"""
    numbered = _is_numbered(node, numbertheorem_counters)

    if (_is_section_node(node) or _is_subsection_node(node)
            or _is_subsubsection_node(node)):
        return _title_for_section_subsection_subsubsection_node(
            node, counters, bool)
    # if _is_section_node(node) and numbered:
    #     _, title = _section_title(node.latex_verbatim())
    #     return f"{counters['section']}. {title}"
    # if _is_section_node(node) and not numbered:
    #     _, title = _section_title(node.latex_verbatim())
    #     return title 

    # if _is_subsection_node(node) and numbered:
    #     _, title = _section_title(node.latex_verbatim())
    #     return f"{counters['section']}.{counters['subsection']}. {title}"
    # if _is_subsection_node(node) and not numbered:
    #     _, title = _section_title(node.latex_verbatim())
    #     return title

    if _is_environment_node(node):
        return _title_for_environment_node(
            node, numbertheorem_counters, numberwithins,
            display_names, counters, swap_numbers)


def _title_for_section_subsection_subsubsection_node(
        node: LatexNode,
        counters: dict[str, int],
        numbered: bool
    ):
    _, title = _section_title(node.latex_verbatim())
    if not numbered:
        return title
    if _is_section_node(node):
        return f"{counters['section']}. {title}"
    elif _is_subsection_node(node):
        return f"{counters['section']}.{counters['subsection']}. {title}"
    else: # _is_subsubsection_node(node):
        return f"{counters['section']}.{counters['subsection']}.{counters['subsubsection']}. {title}"

    # if _is_section_node(node) and numbered:
    #     _, title = _section_title(node.latex_verbatim())
    #     return f"{counters['section']}. {title}"
    # if _is_section_node(node) and not numbered:
    #     _, title = _section_title(node.latex_verbatim())
    #     return title 

    # if _is_subsection_node(node) and numbered:
    #     _, title = _section_title(node.latex_verbatim())
    #     return f"{counters['section']}.{counters['subsection']}. {title}"
    # if _is_subsection_node(node) and not numbered:
    #     _, title = _section_title(node.latex_verbatim())
    #     return title


def _title_for_environment_node(
        node: LatexNode,
        numbertheorem_counters: dict[str, str],
        numberwithins: dict[str, list[str]],
        display_names: dict[str, str],
        counters: dict[str, int],
        swap_numbers: bool):
    """Return the title of an environment node.
    If the node is not that of an theorem-like environment, then 
    
    """
    numbered = _is_numbered(node, numbertheorem_counters)
    # TODO: see what happens when environments are numbered within
    # sections vs. subsections
    if not numbered:
        numbering = None
    else:
        numbering = _node_numbering(
            node, numbertheorem_counters, numberwithins, counters)
    
    environment = node.environmentname
    if environment in display_names:
        display_name = display_names[environment]
    else:
        display_name = environment
    if not numbered:
        return display_name
    elif swap_numbers:
        return f'{numbering}. {display_name}.'
    else:
        return f'{display_name} {numbering}.'
        

In [None]:
#| hide

# Theorem that is not numbered within anything
text = r"""\begin{thm}This is a theorem. \end{thm}"""
node = get_node_from_simple_text(text)
numbertheorem_counters = {'thm': ('thm', None)}
numberwithins = {}
all_numberwithins = {}
display_names = {'thm': 'Theorem'}
counters = {'thm': 1}
swap_numbers = False
sample_title = _title(
    node, numbertheorem_counters, numberwithins, all_numberwithins,
    display_names, counters, swap_numbers)
test_eq(sample_title, 'Theorem 1.')

swap_numbers = False
sample_title = _title(
    node, numbertheorem_counters, numberwithins, all_numberwithins,
    display_names, counters, swap_numbers)
test_eq(sample_title, 'Theorem 1.')

# Theorem that is counted by equation, which in turn is numbered within
# section
text = r"""\begin{thm}This is a theorem. \end{thm}"""
node = get_node_from_simple_text(text)
numbertheorem_counters = {'thm': ('equation', None)}
numberwithins = {'equation': 'section'}
all_numberwithins = {'equation': ['section']}
display_names = {'thm': 'Theorem'}
counters = {'equation': 1, 'section': 2}
swap_numbers = False
sample_title = _title(
    node, numbertheorem_counters, numberwithins, all_numberwithins,
    display_names, counters, swap_numbers)
test_eq(sample_title, 'Theorem 2.1.')

swap_numbers = True
sample_title = _title(
    node, numbertheorem_counters, numberwithins, all_numberwithins,
    display_names, counters, swap_numbers)
test_eq(sample_title, '2.1. Theorem.')

# Section
text = r"""\section{This is a section}"""
node = get_node_from_simple_text(text)
numbertheorem_counters = {'thm': ('equation', None)}
numberwithins = {'equation': 'section'}
all_numberwithins = {'equation': ['section']}
display_names = {'thm': 'Theorem'}
counters = {'equation': 1, 'section': 2}
swap_numbers = False
sample_title = _title(
    node, numbertheorem_counters, numberwithins, all_numberwithins,
    display_names, counters, swap_numbers)
test_eq(sample_title, '2. This is a section')

swap_numbers = True
sample_title = _title(
    node, numbertheorem_counters, numberwithins, all_numberwithins,
    display_names, counters, swap_numbers)
test_eq(sample_title, '2. This is a section')

# Subsection
text = r"""\subsection{This is a subsection}"""
node = get_node_from_simple_text(text)
numbertheorem_counters = {'thm': ('equation', None)}
numberwithins = {'equation': 'section', 'subsection': 'section'}
all_numberwithins = {'equation': ['section'], 'subsection': ['section']}
display_names = {'thm': 'Theorem'}
counters = {'equation': 1, 'section': 2, 'subsection': 3}
swap_numbers = False
sample_title = _title(
    node, numbertheorem_counters, numberwithins, all_numberwithins,
    display_names, counters, swap_numbers)
test_eq(sample_title, '2.3. This is a subsection')

swap_numbers = True
sample_title = _title(
    node, numbertheorem_counters, numberwithins, all_numberwithins,
    display_names, counters, swap_numbers)
test_eq(sample_title, '2.3. This is a subsection')

# In the case that an environment is not a theorem-like environment.
text = r"""\begin{abstract} This is an abstract \end{abstract}"""
node = get_node_from_simple_text(text)
numbertheorem_counters = {'thm': ('equation', None)}
numberwithins = {'equation': 'section', 'subsection': 'section'}
all_numberwithins = {'equation': ['section'], 'subsection': ['section']}
display_names = {'thm': 'Theorem'}
counters = {'equation': 1, 'section': 2, 'subsection': 3}
swap_numbers = False
sample_title = _title(
    node, numbertheorem_counters, numberwithins, all_numberwithins,
    display_names, counters, swap_numbers)
test_eq(sample_title, 'abstract')

# # In the case a section has multilines
# text = r"""\section{Exceptional maximal subgroups of 
# \texorpdfstring{\(\GSp_4(\ff_\ell)\)}{GSp4Fell}}"""
# node = get_node_from_simple_text(text)
# numbertheorem_counters = {'thm': ('equation', None)}
# numberwithins = {'equation': 'section', 'subsection': 'section'}
# all_numberwithins = {'equation': ['section'], 'subsection': ['section']}
# display_names = {'thm': 'Theorem'}
# counters = {'equation': 1, 'section': 2, 'subsection': 3}
# swap_numbers = False
# sample_title = _title(
#     node, numbertheorem_counters, numberwithins, all_numberwithins,
#     display_names, counters, swap_numbers)
# sample_title
# test_eq(sample_title, '2.3. This is a subsection')

In [None]:
#| export
def swap_numbers_invoked(
        preamble: str
        ) -> bool: # 
    """Returns `True` if `\swapnumbers` is in the preamble.

    Assume that a mention of `\swapnumbers` is an actual invocation.
    """
    preamble = remove_comments(preamble)
    return '\swapnumbers' in preamble

In [None]:
assert swap_numbers_invoked('\swapnumbers')
assert not swap_numbers_invoked(r'''
\documentclass{article}
\usepackage{amsthm}
%\usepackage{amsmath}

\newtheorem{theorem}{Theorem} % \swapnumbers
\newtheorem{corollary}[theorem]{Corollary}
\newtheorem{definition}[theorem]{Definition}
\newtheorem*{remark*}{Remark}''')

In [None]:
#| export
def _node_warrants_own_part(
        node, environments_to_not_divide_along: list[str],
        accumulation: str, parts: list[tuple[str, str]]) -> bool:
    """Return `True` if `node` warrants making a new part to be added in `parts`.

    This is a helper function for `_process_node`. When this function returns
    `True`, the `accumulation` should be considered for appending to `parts`
    and the node should also be appended to `parts
    """
    if _is_section_node(node) or _is_subsection_node(node) or _is_subsubsection_node(node):
        return True
    elif not _is_environment_node(node):
        return False
    # Is environment node from here and below.
    if len(parts) == 0 and accumulation.strip() == '':
        return True
    return node.environmentname not in environments_to_not_divide_along

In [None]:
#| hide

# These examples are based on `numbering_example_1_consecutive_numbering_scheme`
# in `\nbs\_tests\latex_examples`.

# Test the case of accumulating text at the very beginning before any section
node = get_node_from_simple_text('\nFor this document, the `theorem` counter is not reset whenever a new section begins.\n\nA similar numbering scheme can be accomplished by importing ')
environments_to_not_divide_along = ['equation', 'equation*', 'proof', 'align', 'align*']
accumulation = ''
parts = []
assert not _node_warrants_own_part(node, environments_to_not_divide_along, accumulation, parts)

node = get_node_from_simple_text('\\verb|amsmath|')
environments_to_not_divide_along = ['equation', 'equation*', 'proof', 'align', 'align*']
accumulation = '\nFor this document, the `theorem` counter is not reset whenever a new section begins.\n\nA similar numbering scheme can be accomplished by importing '
parts = []
assert not _node_warrants_own_part(node, environments_to_not_divide_along, accumulation, parts)

node = get_node_from_simple_text(' and invoking the code ')
environments_to_not_divide_along = ['equation', 'equation*', 'proof', 'align', 'align*']
accumulation = '\nFor this document, the `theorem` counter is not reset whenever a new section begins.\n\nA similar numbering scheme can be accomplished by importing \\verb|amsmath|'
parts = []
assert not _node_warrants_own_part(node, environments_to_not_divide_along, accumulation, parts)

# Now a new section comes in, which warrants a new part.
node = get_node_from_simple_text('\\section{Introduction}')
environments_to_not_divide_along = ['equation', 'equation*', 'proof', 'align', 'align*']
accumulation = '\nFor this document, the `theorem` counter is not reset whenever a new section begins.\n\nA similar numbering scheme can be accomplished by importing \\verb|amsmath| and invoking the code \\verb|\\numberwithin{theorem}{part}| in the preamble.\n\n' 
parts = []
assert _node_warrants_own_part(node, environments_to_not_divide_along, accumulation, parts)

# Now a new theorem comes in, which also warrants a new part.
node = get_node_from_simple_text('\\begin{theorem}\nThis is Theorem 1.\n\\end{theorem}')
environments_to_not_divide_along = ['equation', 'equation*', 'proof', 'align', 'align*']
accumulation = '\n\n' 
parts = [['1', 'For this document, the `theorem` counter is not reset whenever a new section begins.\n\nA similar numbering scheme can be accomplished by importing \\verb|amsmath| and invoking the code \\verb|\\numberwithin{theorem}{part}| in the preamble.'], ['1. Introduction', '\\section{Introduction}']] 
assert _node_warrants_own_part(node, environments_to_not_divide_along, accumulation, parts)

# Test the case where text that does not belong to an envrionment occurs at the 
# very beginning, even before any sections. and an environment node makes an
# appearance.
# cf. divide_latex_example_2 in `nbs\_tests\latex_examples`.
node = get_node_from_simple_text(r"""\begin{abstract}
    This is an abstract
    \end{abstract}""")
environments_to_not_divide_along = ['equation', 'equation*', 'proof', 'align', 'align*']
accumulation = r'\maketitle\n'
parts = []
assert _node_warrants_own_part(node, environments_to_not_divide_along, accumulation, parts)

# Now test the same case except `abstract` is included in `environments_to_not_divide_along`
node = get_node_from_simple_text(r"""\begin{abstract}
    This is an abstract
    \end{abstract}""")
environments_to_not_divide_along = ['equation', 'equation*', 'proof', 'align', 'align*', 'abstract']
accumulation = r'\maketitle\n'
parts = []
assert not _node_warrants_own_part(node, environments_to_not_divide_along, accumulation, parts)

# Test the case at the beginning of a section with an enumerate node.
node = get_node_from_simple_text('\\begin{enumerate}\n  \\item Introduction 2\n\n  \\item Preliminaries $\\quad 7$\n\n\\end{enumerate}')
environments_to_not_divide_along = ['equation', 'equation*', 'proof', 'align', 'align*', 'enumerate', 'itemize']
accumulation = r'\n'
parts = [['1. CONTENTS', '\\section{CONTENTS}']]
assert not _node_warrants_own_part(node, environments_to_not_divide_along, accumulation, parts)

# Test the case where a section is immediately followed by a subsection
node = get_node_from_simple_text('\\section{Section 2}')
environments_to_not_divide_along = ['equation', 'equation*', 'proof', 'align', 'align*', 'enumerate', 'itemize']
accumulation = r'\n'
parts = [['1. Section 1', '\\section{Section 1}']]
assert _node_warrants_own_part(node, environments_to_not_divide_along, accumulation, parts)

# Test the case of a subsubsection
node = get_node_from_simple_text('\\subsubsection{section 1.1.1}')
environments_to_not_divide_along = ['equation', 'equation*', 'proof', 'align', 'align*', 'enumerate', 'itemize']
accumulation = r'\n'
parts = [['1.1 Section 1.1', '\\subsection{Section 1.1}']]
assert _node_warrants_own_part(node, environments_to_not_divide_along, accumulation, parts)


In [None]:
#| export
def _node_is_proof_immediately_following_a_theorem_like_environment(
        node, accumulation, parts, display_names) -> bool:
    """Return `True` if `node` is that of a proof environment that immediately
    follows a theorem-like environment.
    
    This is a helper function for `_process_node`.
    """
    if not _is_environment_node(node):
        return False
    if not node.environmentname == 'proof':
        return False
    if not len(parts) > 0:
        return False
    if accumulation.strip() != '':
        return False
    if not _text_is_of_environment_node(parts[-1][1]):
        return False
    return _environment_name_of_text(parts[-1][1]) in display_names
    # previous_node = get_node_from_simple_text(parts[-1][1])
    # if not _is_environment_node(previous_node):
    #     return False
    # return previous_node.environmentname in display_names


def _node_is_nonspecial_following_a_sectionlike_node(
        node, environments_to_not_divide_along, accumulation, parts) -> bool:
    """Return `True` if `node` is that of a non-environment and non-setionlike
    node that immediately follows a sectionlike (section, subsection,
    or subsubsection) node.
    
    This is a helper function for `_process_node`.
    """
    if ((_is_environment_node(node) and node.environmentname not in environments_to_not_divide_along)
            or _is_section_node(node)
            or _is_subsection_node(node)
            or _is_subsubsection_node(node)):
        return False
    if len(parts) == 0:
        return False
    if accumulation.strip() != '':
        return False
    # previous_node = get_node_from_simple_text(parts[-1][1])
    # return _is_section_node(previous_node) or _is_subsection_node(previous_node) or _is_subsubsection_node(previous_node)
    return _text_is_of_section_like_node(parts[-1][1])
    

In [None]:
#| hide

# Test basic case where node is proof node following a theorem node.
node = get_node_from_simple_text(r'\begin{proof} This is a proof \end{proof}')
accumulation = '\n\n'
parts = [['1. Section', '\\section{Section}'], ['Theorem 1.', '\\begin{thm} This is a theorem \end{thm}']]
display_names = {'thm': 1}
assert _node_is_proof_immediately_following_a_theorem_like_environment(node, accumulation, parts, display_names)

# Test basic case where node is not a proof node
node = get_node_from_simple_text('\\begin{thm} This is a theorem \end{thm}')
accumulation = '\n\n'
parts = [['1. Section', '\\section{Section}']]
display_names = {'thm': 0}
assert not _node_is_proof_immediately_following_a_theorem_like_environment(node, accumulation, parts, display_names)

# Test when node is proof node at the very beginning of a document.
node = get_node_from_simple_text(r'\begin{proof} This is a proof \end{proof}')
accumulation = '\n\n'
parts = []
display_names = {'thm': 0}
assert not _node_is_proof_immediately_following_a_theorem_like_environment(node, accumulation, parts, display_names)

# Test when node is proof node at the beginnning of a section.
node = get_node_from_simple_text(r'\begin{proof} This is a proof \end{proof}')
accumulation = '\n\n'
parts = [['1. Section', '\\section{Section}']]
display_names = {'thm': 0}
assert not _node_is_proof_immediately_following_a_theorem_like_environment(node, accumulation, parts, display_names)

# Test when node is proof node following a remark.
node = get_node_from_simple_text(r'\begin{proof} This is a proof \end{proof}')
accumulation = '\n\n'
parts = [['1. Section', '\\section{Section}'], ['Theorem 1.', '\\begin{thm} This is a theorem \end{thm}'], ['Remark', '\\begin{rem} This is an unnumbered remark \\end{rem}']]
display_names = {'thm': 1}
assert not _node_is_proof_immediately_following_a_theorem_like_environment(node, accumulation, parts, display_names)

# Test when node is proof node following some nonempty text
node = get_node_from_simple_text(r'\begin{proof} This is a proof \end{proof}')
accumulation = '\n\nSome things are being said before the proof but after the theorem.'
parts = [['1. Section', '\\section{Section}'], ['Theorem 1.', '\\begin{thm} This is a theorem \end{thm}']]
display_names = {'thm': 1}
assert not _node_is_proof_immediately_following_a_theorem_like_environment(node, accumulation, parts, display_names)

# Test when node is some "normal" node following some section-like node
node = get_node_from_simple_text(r'I am just some text. Not an environment, not a section')
accumulation = ''
parts = [['1. Section', '\\section{Section}'], ]
environments_to_not_divide_along = [
    'displaymath', 'displaymath*', 'equation', 'equation*', 'gather', 'gather*', 'multiline', 'multiline*',
    'proof', 'align', 'align*', 'enumerate', 'itemize', 'label', 'eqnarray', 'quote', 'tabular', 'table']

assert _node_is_nonspecial_following_a_sectionlike_node(node, environments_to_not_divide_along, accumulation, parts)

# Test when node is some "normal" node following some other "normal" text following a section-like node
node = get_node_from_simple_text(r'I am just some text. Not an environment, not a section')
accumulation = 'But there is some other preceding text between the start of the section and the text, so False should be returned'
parts = [['1. Section', '\\section{Section}'], ]
environments_to_not_divide_along = [
    'displaymath', 'displaymath*', 'equation', 'equation*', 'gather', 'gather*', 'multiline', 'multiline*',
    'proof', 'align', 'align*', 'enumerate', 'itemize', 'label', 'eqnarray', 'quote', 'tabular', 'table']

assert not _node_is_nonspecial_following_a_sectionlike_node(node, environments_to_not_divide_along, accumulation, parts)

# Test when node is a sectionlike node following a section-like node:
node = get_node_from_simple_text(r'\section{Next section}')
accumulation = ''
parts = [['1. Section', '\\section{Section}'], ]
environments_to_not_divide_along = [
    'displaymath', 'displaymath*', 'equation', 'equation*', 'gather', 'gather*', 'multiline', 'multiline*',
    'proof', 'align', 'align*', 'enumerate', 'itemize', 'label', 'eqnarray', 'quote', 'tabular', 'table']
assert not _node_is_nonspecial_following_a_sectionlike_node(node, environments_to_not_divide_along, accumulation, parts)


In [None]:
#| export
DEFAULT_ENVIRONMENTS_TO_NOT_DIVIDE_ALONG = [
    'align', 'align*', 'diagram', 'displaymath', 'displaymath*', 'enumerate', 'eqnarray', 'eqnarray*',
    'equation', 'equation*', 'gather', 'gather*', 'itemize', 'label',
    'multiline', 'multiline*', 'multline', 'multline*',
    'proof', 'quote', 'tabular', 'table', ]
def divide_latex_text(
        document: str, 
        # environments_to_divide_along: list[str], # A list of the names of environments that warrant a new note
        # numbered_environments: list[str], # A list of the names of environments which are numbered in the latex code. 
        environments_to_not_divide_along: list[str] = DEFAULT_ENVIRONMENTS_TO_NOT_DIVIDE_ALONG, # A list of the names of the environemts along which to not make a new note, unless the environment starts a section (or the entire document).
        replace_commands_in_document_first: bool = True,  # If `True`, invoke `replace_commands_in_latex_document` on `document` to first replace custom commands (in the document minus the preamble) before starting to divide the document.
        repeat_replacing_commands: int = -1,  # If `replace_commands_in_document_first` is `True`, then this is passed as the `repeat` argument into the invocation of `replace_commands_in_latex_document`.
        ) -> list[tuple[str, str]]: # Each tuple is of the form `(note_title, text)`, where `note_title` often encapsulates the note type (i.e. section/subsection/display text of a theorem-like environment) along with the numbering and `text` is the text of the part. Sometimes `title` is just a number, which means that `text` is not of a `\section` or `\subsection` command and not of a theorem-like environment.
    r"""Divide LaTeX text to convert into Obsidian.md notes.

    Assumes that the counters in the LaTeX document are either the
    predefined ones or specified by the `\newtheorem` command.

    Proof environments are assigned to the same parts their prcededing
    theorem-like environments, if available.

    TODO: Implement counters specified by `\newcounter`, cf. 
    https://www.overleaf.com/learn/latex/Counters#LaTeX_commands_for_working_with_counters.
    """
    numbertheorem_counters = numbered_newtheorems_counters_in_preamble(document)
    explicit_numberwithins = numberwithins_in_preamble(document)
    numberwithins = _setup_numberwithins(explicit_numberwithins, numbertheorem_counters)
    all_numberwithins = _setup_all_numberwithins(explicit_numberwithins, numbertheorem_counters)
    display_names = display_names_of_environments(document)
    counters = _setup_counters(numbertheorem_counters)
    unnumbered_environments = _unnumbered_environments(
        numbertheorem_counters, display_names)
    # Eventually gets returned
    preamble, main_document = divide_preamble(document)
    if replace_commands_in_document_first:
        main_document = replace_commands_in_latex_document(document, repeat_replacing_commands)
    document_node = find_document_node(main_document)
    swap_numbers = swap_numbers_invoked(preamble)
    parts = []
    # "Accumulates" a "part" until text that should comprise a new part is encountered
    accumulation = '' 
    for node in document_node.nodelist:
        accumulation = _process_node(
            node, environments_to_not_divide_along, accumulation,
            numbertheorem_counters,
            numberwithins, all_numberwithins, counters,
            display_names, swap_numbers, parts)
    _append_non_environment_accumulation_to_parts_if_non_empty(
        accumulation, counters, parts)
    return parts


def _process_node(
        node, environments_to_not_divide_along, accumulation,
        numbertheorem_counters,
        numberwithins, all_numberwithins, counters,
        display_names, swap_numbers, parts) -> str:
    """
    Update `accumulation`, `counter`, and `parts` based on the contents of `node`.

    Also return 'accumulation` to update it.

    This is a helper function for `divide_latex_text`.

    """
    # If node is a proof immediately following a theorem-like environment
    # Then add it to said theorem-like environment
    _change_counters(
        node, counters, numbertheorem_counters, numberwithins)
    if (_node_is_proof_immediately_following_a_theorem_like_environment(
            node, accumulation, parts, display_names)
        or _node_is_nonspecial_following_a_sectionlike_node(
            node, environments_to_not_divide_along, accumulation, parts)):
        parts[-1][1] += node.latex_verbatim()
    elif _node_warrants_own_part(
            node, environments_to_not_divide_along, accumulation, parts):
        accumulation =  _append_non_environment_accumulation_to_parts_if_non_empty(
            accumulation, counters, parts)
        
        title = _title(
            node, numbertheorem_counters, numberwithins, all_numberwithins,
            display_names, counters, swap_numbers).strip()
        title = title.replace('\n', '') 
        parts.append([title, node.latex_verbatim()])
    else:
        accumulation += node.latex_verbatim()
        # In _change_counters`, the '' counter is incremented by default.
        # This offsets the incorrectly incrementation.
    _change_counters_antecedently(node, counters, numbertheorem_counters, all_numberwithins)
    return accumulation


def _append_non_environment_accumulation_to_parts_if_non_empty(
        accumulation: str, counters, parts):
    """Append accumulation to `parts` if `accumulation` is nonempty
    and return the updated `accumulation` """
    if accumulation.strip() != '':
        counters[''] += 1
        parts.append([str(counters['']).strip(), accumulation.strip()])
        return ''
    else:
        return accumulation.strip()






In [None]:
file = _test_directory() / 'latex_examples' / 'divide_latex_example_text_with_gather_environment' / 'main.tex'
sample_latex_text = text_from_file(file)
# print(sample_latex_text)
preamble, document = divide_preamble(sample_latex_text)
parts = divide_latex_text(sample_latex_text)
print(parts)

[['1. Introduction', '\\section{Introduction}\n\nThere is an equation\n\\begin{align*}\nasdf\n\\end{align*}\nbut this equation should not get to start its own part.\n\n']]


#### Examples for the `divide_latex_text` function

In [None]:
#| hide
file = _test_directory() / 'latex_examples' / 'numbering_example_6' / 'main.tex'
sample_latex_text = text_from_file(file)
# print(sample_latex_text)
preamble, document = divide_preamble(sample_latex_text)
parts = divide_latex_text(sample_latex_text)
print(parts)

[['1. Isotypicity and non-unitarity', "\\section{Isotypicity and non-unitarity}\\label{section:bilinear-pairing}\nThe main result of this section is \\autoref{theorem:isotypic}, which states that\ncounterexamples to Putman-Wieland in genus $\\geq 3$ cannot be isotypic, i.e.,\nthere exists an element of \n$H^1(\\Sigma_{g'}, \\mathbb C)^\\rho$ with infinite orbit under the action of a finite\nindex subgroup of the mapping class group. We show more, namely \\autoref{theorem:non-unitary}: if $X\\to Y$ is an $H$-cover, where $Y$ has genus at least $3$, the virtual action of the mapping class group of $Y$ on an $H$-isotypic component of the cohomology of $X$ is non-unitary.\n\nIn \\autoref{corollary:boggi-looijenga} we use this to show how a\n result from the retracted paper of Boggi-Looijenga \\cite{boggiL:curves-with-prescribed-symmetry} would imply the Putman-Wieland conjecture.\n\nOur main tool for proving this is a natural bilinear pairing, which we\nnext introduce.\nLet $C$ be a smooth

In the following example, we take a basic LaTeX file and divide it into parts:

In [None]:
file = _test_directory() / 'latex_examples' / 'divide_latex_example_proof_preceded_by_theorem' / 'main.tex'
sample_latex_text = text_from_file(file)
print(sample_latex_text)


\documentclass[10pt]{article}

\theoremstyle{plain}
\newtheorem*{theorem*}{Theorem}
\newtheorem*{theoremA}{Theorem A}
\newtheorem*{theoremB}{Theorem B}
\newtheorem{theorem}[equation]{Theorem}
\newtheorem{proposition}[equation]{Proposition}
\newtheorem{lemma}[equation]{Lemma}
\newtheorem{corollary}[equation]{Corollary}

\theoremstyle{definition}
\newtheorem{definition}[equation]{Definition}
\newtheorem{example}[equation]{Example}
\newtheorem*{acknowledgements}{Acknowledgements}
\newtheorem*{conventions}{Conventions}

\theoremstyle{remark}
\newtheorem{remark}[equation]{Remark}



\begin{document}
\section{Some section}

\begin{theorem}
This is a theorem.
\end{theorem}

\begin{proof}
This is a proof
\end{proof}

\end{document}


The `divide_preamble` function recognizes where the preamble ends and where the document begins.

In [None]:
preamble, document = divide_preamble(sample_latex_text)

In [None]:
print(preamble)


\documentclass[10pt]{article}

\theoremstyle{plain}
\newtheorem*{theorem*}{Theorem}
\newtheorem*{theoremA}{Theorem A}
\newtheorem*{theoremB}{Theorem B}
\newtheorem{theorem}[equation]{Theorem}
\newtheorem{proposition}[equation]{Proposition}
\newtheorem{lemma}[equation]{Lemma}
\newtheorem{corollary}[equation]{Corollary}

\theoremstyle{definition}
\newtheorem{definition}[equation]{Definition}
\newtheorem{example}[equation]{Example}
\newtheorem*{acknowledgements}{Acknowledgements}
\newtheorem*{conventions}{Conventions}

\theoremstyle{remark}
\newtheorem{remark}[equation]{Remark}






In [None]:
print(document)

\begin{document}
\section{Some section}

\begin{theorem}
This is a theorem.
\end{theorem}

\begin{proof}
This is a proof
\end{proof}

\end{document}


The `divide_latex_text` function divides the LaTeX document into parts, generally based on setions and theorem-like environments:

In [None]:

parts = divide_latex_text(sample_latex_text)
print(parts)
test_eq(len(parts), 2)

[['1. Some section', '\\section{Some section}\n\n'], ['Theorem 1.', '\\begin{theorem}\nThis is a theorem.\n\\end{theorem}\\begin{proof}\nThis is a proof\n\\end{proof}']]


In the next example, we have some `enumerate` environments to list out some things. The `divide_latex_text` does not create a new part of the `enumerate` environment.

In [None]:
file = _test_directory() / 'latex_examples' / 'divide_latex_example_text_preceded_by_undivided_environment' / 'main.tex'
sample_latex_text = text_from_file(file)
print(sample_latex_text)

% In this example, there are enumerate environments, which should not get their
% own `part`, cf. `divide_latex_text` in `16_latex.convert.ipynb`.
\documentclass[10pt]{article}
\usepackage{amsmath}
\usepackage{amsfonts}
\begin{document}

\section{Introduction}

Blahblahblah, this document has some lists.
The `divide_latex_text` should not create a separate part for the below `enumerate`
environment; after all, it seems better to include the list in the same file/note
as the text that provides context for the list.

\begin{enumerate}
  \item Rings
  \item Fields
\end{enumerate}

And here is another list, perhaps a grocery list:

\begin{enumerate}
  \setcounter{enumi}{3}
  \item apples
  \item bananas
  \item milk
\end{enumerate}

Lalalala

\end{document}


In [None]:
preamble, document = divide_preamble(sample_latex_text)
parts = divide_latex_text(document)
print(parts)
test_eq(len(parts), 1)

[['1. Introduction', '\\section{Introduction}\n\nBlahblahblah, this document has some lists.\nThe `divide_latex_text` should not create a separate part for the below `enumerate`\nenvironment; after all, it seems better to include the list in the same file/note\nas the text that provides context for the list.\n\n\\begin{enumerate}\n  \\item Rings\n  \\item Fields\n\\end{enumerate}\n\nAnd here is another list, perhaps a grocery list:\n\n\\begin{enumerate}\n  \\setcounter{enumi}{3}\n  \\item apples\n  \\item bananas\n  \\item milk\n\\end{enumerate}\n\nLalalala\n\n']]


In [None]:
file = _test_directory() / 'latex_examples' / 'divide_latex_example_2' / 'main.tex'
sample_latex_text = text_from_file(file)
preamble, document = divide_preamble(sample_latex_text)
parts = divide_latex_text(document)
print(parts)

[['1', '\\maketitle'], ['abstract', '\\begin{abstract}\nThis is an abstract\n\\end{abstract}']]


The `divide_latex_text` function by default divides along a LaTeX environment (something which is invoked by `\begin{...} \end{...}`). One can use the optional `environments_to_not_divide_along` parameter in the function to specify which environments to not divide along. By default, this list is set as follows:

In [None]:
DEFAULT_ENVIRONMENTS_TO_NOT_DIVIDE_ALONG

['align',
 'align*',
 'diagram',
 'displaymath',
 'displaymath*',
 'enumerate',
 'eqnarray',
 'eqnarray*',
 'equation',
 'equation*',
 'gather',
 'gather*',
 'itemize',
 'label',
 'multiline',
 'multiline*',
 'multline',
 'multline*',
 'proof',
 'quote',
 'tabular',
 'table']

In the following example, the `theorem`, `corollary`, and `definition` environments share a counter, which is not reset even when a new section begins. 

In [None]:
file = _test_directory() / 'latex_examples' / 'numbering_example_1_consecutive_numbering_scheme' / 'main.tex'
text = text_from_file(file)
print(text)

\documentclass{article}
\usepackage{amsthm}
%\usepackage{amsmath}

\newtheorem{theorem}{Theorem}
\newtheorem{corollary}[theorem]{Corollary}
\newtheorem{definition}[theorem]{Definition}
\newtheorem*{remark*}{Remark}

%\numberwithin{theorem}{part}

\begin{document}
For this document, the `theorem` counter is not reset whenever a new section begins.

A similar numbering scheme can be accomplished by importing \verb|amsmath| and invoking the code \verb|\numberwithin{theorem}{part}| in the preamble.

\section{Introduction}

\begin{theorem}
This is Theorem 1.
\end{theorem}

\begin{corollary}
This is Corollary 2.
\end{corollary}

\begin{remark*}
This is a remark. It is unnumbered and it does not affect the numberings of other environments.
\end{remark*}

\begin{definition}
This is Definition 3.
\end{definition}



\section{Another Section}

\begin{theorem}
This is Theorem 4.
\end{theorem}

And we might get a corollary!

\begin{corollary}
This is Corollary 5.
\end{corollary}

\begin{definition

In [None]:
sample_output = divide_latex_text(text)
sample_output


[['1',
  'For this document, the `theorem` counter is not reset whenever a new section begins.\n\nA similar numbering scheme can be accomplished by importing \\verb|amsmath| and invoking the code \\verb|\\numberwithin{theorem}{part}| in the preamble.'],
 ['1. Introduction', '\\section{Introduction}\n\n'],
 ['Theorem 1.', '\\begin{theorem}\nThis is Theorem 1.\n\\end{theorem}'],
 ['Corollary 2.',
  '\\begin{corollary}\nThis is Corollary 2.\n\\end{corollary}'],
 ['Remark',
  '\\begin{remark*}\nThis is a remark. It is unnumbered and it does not affect the numberings of other environments.\n\\end{remark*}'],
 ['Definition 3.',
  '\\begin{definition}\nThis is Definition 3.\n\\end{definition}'],
 ['2. Another Section', '\\section{Another Section}\n\n'],
 ['Theorem 4.', '\\begin{theorem}\nThis is Theorem 4.\n\\end{theorem}'],
 ['2', 'And we might get a corollary!'],
 ['Corollary 5.',
  '\\begin{corollary}\nThis is Corollary 5.\n\\end{corollary}'],
 ['Definition 6.',
  '\\begin{definition}\nThi

In [None]:
assert sample_output[0][0] == '1'
assert sample_output[1][0] == '1. Introduction'
assert sample_output[2][0] == 'Theorem 1.'
assert sample_output[3][0] == 'Corollary 2.'
assert sample_output[4][0] == 'Remark'

In the following example, the `\numerwithin` command is used to make the theorem-like environments numbered within sections. These environments are first numbered `1.1`, `1.2`, `1.3`, etc., and then numbered `2.1`, `2.2`, `2.3`, etc. once a new section starts.

In [None]:

file = _test_directory() / 'latex_examples' / 'numbering_example_2_numbering_scheme_reset_at_each_section' / 'main.tex'
text = text_from_file(file)
print(text)

% This is an example of a LaTeX document whose theorem-like environments are numbered with sections.

\documentclass{article}
\usepackage{amsthm}
\usepackage{amsmath}

\newtheorem{theorem}{Theorem}
\newtheorem{corollary}[theorem]{Corollary}
\newtheorem{definition}[theorem]{Definition}
\newtheorem*{remark*}{Remark}

\numberwithin{theorem}{section}

\begin{document}

This document resets its `theorem` counter whenever a new section begins.

\section{Introduction}

\begin{theorem}
This is Theorem 1.1.
\end{theorem}

\begin{corollary}
This is Corollary 1.2.
\end{corollary}

\begin{remark*}
This is a remark. It is unnumbered and it does not affect the numberings of other environments.
\end{remark*}


\begin{definition}
This is Definition 1.3.
\end{definition}



\section{Another Section}

\begin{theorem}
This is Theorem 2.1.
\end{theorem}

\begin{corollary}
This is Corollary 2.2.
\end{corollary}

\begin{definition}
This is Definition 2.3.
\end{definition}

\end{document}



In [None]:
divide_latex_text(text)

[['1',
  'This document resets its `theorem` counter whenever a new section begins.'],
 ['1. Introduction', '\\section{Introduction}\n\n'],
 ['Theorem 1.1.', '\\begin{theorem}\nThis is Theorem 1.1.\n\\end{theorem}'],
 ['Corollary 1.2.',
  '\\begin{corollary}\nThis is Corollary 1.2.\n\\end{corollary}'],
 ['Remark',
  '\\begin{remark*}\nThis is a remark. It is unnumbered and it does not affect the numberings of other environments.\n\\end{remark*}'],
 ['Definition 1.3.',
  '\\begin{definition}\nThis is Definition 1.3.\n\\end{definition}'],
 ['2. Another Section', '\\section{Another Section}\n\n'],
 ['Theorem 2.1.', '\\begin{theorem}\nThis is Theorem 2.1.\n\\end{theorem}'],
 ['Corollary 2.2.',
  '\\begin{corollary}\nThis is Corollary 2.2.\n\\end{corollary}'],
 ['Definition 2.3.',
  '\\begin{definition}\nThis is Definition 2.3.\n\\end{definition}']]

In this example, the various theorem-like environments share a counter with `equation` environments and this counter is reset at the start of each new section.

In [None]:
file = _test_directory() / 'latex_examples' / 'numbering_example_3_theorem_like_environments_share_counter_with_equation_and_reset_at_each_section' / 'main.tex'
text = text_from_file(file)
print(text)

\documentclass{amsart}
\usepackage[utf8]{inputenc}
\usepackage{amsmath, amsfonts, amssymb, amsthm, amsopn}

\numberwithin{equation}{section}

\theoremstyle{plain}
\newtheorem*{theorem*}{Theorem}
\newtheorem*{theoremA}{Theorem A}
\newtheorem*{theoremB}{Theorem B}
\newtheorem{theorem}[equation]{Theorem}
\newtheorem{proposition}[equation]{Proposition}
\newtheorem{lemma}[equation]{Lemma}
\newtheorem{corollary}[equation]{Corollary}

\theoremstyle{definition}
\newtheorem{definition}[equation]{Definition}
\newtheorem{example}[equation]{Example}
\newtheorem*{acknowledgements}{Acknowledgements}
\newtheorem*{conventions}{Conventions}

\theoremstyle{remark}
\newtheorem{remark}[equation]{Remark}

\begin{document}

\section{Introduction}

\begin{theorem}
This is Theorem 1.1. This is because the \verb|\numberwithin{equation}{section}| makes the section number included in the equation counter and because the \\
\verb|\newtheorem{theorem}[equation]{Theorem}| command makes the environment \verb|theorem

In [None]:
file = _test_directory() / 'latex_examples' / 'numbering_example_3_theorem_like_environments_share_counter_with_equation_and_reset_at_each_section' / 'main.tex'
text = text_from_file(file)
divide_latex_text(text)

[['1. Introduction', '\\section{Introduction}\n\n'],
 ['Theorem 1.1.',
  '\\begin{theorem}\nThis is Theorem 1.1. This is because the \\verb|\\numberwithin{equation}{section}| makes the section number included in the equation counter and because the \\\\\n\\verb|\\newtheorem{theorem}[equation]{Theorem}| command makes the environment \\verb|theorem| be counted by the equation counter.\n\\end{theorem}'],
 ['1',
  'The following makes an equation labeled 1.2; \n\\begin{equation}\n5 + 7 = 12\n\\end{equation}'],
 ['Theorem', '\\begin{theorem*}\nThis Theorem is unnumbered\n\\end{theorem*}'],
 ['Corollary 1.3.',
  '\\begin{corollary}\nThis is Corollary 1.3.\n\\end{corollary}'],
 ['2. Another section', '\\section{Another section}\n'],
 ['Theorem 2.1.', '\\begin{theorem}\nThis is theorem 2.1\n\\end{theorem}'],
 ['2',
  'The following is labeled 2.2:\n\\begin{equation}\n3+5 = 8.\n\\end{equation}']]

In [None]:
file = _test_directory() / 'latex_examples' / 'numbering_example_4_unnumbered_section' / 'main.tex'
text = text_from_file(file)
print(divide_latex_text(text))

[['1. This is section 1', '\\section{This is section 1}\n\n'], ['Theorem 1.1.', '\\begin{theorem}\nThis is Theorem 1.1.\n\\end{theorem}'], ['1.1. This is a subsection 1.1', '\\subsection{This is a subsection 1.1}\n\nThe following makes an equation labeled 1; \n\\begin{equation}\n5 + 7 = 12\n\\end{equation}\n\n'], ['Theorem', '\\begin{theorem*}\nThis Theorem is unnumbered\n\\end{theorem*}'], ['1.2. This is subsection 1.2', '\\subsection{This is subsection 1.2}\n\n'], ['Corollary 1.2.', '\\begin{corollary}\nThis is Corollary 1.2.\n\\end{corollary}'], ['1. Unnumbered section', '\\section*{Unnumbered section}\n\n'], ['1.1. This is subsection 1.3', '\\subsection{This is subsection 1.3}\n'], ['1.1.1. This is subsubsection 1.3.1', '\\subsubsection{This is subsubsection 1.3.1}\n\n'], ['Theorem 1.1.', '\\begin{theorem}\nThis is Theorem 1.3.\n\\end{theorem}'], ['2. Another section', '\\section{Another section}\n\n'], ['2.1. This is subsection 2.1', '\\subsection{This is subsection 2.1}\n\n'], ['

In [None]:
file = _test_directory() / 'latex_examples' / 'numbering_example_5_subsections_and_theorem_like_environments_share_counter' / 'main.tex'
text = text_from_file(file)
sample_output = divide_latex_text(text)
print(divide_latex_text(text))
test_eq(sample_output[4][0], '1. Remark.')
test_eq(sample_output[5][0], 'Remark')


[['1. This is section 1', '\\section{This is section 1}\n\n'], ['1.1. Theorem.', '\\begin{thm}\nThis is 1.1. Theorem. Note that the \\verb|\\swapnumbers| command is invoked in the preamble.\n\\end{thm}'], ['1.2. This is 1.2. subsection.', '\\subsection{This is 1.2. subsection.}\n\nNote that the equation counter is numbered within the subsection counter and that the theorem-like environments are numbered with the equation counter.\n\n'], ['1.2.1. This is 1.2.1. Subsubsection', '\\subsubsection{This is 1.2.1. Subsubsection}\n\n'], ['1. Remark.', '\\begin{remark}\nThis is an 1. Remark. Note that \\verb|\\remark| has a counter separate from those of many of the other theorem-like environments.\n\\end{remark}'], ['Remark', '\\begin{rem*}\nThis is an unnumbered Remark.\n\\end{rem*}'], ['1.3. Proposition.', '\\begin{prop}\nThis is 1.3. Proposition.\n\\end{prop}'], ['1. Unnumbered section', '\\section*{Unnumbered section}\n\n'], ['1.1. Theorem.', '\\begin{thm}\nThis is 1.4. Theorem.\n\\end{thm

In the below example, the `theorem` count is specified to reset at every new section and the `corollary` environment is specified to reset at every new theorem.

In particular, note that there is a Theorem 1.2 and a subsequent Corollary 1.2.1 in the example:

In [None]:
file = _test_directory() / 'latex_examples' / 'numbering_example_7_newtheorem_command_restarts_counter_by_section' / 'main.tex'
text = text_from_file(file) 
print(text)
sample_output = divide_latex_text(text)
print(divide_latex_text(text))
test_eq(sample_output[4][0], 'Corollary 1.2.1.')


% Based on an example from https://www.overleaf.com/learn/latex/Theorems_and_proofs#Numbered_theorems.2C_definitions.2C_corollaries_and_lemmas

\documentclass[12 pt]{amsart}

\newtheorem{theorem}{Theorem}[section]
\newtheorem{corollary}{Corollary}[theorem]
\newtheorem{lemma}[theorem]{Lemma}
% Note that the below invocation of \newtheorem is invalid:
% \newtheorem{proposition}[theorem]{Proposition}[section]
\newtheorem{proposition}{Proposition}[section]

\begin{document}
\section{Introduction}
Theorems can easily be defined:

\begin{theorem}
Let \(f\) be a function whose derivative exists in every point, then \(f\) is 
a continuous function.
\end{theorem}

\begin{theorem}[Pythagorean theorem]
\label{pythagorean}
This is a theorem about right triangles and can be summarised in the next 
equation 
\[ x^2 + y^2 = z^2 \]
\end{theorem}

And a consequence of theorem \ref{pythagorean} is the statement in the next 
corollary.

\begin{corollary}
There's no right rectangle whose sides measure 3c

Note that part titles are stripped and are single-lined:

In [None]:
# TODO: fill in the following example
# part = parts[...]
# assert part[0].strip() == part[0]

In the following example, the subsections and the theorem-like environments share a counter:


In [None]:
file = _test_directory() / 'latex_examples' / 'numbering_example_8_subsubsections_and_theorems_share_counter' / 'main.tex'
text = text_from_file(file) 
print(text)
sample_output = divide_latex_text(text)
print(sample_output)
test_eq(sample_output[-1][0], 'Theorem 1.1.2.')
test_eq(sample_output[-2][0], '1.1.1. section 1.1.1')


% Based on an example from https://www.overleaf.com/learn/latex/Theorems_and_proofs#Numbered_theorems.2C_definitions.2C_corollaries_and_lemmas

\documentclass[12 pt]{amsart}

\newtheorem{cor}[subsubsection]{Corollary}
\newtheorem{lem}[subsubsection]{Lemma}
\newtheorem{prop}[subsubsection]{Proposition}
\newtheorem{propconstr}[subsubsection]{Proposition-Construction}
\newtheorem{lemconstr}[subsubsection]{Lemma-Construction}
\newtheorem{ax}[subsubsection]{Axiom}
\newtheorem{conj}[subsubsection]{Conjecture}
\newtheorem{thm}[subsubsection]{Theorem}
\newtheorem{qthm}[subsubsection]{Quasi-Theorem}
\newtheorem{qlem}[subsubsection]{Quasi-Lemma}
\newtheorem{defn}[subsubsection]{Definition}
\newtheorem{quest}[subsubsection]{Question}
\newtheorem{claim}[subsubsection]{Claim}

\begin{document}
\section{Introduction}

\subsection{section 1.1}

\subsubsection{section 1.1.1}

\begin{thm}
This is theorem 1.1.2
\end{thm}

\end{document}


[['1. Introduction', '\\section{Introduction}\n\n'], ['1.1. sect

In the below example, note that there is some text immediately following the subsubsection; the "part" for the start of the subsubsection is joined by this following text: 

In [None]:
file = _test_directory() / 'latex_examples' / 'divide_latex_example_text_after_subsubsection' / 'main.tex'
sample_latex_text = text_from_file(file)
print(sample_latex_text)
preamble, document = divide_preamble(sample_latex_text)
parts = divide_latex_text(sample_latex_text)
print(parts)
test_eq(parts[2], ['1.1.1. section 1.1.1', '\\subsubsection{section 1.1.1}\nSome text beneath subsubsection\n'])


% Based on an example from https://www.overleaf.com/learn/latex/Theorems_and_proofs#Numbered_theorems.2C_definitions.2C_corollaries_and_lemmas

\documentclass[12 pt]{amsart}

\newtheorem{cor}[subsubsection]{Corollary}
\newtheorem{lem}[subsubsection]{Lemma}
\newtheorem{prop}[subsubsection]{Proposition}
\newtheorem{propconstr}[subsubsection]{Proposition-Construction}
\newtheorem{lemconstr}[subsubsection]{Lemma-Construction}
\newtheorem{ax}[subsubsection]{Axiom}
\newtheorem{conj}[subsubsection]{Conjecture}
\newtheorem{thm}[subsubsection]{Theorem}
\newtheorem{qthm}[subsubsection]{Quasi-Theorem}
\newtheorem{qlem}[subsubsection]{Quasi-Lemma}
\newtheorem{defn}[subsubsection]{Definition}
\newtheorem{quest}[subsubsection]{Question}
\newtheorem{claim}[subsubsection]{Claim}

\begin{document}
\section{Introduction}

\subsection{section 1.1}

\subsubsection{section 1.1.1}
Some text beneath subsubsection
\begin{thm}
This is theorem 1.1.2
\end{thm}

\end{document}


[['1. Introduction', '\\section{I

In the below example, theorem-like environments and equation environments share a counter and there is an equation within a theorem:

In [None]:
file = _test_directory() / 'latex_examples' / 'divide_latex_example_theorems_and_equations_share_counter_and_equation_in_theorem' / 'main.tex'
sample_latex_text = text_from_file(file)
print(sample_latex_text)
preamble, document = divide_preamble(sample_latex_text)
parts = divide_latex_text(sample_latex_text)
print(parts)
test_eq(parts[2], ['Corollary 1.3.', '\\begin{cor}\nThis is Corollary 1.3\n\\end{cor}'])


\documentclass[12pt]{amsart}
\usepackage{amsmath}
\usepackage{amsfonts}


\numberwithin{equation}{section}
\numberwithin{figure}{section}

\newtheorem{lemma}[equation]{Lemma}
\newtheorem{theorem}[equation]{Theorem}
\newtheorem{conjecture}[equation]{Conjecture}
\newtheorem{cor}[equation]{Corollary}
\newtheorem{prop}[equation]{Proposition}

\begin{document}

\section{Introduction}

\begin{theorem}
\begin{equation}
asdf
\end{equation}
\end{theorem}

\begin{cor}
This is Corollary 1.3
\end{cor}

\end{document}
[['1. Introduction', '\\section{Introduction}\n\n'], ['Theorem 1.1.', '\\begin{theorem}\n\\begin{equation}\nasdf\n\\end{equation}\n\\end{theorem}'], ['Corollary 1.3.', '\\begin{cor}\nThis is Corollary 1.3\n\\end{cor}']]


In the below example, theorem-like environments and equation environments share a counter and there is an equation within the proof of a proposition:

In [None]:
file = _test_directory() / 'latex_examples' / 'divide_latex_example_equation_in_proof' / 'main.tex'
sample_latex_text = text_from_file(file)
print(sample_latex_text)
preamble, document = divide_preamble(sample_latex_text)
parts = divide_latex_text(sample_latex_text)
print(parts)
test_eq(parts[2], ['Corollary 1.3.', '\\begin{cor}\nThis is Corollary 1.3\n\\end{cor}'])


\documentclass[12pt]{amsart}
\usepackage{amsmath}
\usepackage{amsfonts}


\numberwithin{equation}{section}
\numberwithin{figure}{section}

\newtheorem{lemma}[equation]{Lemma}
\newtheorem{theorem}[equation]{Theorem}
\newtheorem{conjecture}[equation]{Conjecture}
\newtheorem{cor}[equation]{Corollary}
\newtheorem{prop}[equation]{Proposition}

\begin{document}

\section{Introduction}

\begin{prop}
This is Proposition 1.1
\end{prop}
\begin{proof}
\begin{equation}
\end{equation}
\end{proof}

\begin{cor}
This is Corollary 1.3
\end{cor}

\end{document}
[['1. Introduction', '\\section{Introduction}\n\n'], ['Proposition 1.1.', '\\begin{prop}\nThis is Proposition 1.1\n\\end{prop}\\begin{proof}\n\\begin{equation}\n\\end{equation}\n\\end{proof}'], ['Corollary 1.3.', '\\begin{cor}\nThis is Corollary 1.3\n\\end{cor}']]


In the below example, theorem-like environments and equation environments share a counter and there is an `eqnarray` after the start of a section:

In [None]:
file = _test_directory() / 'latex_examples' / 'divide_latex_example_eqnarray_after_start_of_section' / 'main.tex'
sample_latex_text = text_from_file(file)
print(sample_latex_text)
preamble, document = divide_preamble(sample_latex_text)
parts = divide_latex_text(sample_latex_text)
print(parts)
test_eq(parts[1], ['Proposition 1.2.', '\\begin{prop}\nThis is Proposition 1.2\n\\end{prop}'])


\documentclass[12pt]{amsart}
\usepackage{amsmath}
\usepackage{amsfonts}


\numberwithin{equation}{section}
\numberwithin{figure}{section}

\newtheorem{lemma}[equation]{Lemma}
\newtheorem{theorem}[equation]{Theorem}
\newtheorem{conjecture}[equation]{Conjecture}
\newtheorem{cor}[equation]{Corollary}
\newtheorem{prop}[equation]{Proposition}

\begin{document}

\section{Introduction}
lalalala some stuff $5$

Hello I am saying stuff
\begin{eqnarray}
\end{eqnarray}
\begin{prop}
This is Proposition 1.2
\end{prop}

\end{document}
[['1. Introduction', '\\section{Introduction}\nlalalala some stuff $5$\n\nHello I am saying stuff\n\\begin{eqnarray}\n\\end{eqnarray}\n'], ['Proposition 1.2.', '\\begin{prop}\nThis is Proposition 1.2\n\\end{prop}']]


In the below example, there are many custom commands deifned using the `\def` command. Note that use of `\u` in the LaTeX file, which causes problems for `pylatexenc`. 

The `divide_latex_text` function had difficulties parsing through the latex document for the below example, specifically because this `\u` command made `pylatexenc` unable to find the `\section` following the `\u`. Now that `divide_latex_text` provides the option to replace custom commands from the LaTeX document (with their underlying "meaning/definitions" via the `replace_commands_in_latex_document` function) before parsing through the LaTeX document, this is no longer a problem.

In [None]:
file = _test_directory() / 'latex_examples' / 'divide_latex_example_unknown_section_division_problem' / 'main.tex'
sample_latex_text = text_from_file(file)
# print(sample_latex_text)
preamble, document = divide_preamble(sample_latex_text)
parts = divide_latex_text(sample_latex_text)
# test_eq(parts[1], ['Proposition 1.2.', '\\begin{prop}\nThis is Proposition 1.2\n\\end{prop}'])
test_eq(len(parts), 3)
print(parts)

[['1. Background and Notation', '\\section{Background and Notation}\n\n'], ['1.1. Unitary groups', '\\subsection{Unitary groups}\n\\label{subsecunitary}\n\nwhere $\\bm{{\\rm R}}_{{\\mathcal O}_E/{\\mathbb Z}}$ is the restriction of scalars functor.\nThen $SU$ is the derived group of $GU$ and of ${\\rm U}$,\n\n'], ['2. Mumford-Tate groups and endomorphism rings', '\\section{Mumford-Tate groups and endomorphism rings}\n\n\\label{secmt}\n\nCarlson and Toledo have \n\n\\bibliographystyle{hamsplain}\n\\bibliography{jda}\n\n']]


In [None]:
# TODO: example with a multilined section title forced to single-lined:
# e.g. `\section{Exceptional maximal subgroups of 
# \texorpdfstring{\(\GSp_4(\ff_\ell)\)}{GSp4Fell}}`


In [None]:
# TODO: Find a list of environment names commonly used.

In [None]:
# TODO: examples with different numbering convention and different numbered environments

In [None]:
# Here are some latex files with different conventions:
# - Different environment types have different counts and the counts do not show the section number.
#   - vankataramana_imbrd https://arxiv.org/abs/1205.6543: 
#       - e.g. section 1 has Theorem 1, Remark 1, Remark 2, Remark 3, subsection 1.1.3 has Remark 4, Subsection 2.2 has Definition 1