# latex.folders

> Create and manage folders to house `Obsidian.md` notes correspnoding to parts divided by the `latex.divide` module.

In [None]:
#| default_exp latex.folders

In [None]:
#| export
from trouver.latex.divide import divide_latex_text, DividedLatexPart

In [None]:
import glob
import shutil
import tempfile

from fastcore.test import ExceptionExpected, test_eq
from pathvalidate import validate_filename

from trouver.helper.files_and_folders import text_from_file
from trouver.helper.tests import _test_directory

from trouver.personal_vault.reference import (
    delete_reference_folder
)


## Identify sections and subsections to make folders for a reference.

In [None]:
#| export
def _part_starts_section(
        part: DividedLatexPart):
    """
    Return `True` if `part` starts a section (explicitly),
    cf. `divide_latex_text`.
    """
    return part['text'].startswith(r'\section')
    # node = get_node_from_simple_text(part[1])
    # return _is_section_node(node)


def _part_starts_subsection(
        part: DividedLatexPart):
    """Return `True` if `part` starts a subsection, cf. `divide_latex_text`."""
    return part['text'].startswith(r'\subsection')
    # node = get_node_from_simple_text(part[1])
    # return _is_subsection_node(node)

In [None]:
#| hide
part = DividedLatexPart(note_title='1. This is section 1', text='\\section{This is section 1}')
assert _part_starts_section(part)
assert not _part_starts_subsection(part)
part = DividedLatexPart(note_title='1.2. This is 1.2 subsection.', text='\\subsection{This is 1.2 subsection.}')
assert not _part_starts_section(part)
assert _part_starts_subsection(part)
part = DividedLatexPart(note_title='1', text='Note that the equation counter is numbered within the subsection counter and that the theorem-like environments are numbered with the equation counter.\n\n\\subsubsection{This is 1.2.1 Subsubsection}')
assert not _part_starts_section(part)
assert not _part_starts_subsection(part)
part = DividedLatexPart(note_title='1. Remark.', text='\\begin{remark}\nThis is an unnumbered remark.\n\\end{remark}')
assert not _part_starts_section(part)
assert not _part_starts_subsection(part)
part = DividedLatexPart(note_title='Remark', text='\\begin{rem*}\nThis is an unnumbered Remark.\n\\end{rem*}')
assert not _part_starts_section(part)
assert not _part_starts_subsection(part)

In [None]:
#| export
UNTITLED_SECTION_TITLE = 'Untitled Section'
def section_and_subsection_titles_from_latex_parts(
        parts: list[DividedLatexPart], # An output of `divide_latex_text`
        # verbose_sections: bool = False, # 
        # short_subsections: bool = False,
        # section_name: str = 'section',
        # subsection_name: str = 'subsection')\
        ) -> list[list[str]]: # Each list corresponds to a section. The first entry of the list is the title of the section and the other entries are the titles of the subsections. 
    """
    Return a list of lists of titles for the sections and subsections in `parts`

    Unnumbered sections get their own list. Unnumbered subsections are also included in lists.
    All the titles are striped (of leading and trailing whitespaces).
    """
    sections_and_subsections = []
    for part in parts:
       _consider_part_to_add(part, sections_and_subsections) 
    return sections_and_subsections


def _consider_part_to_add(
        part: list[DividedLatexPart],
        sections_and_subsections: list[list[str]]):
    """Add the title of `part` to `sections_and_subsections`
    if `part` starts a section or subsection."""
    title = part['note_title'].strip()
    if _part_starts_section(part):
        sections_and_subsections.append([title])
    elif _part_starts_subsection(part):
        sections_and_subsections[-1].append(title)
    elif not sections_and_subsections:
        # If sections and subsections is empty and the very first `part`
        # does not explicitly start a section, then we are in an untitled
        # section.
        sections_and_subsections.append([UNTITLED_SECTION_TITLE])
        


In the following example, the Environments are numbered Theorem 1, Corollary 2, Definition 3, etc.
Also note that there is some content before the very first (explicitly defined) section, so there is a section given by the `UNTITLED_SECTION_TITLE` constant.

In [None]:
parts = [
    DividedLatexPart(note_title='1', text='For this document, the `theorem` counter is not reset whenever a new section begins.\n\nA similar numbering scheme can be accomplished by importing \\verb|amsmath| and invoking the code \\verb|\\numberwithin{theorem}{part}| in the preamble.'),
    DividedLatexPart(note_title='1. Introduction', text='\\section{Introduction}'),
    DividedLatexPart(note_title='Theorem 1.', text='\\begin{theorem}\nThis is Theorem 1.\n\\end{theorem}'),
    DividedLatexPart(note_title='Corollary 2.', text='\\begin{corollary}\nThis is Corollary 2.\n\\end{corollary}'),
    DividedLatexPart(note_title='Remark', text='\\begin{remark*}\nThis is a remark. It is unnumbered and it does not affect the numberings of other environments.\n\\end{remark*}'),
    DividedLatexPart(note_title='Definition 3.', text='\\begin{definition}\nThis is Definition 3.\n\\end{definition}'),
    DividedLatexPart(note_title='2. Another Section', text='\\section{Another Section}'),
    DividedLatexPart(note_title='Theorem 4.', text='\\begin{theorem}\nThis is Theorem 4.\n\\end{theorem}'),
    DividedLatexPart(note_title='2', text='And we might get a corollary!'),
    DividedLatexPart(note_title='Corollary 5.', text='\\begin{corollary}\nThis is Corollary 5.\n\\end{corollary}'),
    DividedLatexPart(note_title='Definition 6.', text='\\begin{definition}\nThis is Definition 6.\n\\end{definition}')]
sample_output = section_and_subsection_titles_from_latex_parts(parts)
test_eq(sample_output, [[UNTITLED_SECTION_TITLE], ['1. Introduction'], ['2. Another Section']])

In contrast, the following example has environments numbered by sections:

In [None]:
parts = [
    DividedLatexPart(note_title='1', text='This document resets its `theorem` counter whenever a new section begins.'),
    DividedLatexPart(note_title='1. Introduction', text='\\section{Introduction}'),
    DividedLatexPart(note_title='Theorem 1.', text='\\begin{theorem}\nThis is Theorem 1.1.\n\\end{theorem}'),
    DividedLatexPart(note_title='Corollary 2.', text='\\begin{corollary}\nThis is Corollary 1.2.\n\\end{corollary}'), 
    DividedLatexPart(note_title='Remark', text='\\begin{remark*}\nThis is a remark. It is unnumbered and it does not affect the numberings of other environments.\n\\end{remark*}'),
    DividedLatexPart(note_title='Definition 3.', text='\\begin{definition}\nThis is Definition 1.3.\n\\end{definition}'), 
    DividedLatexPart(note_title='2. Another Section', text='\\section{Another Section}'),
    DividedLatexPart(note_title='Theorem 4.', text='\\begin{theorem}\nThis is Theorem 2.1.\n\\end{theorem}'),
    DividedLatexPart(note_title='Corollary 5.', text='\\begin{corollary}\nThis is Corollary 2.2.\n\\end{corollary}'), 
    DividedLatexPart(note_title='Definition 6.', text='\\begin{definition}\nThis is Definition 2.3.\n\\end{definition}')]
test_eq(sample_output, [[UNTITLED_SECTION_TITLE], ['1. Introduction'], ['2. Another Section']])

The below example is derived from a LaTeX document in which significant content is present before any particular sections. See the `nbs\_tests\latex_examples\latex_example_with_content_before_sections` folder. Also see https://arxiv.org/abs/1111.3607 for an example of a paper with significant content priori to any explicitly defined sections.

In [None]:
parts = [
    DividedLatexPart(note_title='abstract', text="\\begin{abstract}\nI'm an abstract\n\\end{abstract}"),
    DividedLatexPart(note_title='1', text='\\maketitle\n\nI want to talk about things but notice that this part does not belong to a section!'),
    DividedLatexPart(note_title='Theorem 1.', text="\\begin{theorem}\\label{th:some_theorem}\nI'm a theorem.\n\\end{theorem}"),
    DividedLatexPart(note_title='2', text='Blah blah blah'),
    DividedLatexPart(note_title='Theorem 2.', text='\\begin{theorem}\\label{th:some_other_theorem}\nImpart me with mathematical knowledge!\n\\end{theorem}'),
    DividedLatexPart(note_title='3', text='Maybe a corollary'),
    DividedLatexPart(note_title='Corollary 3.', text='\\begin{corollary}\\label{cor:a_corollary}\nI immediately follow from the above theorem.\n\\end{corollary}'),
    DividedLatexPart(note_title='4', text='More stuff!'),
    DividedLatexPart(note_title='Corollary 4.', text='\\begin{corollary}\\label{cor:another_corollary}\nMore delicious mathematical knowledge.\n\\end{corollary}'),
    DividedLatexPart(note_title='5', text='Maybe you could describe how we demonstrate this corollary.'),
    DividedLatexPart(note_title='1. Proof of Theorem~\\ref{th:main}', text='\\section{Proof of Theorem~\\ref{th:main}}'), 
    DividedLatexPart(note_title='6', text='Now this is finally in a section.'),
    DividedLatexPart(note_title='Lemma 5.', text='\\begin{lemma}\nSome lemma\n\\end{lemma}\\begin{proof}\nMaximum effort!\n\\end{proof}'),
    DividedLatexPart(note_title='7', text='Blah blah blah.'),
    DividedLatexPart(note_title='1.1. This is a subsection', text='\\subsection{This is a subsection}'),
    DividedLatexPart(note_title='8', text="I'm about one thing."),
    DividedLatexPart(note_title='1.2. This is another subsection', text='\\subsection{This is another subsection}'), 
    DividedLatexPart(note_title='9', text="I'm about another thing.")]
sample_output = section_and_subsection_titles_from_latex_parts(parts)
test_eq(sample_output, [[UNTITLED_SECTION_TITLE], ['1. Proof of Theorem~\\ref{th:main}', '1.1. This is a subsection', '1.2. This is another subsection']])

The below example is derived from a LaTeX document with a `\numberwithin{equation}{subsection}` in which the theorem-like environments are numbered with the `equation` counter. In particular, theorem-like environments and subsections are counted together.

Also, note that the below example starts with an explicitly defined section, so there is no section given by the `UNTITLED_SECTION_TITLE` constant.

In [None]:
# parts = [
#     ['1. This is section 1', '\\section{This is section 1}'],
#     ['1.1. Theorem.', '\\begin{thm}\nThis is 1.1. Theorem. Note that the \\verb|\\swapnumbers| command is invoked in the preamble.\n\\end{thm}'],
#     ['1.2. This is 1.2. subsection.', '\\subsection{This is 1.2. subsection.}'],
#     ['1', 'Note that the equation counter is numbered within the subsection counter and that the theorem-like environments are numbered with the equation counter.\n\n\\subsubsection{This is 1.2.1. Subsubsection}'],
#     ['1. Remark.', '\\begin{remark}\nThis is an 1. Remark. Note that \\verb|\\remark| has a counter separate from those of many of the other theorem-like environments.\n\\end{remark}'],
#     ['Remark', '\\begin{rem*}\nThis is an unnumbered Remark.\n\\end{rem*}'],
#     ['1.3. Proposition.', '\\begin{prop}\nThis is 1.3. Proposition.\n\\end{prop}'],
#     ['Unnumbered section', '\\section*{Unnumbered section}'],
#     ['1.1. Theorem.', '\\begin{thm}\nThis is 1.4. Theorem.\n\\end{thm}'],
#     ['2. This is Section 2', '\\section{This is Section 2}'],
#     ['2.1. Theorem.', '\\begin{thm}\nThis is 2.1. Theorem\n\\end{thm}']]

parts = [
    DividedLatexPart(note_title='1. This is section 1', text='\\section{This is section 1}'),
    DividedLatexPart(note_title='1.1. Theorem.', text='\\begin{thm}\nThis is 1.1. Theorem. Note that the \\verb|\\swapnumbers| command is invoked in the preamble.\n\\end{thm}'),
    DividedLatexPart(note_title='1.2. This is 1.2. subsection.', text='\\subsection{This is 1.2. subsection.}'), 
    DividedLatexPart(note_title='1', text='Note that the equation counter is numbered within the subsection counter and that the theorem-like environments are numbered with the equation counter.\n\n\\subsubsection{This is 1.2.1. Subsubsection}'), 
    DividedLatexPart(note_title='1. Remark.', text='\\begin{remark}\nThis is an 1. Remark. Note that \\verb|\\remark| has a counter separate from those of many of the other theorem-like environments.\n\\end{remark}'),
    DividedLatexPart(note_title='Remark', text='\\begin{rem*}\nThis is an unnumbered Remark.\n\\end{rem*}'),
    DividedLatexPart(note_title='1.3. Proposition.', text='\\begin{prop}\nThis is 1.3. Proposition.\n\\end{prop}'), 
    DividedLatexPart(note_title='Unnumbered section', text='\\section*{Unnumbered section}'),
    DividedLatexPart(note_title='1.1. Theorem.', text='\\begin{thm}\nThis is 1.4. Theorem.\n\\end{thm}'),
    DividedLatexPart(note_title='2. This is Section 2', text='\\section{This is Section 2}'),
    DividedLatexPart(note_title='2.1. Theorem.', text='\\begin{thm}\nThis is 2.1. Theorem\n\\end{thm}')]

sample_output = section_and_subsection_titles_from_latex_parts(parts)
test_eq(sample_output, [['1. This is section 1', '1.2. This is 1.2. subsection.'], ['Unnumbered section'], ['2. This is Section 2']])


The titles of the sections are stripped of their leading and trailing whitespaces (if available)

In [None]:
# The below example makes sure that titles are stripped
parts = [
    DividedLatexPart(note_title='   1. Section with an unnumbered subsection   ', text='\\section{Section with an unnumbered subsection}'),
    DividedLatexPart(note_title='1', text='This is a section with an unnumbered subsection'),
    DividedLatexPart(note_title='1.1. ', text='\\subsection{}')]
sample_output = section_and_subsection_titles_from_latex_parts(parts)
test_eq(sample_output, [['1. Section with an unnumbered subsection', '1.1.']])



In [None]:
dir = _test_directory() / 'latex_examples' / 'latex_example_with_plenty_of_sections_and_subsections'
file = dir / 'main.tex'
text = text_from_file(file)
parts = divide_latex_text(text, dir) 
print(parts)
sample_output = section_and_subsection_titles_from_latex_parts(parts)
test_eq(sample_output,
        [['1. This is section 1', '1.1. This is section 1.1', '1.2. This is section 1.2'],
         ['2. This is section 2'],
         ['3. This is section 3', '3.1. This is section 3.1', '3.2. This is section 3.2', '3.3. This is section 3.3', '3.4. This is section 3.4']])

[{'note_title': '1. This is section 1', 'text': '\\section{This is section 1}\n\n'}, {'note_title': '1.1. This is section 1.1', 'text': '\\subsection{This is section 1.1}\n\n'}, {'note_title': '1.2. This is section 1.2', 'text': '\\subsection{This is section 1.2}\n\n\n'}, {'note_title': '2. This is section 2', 'text': '\\section{This is section 2}\n\n\n'}, {'note_title': '3. This is section 3', 'text': '\\section{This is section 3}\n\n'}, {'note_title': '3.1. This is section 3.1', 'text': '\\subsection{This is section 3.1}\n\n'}, {'note_title': '3.2. This is section 3.2', 'text': '\\subsection{This is section 3.2}\n\n'}, {'note_title': '3.3. This is section 3.3', 'text': '\\subsection{This is section 3.3}\n\n'}, {'note_title': '3.4. This is section 3.4', 'text': '\\subsection{This is section 3.4}\n\n'}]
