In [1]:
#| default_exp markdown.obsidian.personal.notation.__init__

# markdown.obisidian.personal.notation
> Functions for making and managing notation cards

Mathematical texts are difficult to read not only because it introduces readers to new concepts that they are unfamiliar with but also because any given excerpt requires the reader to be familiar with notation that the writer chooses to use.

The first time reader of a mathematical text thus must go on a wild goose hunt to find where the unfamiliar notations are defined. Unfortunately, the definitions may further introduce unfamiliar notations.

Notation notes try to ease this problem to an extent by providing
1. links to quickly find where notations are introduced
2. the contexts under which the notations are defined, and
3. miscellaneous descriptions of the notations as necessary. 

In [2]:
#| export
from os import PathLike
from pathlib import Path
from typing import Optional, Union
import warnings

from multiset import Multiset
from pylatexenc.latexwalker import LatexNode, LatexMacroNode, LatexWalker, LatexGroupNode, LatexCharsNode

from trouver.helper.path_accepted_string import latex_to_path_accepted_string
from trouver.markdown.markdown.file import MarkdownFile
from trouver.markdown.obsidian.links import (
    LinkType, ObsidianLink
)
from trouver.markdown.obsidian.personal.notation.parse import notation_in_note
from trouver.markdown.obsidian.personal.notation.in_standard_information_note import (
    notat_str_from_doub_asts_in_std_info_note, notations_and_main_notes,
    add_notation_note_to_see_also, notation_note_is_linked_in_see_also_section,
    notat_str_from_html_tags
)
from trouver.markdown.obsidian.personal.note_type import (
    PersonalNoteTypeEnum, note_is_of_type
)
from trouver.markdown.obsidian.vault import VaultNote

In [3]:
import os
import shutil
import tempfile

from fastcore.test import *

from trouver.helper.tests import _test_directory

# Add notation notes to Notation index note

In [4]:
#| export
def notations_to_add_in_index(
        vault: PathLike, # Path to the vault directory.
        notation_index_note = VaultNote, # The notation index note in the vault where the notations should be added to.
        subdirectory: Optional[PathLike] = None , # Path to the subdirectory, relative to `vault`, to find the notation notes. Searches for all notation notes here and in subdirectories of this subdirectory. If `None`, then the `note parameter is used to determined the subdirectory. If `subdirectory` is the empty str, then all notation notes in the vault are searched. Defaults to `None`. 
        note: Optional[VaultNote] = None # The directory that this note is in determines the argument to `subdirectory` parameter if it is `None`. Defaults to `None`, in which case `subdirectory` must be specified.
        ) -> list[tuple[str, ObsidianLink]]: # Each tuple in the list consists of the notation str of the notation note (including surrounding dollar signs `$`) and the (nonembedded) ObsidianLink object for a link to the notation note.
    """Returns notations and links of notation notes to that ought to be
    added in the corresponding notation index, i.e. are in the reference
    folder but not linked by the notation index note.

    If a notation note is not properly formatted, e.g. does not have a
    notation, then the notation and link for the notation note will not
    be included.
    
    **Raises**
    - ValueError
        - If `subdirectory` and `note` are both `None`.

    """
    vault = vault if vault is not None else ''
    mains_dict = notations_and_main_notes(vault, subdirectory, note)
    mf_object = MarkdownFile.from_file(notation_index_note.path())
    mf_text = str(mf_object)
    notations_and_links = []
    for notation, _ in mains_dict.items():
        link_object = ObsidianLink(
            is_embedded=True, file_name=notation, anchor=0, custom_text=0,
            link_type=LinkType.WIKILINK)
        link = link_object.to_string()
        try:
            notation_str = notation_in_note(notation, vault)
        except AttributeError:  # When a notation note is incomplete.
            continue  # TODO: print a warning
        if not link in mf_text:
            notations_and_links.append((notation_str, link_object))
    return notations_and_links



We can identify notation notes which exist in a reference folder but are not linked in the notation index note for the reference:

In [5]:
# TODO: make test
# VaultNote.clear_cache()
vault = _test_directory() / 'test_vault_6'
reference = 'number_theory_reference_1'
note = VaultNote(vault, name=f'_index_{reference}')
notation_index_note = VaultNote(vault, name=f'_notation_{reference}')
sample_output = notations_to_add_in_index(vault, notation_index_note, note=note)
for notation_str, link in sample_output:
    print(notation_str, link.to_string())

$\mathbb{Z}/n\mathbb{Z}$ ![[number_theory_reference_1_notation_Z_nZ_ring_of_integers_modulo_n]]


In [6]:
#| export
def index_notation_note_formatted_entry(
        notation_str: str, # The str of the notation, including the surrounding dollar signs `$`.
        link: ObsidianLink # The embedded link to the notation note. 
        ) -> str:
    """Return a str formatted for an index notation note entry.

    It is recommended to pass the outputs of
    `notations_to_add_in_index` to this function.
    """
    return f'### {notation_str}\n- {link.to_string()}'

The `index_notation_note_formatted_entry` function returns a formatted str to add in the index notation note:

In [7]:
print(index_notation_note_formatted_entry(sample_output[0][0], sample_output[0][1]))

### $\mathbb{Z}/n\mathbb{Z}$
- ![[number_theory_reference_1_notation_Z_nZ_ring_of_integers_modulo_n]]


# Making a notation note

In [8]:
#| export
def make_a_notation_note(
        main_note: VaultNote, # The note from which the notation originates.
        vault: PathLike,
        notation: str, # The notation typed in latex. May or may not be surrounded by dollar signs
        description: str, # The rest of the text describing notation.
        notation_note_name: str, # The name of the new notation note to be created.
        destination: Optional[PathLike] = None, # The directory to create the new notation note in.  If `None`, then creates the new notation note in the same place as the note specified by `note_name`
        overwrite: bool = False, # If `True`, overwrite file of the same path as the new notation file to be written, if such a file exists.  Otherwise, does nothing. Even if a link to the old notation note exists in `main_note`, a new link will still be added.  Defaults to `False`.
        add_to_main: bool = True, # If `True`, adds a link to the notation note in the `See Also` section of the main note.
        latex_in_original: str = '' # The full math mode string in `main_note` which introduces the notation. Defaults to the blank string `''`, in which case `notation` plays the role of `latex_in_original`
        ) -> Union[VaultNote, None]: # The newly created notation note. If no note is created, then returns `None`.
    """Make a new notation note, optionally add a link to it in the
    `See Also` section of its main note, returns it.

    The notation note is created in the same directory as the main note.
    The meta of the notation note has a `latex_in_original` section which
    lists the contents of the latex string in the main note from which the
    notation note comes from. This is so that the
    `make_notation_notes_from_double_asts` method can distinguish between
    notations for which a note has been created and for which a note has
    not been created.
    """
    if destination is None:
        destination = main_note.directory(relative=True)
    notation_note = VaultNote(
        vault, rel_path=destination / f'{notation_note_name}.md')
    if not overwrite and notation_note.exists():
        return
    if not notation_note.exists():
        notation_note.create()
    if not latex_in_original:
        latex_in_original = notation
    to_print = _full_notation_string(
        main_note, notation, description, latex_in_original)
    # TODO: change this to use VaultNote method
    with open(notation_note.path(), 'w+', encoding='utf8') as notation_file:
        notation_file.write(to_print)
    if add_to_main:
        add_notation_note_to_see_also(notation_note, main_note)
    return notation_note
    

def _full_notation_string(
        main_note: VaultNote,
        notation: str,
        description: str,
        latex_in_original: str) -> str:
    """The full "statement" of a notation.
    
    Says something like "<notation> denotes <description of notation>", e.g.
    "$\dim V$ denotes the dimension of the vector space $V$".
    
    **Parameters**
    - notation - str
        - Notation written in LaTeX.
    - description - str
        - The full description of the notation.
        
    **Returns**
    - str
    """
    raw_notation = _raw_notation(notation)
    denote_link = ObsidianLink(False, main_note.name, 0, 'denotes')
    meta_latex_in_original = _raw_notation(
        latex_in_original).replace('\\', '\\\\')
    before_meta = _notation_string_no_metadata(
        raw_notation, denote_link, description)
    return (f'---\ndetect_regex: []\n'
            f'latex_in_original: ["{meta_latex_in_original}"]\n'
            f'tags: []'
            f'\n---\n{before_meta}')


def _raw_notation(notation: str):
    """
    """
    notation = notation.strip()
    notation = notation.strip('$')
    notation = notation.replace('\n', '')
    notation = notation.strip()
    return notation


def _notation_string_no_metadata(
        raw_notation: str,
        denote_link: ObsidianLink,
        description: str) -> str:
    """
    This is a helper function to `_full_notation_string`.
    """
    return f'${raw_notation}$ {str(denote_link)} {description}'



In [9]:
#| hide
test_eq(
    _notation_string_no_metadata(r'\operatorname{Gal}(L/K)', ObsidianLink.from_text('[[note_name|denotes]]'), 'the Galois group of a Galois extension $L/K$ of fields'),
    '$\\operatorname{Gal}(L/K)$ [[note_name|denotes]] the Galois group of a Galois extension $L/K$ of fields')


We can make a notation note with the `make_a_notation_note` method.

In [10]:
with tempfile.TemporaryDirectory(prefix='tmp_dir_', dir=os.getcwd()) as tmp_dir:
    tmp_dir = Path(tmp_dir)
    temp_vault = tmp_dir / 'test_vault_7'
    shutil.copytree(_test_directory() / 'test_vault_7', temp_vault)

    note = VaultNote(temp_vault, name='galois_group')
    notation_note = make_a_notation_note(
        note, temp_vault, r'\operatorname{Gal}(L/K)', '', notation_note_name='some_reference_name_notation_Gal_L_K_galois_group')
    mf = MarkdownFile.from_vault_note(notation_note)
    assert mf.has_metadata()
    meta = mf.metadata()
    assert 'detect_regex' in meta
    assert 'latex_in_original' in meta
    print(meta, '\n')
    assert '\\operatorname{Gal}(L/K)' in meta['latex_in_original']
    print(mf, '\n')

    main_mf = MarkdownFile.from_vault_note(note)
    # print(main_mf)
    assert notation_note.name in str(main_mf)  # A link has been created
    # os.startfile(temp_vault)
    # input()

{'detect_regex': [], 'latex_in_original': ['\\operatorname{Gal}(L/K)'], 'tags': []} 

---
detect_regex: []
latex_in_original: ["\\operatorname{Gal}(L/K)"]
tags: []
---
$\operatorname{Gal}(L/K)$ [[galois_group|denotes]]  



Note that the surrounding dollar signs for LaTeX math mode can be included in the argument for `notation`:

In [11]:
with tempfile.TemporaryDirectory(prefix='tmp_dir_', dir=os.getcwd()) as tmp_dir:
    tmp_dir = Path(tmp_dir)
    temp_vault = tmp_dir / 'test_vault_7'
    shutil.copytree(_test_directory() / 'test_vault_7', temp_vault)

    note = VaultNote(temp_vault, name='galois_group')
    notation_note = make_a_notation_note(
        note, temp_vault, r'$\operatorname{Gal}(L/K)$', '', notation_note_name='some_reference_name_notation_Gal_L_K_galois_group')
    mf = MarkdownFile.from_vault_note(notation_note)
    assert mf.has_metadata()
    meta = mf.metadata()
    assert 'detect_regex' in meta
    assert 'latex_in_original' in meta
    print(meta, '\n')
    assert '\\operatorname{Gal}(L/K)' in meta['latex_in_original']
    print(mf, '\n')

    main_mf = MarkdownFile.from_vault_note(note)
    # print(main_mf)
    assert notation_note.name in str(main_mf)  # A link has been created
    # os.startfile(temp_vault)
    # input()

{'detect_regex': [], 'latex_in_original': ['\\operatorname{Gal}(L/K)'], 'tags': []} 

---
detect_regex: []
latex_in_original: ["\\operatorname{Gal}(L/K)"]
tags: []
---
$\operatorname{Gal}(L/K)$ [[galois_group|denotes]]  



Setting `add_to_main=False` only creates the notation note, but does not add a link to the notation note in the main note:

In [12]:
with tempfile.TemporaryDirectory(prefix='tmp_dir_', dir=os.getcwd()) as tmp_dir:
    tmp_dir = Path(tmp_dir)
    temp_vault = tmp_dir / 'test_vault_7'
    shutil.copytree(_test_directory() / 'test_vault_7', temp_vault)

    note = VaultNote(temp_vault, name='galois_group')
    notation_note = make_a_notation_note(
        note, temp_vault, r'\operatorname{Gal}(L/K)', '', notation_note_name='_reference_notation_Gal_L_K_galois_group',
        add_to_main=False)

    mf = MarkdownFile.from_vault_note(notation_note)
    assert mf.has_metadata()
    meta = mf.metadata()
    assert 'detect_regex' in meta
    assert 'latex_in_original' in meta
    print(meta, '\n')
    assert '\\operatorname{Gal}(L/K)' in meta['latex_in_original']
    print(mf, '\n')

    main_mf = MarkdownFile.from_vault_note(note)
    # print(main_mf)
    assert notation_note.name not in str(main_mf)  # No link has been created

{'detect_regex': [], 'latex_in_original': ['\\operatorname{Gal}(L/K)'], 'tags': []} 

---
detect_regex: []
latex_in_original: ["\\operatorname{Gal}(L/K)"]
tags: []
---
$\operatorname{Gal}(L/K)$ [[galois_group|denotes]]  



If the notation note of the specified name (`notation_note_name`) already exists, then by default no note is created and no link is added in the main note. 

In [13]:
with tempfile.TemporaryDirectory(prefix='tmp_dir_', dir=os.getcwd()) as tmp_dir:
    tmp_dir = Path(tmp_dir)
    temp_vault = tmp_dir / 'test_vault_7'
    shutil.copytree(_test_directory() / 'test_vault_7', temp_vault)

    note = VaultNote(temp_vault, name='spectrum_of_a_ring')
    notation_note = make_a_notation_note(
        note, temp_vault, r'\operatorname{Spec} A', '', notation_note_name='some_reference_name_notation_Spec_A')

    assert notation_note is None

    main_mf = MarkdownFile.from_vault_note(note)
    # print(main_mf)
    assert main_mf.get_headings_and_text()['# See Also'].strip() == ''  # No link has been added

Setting `overwrite=True`, however, will overwrite the existing note. The method will also add a link to the (overwritten) notation note.

In [14]:
with tempfile.TemporaryDirectory(prefix='tmp_dir_', dir=os.getcwd()) as tmp_dir:
    tmp_dir = Path(tmp_dir)
    temp_vault = tmp_dir / 'test_vault_7'
    shutil.copytree(_test_directory() / 'test_vault_7', temp_vault)

    note = VaultNote(temp_vault, name='spectrum_of_a_ring')
    notation_note = make_a_notation_note(
        note, temp_vault, r'\operatorname{Spec} A', '', notation_note_name='some_reference_name_notation_Spec_A',
        overwrite=True)

    assert notation_note is not None

    main_mf = MarkdownFile.from_vault_note(note)
    assert notation_note.name in str(main_mf)
    notation_mf = MarkdownFile.from_vault_note(notation_note) 
    # notation_mf has been overwritten
    print(notation_mf)

---
detect_regex: []
latex_in_original: ["\\operatorname{Spec} A"]
tags: []
---
$\operatorname{Spec} A$ [[spectrum_of_a_ring|denotes]] 


In [15]:
#| hide
vault = _test_directory() / 'test_vault_7'
note = VaultNote(vault, name='galois_group')
output = _full_notation_string(note, notation=r'\operatorname{Gal}(L/K)', description='', latex_in_original='')
print(output)
assert r'$\operatorname{Gal}(L/K)$' in output
assert '[[galois_group|denotes]]' in output

---
detect_regex: []
latex_in_original: [""]
tags: []
---
$\operatorname{Gal}(L/K)$ [[galois_group|denotes]] 


In [16]:
#| hide
assert _raw_notation(r'$$$\mathscr{O}_X$$') == r'\mathscr{O}_X'
assert _raw_notation(r'$$ \operatorname{Spec} A  $$') == r'\operatorname{Spec} A'
assert _raw_notation(r'$f_\mathfrak{p}$') == r'f_\mathfrak{p}'
assert _raw_notation(r'$ \mathscr{O}_X') == r'\mathscr{O}_X'
assert _raw_notation(r'\operatorname{Gal}(L/K)') == r'\operatorname{Gal}(L/K)'
test_eq(_raw_notation('\\operatorname\n{Gal}(L/K)'), r'\operatorname{Gal}(L/K)')

In [17]:
#| hide
latex_string_1 = r'$$$\mathscr{O}_X$$'
output_1 = _full_notation_string(note, notation=latex_string_1, description='', latex_in_original=latex_string_1)
latex_string_2 = r'$$ \mathscr{O}_X $$'
output_2 = _full_notation_string(note, notation=latex_string_2, description='', latex_in_original=latex_string_2)
latex_string_3 = r'$\mathscr{O}_X$'
output_3 = _full_notation_string(note, notation=latex_string_3, description='', latex_in_original=latex_string_3)
latex_string_4 = r'$ \mathscr{O}_X'
output_4 = _full_notation_string(note, notation=latex_string_4, description='', latex_in_original=latex_string_4)
assert output_1 == output_2
assert output_2 == output_3
assert output_3 == output_4

In [18]:
#| export
MAX_NOTE_NAME_LENGTH = 80
def _make_notat_notes_from_sifted_notats(
        main_note: VaultNote, vault: PathLike, reference_name: str,
        notations: list[tuple[str, str]], destination: Optional[PathLike],
        overwrite: bool, add_to_main: bool) -> list[VaultNote]:
    """
    Create the notation notes based on notations 
    that were found in `main_note` (either surrounded by
    double asterisks `**` or within an HTML tag.)

    This is a helper function to `make_notation_notes_from_double_asts`
    and `make_notation_notes_from_HTML_tags`.
    """
    # TODO: test that note names aren't too long.
    new_notes = []
    for full, notation in reversed(notations):
        if not notation:
            notation = full
        notation_note_name = f'{reference_name}_notation_'\
            f'{latex_to_path_accepted_string(notation)}'
        if len(notation_note_name) > MAX_NOTE_NAME_LENGTH:
            notation_note_name = notation_note_name[:MAX_NOTE_NAME_LENGTH]
        notation_note_name = VaultNote.unique_name(
            notation_note_name, vault)
        new_note = make_a_notation_note(
            main_note, vault, notation, '', notation_note_name,
            destination, overwrite, add_to_main, latex_in_original=full)
        if new_note:
            new_notes.append(new_note)
    return new_notes
    

In [19]:
#| hide

#### Make notation notes from double asterisks surrounding notations

In [20]:
#| export
def make_notation_notes_from_double_asts(
        main_note: VaultNote, # The standard information note from which the notations are marked with double asterisks
        vault: PathLike, # The name of the reference; the notation note's name will start with `{reference_name}_notation_`.
        reference_name: str,
        destination: Optional[PathLike] = None, # The directory to create the new notation notes in.  If `None`, then creates the new notation note in the same place as the note specified by `note_name`
        overwrite: bool = False, # If `True`, overwrite file of the same path as the new notation file to be written, if such a file exists.  Otherwise, does nothing. Defaults to `False`.
        add_to_main: bool = True # If `True`, adds links to the notation note in the `See Also` section of the main note.
        ) -> list[VaultNote]: # The list of VaultNotes that are newly created/modified.
    """Make notation notes based on double asterisks surrounding LaTeX text in
    a standard information note.

    Notations are deemed to be completely LaTeX text in info notes that
    are surrounded by double asterisks. In basicality, if such a LaTeX
    text (without surrounding dollars signs `$` or `$$`) is listed in
    the `latex_in_original` metadata section of some notation note in the same
    directory as the info note whose main note is the info note in question,
    then a new notation note for that LaTeX text 
    is not created. However, if there are multiple instances of the same
    LaTeX text, then some notation notes may be created so that the number
    of times the LaTeX text appears in the info note is the no more than
    the number of times the LaTeX text appears in `latex_in_original` metadata
    sections of notation notes (in the same directory as the info note whose
    main note is the info note).

    For example, if there is an info note with notations `A`, `A`, `'A'`,
    `'A'`, and `B` and if there is a single notation note in the same
    directory as the info note with two `'A'` and `'A'` entries in its
    `latex_in_original` metadata section, then three notation notes will be
    created: two with `'A'` listed in their `latex_in_original` sections, and
    one with `'B'` listed in its `latex_in_original` section.

    **Raises**

    - Warning
        - If there are notation notes whose main note is determined to
        be to `main_note` and whose notations "excessively cover" those
        in `main_note`, i.e. the notation notes have more notations than
        `main_note` introduces. The main note and the excessive
        notations are printed; the notations are printed instead of the 
        notation notes because the same notation may span either multiple
        or single notation notes.
    """
    # Find notations
    notations = notat_str_from_doub_asts_in_std_info_note(main_note)
    notations = [_raw_notation(notation) for notation in notations]
    # Get only the notations not already made into notes based on
    # latex_in_original
    all_latex_in_original = _latex_in_original_from_notat_notes_to_main_note(
        vault, main_note)
    notations_to_create = Multiset(notations).difference(all_latex_in_original)
    notations_to_create = [(notat, "") for notat in notations_to_create]
    # Alert of existing notations that should not be there
    excess_notations = all_latex_in_original.difference(Multiset(notations))
    excess_notations = list(excess_notations)
    if excess_notations:
        warnings.warn(
            f"The following note has the following excess notations: "
            f"{main_note.name}, {', '.join(excess_notations)}")
    # Make notation notes
    return _make_notat_notes_from_sifted_notats(
        main_note, vault, reference_name, notations_to_create,
        destination, overwrite, add_to_main)
    

def _latex_in_original_in_notat(
        notation_note: VaultNote
        ) -> list[str]:
    """Return the `latex_in_original` metadata section of the notation note.
    
    If the `latex_in_original` metadata section does not exist, then returns
    the list consisting of the notation in the notation note.
    """
    # TODO: test in the case that `latex_in_original` section does not exist
    mf = MarkdownFile.from_vault_note(notation_note)
    metadata = mf.metadata()
    if metadata is not None:
        return metadata.get('latex_in_original',
                            [notation_in_note(notation_note).strip('$')])
    else:
        return [notation_in_note(notation_note)]

    
def _latex_in_original_from_notat_notes_to_main_note(
        vault: PathLike,
        main_note: VaultNote # The info note
        ) -> Multiset:
    """Return a Multiset enumerating the entries of `latex_in_original`
    in the notation notes in the same directory as an info note
    """
    notation_notes_in_folder = notations_and_main_notes(vault, note=main_note)
    notation_notes_of_main_note = [
        VaultNote(vault, name=notation_note) for notation_note, info_note
        in notation_notes_in_folder.items()
        if main_note.name == info_note]

    all_latex_in_original = Multiset()
    for notat_note in notation_notes_of_main_note:
        all_latex_in_original.update(_latex_in_original_in_notat(notat_note))
    return all_latex_in_original





As described in `markdown.obsidian.personal.machine_learning.notation_identification`, we surround a LaTeX math mode string with double asterisks `**` to indicate that the string introduces a notation.

The `make_notation_notes_from_double_asts` method parses LaTeX surrounded by double asterisks `**` in a standard information note and automatically creates notation notes for said LaTeX.

In [21]:
with tempfile.TemporaryDirectory(prefix='tmp_dir_', dir=os.getcwd()) as tmp_dir:
    tmp_dir = Path(tmp_dir)
    temp_vault = tmp_dir / 'test_vault_6'
    shutil.copytree(_test_directory() / 'test_vault_6', temp_vault)

    # os.startfile(temp_vault)
    # input()
    info_note = VaultNote(temp_vault, name='reference_for_notation_notes_introducing_some_notations')
    new_notes = make_notation_notes_from_double_asts(info_note, temp_vault, 'reference_for_notation_notes')
    
    assert len(new_notes) == 3
    for new_note in new_notes:
        assert new_note.exists()
        assert notation_note_is_linked_in_see_also_section(new_note, info_note)
    # input()
    # TODO: add more tests - overwrite=True, add_to_main=False

In the following example, we prompt `make_notation_notes_from_double_asts` to make notation notes for an info note with no notations - nothing is modified:

In [22]:
with tempfile.TemporaryDirectory(prefix='tmp_dir_', dir=os.getcwd()) as tmp_dir:
    tmp_dir = Path(tmp_dir)
    temp_vault = tmp_dir / 'test_vault_6'
    shutil.copytree(_test_directory() / 'test_vault_6', temp_vault)

    info_note = VaultNote(temp_vault, name='reference_for_notation_notes_no_notations_introduced_here')
    info_note_content_before = info_note.text()
    new_notes = make_notation_notes_from_double_asts(info_note, temp_vault, 'reference_for_notation_notes')
    info_note_content_after = info_note.text()
    
    assert len(new_notes) == 0
    assert info_note_content_before == info_note_content_after

In the following example, we prompt `make_notation_notes_from_double_asts` on the same info note twice - no new notation notes are created the second time.

In [23]:
with tempfile.TemporaryDirectory(prefix='tmp_dir_', dir=os.getcwd()) as tmp_dir:
    tmp_dir = Path(tmp_dir)
    temp_vault = tmp_dir / 'test_vault_6'
    shutil.copytree(_test_directory() / 'test_vault_6', temp_vault)

    info_note = VaultNote(temp_vault, name='reference_for_notation_notes_introducing_some_notations')
    make_notation_notes_from_double_asts(info_note, temp_vault, 'milne_av')
    new_notes = make_notation_notes_from_double_asts(info_note, temp_vault, 'milne_av')
    assert len(new_notes) == 0
    

In the following example, an info note contains two of the same notation. One notation note for each of these notations is created, but with different names:

In [24]:
with tempfile.TemporaryDirectory(prefix='tmp_dir_', dir=os.getcwd()) as tmp_dir:
    tmp_dir = Path(tmp_dir)
    temp_vault = tmp_dir / 'test_vault_7'
    shutil.copytree(_test_directory() / 'test_vault_7', temp_vault)

    info_note = VaultNote(temp_vault, name='note_with_repeated_notation')
    new_notes = make_notation_notes_from_double_asts(info_note, temp_vault, 'some_reference_name')
    assert len(new_notes) == 2
    print(new_notes[0].name)
    print(new_notes[1].name)

some_reference_name_notation_Cl_K
some_reference_name_notation_Cl_K_1


In the following example, there are notation notes with the info note as their main note, but some of the notations in these notation notes cover those in the info note "excessively" - in this case, only notation notes to uncovered notations are created, and warnings are raised to indicate which notations are covered excessively.

In [25]:
with tempfile.TemporaryDirectory(prefix='tmp_dir_', dir=os.getcwd()) as tmp_dir:
    tmp_dir = Path(tmp_dir)
    temp_vault = tmp_dir / 'test_vault_7'
    shutil.copytree(_test_directory() / 'test_vault_7', temp_vault)

    info_note = VaultNote(temp_vault, name='note_with_some_excessive_notation_notes')
    new_notes = make_notation_notes_from_double_asts(info_note, temp_vault, 'some_reference_name')
    assert len(new_notes) == 1



In [26]:
#| hide
with tempfile.TemporaryDirectory(prefix='tmp_dir_', dir=os.getcwd()) as tmp_dir:
    tmp_dir = Path(tmp_dir)
    temp_vault = tmp_dir / 'test_vault_7'
    shutil.copytree(_test_directory() / 'test_vault_7', temp_vault)

    info_note = VaultNote(temp_vault, name='foag_1.3.5')
    new_notes = make_notation_notes_from_double_asts(info_note, temp_vault, 'foag')
    assert len(new_notes) == 0

In [27]:
# TODO: move notation notes to directory of main notes

#### Make notation notes from HTML tags surrounding notations

In [28]:
#| export
def make_notation_notes_from_HTML_tags(
        main_note: VaultNote, # The standard information note from which the notations are marked with double asterisks
        vault: PathLike, # The name of the reference; the notation note's name will start with `{reference_name}_notation_`.
        reference_name: str,
        destination: Optional[PathLike] = None, # The directory to create the new notation notes in.  If `None`, then creates the new notation note in the same place as the note specified by `note_name`
        overwrite: bool = False, # If `True`, overwrite file of the same path as the new notation file to be written, if such a file exists.  Otherwise, does nothing. Defaults to `False`.
        add_to_main: bool = True # If `True`, adds links to the notation note in the `See Also` section of the main note.
        ) -> list[VaultNote]: # The list of VaultNotes that are newly created/modified.
    r"""Make notation notes based on double asterisks surrounding LaTeX text
    in a standard information note.

    Notations are determined by HTML tags with the `notation`
    attribute in them. For instance, `<span notation="">$H^i$</span>`
    Is deemed to be a notation and the text of the notation note starts
    with "$H^i$ [[main_note.name|denotes]]".
    
    The `notation` attribute
    can also be used to specify more precisely the notation
    multiple notation notes, e.g. the tag
    `<span notation="h^i">$h^i := \dim_k H^i$</span>`
    begets a notation note that starts with "$h^i$ [[main_note.name|denotes]]".
    
    The `notation` attribute can also be used to beget multiple notation notes
    from a single HTML tag by separation by double semicolons `;;`. e.g. the
    tag

    `<span notation="IJ;;I+J">$$IJ = \langle ab: a \in I, b \in J \rangle, \quad I+J = \{a+b: a \in I, b \in J \}$</span>`

    begets two notation notes, one which starts with
    "$IJ$ [[main_note.name|denotes]]" and the other which starts with
    "$I+J$ [[main_note.name|denotes]]"

    **Raises**

    - Warning

        - If there are notation notes whose main note is determined to
        be to `main_note` and whose notations "excessively cover" those
        in `main_note`, i.e. the notation notes have more notations than
        `main_note` introduces. The main note and the excessive
        notations are printed; the notations are printed instead of the 
        notation notes because the same notation may span either multiple
        or single notation notes.

        - If there is a notation HTML tag surrounding text that is not a
        pure latex string.
    """
    # Find notations
    pairs_of_notat_strs = notat_str_from_html_tags(main_note)
    pairs_of_notat_strs = [(_raw_notation(full), actual) for full, actual in pairs_of_notat_strs]
    full_latex = [full for full, _ in pairs_of_notat_strs]
    # Get only the notations not already made into notes based on
    # latex_in_original
    all_latex_in_original = _latex_in_original_from_notat_notes_to_main_note(
        vault, main_note)
    notations_to_create = [
        (full, actual) for full, actual in pairs_of_notat_strs if full not in all_latex_in_original]
    # notations_to_create = Multiset(full_latex).difference(all_latex_in_original)
    # notations_to_create = list(notations_to_create)

    # Alert of existing notations that should not be there
    excess_notations = all_latex_in_original.difference(Multiset(full_latex))
    excess_notations = list(excess_notations)
    if excess_notations:
        warnings.warn(
            f"The following note has the following excess notations: "
            f"{main_note.name}, {', '.join(excess_notations)}")
    # Make notation notes
    return _make_notat_notes_from_sifted_notats(
        main_note, vault, reference_name, notations_to_create,
        destination, overwrite, add_to_main)

In [29]:
# TODO: test

# Decomposition of notation

In [30]:
# #| export
# def decompose_notation_as_sequence(
#         notation # latex styled. Assumed to not be surrounded with `'$'`.
#         ) -> list:
#     """
#     **Parameters**
#     - notation - str
    
#     **Returns**
#     - list of str
#     """
#     str_index = 0
#     decomposition = []
#     while str_index < len(notation):
#         if notation[str_index] == '\\':
#             j = 1
#             while (str_index + j < len(notation)
#                     and notation[str_index + j] not in [' ', '\\', '{', '(']):
#                 j += 1
#             latex_command = notation[str_index:str_index + j]
#             decomposition.append(latex_command)
#             str_index += j
#         if str_index < len(notation):
#             decomposition.append(notation[str_index])
#         str_index += 1
#     return decomposition


# def compare_notations_for_sorting(
#         notation1, # latex styled. Assumed to not be surrounded with `'$'`.
#         notation2, # latex styled. Assumed to not be surrounded with `'$'`.
#         character_ordering_list
#         ) -> int:
#     """
#     **Parameters**
#     - notation - str
#         - latex styled. Assumed to not be covered with `'$'`.
    
#     **Returns**
#     - int
#         - 1 if `notation2` is considered to come "earlier", -1 if
#         `notation1` is considered to come "earlier", and 0 otherwise.
#     """
#     decomposition1 = decompose_notation_as_sequence(notation1)
#     decomposition2 = decompose_notation_as_sequence(notation2)
#     index1, index2 = 0, 0
#     while index1 < len(decomposition1) and index2 < len(decomposition2):
#         return  #TODO
#     if index1 < len(decomposition1):
#         return 1
#     if index2 < len(decomposition2):
#         return -1
#     return 0


# def _find_next_effective_character(decomposition, index):
#     while (index < len(decomposition)
#             and decomposition[index] in ['{', '}', '(', ')', '[', ']', r'\tilde',
#                                          r'\hat', r'\bar', 'r\overline']):
#         return

In [31]:
# decompose_notation_as_sequence(r'\tilde{K} (X)')
# decompose_notation_as_sequence(r'P(X;*,A)')
# decompose_notation_as_sequence(r'\operatorname{Spec} A')

## Detect notations being used in reference

### Regex from latex

In [32]:
#| export
SPECIAL_CHARACTERS = ['.', '+', '*', '?', '^', '$', '(', ')',
                      '[', ']', '{', '}', '|', '\\']
replaceable_groups = [['mathrm', 'operatorname', 'rm', 'text'],
                      ['mathbf', 'bf'],
                      ['mathit', 'it']]


def _build_replacables_from_groups(
        replaceable_groups: list[list[str]]) -> dict[str, set[str]]:
    total_dict = {}
    for listy in replaceable_groups:
        set_for_group = set(listy)
        for macro in listy:
            total_dict[macro] = set_for_group
    return total_dict


REPLACEABLES = _build_replacables_from_groups(replaceable_groups)
    

def regex_from_latex(
        latex: str, replaceables: dict[str, set[str]] = REPLACEABLES,
        special_characters: list[str] = SPECIAL_CHARACTERS) -> str:
    """Returns regex to match latex math mode string which is essentially
    equivalent to a specified latex math mode string.
    
    The outputs of this function may not work correctly.
    The regex pattern does not have to fully match equivalent string.
    
    **Parameters**

    - latex - str
        - The latex math mode string. Does not include math mode delimiters
        such as `$`, `$$`, `\[ \]` (although the characters `'\['` and `'\]'`
        can still be part of the string, e.g. for optional arguments of a
        macro/operator). Can include "placeholders" `r'\1'`, `r'\2'`, `r'\3'`,
        etc. to indicate substitutable/generics; the placeholders can be
        substituted with any string.
    - replaceables - dict[str, set[str]]
        - latex strings/commands which are considered "interreplacable"
    - special_characters - list[str]
        - characters to add a backslash `'\'` in front of for regex.
        Defaults to a list consisting of special characters in regex.
    """
    if not replaceables:
        replaceables = {}
    w = LatexWalker(latex)
    nodelist, _, _ = w.get_latex_nodes(pos=0)
    regex_parts = []
    # print(nodelist)
    for node in nodelist:
        _look_into_node(node, regex_parts,
                        replaceables, special_characters)
    regex_parts.append('(?:[ \\{\\}]*)')
    return ''.join(regex_parts)
    
def _look_into_node(
        node: LatexNode, regex_parts: list[str],
        replaceables: dict[str, set[str]],
        special_characters: list[str]) -> None:
    """Appends to `regex_parts`"""
    # hasattr(node, 'nodeargd')
    # print(node)
    if isinstance(node, LatexMacroNode):
        macroname = node.macroname
        if _macro_is_actually_placeholder(macroname):
            regex_parts.append('(?:.*)')
        else:
            if macroname in replaceables:
                replaceable_macros = replaceables[macroname]
            else:
                replaceable_macros = [macroname]
            options_str = '|'.join(replaceable_macros)
            options_str = f'(?:{options_str})'
            regex_parts.append(fr'(?: *?)\\{options_str}(?: *?)')
        for node in node.nodeargd.argnlist:
            _look_into_node(node, regex_parts,
                            replaceables, special_characters)
    elif isinstance(node, LatexGroupNode):
        # print('\nGroup Node')
        # print(node)
        # print(node.nodelist)
        delimiters = node.delimiters
        regex_parts.append(f'\\{delimiters[0]}(?: *?)')
        for node in node.nodelist:
            _look_into_node(node, regex_parts,
                            replaceables, special_characters)
        regex_parts.append(f'(?: *?)\\{delimiters[1]}')
    elif isinstance(node, LatexCharsNode):
        # print('\nChars Node')
        # print(node)
        # print(node.chars)
        chars = node.chars.strip()
        chars = list(chars)
        chars = [f'\\{char}' if char in special_characters else char
                 for char in chars]
        # print(chars)
        chars.insert(0, '')  # add the misc spaces/brackets front and back
        chars.append('')
        regex_optional_spaces_and_brackets = '(?:[ \\{\\}]*?)'.join(chars)
        regex_parts.append(regex_optional_spaces_and_brackets)
        
def _macro_is_actually_placeholder(macro: str) -> bool:
    return macro.isnumeric()

In [33]:
text = r"""e"""
print(regex_from_latex(text, REPLACEABLES))

(?:[ \{\}]*?)e(?:[ \{\}]*?)(?:[ \{\}]*)


### Get regex from notation note

So far, I have just made notation notes in the form `'$math_mode_string$ denotes ...'`. I want to add frontmatter metadata in notation notes to indicate regex to detect the notation with placeholders.

In [34]:
#| export
def regex_from_notation_note(vault: PathLike, note: VaultNote) -> str:
    """Returns a regex str to detect the notation of the notation note.
    
    The regex detection strings should be in a list labeled `detect_regex` in
    the yaml frontmatter. If multiple strings are in the list, then the regex
    will detect latex math mode strings roughly corresponding to any of them.
    If multiple strings are in the list, then they must be ordered 
    "by priority", with the higher priority regexes coming first. It is good
    to have these string in quotes `""` to make sure that yaml can load them
    safely. When doing so, make sure to escape characters, e.g. backslash
    should be typed as `\\`, etc.
    
    The strings in `detect_regex` can include placeholders, cf.
    ``regex_from_latex``.
    
    **Parameters**
    - vault - PathLike
    - note - VaultNote
    
    **Returns**
    - str
        - Of the regex used to detect the notation. The regex does not need to
        fully match instances of the notation.
    """
    assert note_is_of_type(note, PersonalNoteTypeEnum.NOTATION_NOTE)
    mf = MarkdownFile.from_vault_note(note)
    metadata = mf.metadata()
    if metadata and 'detect_regex' in metadata:
        detects = metadata['detect_regex']
        regexes = [regex_from_latex(detect) for detect in detects]
        return '|'.join(regexes)
    else:
        notation = notation_in_note(note, vault)
        return regex_from_latex(notation[1:-1])  # Get rid of `'$'`.

In [35]:
# TODO: test