# markdown.obisidian.personal.index_notes
> Functions for managing index notes one's Obsidian.md math vault.

In a Obsidian math vault, it is convenient to keep index notes, which list links to other index notes or standard information notes. 

The methods in this module
- create (standard information) notes in appropriate folders,
- set up the notes,
- add links of the notes to appropriate index notes
- indicate in the index note and the standard information note where the content of the information note originates from in the original text.

In [None]:
#| default_exp markdown.obsidian.personal.index_notes

In [None]:
#| export
import glob
import os
from os import PathLike
from pathlib import Path
import re
from typing import Union

from natsort import natsorted

from trouver.markdown.markdown.file import (
    MarkdownFile, MarkdownLineEnum
)
from trouver.markdown.markdown.heading import heading_title
from trouver.markdown.obsidian.links import (
    find_links_in_markdown_text, ObsidianLink, links_from_text
)
from trouver.markdown.obsidian.vault import (
    VaultNote, note_name_unique, note_path_by_name
)

In [None]:
import tempfile
from unittest import mock
from fastcore.test import *
from trouver.helper import path_name_no_ext

## Automatically filling in some notes

- I need to identify the names of subsections/subchapters inside a chapter, and identify the correspondence of subdirectories in the directory with headings in the index note.
- I need to use regex to find Theorems/Corollaries/Propositions/Lemmas/Definitions/Remarks/Examples (I'll refere to these as Numberings) in LaTeX code that has been OCR'd
- For each subsection/subchapter, I need to collect these Numberings, create an information note for each one, and add the number/page number.

In [None]:
#| export
def subsections_listed_in_index_note(
        index_note: Union[VaultNote, str], # The index note
        vault: PathLike
        ) -> dict[Union[int, str], [dict, str]]: # The keys are 1. line numbers and 2. `'title'`. The values are dict and str (the blank str if root node), respectively.
    """
    Return subsections/subchapters as listed in the index note

    **See Also**
    
    - The `get_headings_tree` function of the `MarkdownFile` class.
    """
    vault = Path(vault)
    if isinstance(index_note, str):
        index_note = VaultNote(vault, name=index_note)
    mf_file = MarkdownFile.from_vault_note(index_note)
    return mf_file.get_headings_tree()

In [None]:
text = r"""# 1. Some section title
- [[some_note]], Page 1
- [[some_note_2]], Page 2

# 2. Some other section title
- [[some_note_3]], Page 2
- [[some_note_4]], Page 3

# 3. Section 3
- [[some_note_5|an alias]], Page 3

# 4. Section 4
# 5. Section 5
"""

with mock.patch("trouver.markdown.markdown.file.open", mock.mock_open(read_data=text)):
    fake_vn = VaultNote(rel_path='fake_note.md', vault='')  # Think of this as a VaultNote object whose underlying file has `text` as its content.
    subsections_in_text = subsections_listed_in_index_note(fake_vn, vault='')
    expected_output = {
        'title': '',
        0: {'title': '# 1. Some section title'},
        4: {'title': '# 2. Some other section title'},
        8: {'title': '# 3. Section 3'},
        11: {'title': '# 4. Section 4'},
        12: {'title': '# 5. Section 5'}}
    test_eq(subsections_in_text, expected_output)

In [None]:
#| export
def subsection_folders(
        index_note: Union[VaultNote, str], # The index note
        vault: PathLike,
        output_type: str, # `'absolute_path'`, `'relative_path'`, or `'name'`
        ) -> list[str]: # List of immediate subdirectories in the directory containing the index note.
    """
    Return subdirectories corresponding to subsections/subchapters, i.e.
    the folders in the same directory as the index note.

    The folders are arranged in the order specified by `natsorted`.
    """
    vault = Path(vault)
    if isinstance(index_note, str):
        index_note = VaultNote(vault, name=index_note)
    parent_directory = (vault / index_note.rel_path).parent
    # print(str(parent_directory))
    glob_result = natsorted(glob.glob(str(parent_directory) + '/**/'))
    if output_type == 'absolute_path':
        return glob_result
    elif output_type == 'relative_path':
        return [str(Path(dir).relative_to(vault)) for dir in glob_result]
    elif output_type == 'name':
        return [Path(dir).name for dir in glob_result]

In [None]:
mock_vault = Path('mock_absolute_path')
mock_path =  mock_vault / Path('mock_reference_folder') / Path('mock_chapter')
folders = [  #glob.glob would return the folders in this order, at least on Windows:
    '1 section',
    '10 section',
    '11 section',
    '2 section',
    '3 section',
    '4 section',
    '5 section',
    '6 section',
    '7 section',
    '8 section',
    '9 section']

mock_glob_return_value = [str(mock_path / folder) for folder in folders]

with mock.patch("__main__.glob.glob", return_value=mock_glob_return_value):
    mock_index_note = VaultNote(rel_path='_index_mock_chapter.md', vault= mock_vault)
    
    sample_output_absolute_path = subsection_folders(mock_index_note, mock_vault, output_type='absolute_path')
    test_shuffled(sample_output_absolute_path, mock_glob_return_value)
    test_eq(sample_output_absolute_path, natsorted(mock_glob_return_value))

    sample_output_relative_path = subsection_folders(mock_index_note, mock_vault, output_type='relative_path')
    expected_output_for_relative_paths = [os.path.relpath(folder, mock_vault) for folder in mock_glob_return_value]
    test_shuffled(sample_output_relative_path, expected_output_for_relative_paths)
    test_eq(sample_output_relative_path, natsorted(expected_output_for_relative_paths))

    # test_eq(sample_output_absolute_path, )
    sample_output_name = subsection_folders(mock_index_note, mock_vault, output_type='name')
    test_shuffled(sample_output_name, folders)
    test_eq(sample_output_name, natsorted(folders))

## Corresponding headings in index notes and subfolders

In [None]:
#| export 
def get_alphanumeric(
        title: str, # The title of either a folder or a heading. Must start with an alphanumeric.
        title_type: str # Either `folder` or `heading`.
        ) -> str: # An alphabet or a numeric (arabic or roman)
    """
    Get the alphanumeric of a title of either a folder or a heading
    in an index noteh.

    Assumes that each folder is titled
    `'{alphanumeric}_{folder_title}'` and each heading is titled
    `'{alphanumeric}. {heading_title}'`
    """
    assert title_type in ['folder', 'heading']
    if title_type == 'folder':
        return re.sub(r'(.*?)_.*' , r'\1', title)
    else:
        return re.sub(r'(.*?)\. .*', r'\1', title)
    


In [None]:
test_eq(get_alphanumeric('1. Higher direct images', 'heading'), '1')
test_eq(get_alphanumeric('1_higher_direct_images', 'folder'), '1')
test_eq(get_alphanumeric('12_higher_direct_images_the_leray_spectral_sequence', 'folder'), '12')
test_eq(get_alphanumeric('VII_elliptic_curves_over_local_fields', 'folder'), 'VII')
test_eq(get_alphanumeric('A_properties_of_morphisms', 'folder'), 'A')

In [None]:
#| export 
def correspond_headings_with_folder(
        index_note: VaultNote,
        vault: PathLike,
        include_non_heading: bool = True # If `True`, and if there is text before any heading, then treat such text as being under a "blank" heading.
        ) -> dict[str, str]:
    """
    Return tuples of corresponding headings in an index note
    with folder names.
    
    Assumes that each folder is titled
    `'{alphanumeric}_{folder_title}'` and each heading is titled
    `'{alphanumeric}. {heading_title}'`
    
    **Returns**
    - dict[str, str]
        - Each key is a str indexing the headings and folders. The keys
        are usually alphanumerics (arabic or roman), depending on the
        numbering system of chapters/sections of the reference/text.
        The values are tuples `(folder_title, heading_title)` without 
        the alphanumeric. For the blank heading, the key/index, the folder title,
        and the heading title are all the empty str.
    """
    index = MarkdownFile.from_vault_note(index_note)
    headings = index.get_headings(levels=1)
    headings = [heading_title(heading) for heading in headings]
    folders = subsection_folders(index_note, vault, output_type='name')
    correspond_dict = {get_alphanumeric(heading, 'heading'): (heading, folder)
                       for heading, folder in zip(headings, folders)}
    # TODO do a better job at the conditional below; 
    # for example, consider the start of the text blank if it's just empty lines with spaces.
    if (include_non_heading and index.parts
            and index.parts[0]['type'] != MarkdownLineEnum.HEADING):
        correspond_dict[''] = ('', '')
    return correspond_dict
    

In [None]:
mock_vault = Path('mock_absolute_path')
mock_path =  mock_vault / Path('algebraic_geometry') / Path('some_reference') / Path('chapter_18_some_chapter')
folders = ['181_some_title',
    '182_some_other_title',
    '183_yet_another_title']
mock_glob_return_value = [str(mock_path / folder) for folder in folders]

text = r"""# 18.1. Some title 
- [ ] [[some_reference 18.1|some_reference_some_alias]], 18.1, Page 300
# 18.2. Some other title 
- [ ] [[some_reference 18.2]], 18.2, Page 305
# 18.3. Yet another title 
- [ ] [[some_reference 18.3|]], 18.3, Page 308
"""
mock_index_file = MarkdownFile.from_string(text)

with (mock.patch("__main__.glob.glob", return_value=mock_glob_return_value),
      mock.patch("trouver.markdown.markdown.file.MarkdownFile.from_vault_note", return_value=mock_index_file)):

    mock_index_note = VaultNote(rel_path = '_index_mock.md', vault=mock_vault)
    # subsections_listed_in_index_note(mock_index_note, vault=mock_vault)
    sample_output = correspond_headings_with_folder(mock_index_note, mock_vault)
    print(sample_output)
    test_eq(len(sample_output), 3)
    for key, value in sample_output.items():
        assert value[0].startswith(key)
        assert value[1].startswith(key.replace('.', ''))

{'18.1': ('18.1. Some title', '181_some_title'), '18.2': ('18.2. Some other title', '182_some_other_title'), '18.3': ('18.3. Yet another title', '183_yet_another_title')}


## Move information notes to their appropriate folders.
Sometimes, I end up creating information notes in the wrong folders. It would be nice to detect which ones are in the wrong folders and to move them appropriately.

In [None]:
#| export
def information_notes_linked_in_index_note(
        index_note: VaultNote, # The note indexing the information notes.
        vault: PathLike,
        hints: list[PathLike] = None # Hints on where the information notes are likely to be found at.  Each path is relative to `vault` and points to a directory. Defaults to `None`.
        ) -> dict[str, list[VaultNote]]: # Each key is the index for the heading (usually either an alphanumerical or a roman numerical). Each value is a list of the information notes linked in the index note.
    """Find information notes to be moved to the correct folder.
    
    Current implementation just looks at level 1 headings.
    This function is used in `move_information_notes_to_correct_folder`.
    Assumes that all notes in the vault have unique names.
    """
    parent_folder = os.path.dirname(index_note.rel_path)
    headings_folders = correspond_headings_with_folder(index_note, vault)
    mf = MarkdownFile.from_vault_note(index_note)
    headings_text = mf.get_headings_and_text(levels=1, include_start=True)
    headings_text = {heading_title(heading): text for heading, text
                     in headings_text.items()}
    text_under_headings = {heading_index: headings_text[heading] 
                            for heading_index, (heading, _) in headings_folders.items()}
    links_by_headings = {heading_index:links_from_text(text) for
                         heading_index, text in text_under_headings.items()}
    note_names_by_headings = {heading_index:[il.file_name for il in links] 
                              for heading_index, links in links_by_headings.items()}
    # Find notes by headings, but also pass the folder corresponding to the heading
    # as a hint of where to find the note for speedup, in case the note is 
    # already at the right place.
    folders_by_index = {heading_index: Path(vault) / parent_folder / heading_folder 
                        for heading_index, (_, heading_folder) in headings_folders.items()}
    if not hints:
        hints = []
    notes_by_headings = {heading_index: [VaultNote(vault, name=nn, hints=hints+[folders_by_index[heading_index]]) 
                                         for nn in note_names]
                         for heading_index, note_names in note_names_by_headings.items()}
    return notes_by_headings
    

In [None]:
VaultNote.clear_cache()

mock_vault = Path('mock_absolute_path')
mock_path =  mock_vault / Path('algebraic_geometry') / Path('some_reference') / Path('chapter_18_some_chapter')
folders = ['181_some_title',
    '182_some_other_title',
    '183_yet_another_title']
mock_glob_return_value = [str(mock_path / folder) for folder in folders]

mock_correspond_headings_with_folder_return_value = {
  '18.1': ('18.1. Some title', '181_some_title'),
  '18.2': ('18.2. Some other title', '182_some_other_title'),
  '18.3': ('18.3. Yet another title', '183_yet_another_title')}

text = r"""# 18.1. Some title 
- [ ] [[some_reference 18.1|some_reference_some_alias]], 18.1, Page 300
- [ ] [[some_reference 18.1.1|another_alias]], 18.1.1, Page 300
- [ ] [[some_reference 18.1.2]], 18.1.2, Page 301
# 18.2. Some other title 
- [ ] [[some_reference 18.2]], 18.2, Page 305
- [ ] [[some_reference 18.2.1]], 18.2.1, Page 306
# 18.3. Yet another title 
- [ ] [[some_reference 18.3]], 18.3, Page 308
- [ ] [[some_reference 18.3.1]], 18.3.1, Page 308
"""

mock_index_file = MarkdownFile.from_string(text)

mock_index_note = VaultNote(rel_path = mock_path / '_index_18_some_index_note.md', vault=mock_vault)

with (mock.patch("trouver.markdown.markdown.file.MarkdownFile.from_vault_note", return_value=mock_index_file),
      mock.patch("__main__.correspond_headings_with_folder", return_value=mock_correspond_headings_with_folder_return_value),
      # mock.patch("__main__.VaultNote", side_effect=[None, None, None, None, None, None, None])
      ):
    sample_output = information_notes_linked_in_index_note(mock_index_note, mock_vault)
    test_eq(len(sample_output), 3)
    test_eq(len(sample_output['18.1']), 3)
    test_eq(len(sample_output['18.2']), 2)
    test_eq(sample_output['18.1'][0].name, 'some_reference 18.1')
    test_eq(sample_output['18.2'][1].name, 'some_reference 18.2.1')


In [None]:
#| export    
def move_information_notes_to_correct_folder(
        index_note: VaultNote,
        vault: PathLike,
        hints: list[PathLike] = None # Hints on where the information notes are likely to be found at.  Each path is relative to `vault` and points to a directory. Defaults to `None`.
        ) -> None:
    """Moves the information notes indexed by `index_note` to the correct folder.

    The "correct folder" is a folder in the same directory as `index_note`
    corresponding to the heading under which the information note is indexed.
    The current implementation just looks at level 1 headings.
    """
    parent_folder = os.path.dirname(index_note.path(relative=True))
    linked_notes = information_notes_linked_in_index_note(index_note, vault, hints)
    headings_folders = correspond_headings_with_folder(index_note, vault)
    for heading_index, notes in linked_notes.items():
        _move_notes_under_heading(heading_index, notes,
                                  parent_folder, headings_folders)


def _move_notes_under_heading(
        heading_index, notes: list[VaultNote], parent_folder, headings_folders):
    destination_folder = headings_folders[heading_index][1]
    for note in notes:
        note_folder = os.path.dirname(note.rel_path)
        if destination_folder == note_folder:
            continue
        note.move_to_folder(Path(parent_folder) / destination_folder)


The following example concerns the following vault: 


```
.
├── folder_1
│   ├── reference_1
│   │   ├── 1_chapter
│   │   │   ├── 1_section
│   │   │   │   ├── note_11.md
│   │   │   │   ├── note_12.md
│   │   │   │   ├── note_13.md
│   │   │   │   └── note_21.md
│   │   │   ├── 2_section
│   │   │   │   └── note_22.md
│   │   │   ├── 3_section
│   │   │   │   ├── note_31.md
│   │   │   │   ├── note_32.md
│   │   │   │   ├── note_41.md
│   │   │   │   └── note_42.md
│   │   │   ├── 4_section
│   │   │   │   └── a_note_belonging_in_reference_2_chapter_1.md
│   │   │   └── _index_1_chapter.md
│   │   ├── 2_chapter
│   │   │   └── _index_2_chapter.md
│   │   ├── 3_chapter
│   │   │   └── _index_3_chapter.md
│   │   └── _index_reference_1.md
│   ├── reference_2
│   │   ├── 1_chapter_reference_2
│   │   │   ├── 1_section_1_chapter_reference_2
│   │   │   └── _index_1_chapter_reference_2.md
│   │   ├── 2_chapter_reference_2
│   │   └── _index_reference_2.md
│   └── _index_folder_1.md
├── folder_2
│   └── _index_folder_2.md
└── _index.md
```


Say that the contents of `_index_chapter_1.md` and `_index_1_chapter_reference_2j.md` are as follows:

In [None]:
index_chapter_1_text = r"""
# 1. Section
- [[note_11]]
- [[note_12]]
- [[note_13]]

# 2. Section
- [[note_21]]
- [[note_22]]

# 3. Section
- [[note_31]]
- [[note_32]]

# 4. Section
- [[note_41]]
- [[note_42]]
"""

index_1_chapter_reference_2_text = r"""
# 1. Section 1_chapter_reference_2
- [[a_note_belonging_in_reference_2_chapter_1]]
"""

In particular, the following notes are in the "wrong" folders:

- `note_21.md` is in the folder `1_section`, but it should be in the folder `2_section`.
- `note_41.md` is in the folder `3_section`, but it should be in the folder `4_section`.
- `note_42.md` is in the folder `3_section`, but it should be in the folder `4_section`.
- `a_note_belonging_in_reference_2_chapter_1.md` is in the folder `4_section`, but it should be in the folder `1_section_1_chapter_reference_2`.



In [None]:
def make_example_vault(temp_dir: PathLike):
    temp_dir = Path(temp_dir)
    os.mkdir(temp_dir / 'folder_1')
    os.mkdir(temp_dir / 'folder_1' / 'reference_1')
    os.mkdir(temp_dir / 'folder_1' / 'reference_1' / '1_chapter')
    os.mkdir(temp_dir / 'folder_1' / 'reference_1' / '1_chapter' / '1_section')
    os.mkdir(temp_dir / 'folder_1' / 'reference_1' / '1_chapter' / '2_section')
    os.mkdir(temp_dir / 'folder_1' / 'reference_1' / '1_chapter' / '3_section')
    os.mkdir(temp_dir / 'folder_1' / 'reference_1' / '1_chapter' / '4_section')
    os.mkdir(temp_dir / 'folder_1' / 'reference_1' / '2_chapter')
    os.mkdir(temp_dir / 'folder_1' / 'reference_1' / '3_chapter')
    os.mkdir(temp_dir / 'folder_1' / 'reference_2')
    os.mkdir(temp_dir / 'folder_1' / 'reference_2' / '1_chapter_reference_2')
    os.mkdir(temp_dir / 'folder_1' / 'reference_2' / '1_chapter_reference_2' / '1_section_1_chapter_reference_2')
    os.mkdir(temp_dir / 'folder_1' / 'reference_2' / '2_chapter_reference_2')
    os.mkdir(temp_dir / 'folder_2')

    (temp_dir / '_index.md').touch()
    (temp_dir / 'folder_1' / '_index_folder_1.md').touch()
    (temp_dir / 'folder_1' / 'reference_1' / '_index_reference_1.md').touch()
    (temp_dir / 'folder_1' / 'reference_1' / '1_chapter' / '_index_1_chapter.md').touch()
    (temp_dir / 'folder_1' / 'reference_1' / '1_chapter' / '1_section' / 'note_11.md').touch()
    (temp_dir / 'folder_1' / 'reference_1' / '1_chapter' / '1_section' / 'note_12.md').touch()
    (temp_dir / 'folder_1' / 'reference_1' / '1_chapter' / '1_section' / 'note_13.md').touch()
    (temp_dir / 'folder_1' / 'reference_1' / '1_chapter' / '1_section' / 'note_21.md').touch()
    (temp_dir / 'folder_1' / 'reference_1' / '1_chapter' / '2_section' / 'note_22.md').touch()
    (temp_dir / 'folder_1' / 'reference_1' / '1_chapter' / '3_section' / 'note_31.md').touch()
    (temp_dir / 'folder_1' / 'reference_1' / '1_chapter' / '3_section' / 'note_32.md').touch()
    (temp_dir / 'folder_1' / 'reference_1' / '1_chapter' / '3_section' / 'note_41.md').touch()
    (temp_dir / 'folder_1' / 'reference_1' / '1_chapter' / '3_section' / 'note_42.md').touch()
    (temp_dir / 'folder_1' / 'reference_1' / '1_chapter' / '4_section' / 'a_note_belonging_in_reference_2_chapter_1.md').touch()
    (temp_dir / 'folder_1' / 'reference_1' / '2_chapter' / '_index_2_chapter.md').touch()
    (temp_dir / 'folder_1' / 'reference_1' / '3_chapter' / '_index_3_chapter.md').touch()
    (temp_dir / 'folder_1' / 'reference_2' / '_index_reference_2.md').touch()
    (temp_dir / 'folder_1' / 'reference_2' / '1_chapter_reference_2' / '_index_1_chapter_reference_2.md').touch()
    (temp_dir / 'folder_2' / '_index_folder_2').touch()

    with open((temp_dir / 'folder_1' / 'reference_1' / '1_chapter' / '_index_1_chapter.md'), 'w') as writer:
        writer.write(index_chapter_1_text)
    with open((temp_dir / 'folder_1' / 'reference_2' / '1_chapter_reference_2' / '_index_1_chapter_reference_2.md'), 'w') as writer:
        writer.write(index_1_chapter_reference_2_text)

The `move_information_notes_to_correct_folder` method first applied to `_index_1_chapter.md` moves `note_21.md`, `note_41.md`, and `note_42.md`, but not `a_note_belonging_in_reference_2_chapter_1.md`, to their respective correct locations

In [None]:
with tempfile.TemporaryDirectory(prefix='temp_dir', dir=os.getcwd()) as temp_dir:
    make_example_vault(temp_dir)

    one_chapter_index_note = VaultNote(temp_dir, name='_index_1_chapter')
    one_chapter_reference_2_index_note = VaultNote(temp_dir, name='_index_1_chapter_reference_2')

    move_information_notes_to_correct_folder(one_chapter_index_note, temp_dir)
    note_21 = VaultNote(temp_dir, name='note_21')
    test_eq(path_name_no_ext(note_21.path().parent), '2_section')
    note_41 = VaultNote(temp_dir, name='note_41')
    test_eq(path_name_no_ext(note_41.path().parent), '4_section')
    note_42 = VaultNote(temp_dir, name='note_42')
    test_eq(path_name_no_ext(note_42.path().parent), '4_section')

    move_information_notes_to_correct_folder(one_chapter_reference_2_index_note, temp_dir)
    note = VaultNote(temp_dir, name='a_note_belonging_in_reference_2_chapter_1')
    test_eq(path_name_no_ext(note.path().parent), '1_section_1_chapter_reference_2')
    