# markdown.markdown.file

> Functions for parsing markdown files

In [None]:
#| default_exp markdown.markdown.file

In [None]:
#| export
from __future__ import annotations
import copy
from enum import Enum
from itertools import product
from os import PathLike
import re
import string
from typing import Iterator, Union, Optional
import warnings
import yaml

from trouver.helper.html import remove_html_tags_in_text
from trouver.helper.regex import find_regex_in_text
from trouver.markdown.markdown.heading import (
    heading_level, heading_title
)
from trouver.markdown.obsidian.links import (
    ObsidianLink, 
    remove_links_from_text, EMBEDDED_PATTERN
)
from trouver.markdown.obsidian.vault import VaultNote, NoteDoesNotExistError, NotePathIsNotIdentifiedError
from trouver.markdown.obsidian.tags import (
    strip_auto_from_tag, tag_is_auto_tag
)

In [None]:
from collections import OrderedDict
import os
from pathlib import Path 
import tempfile
import shutil

from fastcore.test import *
from nbdev.showdoc import show_doc

## Frontmatter meta

In [None]:
#| export
def find_front_matter_meta_in_markdown_text(
        text: str
        ) -> Union[tuple[int], None]: # Each tuple is of the form `(a,b)` where `text[a:b]` is a markdown front matter. This list will be of length at most 1 because the front matter must come at the start of the markdown document.
    """
    Return ranges in the markdown text string where front matter meta occurs.
    
    `text` is assumed to start with the front matter meta.
    The front matter meta is of the following YAML format.
    
    ```markdown
    ---
    key: entry
    ---
    ```
    """
    listy = find_regex_in_text(text, pattern=r'---\n([\s\S])*?(?(1)\n|)---')
    return listy[0] if listy else None


The frontmatter meta in an Obsidian Markdown note is surrounded by `---` and must be at the top. We can identify the frontmatter meta in some text:

In [None]:
sample_text = r'''---
cssclass: clean-embeds
aliases: []
tags: [_meta/TODO/change_title, _meta/definition, _meta/literature_note]
---
# Topic[^1]

This is some text. Lalalalala

# See Also

# Meta
## References
![[_reference_foag]]

## Citations and Footnotes
[^1]: Some citation'''

start, end = find_front_matter_meta_in_markdown_text(sample_text)
just_frontmatter = sample_text[start:end]
print(just_frontmatter)
assert just_frontmatter.startswith('---')
assert just_frontmatter.endswith('---')
assert 'cssclass:' in just_frontmatter
assert 'aliases:' in just_frontmatter
assert 'tags:' in just_frontmatter

---
cssclass: clean-embeds
aliases: []
tags: [_meta/TODO/change_title, _meta/definition, _meta/literature_note]
---


If there is no frontmatter meta, then `find_front_matter_meta_in_markdown_text` returns `None`. 

In [None]:
sample_text = r'''
There is no frontmatter meta here.
'''

assert find_front_matter_meta_in_markdown_text(sample_text) is None


sample_text = r'''
---
aliases: []
--
Notice that the front matter meta is incorrectly formatted!
'''

assert find_front_matter_meta_in_markdown_text(sample_text) is None


In [None]:
# hide 
# Some additional tests
sample_text_1 = ''
assert find_front_matter_meta_in_markdown_text(sample_text_1) is None
sample_text_2 = "hello I know a song that gets on everybody\'s nose"
assert find_front_matter_meta_in_markdown_text(sample_text_2) is None
sample_text_3 = '---\n---'
start, end = find_front_matter_meta_in_markdown_text(sample_text_3)
assert sample_text_3[start:end] == sample_text_3
sample_text_4 = '---\n---\n---'
start, end = find_front_matter_meta_in_markdown_text(sample_text_4)
assert sample_text_4[start:end] == '---\n---'

In [None]:
#| export
def _enquote(str):
    return f'"{str}"'

In [None]:
#| hide
# TODO: tests
test_eq(_enquote('hi'), '"hi"')

In [None]:
#| export
def dict_to_metadata_lines(
        data: dict[str, Union[str, list[str]]], # The keys are str of the labels/names of the metadata. The values are the metadata, which are usually str or list.
        enquote_entries_in_fields = list[str] # A list of str of fields in the YAML metadata whose entries need to be enquoted. If there is a string that is not a key of `new_metadata`, then that string is essentially ignored (in particular, no errors are raised).
        ) -> list[str]: # Each str entry is the line for the yaml frontmatter metadata of an Obsidian Markdown note.
    """
    Convert a dict to a list of str of yaml frontmatter metadata
    that Obsidian recognizes.

    This function is used in `MarkdownFile.replace_metadata`.
    """

    lines = [
        _line_str(key, value, enquote_entries_in_fields)
        for key, value in data.items()]
    # lines = []
    # for key, value in data.items():
    #     if isinstance(value, list):
    #         _add_line_for_list(lines, key, value, key in enquote_entries_in_fields)
    #         continue
    #     lines.append(f'{key}: {value}')
    return lines

def _line_str(
        key: str,
        value: Union[str, list[str]],
        enquote_entries_in_fields = list[str]):
    """This is a helper function for `dict_to_metadata_lines`."""
    list_value = value if isinstance(value, list) else [value]
    escaped_strings_in_list_value = [
        _enquote(str(single_value).replace('\\', '\\\\'))
        if key in enquote_entries_in_fields else str(single_value)
        for single_value in list_value
        ]
    if isinstance(value, list):
        return f"{key}: [{', '.join(escaped_strings_in_list_value)}]"
    else:
        return f"{key}: {escaped_strings_in_list_value[0]}"

# def _add_line_for_list(
#         lines: list[str], key: str, list_value: list, enquote: bool):
#     """This is a helper function for `dict_to_metadata_lines`."""
#     escaped_strings_in_list_value = [
#         _enquote(single_value.replace('\\', '\\\\'))
#         if _list_string_for_yaml_metadata_needs_quotes(single_value) 
#         else single_value
#         for single_value in list_value]
#     lines.append(f"{key}: [{', '.join(escaped_strings_in_list_value)}]")

# def _list_string_for_yaml_metadata_needs_quotes(single_value):
#     return isinstance(single_value, str) and bool(re.compile(r'[\\\{\[\|\*]').match(single_value))

In [None]:
#| hide
# assert not _list_string_for_yaml_metadata_needs_quotes('hi')
# assert _list_string_for_yaml_metadata_needs_quotes('[T]')
# assert _list_string_for_yaml_metadata_needs_quotes(r'\\begin{align*}')

`dict_to_metadata_lines` takes a dictionary and converts it to a string usable as Obsidian Markdown frontmatter meta.

In [None]:
sample_dict = OrderedDict([
            ('cssclass', 'clean-embeds'),
            ('aliases', []),
            ('tags', ['_meta/literature_note', '_meta/research', '_meta/self_written'])])
sample_output = dict_to_metadata_lines(sample_dict, [])
test_eq(sample_output,
    ['cssclass: clean-embeds', 'aliases: []', 'tags: [_meta/literature_note, _meta/research, _meta/self_written]'])

# sample_lines = dict_to_metadata_lines(sample_dict)
# sample_output = 
# print('\n'.join(sample_lines))
# assert sample_lines == sample_output


We can specify entries of fields to be enquoted. This might be necessary if a string needs to be escaped (because it has a character such as the backslash `r'\'`, the mid slash `r'|'`, a square bracket `r'['`, etc.). For example, if a field is expected to have LaTeX strings, then it is good practice to specify such strings to be enquoted:

In [None]:
sample_dict = OrderedDict([
    ('latex_in_original', [r'\\mathscr{O}_{\\text {Proj } S_{*}}(n)'])
])
sample_output = dict_to_metadata_lines(sample_dict, enquote_entries_in_fields=['latex_in_original'])
test_eq(sample_output, ['latex_in_original: ["\\\\\\\\mathscr{O}_{\\\\\\\\text {Proj } S_{*}}(n)"]'])

# An example of an entry that is not a string
sample_dict = OrderedDict([
    ('latex_in_original', [1])
])
sample_output = dict_to_metadata_lines(sample_dict, enquote_entries_in_fields=['latex_in_original'])
test_eq(sample_output, ['latex_in_original: ["1"]'])

In [None]:
#| hide
# Make sure that quotation marks don't just disappear for no good reason in strings
sample_dict = OrderedDict([
    ('detect_regex', []),
    ('latex_in_original', ['[T]']),
    ('tags', ['_auto/notation_summary'])
])
sample_output = dict_to_metadata_lines(sample_dict, enquote_entries_in_fields=['latex_in_original'])
assert 'latex_in_original: ["[T]"]' in sample_output

sample_dict = OrderedDict([
    ('detect_regex', []),
    ('latex_in_original', ['|G|']),
    ('tags', ['_auto/notation_summary'])
])
sample_output = dict_to_metadata_lines(sample_dict, enquote_entries_in_fields=['latex_in_original'])
assert 'latex_in_original: ["|G|"]' in sample_output

sample_dict = OrderedDict([
    ('detect_regex', []),
    ('latex_in_original', ['*']),
    ('tags', ['_auto/notation_summary'])
])
sample_output = dict_to_metadata_lines(sample_dict, enquote_entries_in_fields=['latex_in_original'])
assert 'latex_in_original: ["*"]' in sample_output

sample_dict = OrderedDict([
    ('detect_regex', []),
    ('latex_in_original', ['[ Y(S)]']),
    ('tags', ['_auto/notation_summary'])
])
sample_output = dict_to_metadata_lines(sample_dict, enquote_entries_in_fields=['latex_in_original'])
assert 'latex_in_original: ["[ Y(S)]"]' in sample_output

In [None]:
#| export
# TODO: apply this function to the MarkdownFile.metadata function and the MarkdownFile.write function
def parse_metadata_string(
        metadata_str: str, # The string for YAML frontmatter metadata of an Obsidian Markdown note
        raise_error: bool = True, # If `True`, then raise an Error.
        raise_warning: bool = True # If `raise_error` is false and `raise_warning` is `True`, then raise a warning message.
        ) -> Union[dict[str], None]: # The keys are `str` of the labels/names of the metadata. The values are the metadata, which are usually `str` or `list`. If the YAML metadata string cannot be parsed, then this return value is `None`.
    """
    Attempt to parse the string for YAML frontmatter metadata of an
    Obsidian Markdown note.

    **Raises**

    - ValueError
        - If `raise_error` is `True` and if any `yaml.YAMLError` exceptions
        are raised when reading (i.e. parsing or scanning the YAML metadata.
        In doing so, `metadata_str` is printed. Moreover,
        the appropriate `yaml.YAMLError` (e.g. a `yaml.parser.ParserError`,
        `yaml.scanner.ScannerError`, or `yaml.reader.ReaderError`) is also raised.
    - Warning
        - If `raise_error` is `False` and `raise_warning` is `True` and if any
        `yaml.YAMLError` exceptions are raise when reading.
    """
    try:
        return yaml.safe_load(metadata_str)
    except (yaml.YAMLError) as e:
        if raise_error:
            raise ValueError(
                "The following YAML frontmatter metadata string cannot be parsed:\n"
                f"{metadata_str}\n\n") from e
        elif raise_warning:
            warnings.warn(
                "The following YAML frontmatter metadata string cannot be parsed:\n"
                f"{metadata_str}\n\n")

The `parse_metadata_string` function attempts to parse the string for YAML frontmatter metadata of an Obsidian Markdown note. If the argument can be parsed via the `yaml.safe_load` function, then the output of the `yaml.safe_load` invocation is returned:

In [None]:
good_metadata_str = r"""
field: [hi, bye]
fieldy: ooh
"""

metadata_output = parse_metadata_string(good_metadata_str)
assert type(metadata_output) == dict
test_eq(metadata_output, {'field': ['hi', 'bye'], 'fieldy': 'ooh'})

If the YAML frontmatter metadata string cannot be parsed, by the `yaml.safe_load` function, then an error or a warning can be optionally raised. If `raise_error` is `False`, then `parse_metadata_string` returns `None`.

In [None]:
bad_metadata_str = "some_metadata_field: [\badly_formatted_string]"  # ReaderError
with (ExceptionExpected(ValueError)):
    metadata_output = parse_metadata_string(bad_metadata_str, raise_error=True)
# bad_metadata_str = "field: field2:" # ScannerError

test_warns(lambda: parse_metadata_string(bad_metadata_str, raise_error=False, raise_warning=True))

metadata_output = parse_metadata_string(bad_metadata_str, raise_error=False, raise_warning=False)
test_eq(metadata_output, None)

In [None]:
yaml_string = 'latex_from_original: ["[t]", "hi"]'
data = yaml.safe_load(yaml_string)
yaml.dump(data, default_flow_style=True)

"{latex_from_original: ['[t]', hi]}\n"

In [None]:
yaml.dump(r'\begin{align*}  \end{align*}')


'\\begin{align*}  \\end{align*}\n...\n'

In [None]:
yaml.dump('[t]')

"'[t]'\n"

## Replace embedded links with text

In [None]:
#| export
def replace_embedded_links_with_text(
        text: str, vault: PathLike) -> str:
    """
    Return the text with all embedded links replaced with the
    text of the corresponding notes
    
    Assumes that the notes of the links exist in the vault and 
    have unique names in the vault. Note that embedded links are
    not always to notes (e.g. they can point to images), or even
    to existing notes. In such cases, the embedded link will be
    replaced with blank text.
    """
    
    embedded_links = find_regex_in_text(text, EMBEDDED_PATTERN)
    for start, end in reversed(embedded_links):
        link = text[start:end]
        link_object = ObsidianLink.from_text(link)
        try:
            vn = VaultNote(vault, name=link_object.file_name, update_cache=False)
            replace = vn.text()
            #mf = MarkdownFile.from_vault_note(vn)
            #replace = str(mf)
        except NoteDoesNotExistError:
            replace = ''
        text = text[:start] + replace + text[end:]
    return text

## MarkdownFile class

In [None]:
#| export
class MarkdownLineEnum(Enum):
    # See https://www.markdownguide.org/basic-syntax/
    DEFAULT = 0
    HEADING = 1
    CODE_BLOCK = 2
    META = 3
    ORDERED_LIST = 4
    UNORDERED_LIST = 5
    BLOCKQUOTE = 6
    HORIZONTAL_RULE = 7
    COMMENT = 8
    BLANK_LINE = 9
    UNKNOWN = 10
    FOOTNOTE_DESCRIPTION = 11
    DISPLAY_LATEX_SINGLE = 12  # A single-line DISPLAY latex
    DISPLAY_LATEX_START = 13  # Start line of DISPLAY latex
    DISPLAY_LATEX_END = 14  # End line of DISPLAY latex
    DISPLAY_LATEX = 15  # All other DISPLAY latex lines

The following are the members of the `MarkdownLineEnum` class:

In [None]:
for line_type in MarkdownLineEnum:
    print(line_type.name)

DEFAULT
HEADING
CODE_BLOCK
META
ORDERED_LIST
UNORDERED_LIST
BLOCKQUOTE
HORIZONTAL_RULE
COMMENT
BLANK_LINE
UNKNOWN
FOOTNOTE_DESCRIPTION
DISPLAY_LATEX_SINGLE
DISPLAY_LATEX_START
DISPLAY_LATEX_END
DISPLAY_LATEX


In [None]:
#| export
class MarkdownFile:
    r"""
    Parses and represents the contents of an Obsidian styled Markdown
    file.

    The Markdown file must be formatted in certain ways. In general, 
    text components of different types (see `MarkdownLineEnum`) must
    be on different lines - no text components of different types may
    occupy the same line. In particular,

    - Comments (surrounded by `%%`) must not be on the same line as
    non-comments.
    - Display math mode LaTeX (surrounded by `$$`) must not be on the same line
    as non-In line LaTeX.

    **Attributes**

    - text - str
    - parts - list[dict[str, Union[MarkdownLineEnum, str]]]
        - Represents the lines of the markdown file.
        Each dict has two keys, `'type'` and `'line'`, which respectively
        hold a `MarkdownLineEnum` and a `str` as values. While a value
        of `'line'` may have new line characters `'\n'`, it does not
        need to --- more specifically, the `__str__` method (and the
        `text_of_lines` method) adds new line characters `'\n'` in
        between lines.
    """
    
    def __init__(self, parts: list[dict[str, Union[MarkdownLineEnum, str]]]):
        # self.text = text
        self.parts = parts

    def __str__(self):
        """Return the text representation of the `MarkdownFile` object"""
        return self.text_of_lines(0, len(self.parts))

    def text_of_lines(
            self,
            start: int,
            end: int
            ) -> str:
        """Return the text of `self.parts[start:end]`,
        adding new line characters `'\n'` in between. """
        return '\n'.join([
            line_dict['line'] for line_dict in self.parts[start:end]])

    def get_headings(
            self,
            levels: Union[int, Iterator[int], None] = None, # The levels of the headings to search for. Each int is between 1 and 6 inclusive, as each heading can be of levels 1 to 6. Defaults to `None`, in which case all heading-levels are searched.
            include_start: bool = True # If `True` and if this object contains text that is not under a heading (i.e. the text does not start with a heading), then include `-1` as a key with the empty str as value.
            ) -> list[str]: # Each str is the heading, including leading sharps `'#'`.
        """
        Return a list of heading titles in the markdown file.
        """
        line_dict = self.get_headings_by_line_number(
            levels, include_start=include_start)
        return list(line_dict.values())

    def get_headings_by_line_number(
            self,
            levels: Union[Iterator[int], int, None] = None, # The levels of the headings to search for. Each int is between 1 and 6 inclusive, as each heading can be of levels 1 to 6. If `None` then all heading-levels are searched.
            include_start: bool = True # If `True` and if this object contains text that is not under a heading (i.e. the text does not start with a heading), then include `-1` as a key with the empty str as value.
            ) -> dict[int, str]: # The keys are line numbers and each value is str is the heading string, including the leading sharps `'#'`, but without any leading or trailing whitespace characters.
        """
        Return a dict of heading titles in the markdown file.
        """
        if not levels:
            levels = []
        elif type(levels) is int:
            levels = (levels,)
        line_numbers = [i for i, line_dict in enumerate(self.parts)
                        if line_dict['type'] is MarkdownLineEnum.HEADING]
        extract = {line_number: self.parts[line_number] 
                   for line_number in line_numbers}
        headings = {line_number: line_dict['line'].strip()
                    for line_number, line_dict in extract.items()
                    if (not(levels) 
                        or heading_level(line_dict['line']) in levels)}
        if include_start and 0 not in headings:
            headings[-1] = ''
        return headings
    
    def get_headings_and_text(
            self,
            levels: Union[Iterator[int], int, None] = None, # The levels of the headings to search for. Each int is between 1 and 6 inclusive, as each heading can be of levels 1 to 6. If `None`, then all heading-levels are searched.
            include_start: bool = True # If `True` and if this object contains text that is not under a heading (i.e. the text does not start with a heading), then include `-1` as a key with the empty str as value.        
            ) -> dict[str, str]: # Each key is the entire str of the heading, including the leading sharps `'#'`, but not including leading or trailing whitespace characters Each value is the str under that heading until the next heading, including at trailing next line characters `\n`.  If `include_start` is `True`, then one of the keys is the empty str and the corresponding value is the start of the text that is not under any heading.
        # TODO I think there is a bug, e.g. when all headers are level 1 and 
        # level = 2, include_start = True, everything is treated like the start.
        """
        Return a list of headings and the text under each heading.
        
        The text under each heading does not include the text of
        subheadings.

        """
        headings_by_lines = self.get_headings_by_line_number(levels, include_start)
        line_numbers = [num for num in headings_by_lines]
        line_numbers.append(len(self.parts))
        line_numbers.sort()
        heading_dict = {}
        # previous_header_line_num = -1
        previous_header_line_num = -1 if include_start else line_numbers[0]
        for i, header_line_num in enumerate(line_numbers):
            lines = self.parts[previous_header_line_num+1:header_line_num]
            lines = [line_dict['line'] for line_dict in lines]
            heading = headings_by_lines.get(previous_header_line_num, '')
            previous_header_line_num = header_line_num
            heading_dict[heading] = '\n'.join(lines)
        return heading_dict
    
    def get_headings_tree(
            self
            ) -> dict[Union[str, int], Union[str, dict]]: # The keys are 1. line numbers or 2. the str `'title'`.  The values are dict or str (the blank str if root node) respectively. The dicts in themselves recursively represent trees and the str are headings, including the leading sharps.
        """Return a dict representing the tree of headings in the markdown file.

        **Returns**

        - dict[Union[str, int], Union[str, dict]]
            - The keys are 1. line numbers or 2. the str `'title'`.  The values
            are dict or str (the blank str if root node) respectively. The
            dicts in themselves recursively represent trees and the str are
            headings, including the leading sharps. In particular, the root
            level dict also has the blank string `''` associated to the key
            `'title'`.
        """
        headings_dict = self.get_headings_by_line_number(include_start=False)
        root_dict = {'title': ''}
        dict_stack = [root_dict]
        current_level = 0
        # Go through each heading, and figure out where it should
        # be added.
        for line_number, heading in headings_dict.items():
            new_dict = {'title': heading}
            while (heading_level(dict_stack[-1]['title'])
                   >= heading_level(heading)):
                dict_stack.pop()
            dict_stack[-1][line_number] = new_dict
            dict_stack.append(new_dict)
            current_level = heading_level(heading)
        return root_dict
                
    
    def get_line_number_of_heading(
            self,
            title: Union[str, None] = None, # Title of the heading. Does not include the leading sharps (`'#'`). If `None`, then return the line number of any heading after the specified line number.
            from_line: int = 0, # The line number to start searching for the heading with `title` from.
            levels: Union[Iterator[int], int, None] = None # The levels of the heading to search for. Each int is between 1 and 6 inclusive, as each heading can be of levels 1 to 6. If `None`, then all heading-levels are searched.
            ) -> int: # An index in `self.parts`. If no index/line number of the matching heading exists, then return -1.
        """
        Return the line number of the heading with the specified
        title after the specified line number.
        """
        if type(levels) is int:
            levels = (levels,)
        heading_satisfies = lambda x: (
            x['type'] is MarkdownLineEnum.HEADING
            and (levels is None or heading_level(x['line']) in levels)
            and (title is None or heading_title(x['line']) == title))
        return next((i for i in range(from_line, len(self.parts))
                     if heading_satisfies(self.parts[i])), -1)
    
    def get_line_numbers_under_heading(
            self,
            title: Union[str, None] = None, # Title of the heading. Does not include the leading sharps (`'#'`). If `None`, then return the line number of any heading after the specified line number.
            from_line: int = 0, # The line number to start searching for the heading with `title` from.
            levels: Union[Iterator[int], int, None] = None, # The levels of the heading to search for. Each int is between 1 and 6 inclusive, as each heading can be of levels 1 to 6. If `None`, then all heading-levels are searched.
            include_subheadings: bool = True # If `True`, then include the subheadings.
            ) -> Union[tuple[int], int]: # `(start, end)` where `self.parts[start:end]` represents the parts under the heading, including the start of the heading.  If the heading of the specified title does not exist, then returns -1.
        """
        Return the line numbers belonging to the heading.
        """
        # TODO: implement from_line, levels and then test
        start_line = self.get_line_number_of_heading(title)
        if start_line == -1:
            return -1
        level = heading_level(self.parts[start_line]['line'])
        if include_subheadings:
            levels_to_search = range(1, level+1)
        else:
            levels_to_search = None
        headings_and_lines = self.get_headings_by_line_number(
            levels=levels_to_search)
        lines_with_headings = list(headings_and_lines)
        lines_with_headings.sort()
        index_of_start = lines_with_headings.index(start_line)
        if len(lines_with_headings) - 1 >= index_of_start + 1:
            end_line = lines_with_headings[index_of_start + 1]
        else:
            end_line = len(self.parts)
        return (start_line, end_line)

    def write(
        self,
        vn: VaultNote, # Represents the file.
        mode: str = 'w', # The specific mode to write the file with.
        # enquote_entries_in_metadata_fields: list[str] = [] # A list of str of fields in the YAML metadata whose entries need to be enquoted. If there is a string that is not a key of `new_metadata`, then that string is essentially ignored (in particular, no errors are raised).
        ) -> None:
        """
        Write to the file specified by a `VaultNote` object.

        If the file that the `VaultNote` object represents does not exist,
        then this method creates it.
        """
        if not vn.exists():
            vn.create()
        # if enquote_entries_in_metadata_fields:
        #     self.replace_metadata(
        #         self.metadata(), enquote_entries_in_metadata_fields)
        with open(vn.path(), mode, encoding='utf-8') as file:
            file.write(str(self))
            file.close()
            
    def insert_line(
            self,
            index: int, # The index at which to add `line_dict` into `self.parts`.
            line_dict: dict[str, Union[MarkdownLineEnum, str]] # See `self.parts`.
            ) -> None:
        """Add a line at the specified index/line number to `self.parts`."""
        self.parts.insert(index, line_dict)
        
    def remove_line(
            self,
            index: int = -1 # The index of the line to remove from `self.parts`.
            ) -> None:
        """Remove a line from `self.parts`."""
        del self.parts[index]
        
    def remove_lines(
            self,
            start: int, # The index of the first line to remove from `self.parts`.
            end: int # The end index to remove; the line of index `end` is not removed, but the line of index `end - ` is.
            ) -> None:
        """Remove lines from `self.parts`."""
        del self.parts[start:end]
        
    def pop_line(
            self,
            index: int = -1 # The index of the line to pop from `self.parts`.
            ) -> dict[str, Union[MarkdownLineEnum, str]]: # The popped line
        """Remove a line from `self.parts` and get its value."""
        return self.parts.pop(index)
        
    def add_line_to_end(
            self,
            line_dict: dict[str, Union[MarkdownLineEnum, str]] # See `self.parts`.
            ) -> None:
        """Add a line to the end of `self.parts`."""
        self.parts.append(line_dict)
        
    def add_blank_line_to_end(self) -> None:
        """Add a blank line to the end of `self.parts`."""
        # TODO: Should the blank line be '\n' or ''?
        self.add_line_to_end(
            {'type': MarkdownLineEnum.BLANK_LINE, 'line': '\n'})
        
    def add_line_in_section(
            self,
            title: str, # Title of the heading (without the leading sharps `'#'`)
            line_dict: dict[str, Union[MarkdownLineEnum, str]], # The line to add
            start: bool = True # If `True`, add to the start of the section. If `False`, add to the end of the section.
            ) -> None:
        # TODO start=False is not implemented. 
        # TODO Be able to tell what the inserted line's type is.
        # TODO This seems to work incorrectly if there are no lines
        # after a header.
        """Add a line in section specified by its title."""
        line_number = self.get_line_number_of_heading(title=title)
        self.insert_line(line_number + 1, line_dict)
     
    def remove_section(
            self,
            title: str # The title of the section to remove (without the starting `'#'`'s)
            ) -> None:
        """
        Remove the section with the specified title, including subsections,
        if the section exists.
        """
        section_line = self.get_line_number_of_heading(title=title)
        if section_line == -1:
            return
        level = heading_level(self.parts[section_line]['line'])
        big_level_lines = self.get_headings_by_line_number(range(1, level+1))
        line_numbers = [line_num for line_num in big_level_lines]
        line_numbers.sort()
        i = line_numbers.index(section_line) + 1
        next_section_line = (
            line_numbers[i] if i < len(line_numbers) else len(self.parts))
        self.remove_lines(section_line, next_section_line)
        
    
    def clear_section(
            self,
            title: str, # Title of the section (Without the leading sharps `'#'`)
            leave_blank_line: bool = True, # If `True`, leaves a blank line at the end of the section.
            clear_subsections: Optional[str] = None # `'clear'`, `'delete'`, or `None`. If `'clear'`, then just clears the contents of subsections, but does not affect the headers. If `'delete'`, then clears the contents of the subsections and deletes the headers. If `None`, then does not affect either. 
            ) -> None:
        # TODO: implement clear_subsections
        """
        Clear the section with the specified title, if it exists.
        
        Does not clear subsections.
        """
        section_line = self.get_line_number_of_heading(title=title)
        if section_line == -1:
            return
        next_section_line = self.get_line_number_of_heading(
            from_line=section_line+1)
        if next_section_line == -1:
            next_section_line = len(self.parts)
        self.parts = self.parts[:section_line+1] + self.parts[next_section_line:]
        if leave_blank_line:
            self.insert_line(
                section_line + 1,
                {'type': MarkdownLineEnum.BLANK_LINE, 'line': ''})

    def clear_all_sections(
            self,
            leave_blank_lines: bool = True
            # If True, leaves a blank line in each section
            ) -> None:
        """
        Clear all sections.
        
        Does not clear frontmatter metadata. Leaves all headers intact.
        """
        part_indices_to_remove = [
            i for (i, part) in enumerate(self.parts)
            if (part['type'] not in [MarkdownLineEnum.META,
                                     MarkdownLineEnum.HEADING])]
        for index_to_remove in reversed(part_indices_to_remove):
            self.remove_line(index_to_remove)
        
    def metadata_parts(
            self
            ) -> list[str]: # Each str is a 'line' from `self.parts` from the frontmatter YAML metadata.
        # TODO: test
        """
        Return the sublist from `self.parts` consisting of the parts
        that are of the frontmatter yaml metadata.
        """
        return [part['line'] for part in self.parts 
                if part['type'] == MarkdownLineEnum.META]
    
    def metadata(
            self
            ) -> Union[dict[str], None]: # The keys are `str` of the labels/names of the metadata. The values are the metadata, which are usually `str` or `list`. If there is not frontmatter YAML metadata, then this return value is `None`.
        # TODO: change the Exception to some kind of yaml format exception.
        """
        Return the frontmatter metadata as a dict.

        Writing the metadata read with this function can be faulty if
        the metadata attempts to escape characters that cannot be read by
        YAML; the escapes are not preserved upon writing.

        **Raises**

        - ValueError
            - If any exceptions are raised when reading (i.e. parsing or scanning
            the YAML metadata. In doing so, `str(self)` is printed. Moreover,
            the appropriate `yaml.parser.ParserError`, `yaml.scanner.ScannerError`,
            or `yaml.reader.ReaderError` is also raised.
        """
        md_parts = self.metadata_parts()
        return parse_metadata_string('\n'.join(md_parts[1:-1]))
        # try:
        #     return yaml.safe_load('\n'.join(md_parts[1:-1]))
        # except (yaml.YAMLError) as e:
        #     raise ValueError(
        #         "There is invalid YAML formatting in a MarkdownFile object."
        #         " The following is its text:\n"
        #         f"{str(self)}\n\n") from e
        # try:
        #     return ruamel.yaml.load()
            
    def has_metadata(self) -> bool:
        """
        Return `True` if this `MarkdownFile` object has fronmatter
        YAML metadata.

        If the `MarkdownFile` object has any frontmatter YAML metadata, then
        it is expected to be at the very start; in particular, it must not
        be preceded by any whitespace characters.
        """
        return (self.parts 
                and self.parts[0]['type'] == MarkdownLineEnum.META)

    def add_metadata_section(
            self,
            check_exists: bool = True # If `True`, Check if there is already a metadata section at the beginning and do not add a metadata section if it exists.
            ) -> None:
        """Add a frontmatter YAML metadata at the very beginning."""
        if check_exists and self.has_metadata():
            return
        default_meta = [{'type': MarkdownLineEnum.META, 'line': '---\n'},
                        {'type': MarkdownLineEnum.META, 'line': '---\n'}]
        self.parts = default_meta + self.parts

    def metadata_lines(
            self
            ) -> tuple[int]: # The tuple consists of 2 ints, `a` and `b`, where `self.parts[a:b+1]` represent the metadata lines, including the `'---'` before and after.
        # TODO: change the index conventions of the output so that
        # [a:b] is the parts
        """
        Return the indices in `self.parts` which are metadata.
        
        Assumes that `self.parts` is nonempty. 

        If the MarkdownFile object has any frontmatter YAML metadata, then
        it is expected to be at the very start; in particular, it must not
        be preceded by any whitespace characters.
        
        **Returns**
         
        - tuple
            - The tuple consists of 2 ints, `a` and `b`, where `self.parts[a:b+1]`
            represent the metadata lines, including the `'---'` before and after.
        """
        for i, part in enumerate(self.parts):
            if part['type'] == MarkdownLineEnum.META:
                start = i
                break
        for j in range(i+1, len(self.parts)):
            if self.parts[j]['type'] != MarkdownLineEnum.META:
                end = j-1
                break
        return (start, end)
        
    def replace_metadata(
            self,
            new_metadata: dict[str], # The dictionary representing the new metadata. The keys are the names of fields. The values are the field values, usually expected to be a single string or a list of strings
            enquote_entries_in_fields: list[str] = [] # A list of str of fields in the YAML metadata whose entries need to be enquoted. If there is a string that is not a key of `new_metadata`, then that string is essentially ignored (in particular, no errors are raised).
        ) -> None:
        """
        Replace the frontmatter metadata of this MarkdownFile object.
        
        Optionally also enquotes string entries in fields specified by
        `enquote_entries_in_fields`.

        **Warning**
        - This method is only tested when the values of `new_metadata` are either `str` or
        `list[str]`.
        """
        lines = dict_to_metadata_lines(new_metadata, enquote_entries_in_fields)
        new_metadata_parts = [{'type': MarkdownLineEnum.META, 'line': line}
                              for line in lines]
        new_metadata_parts.insert(0, {'type': MarkdownLineEnum.META, 'line': '---'})
        new_metadata_parts.append({'type': MarkdownLineEnum.META, 'line': '---'})
        start, end = self.metadata_lines()
        self.parts = self.parts[:max(0,start-1)] + new_metadata_parts\
            + self.parts[end+1:]
    
    def remove_metadata(self) -> None:
        """Remove the frontmatter metadata of this MarkdownFile object."""
        self.parts = [part for part in self.parts 
                      if part['type'] != MarkdownLineEnum.META]
    
    def has_tag(
            self,
            tag: str # The tag. Does not start with the hashtag `'#'`.
            ) -> bool:
        """
        Return `True` if the Markdown file has the specified tag in its
        YAML frontmatter metadata.

        More specifically, return `True` if the `MarkdownFile` objeect

        1. has YAML frontmatter metadata,
        2. the metadata has a `'tags'` section,, and
        3. the `'tags'` section is a list with the specified tag.

        Note that `tag` should not start with the hashtag `#` charater.
        """
        if not self.has_metadata():
            return False
        metadata = self.metadata()
        return (bool(metadata) and 'tags' in metadata
                and tag in metadata['tags'])
    
    def add_tags(
            self,
            tags: Union[str, list[str]], # The str representing the tags. May or may not start with `'#'`, e.g. `'#_meta/definition'` or `'_meta/definition'`.
            skip_repeats: bool = True, # If `True`, then this MarkdownFile will just have unique tags; merges pre-existing repeated tags if necessary. Also, the order of the tags may be changed.
            skip_repeated_auto: bool = True, # If `True`, then only add tags starting with '_auto/' if the corresponding non-auto tag does not exist, e.g.  '_auto/_meta/definition' is not added if the note already has '_meta/definition'.
            enquote_entries_in_metadata_fields: list[str] = [] # A list of str of fields in the YAML metadata whose entries need to be enquoted. If there is a string that is not a key of `new_metadata`, then that string is essentially ignored (in particular, no errors are raised).
            ) -> None:
        """
        Add tags to the frontmatter metadata.
        
        The order of the tags may be changed.

        Ultimately the `replace_metadata` method is used to modify the YAML metadata.
        Use the `enquote_entries_in_metadata_fields` parameter to ensure that the
        `replace_metadata` invocation preserves enquoted metadata values. 

        """
        if isinstance(tags, str):
            tags = [tags]
        self.add_metadata_section(check_exists=True)
        metadata = self.metadata()
        if not metadata:
            metadata = {}
        if not 'tags' in metadata:
            metadata['tags'] = []

        tags = [tag[1:] if tag.startswith('#') else tag for tag in tags]
        if skip_repeated_auto:
            tags = [tag for tag in tags 
                    if (not tag_is_auto_tag(tag) 
                        or strip_auto_from_tag(
                            tag, with_hash_tag=False) not in metadata['tags'])]
        if skip_repeats:
            set_of_tags = set(metadata['tags']) | set(tags)
            metadata['tags'] = list(set_of_tags)
        else:
            metadata['tags'] += tags
        self.replace_metadata(metadata, enquote_entries_in_metadata_fields)
        
    def remove_tags(
            self,
            tags: list[str], # The str representing the tags. May or may not start with `'#'`, e.g. `'#_meta/definition'` or `'_meta/definition'`.
            enquote_entries_in_metadata_fields: list[str] = [] # A list of str of fields in the YAML metadata whose entries need to be enquoted. If there is a string that is not a key of `new_metadata`, then that string is essentially ignored (in particular, no errors are raised).
            ) -> None:
        """
        Remove specified tags from the frontmatter metadata, if
        the frontmatter metadata and the specified tags.

        If the `MarkdownFile` object does not have a frontmatter or
        if the frontmatter does not include a `tags` line, then
        the `MarkdownFile` object is not modified.
        
        Assumes that this MarkdownFile object has a frontmatter and
        that the frontmatter includes a tags line.
        
        Any repeated tags are either merged into one (if the tag is 
        not in `tags`) or are removed (if the tag is in `tags`).

        Ultimately the `replace_metadata` method is used to modify the YAML metadata.
        Use the `enquote_entries_in_metadata_fields` parameter to ensure that the
        `replace_metadata` invocation preserves enquoted metadata values. 
        """
        tags = [tag[1:] if tag.startswith('#') else tag for tag in tags]
        metadata = self.metadata()
        if metadata is None or not 'tags' in metadata:
            return
        set_of_tags = set(metadata['tags'])
        set_of_tags -= set(tags)
        metadata['tags'] = list(set_of_tags)
        self.replace_metadata(metadata, enquote_entries_in_metadata_fields)
    
    def replace_auto_tags_with_regular_tags(
            self,
            exclude: list[str] = None, # The tags whose `_auto/` tags should not be converted. The str should not start with `'#'` and should not start with `'_auto/'`.
            enquote_entries_in_metadata_fields: list[str] = [] # A list of str of fields in the YAML metadata whose entries need to be enquoted. If there is a string that is not a key of `new_metadata`, then that string is essentially ignored (in particular, no errors are raised).
            ) -> None:
        """
        Replace tags in the frontmatter metadata starting with `_auto/`
        with tags without the `_auto/`.

        Ultimately the `replace_metadata` method is used to modify the YAML metadata.
        Use the `enquote_entries_in_metadata_fields` parameter to ensure that the
        `replace_metadata` invocation preserves enquoted metadata values. 
        """
        if not exclude:
            exclude = []
        metadata = self.metadata()
        no_auto_tags = [strip_auto_from_tag(tag, with_hash_tag=False)
                         for tag in metadata['tags']]
        new_tags = [
            no_auto if (tag_is_auto_tag(old) and no_auto not in exclude)
            else old for (no_auto, old) in zip(no_auto_tags, metadata['tags'])
        ]
        metadata['tags'] = new_tags
        self.replace_metadata(metadata, enquote_entries_in_metadata_fields)
    
    def remove_in_line_tags(self) -> None:
        """Remove lines starting with in line tags."""
        part_indices_to_remove = []
        for i, part in enumerate(self.parts):
            if (part['type'] == MarkdownLineEnum.DEFAULT
                    and part['line'].strip().startswith('#')):
                part_indices_to_remove.append(i)
        for i in reversed(part_indices_to_remove):
            self.remove_line(i)
            
    def replace_links_with_display_text(
            self,
            remove_embedded_note_links: bool = False # If `True`, remove links to embedded notes as well. If `False`, does not modify embedded notes.`
            ) -> None:
        """Remove nonembedded links and replaces them with their display text.
        """
        for part in self.parts:
            if part['type'] == MarkdownLineEnum.META:
                continue
            part['line'] = remove_links_from_text(
                part['line'],
                remove_embedded_note_links=remove_embedded_note_links)
    
    def parts_of_id(
            self,
            par_id: str # Must begin with `'^'`.
            ) -> Union[tuple[int], None]: # `(start,end)` where `self.parts[start:end]` consists of the lines of the specified id. If the specified id does not exist for the note, then `None` is returned.
        """
        Return the indices of the lines within the Markdown file
        belonging to the specified text id.

        This id can be used as an anchor for a link in Obsidian. For example,
        `[[note#^65809f]]` is a link to a note named `note` to the text with id
        `65809f`. Such a text is marked with a trailing `^65809f`.
        """
        pattern = re.compile(fr'(?:^|\s){re.escape(par_id)}\b(?:\s*?)$')
        for i, part in enumerate(reversed(self.parts)):
            match = re.search(pattern, part['line'])
            if match:
                break

        end_of_text = len(self.parts) - 1 - i
        i = end_of_text
        has_any = _remove_text_id(self.parts[i]['line']).strip() != ''
        cont = True
        while i > 0 and cont:
            cont, has_any = self._include_previous_line_as_id_text(i, has_any)
            if cont:
                i -= 1
        return i, end_of_text+1
        # self.parts[i:end_of_text+1]

    def _include_previous_line_as_id_text(self, i: int, has_any: bool):
        """Used in `text_of_id`

        **Parameters**
        - i - int
            - Greater than 0.
        - has_any - bool
            - `True` if any text beyond the id has been gathered.
            `False` otherwise.

        **Return**
        - bool, bool
            - The first bool is `True` if the previous line should be
            included in the text of the id. The second bool is `True` if
            any content any been included in the search.
        """

        if self.parts[i]['type'] == MarkdownLineEnum.HEADING:
            return False, True
        elif self.parts[i-1]['type'] == MarkdownLineEnum.HEADING:
            return False, has_any
        elif (self.parts[i]['type'] == MarkdownLineEnum.DISPLAY_LATEX
                and self.parts[i-1]['type'] == MarkdownLineEnum.DISPLAY_LATEX):
            return True, True
        elif self.parts[i-1]['line'].strip() == '':
            if has_any:
                return False, True
            else:
                return True, False
        return True, True

    def replace_embedded_links_with_text(
            self,
            vault: PathLike,
            recursive: bool = True, # If `True`, then recursively replaces embedded links in the text of the embedded links.
            remove_paragraph_id: bool = True # If `True`, then removes the paragraph id's in the text of the embedded links. Leaves the paragraph id's of the origianl text in tact.
            ) -> None:
        """
        Remove embedded links and replaces them with their underlying text,
        as found in notes in the vault.
        
        Assumes that the embedded links do not loop infinitely.

        For embedded links to notes that do not exist in the vault,
        the embedded links are replaced with blank str.

        No new entries are added to `self.parts` even if the embedded links
        have multiple lines.
        """
        # TODO: implement error raise upon infinite loop.
        for part in self.parts:
            part['line'] = self._replace_embedded_links_one_line(
                part['line'], vault, recursive, remove_paragraph_id)
    
    def _replace_embedded_links_one_line(
            self, text: str, vault: PathLike, recursive: bool,
            remove_paragraph_id: bool) -> str:
        # TODO: test what happens when `link_note` is actually an image.
        # TODO: deal with possibility that a vaultnote gets an image
        # file name passed.
        """Used in `replace_embedded_links_with_text`"""
        embedded_links = find_regex_in_text(text, EMBEDDED_PATTERN)
        for start, end in reversed(embedded_links):
            link_object = ObsidianLink.from_text(text[start:end])
            if link_object.file_name:
                try:
                    link_note = VaultNote(
                        vault, name=link_object.file_name,
                        update_cache=False)
                    link_file = MarkdownFile.from_vault_note(link_note)
                except (NoteDoesNotExistError, NotePathIsNotIdentifiedError):
                    text = text[:start] + text[end:]
                    continue
                # if not link_note.exists():
                #     text = text[:start] + text[end:]
                #     continue
            else:
                link_file = self
            if link_object.anchor == 0:
                replace = str(link_file)
            elif link_object.anchor.startswith('^'):
                replace = self._text_of_embedded_link_of_id(
                    link_file, link_object.anchor, vault, recursive, remove_paragraph_id)
            elif link_object.anchor:
                replace = self._text_of_embedded_link_of_section(
                    link_file, link_object.anchor, vault, recursive, remove_paragraph_id)
            else:
                replace = str(link_file)
            text = text[:start] + replace + text[end:]
        return text
            # try:
            #     vn = VaultNote(vault, name=link_object.file_name)
            #     mf = MarkdownFile.from_vault_note(vn)
            # except NoteDoesNotExistError:
            #     replace = ''
            # text = text[:start] + replace + text[end:]

    def _text_of_embedded_link_of_id(
            self, link_file: MarkdownFile, par_id: str, vault: PathLike,
            recursive: bool, remove_paragraph_id: bool) -> str:
        """Used in `_replace_embedded_links_one_line"""
        start, end = link_file.parts_of_id(par_id)
        embedded_text = self._text_of_lines_of_embedded_links(
            link_file, vault, start, end, recursive,
            remove_paragraph_id)
        if remove_paragraph_id:
            embedded_text = _remove_text_id(embedded_text)
        return embedded_text

    def _text_of_embedded_link_of_section(
            self, link_file: MarkdownFile, heading: str, vault: PathLike,
            recursive: bool, remove_paragraph_id: bool) -> str:
        """Used in `_replace_embedded_links_one_line"""
        start, end = link_file.get_line_numbers_under_heading(
            title=heading, include_subheadings=True)
        return self._text_of_lines_of_embedded_links(
            link_file, vault, start, end, recursive,
            remove_paragraph_id)

    def _text_of_lines_of_embedded_links(
            self, link_file: MarkdownFile, vault: PathLike,
            start: int, end: int, recursive: bool,
            remove_paragraph_id: bool) -> str:
        """Used in `_text_of_embedded_link_of_id` and 
        `_text_of_embedded_link_of_section`
        """
        if not recursive:
            return link_file.text_of_lines(start, end)
        new_mf = MarkdownFile(
            copy.deepcopy(link_file.parts[start:end]))
        new_mf.replace_embedded_links_with_text(
            vault, recursive, remove_paragraph_id)
        return str(new_mf)

    def remove_footnotes_to_embedded_links(
            self,
            remove_footnote_mentions: bool = True # If `True`, removes the mentions to the footnote to the embedded links in the text.
            ) -> None:
        """
        Remove footnotes to embedded links.
        
        These are footnotes whose only content are embedded links, e.g.
        `[^1]: ![[embedded_note]]`
        """
        footnote_parts_to_remove = [
            (i, part) for i, part in enumerate(self.parts)
            if part['type'] == MarkdownLineEnum.FOOTNOTE_DESCRIPTION
            and re.fullmatch(
                fr'\[\^\w+\]: ?{EMBEDDED_PATTERN}', part['line'].strip())]
        for i, _ in reversed(footnote_parts_to_remove):
            self.remove_line(i)
        if not remove_footnote_mentions:
            return
        footnote_labels_to_remove = [
            re.fullmatch(
                fr'\[\^(\w+)\]: ?{EMBEDDED_PATTERN}', part['line'].strip()).group(1)
            for _, part in footnote_parts_to_remove]
        for part, label in product(self.parts, footnote_labels_to_remove):
            part['line'] = part['line'].replace(f'[^{label}]', '')
            
    def remove_headers(self) -> None:
        """Remove all headers."""
        heading_lines = self.get_headings_by_line_number(include_start=False)
        heading_lines = [line for line in heading_lines]
        heading_lines.sort()
        for line in reversed(heading_lines):
            self.remove_line(line)
        
    def remove_double_blank_lines(self) -> None:
        """Remove blank lines so that there are no consecutive blank lines"""
        parts_to_remove = []
        for i, part in enumerate(self.parts):
            last_blank = self.parts[i-1]['type'] == MarkdownLineEnum.BLANK_LINE
            if (part['type'] == MarkdownLineEnum.BLANK_LINE and last_blank):
                parts_to_remove.append(i)
        for i in reversed(parts_to_remove):
            self.remove_line(i)

    def remove_html_tags(self) -> None:
        """Remove HTML tags that are typeset in single lines.

        HTML tags that span multiple lines are ignored.
        """
        # TODO: test
        for _, part in enumerate(self.parts):
            part['line'], _ = remove_html_tags_in_text(part['line'])
            # self.parts[i-1]['']
    
    def merge_display_math_mode(self) -> None:
        """Merge chunks of display_math_mode latex lines into single lines"""
        i = 0
        ils = MarkdownLineEnum.DISPLAY_LATEX_START
        while i < len(self.parts):
            if self.parts[i]['type'] == ils:
                self._merge_one_display_math_mode_latex_chunk(i)
            i += 1
        
    def _merge_one_display_math_mode_latex_chunk(self, start: int) -> None:
        """Merge one chunk of display math mode latex lines in self.parts.
        """
        j = start + 1 
        ile = MarkdownLineEnum.DISPLAY_LATEX_END
        while (j < len(self.parts)
               and self.parts[j]['type'] is not ile):
            j += 1
        end = j + 1
        lines = [part['line'] for part in self.parts[start:end]]
        merged = ' '.join(lines)
        self.remove_lines(start, end)
        self.insert_line(
            start, {'type': MarkdownLineEnum.DISPLAY_LATEX_SINGLE,
                    'line': merged})

    def merge_display_math_mode_into_preceding_text(
            self,
            separator: str = '\n' # The str with which to join the latex lines into the text lines. Note that the display math mode latex lines are not joined with this str.
            ) -> None:
        """
        Merge chunks of display math mode latex lines into single lines and merge
        those single lines into preceding text lines.
        """
        self.merge_display_math_mode()
        i = 0
        ils = MarkdownLineEnum.DISPLAY_LATEX_SINGLE
        while i < len(self.parts):
            if self.parts[i]['type'] == ils:
                i = self._merge_latex_into_text(i, separator)
            i += 1

    def _merge_latex_into_text(self, index: int, separator: str) -> int:
        # TODO: test
        """Used in `merge_display_math_mode_into_preceding_text`"""
        j = index-1
        if j == -1:
            return index
        while j >= 0 and self.parts[j]['line'].strip() == '':
            j -= 1
        merged = separator.join([self.parts[i]['line'] for i in range(j, index+1)])
        self.remove_lines(start=j+1, end=index+1)
        self.parts[j]['line'] = merged
        return j

    @classmethod
    def from_file(
            cls,
            file_path: PathLike
            ) -> MarkdownFile:
        """
        Return a `MarkdownFile` object from a specified file.
        
        **Raises**
        - FileNotFoundError
            - If `file_path` points to a file which does not exist.
        """
        with open(file_path, 'r', encoding='utf-8') as file:
            text = file.read()
        lines = text.split('\n')
        return cls.from_list(lines)
            
    @classmethod
    def from_list(
            cls,
            list_of_lines: list[str]
            ) -> MarkdownFile:
        """
        Return a `MarkdownFile` object from a list of lines.
        
        This may not work correctly if the markdown text is not
        sufficiently well-formatted. These formattings include:
        - comments must start the line with `'%%'`.
        - comments must end with `'%%'` followed by whitespaces
          and nothing else.
        - indents should be done with tabs?
        """
        parts = []
        parts.extend(cls._look_at_start_of_file(list_of_lines))
        for line in list_of_lines:
            parts.append(cls._line_dict(line, parts))
        return cls(parts)
    
    @classmethod
    def from_vault_note(cls, vn: VaultNote) -> MarkdownFile:
        """
        Return a `MarkddownFile` object from a `VaultNote` object.
        
        **Raises**
        - FileNotFoundError
            - If `vn` represents a note file which does not exist.
        """
        return cls.from_file(vn.path())
    
    @classmethod
    def from_string(cls, text: str) -> MarkdownFile:
        """
        Return a `MarkdownFile` object from a str.
        """
        return cls.from_list(text.splitlines(keepends=False))
    
    def copy(self, deep: bool) -> MarkdownFile:
        # TODO: test
        list_to_copy_with = self.parts
        if deep:
            list_to_copy_with = copy.deepcopy(list_to_copy_with)
        return self.__class__(list_to_copy_with)

    @classmethod
    def _look_at_start_of_file(cls, list_of_lines: list[str])\
            -> list[dict[str, Union[MarkdownLineEnum, str]]]:
        """
        Inspect start of file for blank lines and Front matter Meta.
        
        Only blank lines may preceed front matter in markdown.
        May remove leading entries in `list_of_lines`.
        
        **Parameters**
        - list_of_lines - list of str

        **Returns**
        - list[dict[str, Union[MarkdownLineEnum, str]]]
            - Each dict has two keys, 'type' and 'line', which respectively
            hold a `MarkdownLineEnum` and a str as values.
            
        **Notes**
        - First tries to find the opening of the front matter given by
        `'---'`, then tries to find the closing given by `'---'`.
        """
        # Find opening of front matter
        i = 0
        parts = []
        while list_of_lines and list_of_lines[0].isspace():
            parts.append({'type': MarkdownLineEnum.BLANK_LINE,
                      'line': list_of_lines[0]})
            del list_of_lines[0]
        
        if not list_of_lines or list_of_lines[i].strip() != '---':
            return parts

        # Find close of front matter
        found_close_of_front_matter = False
        for i, line in enumerate(list_of_lines):
            if i == 0:
                continue
            if line.strip() == '---':
                found_close_of_front_matter = True
                break
        if not found_close_of_front_matter:
            return parts
        for j in range(0,i+1):
            parts.append({'type': MarkdownLineEnum.META,
                          'line': list_of_lines[0]})
            del list_of_lines[0]
        return parts
        
    @classmethod
    def _line_dict(cls, line: str, parts: list[dict])\
            -> dict[str, Union[MarkdownLineEnum, str]]:
        """
        Return a dictionary giving some information about `line`.
        
        **Parameters**
        - `line` - str
        - parts - dict[str, Union[MarkdownLineEnum, str]]
            - Each dict is the dictionary of a previous line.
        
        **Returns**
        - dict[str, Union[MarkdownLineEnum, str]]
            - Has a key `'type'`, which holds a MarkdownLineEnum
            as its value.
            - Has a key `'line'`, which holds `line`.
            - If the type is MarkdownLineEnum.CODE_BLOCK
        """
        line_dict = {'line': line}
        # Check previous line to see 
        if parts and parts[-1]['type'] is MarkdownLineEnum.COMMENT:
            if not parts[-1]['line'].rstrip().endswith('%%'):
                line_dict['type'] = MarkdownLineEnum.COMMENT
                return line_dict
        elif line.startswith('%%'):
            line_dict['type'] = MarkdownLineEnum.COMMENT
        if parts and parts[-1]['type'] is MarkdownLineEnum.CODE_BLOCK:
            if parts[-1]['line'].strip() != '```':
                line_dict['type'] = MarkdownLineEnum.CODE_BLOCK
                return line_dict
        elif line.strip().startswith('```'):
            line_dict['type'] = MarkdownLineEnum.COMMENT
        ###
        if parts and parts[-1]['type'] in [
                MarkdownLineEnum.DISPLAY_LATEX,
                MarkdownLineEnum.DISPLAY_LATEX_START]:
            if _line_end_in_line_latex(line) or '$$' in line:
                line_dict['type'] = MarkdownLineEnum.DISPLAY_LATEX_END
            else:
                line_dict['type'] = MarkdownLineEnum.DISPLAY_LATEX
            return line_dict


        if line.lstrip() == '':
            line_dict['type'] = MarkdownLineEnum.BLANK_LINE
        elif _line_start_and_end_in_line_latex(line):
            line_dict['type'] = MarkdownLineEnum.DISPLAY_LATEX_SINGLE
        elif _line_start_in_line_latex(line):
            line_dict['type'] = MarkdownLineEnum.DISPLAY_LATEX_START
        elif line.lstrip().startswith('%%'):
            line_dict['type'] = MarkdownLineEnum.COMMENT
        elif re.match(r'^#{1,6} ', line):
            line_dict['type'] = MarkdownLineEnum.HEADING
        elif line == '---':
            line_dict['type'] = MarkdownLineEnum.HORIZONTAL_RULE
        elif line.lstrip().startswith(('-', '*', '+')):
            line_dict['type'] = MarkdownLineEnum.UNORDERED_LIST
        elif re.match(r'^\d+\.', line.lstrip()):
            line_dict['type'] = MarkdownLineEnum.ORDERED_LIST
        elif line.lstrip().startswith('>'):
            line_dict['type'] = MarkdownLineEnum.BLOCKQUOTE
        elif re.match(r'^\[\^.+?\]:', line):
            line_dict['type'] = MarkdownLineEnum.FOOTNOTE_DESCRIPTION
        else:
            stripped_line = line.strip()
            if stripped_line.startswith(('---', '>', '# ', '##')):
                line_dict['type'] = MarkdownLineEnum.UNKNOWN
            else:
                line_dict['type'] = MarkdownLineEnum.DEFAULT
        return line_dict


def _remove_text_id(line: str) -> str:
    """"""
    # ID either starts at the start of line or is preceded by a whitespace
    # ID consists of \w\d.
    # ID ends with word boundary and might be followed by whitespaces.
    return re.subn(r'((?:^|\s)\^[\w\d]+)\b(?:\s*?)$', r'', line)[0]

def _line_start_in_line_latex(line: str) -> bool:
    """Return `True` if the line starts an display math mode latex string.
    
    This is not fully accurate outside of context - e.g. 
    It will return `True` on `$$`, regardless of whether this
    actually starts or ends the display math mode latex.
    """
    return line.lstrip(string.whitespace + '*').startswith('$$') 

def _line_end_in_line_latex(line: str) -> bool:
    stripped = _remove_text_id(line)
    stripped = stripped.rstrip(string.whitespace + '*')
    return stripped.endswith('$$')

def _line_start_and_end_in_line_latex(line: str) -> bool:
    stripped = _remove_text_id(line)
    stripped = stripped.strip(string.whitespace + '*')
    return bool(re.match(r'\$\$.*\$\$', stripped))

In [None]:
show_doc(MarkdownFile)

---

[source](https://github.com/hyunjongkimmath/trouver/blob/main/trouver/markdown/markdown/file.py#L193){target="_blank" style="float:right; font-size:smaller"}

### MarkdownFile

>      MarkdownFile
>                    (parts:list[dict[str,typing.Union[__main__.MarkdownLineEnum
>                    ,str]]])

Parses and represents the contents of an Obsidian styled Markdown
file.

The Markdown file must be formatted in certain ways. In general, 
text components of different types (see `MarkdownLineEnum`) must
be on different lines - no text components of different types may
occupy the same line. In particular,

- Comments (surrounded by `%%`) must not be on the same line as
non-comments.
- Display math mode LaTeX (surrounded by `$$`) must not be on the same line
as non-In line LaTeX.

**Attributes**

- text - str
- parts - list[dict[str, Union[MarkdownLineEnum, str]]]
    - Represents the lines of the markdown file.
    Each dict has two keys, `'type'` and `'line'`, which respectively
    hold a `MarkdownLineEnum` and a `str` as values. While a value
    of `'line'` may have new line characters `'\n'`, it does not
    need to --- more specifically, the `__str__` method (and the
    `text_of_lines` method) adds new line characters `'\n'` in
    between lines.

## Example text and example vault used in the rest of the page

The examples demonstrated for the `MarkdownFile` class here will be based upon the following text:

In [None]:
template_text = """---
cssclass: clean-embeds
aliases: []
tags: [_meta/literature_note]
---
# Topic[^1]

# See Also

# Meta
## References

## Citations and Footnotes
[^1]: Citation"""


text_1 = """
# Section 1
some text 

asdfasdf

## Subsection a
Didididi
Dododododo
# Section 2
"""

text_2 = """---
tags: [_meta/definition, _meta/concept, _auto/_meta/notation, this_tag_will_be_removed]
---
# Topic
This is some note with some stuff.
"""

text_3 = """
# Section 1
Some stuff

#_meta/question Will `remove_in_line_tags` remove this line? Yes it will!

# Hello

 #tag Will `remove_in_line_tags` remove this line? Yes it will!
"""

text_4 = """
# Some thing

I have a [[this is a note#this is an anchor in the note|link]]

## Another topic
This is a link without a specified display text: [[some_kind_of_note]].

This is a link to an anchor without a specified display text: [[another_note#another anchor]].
"""
    
text_5 = """# A header
![[This note is embedded]].

The link above should will not be replaced by `replace_links_with_display_text`,
unless `remove_embedded_note_links` is set to `True`."""
    
text_6 = """
# Header

I want to link to some embedded note[^1]

[^1]: ![[link_to_embedded_note_1]]

You can also let the footnote mention be alphanumeric[^1][^note]

[^note]: ![[link_to_embedded_note_2]]
"""
    
text_7 = r"""---
cssclass: clean-embeds
aliases: []
tags: [_meta/concept, _meta/literature_note]
---
# Grothendieck-Witt ring elements of a finite field are given by rank and discriminant up to squares[^1]

$\operatorname{GW}(\mathbb{F}_q)\cong \mathbb{Z}\times \mathbb{F}_q^{\times}/(\mathbb{F}_q^{\times})^2$[^2] where the isomorphism is given by the rank and discriminant.


[^2]: ![[notation_GW_k_Grothendieck_Witt_ring_of_a_field]]


# See Also
- [[grothendieck_witt_ring_of_a_polynomial_ring_over_a_field_is_isomorphic_to_that_of_the_field]]

# Meta
## References
![[_reference_pauli_wickelgren_aa1]]


## Citations and Footnotes
[^1]: Pauli, Wickelgren, Example 3.7, Page 4"""


text_8 = r"""---
cssclass: clean-embeds
---
# Topic[^1]
Here is a LaTeX Equation:

$$ 5 \neq 7$$
Hey
Okay, now here is another one:
$$\begin{align*}
\sum_{k=1}^n k = \frac{n(n+1)}{2}
\end{align*}$$

%%This is a comment. 
The comment is not visible.
This is the end of the comment %%

This is the end of this note. This is visible.
"""

text_9 = r"""
This LaTeX Equation has `**` surrounding it:
**$$\mathcal{O}_X$$**

This LaTeX Equation has an id:
$$5 \neq 7$$ ^221b51

This LaTeX Equation also has an id:
$$5 \neq 7
$$ ^221b51
This is the end
"""

text_10 = r"""This is a single line display math mode LaTeX equation:

$$\mathcal{O}_X$$

This is a single multi-line display math mode LaTeX equation:

$$
5 + 2 = 7
$$

These are multiple consecutive display math mode LaTeX equations:

$$1+1 = 2
$$
$$5 + 7 = 14$$
$$

8 + 4 = 12

$$
"""

text_11 = r"""$$asdf$$
$$asdf$$
$$asdf$$
After text."""

text_12 = r"""

"""

We also use the multiple example vaults.

The following vault will be used mainly for some basic file interactions of the `MarkdownFile` class.

```
.
└── algebraic_geometry
    ├── a1_homotopy_theory
    │   ├── pauli_wickelgren_aa1
    │   │   ├── 3_the_grothendieck_witt_ring_of_k
    │   │   │   └── pauli_wickelgren_aa1_example 3.7.md
    │   │   └── _index_pauli_wickelgren_aa1.md
    |   └── _index_a1_homotopy_theory.md
    └── _index_algebraic_geometry.md
```

`pauli_wickelgren_aa1_example 3.7.md` will contain the contents of `text_7`.

In [None]:
def make_example_vault(temp_dir: PathLike):
    temp_dir = Path(temp_dir)
    os.mkdir(temp_dir / 'algebraic_geometry')
    os.mkdir(temp_dir / 'algebraic_geometry' / 'a1_homotopy_theory')
    os.mkdir(temp_dir / 'algebraic_geometry' / 'a1_homotopy_theory' / 'pauli_wickelgren_aa1')
    os.mkdir(temp_dir / 'algebraic_geometry' / 'a1_homotopy_theory' / 'pauli_wickelgren_aa1' / '3_the_grothendieck_witt_ring_of_k')

    (temp_dir / 'algebraic_geometry' / '_index_algebraic_geometry.md').touch()
    (temp_dir / 'algebraic_geometry' / 'a1_homotopy_theory' / '_index_a1_homotopy_theory.md').touch()
    (temp_dir / 'algebraic_geometry' / 'a1_homotopy_theory' / 'pauli_wickelgren_aa1' / '_index_pauli_wickelgren_aa1.md').touch()
    (temp_dir / 'algebraic_geometry' / 'a1_homotopy_theory' / 'pauli_wickelgren_aa1' / '3_the_grothendieck_witt_ring_of_k' / 'pauli_wickelgren_aa1_example 3.7.md').touch()

    with open((temp_dir / 'algebraic_geometry' / 'a1_homotopy_theory' / 'pauli_wickelgren_aa1' / '3_the_grothendieck_witt_ring_of_k' / 'pauli_wickelgren_aa1_example 3.7.md'), 'w') as writer:
        writer.write(
            text_7
        )

# with tempfile.TemporaryDirectory(prefix='temp_dir', dir=os.getcwd()) as temp_dir:
#     make_example_vault(temp_dir)
#     os.startfile(os.getcwd())
#     input()

We use the following example vault to demonstrate examples of embedded notes and other functionalities:

```
.
├── note_which_becomes_entirely_embedded_1.md
├── note_with_embedded_links_1.md
└── note_with_paragraphs_that_are_embedded_1.md
```

In [None]:
def make_example_vault_2(temp_dir: PathLike):
    temp_dir = Path(temp_dir)

    file_1 = temp_dir / 'note_which_becomes_entirely_embedded_1.md'
    file_2 = temp_dir / 'note_with_embedded_links_1.md'
    file_3 = temp_dir / 'note_with_paragraphs_that_are_embedded_1.md'

    file_1.touch()
    file_2.touch()
    file_3.touch()

    with open(file_1, 'w') as writer:
        writer.write(
            r"""Hello, this is a note which becomes entirely embedded.
%%This is a comment. 
The comment is not visible.
This is the end of the comment %%"""
        )
        
    with open(file_2, 'w') as writer:
        writer.write(
            r"""This is a note.

There are some embedded text here:

![[note_which_becomes_entirely_embedded_1]]


![[note_with_paragraphs_that_are_embedded_1#^65809f]]

![[note_with_paragraphs_that_are_embedded_1#^221b51]]

![[note_with_paragraphs_that_are_embedded_1#Section]]"""
        )

    
    with open(file_3, 'w') as writer:
        writer.write(
            r"""This paragraph becomes embedded.

# Thank you for watching 
cheese 
bandit
$$asdf$$
asdf 
^65809f



This paragraph has not id.

$$5 \neq 7
$$ 
^221b51

# This section has an id ^123456

# This section has no id 
^fff123

$$\mathcal{O}_X$$  ^latexthing

# Section
Some kind of section?

Lalalala
## Subsection
argonaut
# Section
Maybe?
""")

## Constructing a `MarkdownFile` object

In [None]:
show_doc(MarkdownFile.from_vault_note)

---

[source](https://github.com/hyunjongkimmath/trouver/blob/main/trouver/markdown/markdown/file.py#L1101){target="_blank" style="float:right; font-size:smaller"}

### MarkdownFile.from_vault_note

>      MarkdownFile.from_vault_note
>                                    (vn:trouver.markdown.obsidian.vault.VaultNo
>                                    te)

Return a `MarkddownFile` object from a `VaultNote` object.

**Raises**
- FileNotFoundError
    - If `vn` represents a note file which does not exist.

In [None]:
show_doc(MarkdownFile.from_file)

---

[source](https://github.com/hyunjongkimmath/trouver/blob/main/trouver/markdown/markdown/file.py#L1063){target="_blank" style="float:right; font-size:smaller"}

### MarkdownFile.from_file

>      MarkdownFile.from_file (file_path:os.PathLike)

Return a `MarkdownFile` object from a specified file.

**Raises**
- FileNotFoundError
    - If `file_path` points to a file which does not exist.

In [None]:
show_doc(MarkdownFile.from_list)

---

[source](https://github.com/hyunjongkimmath/trouver/blob/main/trouver/markdown/markdown/file.py#L1080){target="_blank" style="float:right; font-size:smaller"}

### MarkdownFile.from_list

>      MarkdownFile.from_list (list_of_lines:list[str])

Return a `MarkdownFile` object from a list of lines.

This may not work correctly if the markdown text is not
sufficiently well-formatted. These formattings include:
- comments must start the line with `'%%'`.
- comments must end with `'%%'` followed by whitespaces
  and nothing else.
- indents should be done with tabs?

In [None]:
show_doc(MarkdownFile.from_string)

---

[source](https://github.com/hyunjongkimmath/trouver/blob/main/trouver/markdown/markdown/file.py#L1112){target="_blank" style="float:right; font-size:smaller"}

### MarkdownFile.from_string

>      MarkdownFile.from_string (text:str)

Return a `MarkdownFile` object from a str.

The most convenient way to construct a `MarkdownFile` object is by the `MarkdownFile.from_vault_note` factory method.

In [None]:
with tempfile.TemporaryDirectory(prefix='temp_dir_', dir=os.getcwd()) as temp_dir:
    make_example_vault(temp_dir)

    vault_note = VaultNote(temp_dir, name='pauli_wickelgren_aa1_example 3.7')
    mf = MarkdownFile.from_vault_note(vault_note)
    test_eq(str(mf), text_7)

    vault_note = VaultNote(temp_dir, rel_path='does_not_exist.md')
    assert not vault_note.exists()
    with ExceptionExpected(ex=FileNotFoundError):
        mf = MarkdownFile.from_vault_note(vault_note)


We can similarly construct a `MarkdownFile` by the `MarkdownFile.from_file` factory method without having to use a `VaultNote` object. 

In [None]:
with tempfile.TemporaryDirectory(prefix='temp_dir_', dir=os.getcwd()) as temp_dir:
    make_example_vault(temp_dir)

    temp_dir = Path(temp_dir)
    mf = MarkdownFile.from_file(temp_dir / 'algebraic_geometry' / 'a1_homotopy_theory' / 'pauli_wickelgren_aa1' / '3_the_grothendieck_witt_ring_of_k' / 'pauli_wickelgren_aa1_example 3.7.md')

    assert not os.path.exists(temp_dir / 'does_not_exist.md')
    with ExceptionExpected(ex=FileNotFoundError):
        mf = MarkdownFile.from_file(temp_dir / 'does_not_exist.md')

If the list of lines of the Markdown file are available, then the `MarkdownFile.from_list` factory method can be used.

Similarly, if the entire string of the Markdown file is available, then the `MarkdownFile.from_str` factory method can be used.

In [None]:
list_of_lines = template_text.splitlines()
template_mf_1 = MarkdownFile.from_list(list_of_lines)
template_mf_2 = MarkdownFile.from_string(template_text)

print(str(template_mf_1))
test_eq(str(template_mf_1), str(template_mf_2))

---
cssclass: clean-embeds
aliases: []
tags: [_meta/literature_note]
---
# Topic[^1]

# See Also

# Meta
## References

## Citations and Footnotes
[^1]: Citation


In [None]:
#| hide
# This tests whether MarkdownLineEnum.DISPLAY_LATEX is detected correctly.
# Also test comments.
mf = MarkdownFile.from_string(text_8)
assert mf.parts[6]['type'] == MarkdownLineEnum.DISPLAY_LATEX_SINGLE
assert mf.parts[9]['type'] == MarkdownLineEnum.DISPLAY_LATEX_START
assert mf.parts[10]['type'] == MarkdownLineEnum.DISPLAY_LATEX
assert mf.parts[11]['type'] == MarkdownLineEnum.DISPLAY_LATEX_END
assert mf.parts[13]['type'] == MarkdownLineEnum.COMMENT
assert mf.parts[14]['type'] == MarkdownLineEnum.COMMENT
assert mf.parts[15]['type'] == MarkdownLineEnum.COMMENT

assert re.match('\$\$.*\$\$', r'$$\\mathcal{O}_X$$')
mf = MarkdownFile.from_string(text_9)
assert mf.parts[2]['type'] == MarkdownLineEnum.DISPLAY_LATEX_SINGLE
assert mf.parts[5]['type'] == MarkdownLineEnum.DISPLAY_LATEX_SINGLE
assert mf.parts[8]['type'] == MarkdownLineEnum.DISPLAY_LATEX_START
assert mf.parts[9]['type'] == MarkdownLineEnum.DISPLAY_LATEX_END

assert _line_start_and_end_in_line_latex(r'$$hi$$')
mf = MarkdownFile.from_string(text_10)
assert mf.parts[2]['type'] == MarkdownLineEnum.DISPLAY_LATEX_SINGLE
assert mf.parts[6]['type'] == MarkdownLineEnum.DISPLAY_LATEX_START
assert mf.parts[7]['type'] == MarkdownLineEnum.DISPLAY_LATEX
assert mf.parts[8]['type'] == MarkdownLineEnum.DISPLAY_LATEX_END
assert mf.parts[12]['type'] == MarkdownLineEnum.DISPLAY_LATEX_START
assert mf.parts[13]['type'] == MarkdownLineEnum.DISPLAY_LATEX_END
assert mf.parts[14]['type'] == MarkdownLineEnum.DISPLAY_LATEX_SINGLE
assert mf.parts[15]['type'] == MarkdownLineEnum.DISPLAY_LATEX_START
assert mf.parts[16]['type'] == MarkdownLineEnum.DISPLAY_LATEX
assert mf.parts[17]['type'] == MarkdownLineEnum.DISPLAY_LATEX
assert mf.parts[18]['type'] == MarkdownLineEnum.DISPLAY_LATEX
assert mf.parts[19]['type'] == MarkdownLineEnum.DISPLAY_LATEX_END

In [None]:
#| hide
# This tests whether types of parts are detected correctly, particularly the parts that are not LaTeX parts that come after LaTeX parts:
# There was a wrong part type where $$482 happens - it was mistaken as an DISPLAY_LATEX as opposed to an DISPLAY_LATEX_END
text = r"""---
cssclass: clean-embeds
aliases: []
tags: [_meta/TODO/change_title, _meta/literature_note, _reference/some_reference]
---
# Topic[^1]

Some text
$$
a_{00} x_{0}^{2}+a_{01} x_{0} x_{1}+\cdots+a_{22} x_{2}^{2}=0
$$482
This is a title in a page

# See Also

# Meta
## References
![[_reference_some_reference]]

## Citations and Footnotes
[^1]: reference"""
mf = MarkdownFile.from_string(text)
print(mf.parts)

[{'type': <MarkdownLineEnum.META: 3>, 'line': '---'}, {'type': <MarkdownLineEnum.META: 3>, 'line': 'cssclass: clean-embeds'}, {'type': <MarkdownLineEnum.META: 3>, 'line': 'aliases: []'}, {'type': <MarkdownLineEnum.META: 3>, 'line': 'tags: [_meta/TODO/change_title, _meta/literature_note, _reference/some_reference]'}, {'type': <MarkdownLineEnum.META: 3>, 'line': '---'}, {'line': '# Topic[^1]', 'type': <MarkdownLineEnum.HEADING: 1>}, {'line': '', 'type': <MarkdownLineEnum.BLANK_LINE: 9>}, {'line': 'Some text', 'type': <MarkdownLineEnum.DEFAULT: 0>}, {'line': '$$', 'type': <MarkdownLineEnum.DISPLAY_LATEX_START: 13>}, {'line': 'a_{00} x_{0}^{2}+a_{01} x_{0} x_{1}+\\cdots+a_{22} x_{2}^{2}=0', 'type': <MarkdownLineEnum.DISPLAY_LATEX: 15>}, {'line': '$$482', 'type': <MarkdownLineEnum.DISPLAY_LATEX_END: 14>}, {'line': 'This is a title in a page', 'type': <MarkdownLineEnum.DEFAULT: 0>}, {'line': '', 'type': <MarkdownLineEnum.BLANK_LINE: 9>}, {'line': '# See Also', 'type': <MarkdownLineEnum.HEADI

## Getting headings of a `MarkdownFile` object

In a Markdown file, one can set headings. In fact, you can consider the text here as text rendered with Markdown! More generally, you can type Markdown in Jupyter notebooks.

For example, typing the following text


There are multiple methods in the `MarkdownFile` class which retrieve the headings of a Markdown file and their locations.


In [None]:
template_mf = MarkdownFile.from_string(template_text)

In [None]:
show_doc(MarkdownFile.get_headings)

---

[source](https://github.com/hyunjongkimmath/trouver/blob/main/trouver/markdown/markdown/file.py#L239){target="_blank" style="float:right; font-size:smaller"}

### MarkdownFile.get_headings

>      MarkdownFile.get_headings (levels:Union[int,Iterator[int],NoneType]=None,
>                                 include_start:bool=True)

Return a list of heading titles in the markdown file.

|    | **Type** | **Default** | **Details** |
| -- | -------- | ----------- | ----------- |
| levels | Union[int, Iterator[int], None] | None | The levels of the headings to search for. Each int is between 1 and 6 inclusive, as each heading can be of levels 1 to 6. Defaults to `None`, in which case all heading-levels are searched. |
| include_start | bool | True | If `True` and if this object contains text that is not under a heading (i.e. the text does not start with a heading), then include `-1` as a key with the empty str as value. |
| **Returns** | **list[str]** |  | **Each str is the heading, including leading sharps `'#'`.** |


The `get_headings` function returns only a list of headings.

In [None]:
print(template_mf.get_headings())
assert template_mf.get_headings() == [
    '# Topic[^1]', '# See Also', '# Meta', '## References', '## Citations and Footnotes', '']

assert template_mf.get_headings((3,4,6), include_start=True) == ['']



['# Topic[^1]', '# See Also', '# Meta', '## References', '## Citations and Footnotes', '']


The empty heading `''` is returned when `include_start=True` and there is text belonging to no heading - this can only happen at the start of the Markdown file before any headings are specified. Any YAML frontmatter meta is considered as "text belonging to no heading".

Setting `include_start=False` excludes  the empty heading altogether.

In [None]:
assert template_mf.get_headings(include_start=False) == [
    '# Topic[^1]', '# See Also', '# Meta', '## References', '## Citations and Footnotes']

The parameter `levels` specifies which level headings to return. The argument passed to `levels` does not affect whether or not the empty heading `''` is included.

In [None]:
assert template_mf.get_headings(levels=1, include_start=True) == [
    '# Topic[^1]', '# See Also', '# Meta', '']
assert template_mf.get_headings(levels=1, include_start=False) == [
    '# Topic[^1]', '# See Also', '# Meta']
# Since list(range(2,6)) == [2, 3, 4, 5], the below returns all headings of levels 2, 3, 4, 5.
assert template_mf.get_headings(levels=range(2,6), include_start=True) == [
    '## References', '## Citations and Footnotes', '']

In [None]:
show_doc(MarkdownFile.get_headings_by_line_number)

---

[source](https://github.com/hyunjongkimmath/trouver/blob/main/trouver/markdown/markdown/file.py#L251){target="_blank" style="float:right; font-size:smaller"}

### MarkdownFile.get_headings_by_line_number

>      MarkdownFile.get_headings_by_line_number
>                                                (levels:Union[Iterator[int],int
>                                                ,NoneType]=None,
>                                                include_start:bool=True)

Return a dict of heading titles in the markdown file.

|    | **Type** | **Default** | **Details** |
| -- | -------- | ----------- | ----------- |
| levels | Union[Iterator[int], int, None] | None | The levels of the headings to search for. Each int is between 1 and 6 inclusive, as each heading can be of levels 1 to 6. If `None` then all heading-levels are searched. |
| include_start | bool | True | If `True` and if this object contains text that is not under a heading (i.e. the text does not start with a heading), then include `-1` as a key with the empty str as value. |
| **Returns** | **dict[int, str]** |  | **The keys are line numbers and each value is str is the heading string, including the leading sharps `'#'`, but without any leading or trailing whitespace characters.** |

The `get_headings_by_line_number` function returns a dict whose keys are line numbers to headers and whose corresponding values are the full header str. 

Similarly as with `get_headings`, setting `include_start=True` includes the empty header; the corresponding line number is always `-1`.

In [None]:
line_numbers_and_headings = template_mf.get_headings_by_line_number()
print(line_numbers_and_headings)
assert line_numbers_and_headings == {
5: '# Topic[^1]',
 7: '# See Also',
 9: '# Meta',
 10: '## References',
 12: '## Citations and Footnotes',
 -1: ''}

assert template_mf.get_headings_by_line_number(include_start=False) == {
5: '# Topic[^1]',
 7: '# See Also',
 9: '# Meta',
 10: '## References',
 12: '## Citations and Footnotes', }

{5: '# Topic[^1]', 7: '# See Also', 9: '# Meta', 10: '## References', 12: '## Citations and Footnotes', -1: ''}


In [None]:
# TODO: add more examples

In [None]:
show_doc(MarkdownFile.get_headings_and_text)

---

[source](https://github.com/hyunjongkimmath/trouver/blob/main/trouver/markdown/markdown/file.py#L275){target="_blank" style="float:right; font-size:smaller"}

### MarkdownFile.get_headings_and_text

>      MarkdownFile.get_headings_and_text
>                                          (levels:Union[Iterator[int],int,NoneT
>                                          ype]=None, include_start:bool=True)

Return a list of headings and the text under each heading.

The text under each heading does not include the text of
subheadings.

|    | **Type** | **Default** | **Details** |
| -- | -------- | ----------- | ----------- |
| levels | Union[Iterator[int], int, None] | None | The levels of the headings to search for. Each int is between 1 and 6 inclusive, as each heading can be of levels 1 to 6. If `None`, then all heading-levels are searched. |
| include_start | bool | True | If `True` and if this object contains text that is not under a heading (i.e. the text does not start with a heading), then include `-1` as a key with the empty str as value. |
| **Returns** | **dict[str, str]** |  | **Each key is the entire str of the heading, including the leading sharps `'#'`, but not including leading or trailing whitespace characters Each value is the str under that heading until the next heading, including at trailing next line characters `\n`.  If `include_start` is `True`, then one of the keys is the empty str and the corresponding value is the start of the text that is not under any heading.** |

The `get_headings_and_text` function returns a dict whose keys are full headers and whose values are text under the headers.

In [None]:
headings_and_text = template_mf.get_headings_and_text()
print(headings_and_text)
assert headings_and_text == { '': '---\ncssclass: clean-embeds\naliases: []\ntags: [_meta/literature_note]\n---',
 '# Topic[^1]': '',
 '# See Also': '',
 '# Meta': '',
 '## References': '',
 '## Citations and Footnotes': '[^1]: Citation' }

print(template_mf.get_headings_and_text(None, False))
assert template_mf.get_headings_and_text(None, False) == {
 '# Topic[^1]': '',
 '# See Also': '',
 '# Meta': '',
 '## References': '',
 '## Citations and Footnotes': '[^1]: Citation' }


{'': '---\ncssclass: clean-embeds\naliases: []\ntags: [_meta/literature_note]\n---', '# Topic[^1]': '', '# See Also': '', '# Meta': '', '## References': '', '## Citations and Footnotes': '[^1]: Citation'}
{'# Topic[^1]': '', '# See Also': '', '# Meta': '', '## References': '', '## Citations and Footnotes': '[^1]: Citation'}


In [None]:
# TODO: add more examples

In [None]:
show_doc(MarkdownFile.get_headings_tree)

---

[source](https://github.com/hyunjongkimmath/trouver/blob/main/trouver/markdown/markdown/file.py#L304){target="_blank" style="float:right; font-size:smaller"}

### MarkdownFile.get_headings_tree

>      MarkdownFile.get_headings_tree ()

Return a dict representing the tree of headings in the markdown file.

**Returns**

- dict[Union[str, int], Union[str, dict]]
    - The keys are 1. line numbers or 2. the str `'title'`.  The values
    are dict or str (the blank str if root node) respectively. The
    dicts in themselves recursively represent trees and the str are
    headings, including the leading sharps. In particular, the root
    level dict also has the blank string `''` associated to the key
    `'title'`.

In [None]:
headings_tree = template_mf.get_headings_tree()
print(headings_tree)
assert headings_tree == {
    'title': '',
    5: {'title': '# Topic[^1]'},
    7: {'title': '# See Also'},
    9: {'title': '# Meta',
        10: {'title': '## References'},
        12: {'title': '## Citations and Footnotes'}
        }
}

{'title': '', 5: {'title': '# Topic[^1]'}, 7: {'title': '# See Also'}, 9: {'title': '# Meta', 10: {'title': '## References'}, 12: {'title': '## Citations and Footnotes'}}}


In [None]:
show_doc(MarkdownFile.get_line_number_of_heading)

---

[source](https://github.com/hyunjongkimmath/trouver/blob/main/trouver/markdown/markdown/file.py#L336){target="_blank" style="float:right; font-size:smaller"}

### MarkdownFile.get_line_number_of_heading

>      MarkdownFile.get_line_number_of_heading (title:Optional[str]=None,
>                                               from_line:int=0, levels:Union[It
>                                               erator[int],int,NoneType]=None)

Return the line number of the heading with the specified
title after the specified line number.

|    | **Type** | **Default** | **Details** |
| -- | -------- | ----------- | ----------- |
| title | Union[str, None] | None | Title of the heading. Does not include the leading sharps (`'#'`). If `None`, then return the line number of any heading after the specified line number. |
| from_line | int | 0 | The line number to start searching for the heading with `title` from. |
| levels | Union[Iterator[int], int, None] | None | The levels of the heading to search for. Each int is between 1 and 6 inclusive, as each heading can be of levels 1 to 6. If `None`, then all heading-levels are searched. |
| **Returns** | **int** |  | **An index in `self.parts`. If no index/line number of the matching heading exists, then return -1.** |

Note that the argument to `title` does not include the starting hashtags `#`.

In [None]:
line_number = template_mf.get_line_number_of_heading(title='See Also')
assert line_number == 7

If the heading of the specified title does not exist, then `-1` is returned.

In [None]:
assert template_mf.get_line_number_of_heading(title='Nonexistent title') == -1

We can search for headers of specified titles from specified lines onward:

In [None]:
assert template_mf.get_line_number_of_heading(title='Topic[^1]', from_line=3) == 5
assert template_mf.get_line_number_of_heading(title='Topic[^1]', from_line=6) == -1

We can also specify the levels that the header must be: 

In [None]:
assert template_mf.get_line_number_of_heading(title='Topic[^1]', levels=(1,2,6)) == 5
assert template_mf.get_line_number_of_heading(title='Topic[^1]', levels=(3, 5)) == -1

In [None]:
show_doc(MarkdownFile.get_line_numbers_under_heading)

---

[source](https://github.com/hyunjongkimmath/trouver/blob/main/trouver/markdown/markdown/file.py#L355){target="_blank" style="float:right; font-size:smaller"}

### MarkdownFile.get_line_numbers_under_heading

>      MarkdownFile.get_line_numbers_under_heading (title:Optional[str]=None,
>                                                   from_line:int=0, levels:Unio
>                                                   n[Iterator[int],int,NoneType
>                                                   ]=None, include_subheadings:
>                                                   bool=True)

Return the line numbers belonging to the heading.

|    | **Type** | **Default** | **Details** |
| -- | -------- | ----------- | ----------- |
| title | Union[str, None] | None | Title of the heading. Does not include the leading sharps (`'#'`). If `None`, then return the line number of any heading after the specified line number. |
| from_line | int | 0 | The line number to start searching for the heading with `title` from. |
| levels | Union[Iterator[int], int, None] | None | The levels of the heading to search for. Each int is between 1 and 6 inclusive, as each heading can be of levels 1 to 6. If `None`, then all heading-levels are searched. |
| include_subheadings | bool | True | If `True`, then include the subheadings. |
| **Returns** | **Union[tuple[int], int]** |  | **`(start, end)` where `self.parts[start:end]` represents the parts under the heading, including the start of the heading.  If the heading of the specified title does not exist, then returns -1.** |

In [None]:
print(template_text)
template_mf = MarkdownFile.from_string(template_text)
assert template_mf.get_line_numbers_under_heading(title='Topic[^1]') == (5,7)
assert template_mf.get_line_numbers_under_heading(title='See Also') == (7,9)
assert template_mf.get_line_numbers_under_heading(title='Meta') == (9,14)
assert template_mf.get_line_numbers_under_heading(title='References') == (10,12)
assert template_mf.get_line_numbers_under_heading(title='Citations and Footnotes') == (12,14)

---
cssclass: clean-embeds
aliases: []
tags: [_meta/literature_note]
---
# Topic[^1]

# See Also

# Meta
## References

## Citations and Footnotes
[^1]: Citation


If `include_subheadings=False`, then the line numbers for only the section without any subsections is returned.

In [None]:
assert template_mf.get_line_numbers_under_heading(title='Topic[^1]', include_subheadings=False) == (5,7)
assert template_mf.get_line_numbers_under_heading(title='See Also', include_subheadings=False) == (7,9)
assert template_mf.get_line_numbers_under_heading(title='Meta', include_subheadings=False) == (9,10)
assert template_mf.get_line_numbers_under_heading(title='References', include_subheadings=False) == (10,12)
assert template_mf.get_line_numbers_under_heading(title='Citations and Footnotes', include_subheadings=False) == (12,14)

## Adding/removing lines in a `MarkdownFile` object

In [None]:
# TODO examples of insert_line, remove_line, pop_line, add_line_to_end, add_blank_line_to_end, add_line_in_section

In [None]:
show_doc(MarkdownFile.insert_line)

---

[source](https://github.com/hyunjongkimmath/trouver/blob/main/trouver/markdown/markdown/file.py#L406){target="_blank" style="float:right; font-size:smaller"}

### MarkdownFile.insert_line

>      MarkdownFile.insert_line (index:int,
>                                line_dict:dict[str,typing.Union[__main__.Markdo
>                                wnLineEnum,str]])

Add a line at the specified index/line number to `self.parts`.

|    | **Type** | **Details** |
| -- | -------- | ----------- |
| index | int | The index at which to add `line_dict` into `self.parts`. |
| line_dict | dict[str, Union[MarkdownLineEnum, str]] | See `self.parts`. |
| **Returns** | **None** |  |

In [None]:
show_doc(MarkdownFile.remove_line)

---

[source](https://github.com/hyunjongkimmath/trouver/blob/main/trouver/markdown/markdown/file.py#L414){target="_blank" style="float:right; font-size:smaller"}

### MarkdownFile.remove_line

>      MarkdownFile.remove_line (index:int=-1)

Remove a line from `self.parts`.

|    | **Type** | **Default** | **Details** |
| -- | -------- | ----------- | ----------- |
| index | int | -1 | The index of the line to remove from `self.parts`. |
| **Returns** | **None** |  |  |

In [None]:
show_doc(MarkdownFile.remove_lines)

---

[source](https://github.com/hyunjongkimmath/trouver/blob/main/trouver/markdown/markdown/file.py#L421){target="_blank" style="float:right; font-size:smaller"}

### MarkdownFile.remove_lines

>      MarkdownFile.remove_lines (start:int, end:int)

Remove lines from `self.parts`.

|    | **Type** | **Details** |
| -- | -------- | ----------- |
| start | int | The index of the first line to remove from `self.parts`. |
| end | int | The end index to remove; the line of index `end` is not removed, but the line of index `end - ` is. |
| **Returns** | **None** |  |

In [None]:
show_doc(MarkdownFile.pop_line)

---

[source](https://github.com/hyunjongkimmath/trouver/blob/main/trouver/markdown/markdown/file.py#L429){target="_blank" style="float:right; font-size:smaller"}

### MarkdownFile.pop_line

>      MarkdownFile.pop_line (index:int=-1)

Remove a line from `self.parts` and get its value.

|    | **Type** | **Default** | **Details** |
| -- | -------- | ----------- | ----------- |
| index | int | -1 | The index of the line to pop from `self.parts`. |
| **Returns** | **dict[str, Union[MarkdownLineEnum, str]]** |  | **The popped line** |

In [None]:
show_doc(MarkdownFile.add_line_to_end)

---

[source](https://github.com/hyunjongkimmath/trouver/blob/main/trouver/markdown/markdown/file.py#L436){target="_blank" style="float:right; font-size:smaller"}

### MarkdownFile.add_line_to_end

>      MarkdownFile.add_line_to_end
>                                    (line_dict:dict[str,typing.Union[__main__.M
>                                    arkdownLineEnum,str]])

Add a line to the end of `self.parts`.

|    | **Type** | **Details** |
| -- | -------- | ----------- |
| line_dict | dict[str, Union[MarkdownLineEnum, str]] | See `self.parts`. |
| **Returns** | **None** |  |

In [None]:
show_doc(MarkdownFile.add_blank_line_to_end)

---

[source](https://github.com/hyunjongkimmath/trouver/blob/main/trouver/markdown/markdown/file.py#L443){target="_blank" style="float:right; font-size:smaller"}

### MarkdownFile.add_blank_line_to_end

>      MarkdownFile.add_blank_line_to_end ()

Add a blank line to the end of `self.parts`.

In [None]:
show_doc(MarkdownFile.add_line_in_section)

---

[source](https://github.com/hyunjongkimmath/trouver/blob/main/trouver/markdown/markdown/file.py#L449){target="_blank" style="float:right; font-size:smaller"}

### MarkdownFile.add_line_in_section

>      MarkdownFile.add_line_in_section (title:str,
>                                        line_dict:dict[str,typing.Union[__main_
>                                        _.MarkdownLineEnum,str]],
>                                        start:bool=True)

Add a line in section specified by its title.

|    | **Type** | **Default** | **Details** |
| -- | -------- | ----------- | ----------- |
| title | str |  | Title of the heading (without the leading sharps `'#'`) |
| line_dict | dict[str, Union[MarkdownLineEnum, str]] |  | The line to add |
| start | bool | True | If `True`, add to the start of the section. If `False`, add to the end of the section. |
| **Returns** | **None** |  |  |

## Removing or clearing sections in a `MarkdownFile` object

In [None]:
show_doc(MarkdownFile.remove_section)

---

[source](https://github.com/hyunjongkimmath/trouver/blob/main/trouver/markdown/markdown/file.py#L463){target="_blank" style="float:right; font-size:smaller"}

### MarkdownFile.remove_section

>      MarkdownFile.remove_section (title:str)

Remove the section with the specified title, including subsections,
if the section exists.

|    | **Type** | **Details** |
| -- | -------- | ----------- |
| title | str | The title of the section to remove (without the starting `'#'`'s) |
| **Returns** | **None** |  |

The `remove_section` method removes all lines belonging to a section, including subsections.

In [None]:
# TODO remove_section, clear_section, clear_all_sections
template_mf = MarkdownFile.from_string(template_text)
template_mf.remove_section('Topic[^1]')
assert len(template_mf.parts) == 12
template_mf.remove_section('Meta')  # This removes subsections too!
assert str(template_mf) == """---
cssclass: clean-embeds
aliases: []
tags: [_meta/literature_note]
---
# See Also
"""

Attempting to remove a Non-existent section does nothing.

In [None]:
mf_2 = MarkdownFile.from_string(template_text)
mf_2.remove_section('Non existing section')
assert str(mf_2), template_text

In [None]:
show_doc(MarkdownFile.clear_section)

---

[source](https://github.com/hyunjongkimmath/trouver/blob/main/trouver/markdown/markdown/file.py#L484){target="_blank" style="float:right; font-size:smaller"}

### MarkdownFile.clear_section

>      MarkdownFile.clear_section (title:str, leave_blank_line:bool=True,
>                                  clear_subsections:Optional[str]=None)

Clear the section with the specified title, if it exists.

Does not clear subsections.

|    | **Type** | **Default** | **Details** |
| -- | -------- | ----------- | ----------- |
| title | str |  | Title of the section (Without the leading sharps `'#'`) |
| leave_blank_line | bool | True | If `True`, leaves a blank line at the end of the section. |
| clear_subsections | Optional[str] | None | `'clear'`, `'delete'`, or `None`. If `'clear'`, then just clears the contents of subsections, but does not affect the headers. If `'delete'`, then clears the contents of the subsections and deletes the headers. If `None`, then does not affect either. |
| **Returns** | **None** |  |  |

In [None]:
mf = MarkdownFile.from_string(text_1)
mf.clear_section('Section 1', leave_blank_line=True)
headings_and_text = mf.get_headings_and_text()
assert headings_and_text['# Section 1'] == ''
assert mf.get_line_number_of_heading('Subsection a') == 3
assert mf.parts[4]['line'] == 'Didididi'
print(mf)


# Section 1

## Subsection a
Didididi
Dododododo
# Section 2


Setting `leave_blank_line=False` leaves no blank line between the section and the next:

In [None]:
mf = MarkdownFile.from_string(text_1)
mf.clear_section('Section 1', leave_blank_line=False)
headings_and_text = mf.get_headings_and_text()
assert headings_and_text['# Section 1'] == ''
assert mf.get_line_number_of_heading('Subsection a') == 2
assert mf.parts[3]['line'] == 'Didididi'
print(mf)


# Section 1
## Subsection a
Didididi
Dododododo
# Section 2


In [None]:
show_doc(MarkdownFile.clear_all_sections)

---

[source](https://github.com/hyunjongkimmath/trouver/blob/main/trouver/markdown/markdown/file.py#L509){target="_blank" style="float:right; font-size:smaller"}

### MarkdownFile.clear_all_sections

>      MarkdownFile.clear_all_sections (leave_blank_lines:bool=True)

Clear all sections.

Does not clear frontmatter metadata. Leaves all headers intact.

|    | **Type** | **Default** | **Details** |
| -- | -------- | ----------- | ----------- |
| leave_blank_lines | bool | True |  |
| **Returns** | **None** |  | **If True, leaves a blank line in each section** |

In [None]:
mf = MarkdownFile.from_string(text_1)
mf.clear_all_sections(leave_blank_lines=True)
assert len(mf.parts) == 3
print(mf)

# Section 1
## Subsection a
# Section 2


## Metadata in a `MarkdownFile` object

Here are some things that we can do with a `MarkdownFile` object with frontmatter YAML metadata:

In [None]:
template_mf = MarkdownFile.from_string(template_text)
print (template_mf.metadata(), '\n')
assert template_mf.metadata() == {'cssclass': 'clean-embeds', 'aliases': [], 'tags': ['_meta/literature_note']}
assert template_mf.has_metadata()
assert template_mf.metadata_lines() == (0, 4)


new_metadata = {'aliases': ['an_awesome_note', 'no_more_cssclass', 'no_more_tags']}
template_mf.replace_metadata(new_metadata)
print('The following is the MarkdownFile with new frontmatter YAML metadata:\n')
print(template_mf, '\n')
assert str(template_mf) == """---
aliases: [an_awesome_note, no_more_cssclass, no_more_tags]
---
# Topic[^1]

# See Also

# Meta
## References

## Citations and Footnotes
[^1]: Citation"""


template_mf.remove_metadata()
print('The following is the MarkdownFile with frontmatter YAML metadata removed:\n')
print(template_mf)
assert str(template_mf) == """# Topic[^1]

# See Also

# Meta
## References

## Citations and Footnotes
[^1]: Citation"""

{'cssclass': 'clean-embeds', 'aliases': [], 'tags': ['_meta/literature_note']} 

The following is the MarkdownFile with new frontmatter YAML metadata:

---
aliases: [an_awesome_note, no_more_cssclass, no_more_tags]
---
# Topic[^1]

# See Also

# Meta
## References

## Citations and Footnotes
[^1]: Citation 

The following is the MarkdownFile with frontmatter YAML metadata removed:

# Topic[^1]

# See Also

# Meta
## References

## Citations and Footnotes
[^1]: Citation


Note that if the `MarkdownFile` does not have any YAML frontmatter metadata, then the `metadata` method returns `None`:

In [None]:
mf_1 = MarkdownFile.from_string(text_1)
assert mf_1.metadata() is None

If the `MarkdownFile`'s YAML frontmatter metadata has formatting issues then `metadata` raises a ValueError. In actuality, the error message also yields the appropraite [yaml.YAMLError](https://github.com/yaml/pyyaml/blob/8cdff2c80573b8be8e8ad28929264a913a63aa33/lib/yaml/error.py) in the [PyYAML](https://github.com/yaml/pyyaml) library, e.g. [`yaml.parser.ParserError`](https://github.com/yaml/pyyaml/blob/master/lib/yaml/parser.py), [`yaml.scanner.ScannerError`](https://github.com/yaml/pyyaml/blob/master/lib/yaml/scanner.py), or [`yaml.reader.ReaderError`](https://github.com/yaml/pyyaml/blob/master/lib/yaml/reader.py).

In [None]:
text_with_bad_yaml = "---\nsome_metadata_field: [\badly_formatted_string]\n---\nThe rest of the note contents..."
mf = MarkdownFile.from_string(text_with_bad_yaml)
with ExceptionExpected(ValueError): # By virtue of giving a ReaderError
    mf.metadata()


text_with_bad_yaml = "---\nfield: field2: \n---\nThe rest of the note contents..."
mf = MarkdownFile.from_string(text_with_bad_yaml)
with ExceptionExpected(ValueError): # By virtue of giving a ScannerError
    mf.metadata()


text_with_bad_yaml = "---\nfield: John\n- field2: Mary \n---\nThe rest of the note contents..."
mf = MarkdownFile.from_string(text_with_bad_yaml)
with ExceptionExpected(ValueError): # By virtue of giving a ParserError
    mf.metadata()



In [None]:
#| hide
# Test escaping characters

#     latex_in_original: ["\\mathscr{O}_{\\text {Proj } S_{*}}(n)"]

In [None]:
show_doc(MarkdownFile.has_metadata)

---

[source](https://github.com/hyunjongkimmath/trouver/blob/main/trouver/markdown/markdown/file.py#L568){target="_blank" style="float:right; font-size:smaller"}

### MarkdownFile.has_metadata

>      MarkdownFile.has_metadata ()

Return `True` if this `MarkdownFile` object has fronmatter
YAML metadata.

If the `MarkdownFile` object has any frontmatter YAML metadata, then
it is expected to be at the very start; in particular, it must not
be preceded by any whitespace characters.

In [None]:
show_doc(MarkdownFile.metadata_lines)

---

[source](https://github.com/hyunjongkimmath/trouver/blob/main/trouver/markdown/markdown/file.py#L591){target="_blank" style="float:right; font-size:smaller"}

### MarkdownFile.metadata_lines

>      MarkdownFile.metadata_lines ()

Return the indices in `self.parts` which are metadata.

Assumes that `self.parts` is nonempty. 

If the MarkdownFile object has any frontmatter YAML metadata, then
it is expected to be at the very start; in particular, it must not
be preceded by any whitespace characters.

**Returns**

- tuple
    - The tuple consists of 2 ints, `a` and `b`, where `self.parts[a:b+1]`
    represent the metadata lines, including the `'---'` before and after.

In [None]:
show_doc(MarkdownFile.replace_metadata)

---

[source](https://github.com/hyunjongkimmath/trouver/blob/main/trouver/markdown/markdown/file.py#L621){target="_blank" style="float:right; font-size:smaller"}

### MarkdownFile.replace_metadata

>      MarkdownFile.replace_metadata (new_metadata:dict[str],
>                                     enquote_entries_in_fields:list[str]=[])

Replace the frontmatter metadata of this MarkdownFile object.

Optionally also enquotes string entries in fields specified by
`enquote_entries_in_fields`.

**Warning**
- This method is only tested when the values of `new_metadata` are either `str` or
`list[str]`.

|    | **Type** | **Default** | **Details** |
| -- | -------- | ----------- | ----------- |
| new_metadata | dict[str] |  | The dictionary representing the new metadata. The keys are the names of fields. The values are the field values, usually expected to be a single string or a list of strings |
| enquote_entries_in_fields | list[str] | [] | A list of str of fields in the YAML metadata whose entries need to be enquoted. If there is a string that is not a key of `new_metadata`, then that string is essentially ignored (in particular, no errors are raised). |
| **Returns** | **None** |  |  |

In [None]:
# TODO: basic example

In the case that the metadata comprises of strings which need to be escaped (because they have backslashes), then the `enquote_entries_in_fields` parameter of the `MarkdownFile.replace_metadata` method can be specified to enquote and escape such strings, cf. `dict_to_metadata_lines`.

In particular, the `MarkdownFile.replace_metadata` method with the `MarkdownFile` object's own `.metadata()` passed in the following example should ideally not modify the string of the `MarkdownFile` object. This feature needs to be tested with more examples, however.

In [None]:
mf = MarkdownFile.from_string(
    r'''---
latex_in_original: ["\\mathscr{O}_{\\text {Proj } S_{*}}(n)"]
---

''')

original_metadata = mf.metadata()
original_str = str(mf).strip()

dict_to_metadata_lines(mf.metadata(), enquote_entries_in_fields=['latex_in_original'])
mf.replace_metadata(mf.metadata(), enquote_entries_in_fields=['latex_in_original'])

test_eq(mf.metadata(), original_metadata)
test_eq(str(mf).strip(), original_str)

Multiple methods in the `MarkdownFile` class, including `MarkdownFile.add_tags`, `MarkdownFile.remove_tags`, and `MarkdownFile.replace_auto_tags_with_regular_tags` depend on the `MarkdownFile.replace_metadata` method. Arguments for the `enquote_entries_in_metadata_fields` must be specified appropriately when using these methods.

In [None]:
show_doc(MarkdownFile.remove_metadata)

---

[source](https://github.com/hyunjongkimmath/trouver/blob/main/trouver/markdown/markdown/file.py#L645){target="_blank" style="float:right; font-size:smaller"}

### MarkdownFile.remove_metadata

>      MarkdownFile.remove_metadata ()

Remove the frontmatter metadata of this MarkdownFile object.

In [None]:
show_doc(MarkdownFile.add_metadata_section)

---

[source](https://github.com/hyunjongkimmath/trouver/blob/main/trouver/markdown/markdown/file.py#L580){target="_blank" style="float:right; font-size:smaller"}

### MarkdownFile.add_metadata_section

>      MarkdownFile.add_metadata_section (check_exists:bool=True)

Add a frontmatter YAML metadata at the very beginning.

|    | **Type** | **Default** | **Details** |
| -- | -------- | ----------- | ----------- |
| check_exists | bool | True | If `True`, Check if there is already a metadata section at the beginning and do not add a metadata section if it exists. |
| **Returns** | **None** |  |  |

If the `MarkdownFile` has no frontmatter YAML metadata, then we can use the `add_metadata_section` method to add blank frontmatter YAML metadata:

In [None]:
mf = MarkdownFile.from_string(text_1)
assert not mf.has_metadata()
mf.add_metadata_section()
print(mf)

---

---


# Section 1
some text 

asdfasdf

## Subsection a
Didididi
Dododododo
# Section 2


If the `MarkdownFile` object already has frontmatter YAML metadata, then the `add_metadata_section` method does nothing.

In [None]:
template_mf = MarkdownFile.from_string(template_text)
mf.add_metadata_section()
assert str(template_mf) == template_text

## Tags in `MarkdownFile` objects


#### YAML metadata tags

In Obsidian, one can add tags to notes both within text and in the frontmatter YAML metadata.

In [None]:
show_doc(MarkdownFile.has_tag)

---

[source](https://github.com/hyunjongkimmath/trouver/blob/main/trouver/markdown/markdown/file.py#L650){target="_blank" style="float:right; font-size:smaller"}

### MarkdownFile.has_tag

>      MarkdownFile.has_tag (tag:str)

Return `True` if the Markdown file has the specified tag in its
YAML frontmatter metadata.

More specifically, return `True` if the `MarkdownFile` objeect

1. has YAML frontmatter metadata,
2. the metadata has a `'tags'` section,, and
3. the `'tags'` section is a list with the specified tag.

Note that `tag` should not start with the hashtag `#` charater.

|    | **Type** | **Details** |
| -- | -------- | ----------- |
| tag | str | The tag. Does not start with the hashtag `'#'`. |
| **Returns** | **bool** |  |

In [None]:
show_doc(MarkdownFile.add_tags)

---

[source](https://github.com/hyunjongkimmath/trouver/blob/main/trouver/markdown/markdown/file.py#L672){target="_blank" style="float:right; font-size:smaller"}

### MarkdownFile.add_tags

>      MarkdownFile.add_tags (tags:Union[str,list[str]], skip_repeats:bool=True,
>                             skip_repeated_auto:bool=True,
>                             enquote_entries_in_metadata_fields:list[str]=[])

Add tags to the frontmatter metadata.

The order of the tags may be changed.

Ultimately the `replace_metadata` method is used to modify the YAML metadata.
Use the `enquote_entries_in_metadata_fields` parameter to ensure that the
`replace_metadata` invocation preserves enquoted metadata values.

|    | **Type** | **Default** | **Details** |
| -- | -------- | ----------- | ----------- |
| tags | Union[str, list[str]] |  | The str representing the tags. May or may not start with `'#'`, e.g. `'#_meta/definition'` or `'_meta/definition'`. |
| skip_repeats | bool | True | If `True`, then this MarkdownFile will just have unique tags; merges pre-existing repeated tags if necessary. Also, the order of the tags may be changed. |
| skip_repeated_auto | bool | True | If `True`, then only add tags starting with '_auto/' if the corresponding non-auto tag does not exist, e.g.  '_auto/_meta/definition' is not added if the note already has '_meta/definition'. |
| enquote_entries_in_metadata_fields | list[str] | [] | A list of str of fields in the YAML metadata whose entries need to be enquoted. If there is a string that is not a key of `new_metadata`, then that string is essentially ignored (in particular, no errors are raised). |
| **Returns** | **None** |  |  |

In [None]:
show_doc(MarkdownFile.remove_tags)

---

[source](https://github.com/hyunjongkimmath/trouver/blob/main/trouver/markdown/markdown/file.py#L711){target="_blank" style="float:right; font-size:smaller"}

### MarkdownFile.remove_tags

>      MarkdownFile.remove_tags (tags:list[str],
>                                enquote_entries_in_metadata_fields:list[str]=[]
>                                )

Remove specified tags from the frontmatter metadata, if
the frontmatter metadata and the specified tags.

If the `MarkdownFile` object does not have a frontmatter or
if the frontmatter does not include a `tags` line, then
the `MarkdownFile` object is not modified.

Assumes that this MarkdownFile object has a frontmatter and
that the frontmatter includes a tags line.

Any repeated tags are either merged into one (if the tag is 
not in `tags`) or are removed (if the tag is in `tags`).

Ultimately the `replace_metadata` method is used to modify the YAML metadata.
Use the `enquote_entries_in_metadata_fields` parameter to ensure that the
`replace_metadata` invocation preserves enquoted metadata values.

|    | **Type** | **Default** | **Details** |
| -- | -------- | ----------- | ----------- |
| tags | list[str] |  | The str representing the tags. May or may not start with `'#'`, e.g. `'#_meta/definition'` or `'_meta/definition'`. |
| enquote_entries_in_metadata_fields | list[str] | [] | A list of str of fields in the YAML metadata whose entries need to be enquoted. If there is a string that is not a key of `new_metadata`, then that string is essentially ignored (in particular, no errors are raised). |
| **Returns** | **None** |  |  |

In [None]:
mf = MarkdownFile.from_string(text_2)
assert mf.has_tag('_meta/definition')

mf.add_tags(['new_tag'])  #This may change the order of the tags.
assert mf.has_tag('new_tag')

assert mf.has_tag('this_tag_will_be_removed')
mf.remove_tags(['this_tag_will_be_removed'])
assert not mf.has_tag('this_tag_will_be_removed')

assert not mf.has_tag('no_tag')
mf.remove_tags(['no_tag']) # Does nothing
assert not mf.has_tag('no_tag')

print(mf)

---
tags: [new_tag, _meta/definition, _meta/concept, _auto/_meta/notation]
---
# Topic
This is some note with some stuff.


If the `MarkdownFile` object does not have frontmatter YAML metadata or if its frontmatter YAML metadata does not have a `tags` section, then no changes are made to the `MarkdownFile` object:

In [None]:
# Example of a MarkdownFile object without frontmatter YAML metadata:
mf = MarkdownFile.from_string(text_1)

mf_str_before = str(mf)

assert mf.metadata() is None
mf.remove_tags(['_meta/definition'])
assert mf.metadata() is None

test_eq(str(mf), mf_str_before)

# Example of a MarkdownFile object with frontmatter YAML metadata but without a tags section.
mf = MarkdownFile.from_string(text_8)

mf_str_before = str(mf)

assert mf.metadata() is not None
assert 'tag' not in mf.metadata() 
mf.remove_tags(['_meta/definition'])

test_eq(str(mf), mf_str_before)


The `add_tags` method has a `skip_repeats` parameter.

In [None]:
mf.add_tags(['new_tag'], skip_repeats=True)  # Only one `new_tag` will be present after this.
print(mf)
mf.add_tags(['new_tag'], skip_repeats=False)  # After this, mf will have 2 `new_tag`'s  # After this, mf will have 2 `new_tag`'s  # After this, mf will have 2 `new_tag`'s
print(mf)
mf.add_tags(['new_tag'], skip_repeats=True)  # Only one `new_tag` will be present after this, even though there were multiple `new_tag`'s before this.
print(mf)

---
cssclass: clean-embeds
tags: [new_tag]
---
# Topic[^1]
Here is a LaTeX Equation:

$$ 5 \neq 7$$
Hey
Okay, now here is another one:
$$\begin{align*}
\sum_{k=1}^n k = \frac{n(n+1)}{2}
\end{align*}$$

%%This is a comment. 
The comment is not visible.
This is the end of the comment %%

This is the end of this note. This is visible.
---
cssclass: clean-embeds
tags: [new_tag, new_tag]
---
# Topic[^1]
Here is a LaTeX Equation:

$$ 5 \neq 7$$
Hey
Okay, now here is another one:
$$\begin{align*}
\sum_{k=1}^n k = \frac{n(n+1)}{2}
\end{align*}$$

%%This is a comment. 
The comment is not visible.
This is the end of the comment %%

This is the end of this note. This is visible.
---
cssclass: clean-embeds
tags: [new_tag]
---
# Topic[^1]
Here is a LaTeX Equation:

$$ 5 \neq 7$$
Hey
Okay, now here is another one:
$$\begin{align*}
\sum_{k=1}^n k = \frac{n(n+1)}{2}
\end{align*}$$

%%This is a comment. 
The comment is not visible.
This is the end of the comment %%

This is the end of this note. This i

In [None]:
show_doc(MarkdownFile.replace_auto_tags_with_regular_tags)

---

[source](https://github.com/hyunjongkimmath/trouver/blob/main/trouver/markdown/markdown/file.py#L743){target="_blank" style="float:right; font-size:smaller"}

### MarkdownFile.replace_auto_tags_with_regular_tags

>      MarkdownFile.replace_auto_tags_with_regular_tags (exclude:list[str]=None,
>                                                        enquote_entries_in_meta
>                                                        data_fields:list[str]=[
>                                                        ])

Replace tags in the frontmatter metadata starting with `_auto/`
with tags without the `_auto/`.

Ultimately the `replace_metadata` method is used to modify the YAML metadata.
Use the `enquote_entries_in_metadata_fields` parameter to ensure that the
`replace_metadata` invocation preserves enquoted metadata values.

|    | **Type** | **Default** | **Details** |
| -- | -------- | ----------- | ----------- |
| exclude | list[str] | None | The tags whose `_auto/` tags should not be converted. The str should not start with `'#'` and should not start with `'_auto/'`. |
| enquote_entries_in_metadata_fields | list[str] | [] | A list of str of fields in the YAML metadata whose entries need to be enquoted. If there is a string that is not a key of `new_metadata`, then that string is essentially ignored (in particular, no errors are raised). |
| **Returns** | **None** |  |  |

One can use `/` characters to create "subtags". `trouver` recognizes `_auto` tags, which are tags that `trouver` adds to mark notes that it has processed in certain ways. 

For example, `trove.markdown.osidian.personal.machine_learning.tag_categorization` can label the "types" of the content of information notes. For example, if this labelling process determines an information note to be a definition note, then it will add a `_auto/_meta/definition` tag, as opposed to a `_meta/definition` tag. This way, one can recognize that the tag was added automatically via a machine learning model and may be prone to error.

The `replace_auto_tags_with_regular_tags` replaces `_auto` tags with regular tags. This can be useful, for example, once tags for a multitude of notes have been verified.

In [None]:
mf = MarkdownFile.from_string(text_2)
assert mf.has_tag('_auto/_meta/notation')
mf.replace_auto_tags_with_regular_tags()
assert mf.has_tag('_meta/notation')
assert not mf.has_tag("_auto/_meta/notation")
print(mf)

---
tags: [_meta/definition, _meta/concept, _meta/notation, this_tag_will_be_removed]
---
# Topic
This is some note with some stuff.


#### display math mode tags

The difference between headers and display math mode tags is that the former has a space between the hashtags and the text and the latter does not. For example, `# Header` is a header and `#tag` is a tag.

In [None]:
show_doc(MarkdownFile.remove_in_line_tags)

---

[source](https://github.com/hyunjongkimmath/trouver/blob/main/trouver/markdown/markdown/file.py#L768){target="_blank" style="float:right; font-size:smaller"}

### MarkdownFile.remove_in_line_tags

>      MarkdownFile.remove_in_line_tags ()

Remove lines starting with in line tags.

In [None]:
mf = MarkdownFile.from_string(text_3)
mf.remove_in_line_tags()
print(str(mf))
print(mf.parts)
assert len(mf.parts) == 7
assert str(mf) == """
# Section 1
Some stuff


# Hello
"""


# Section 1
Some stuff


# Hello

[{'line': '', 'type': <MarkdownLineEnum.BLANK_LINE: 9>}, {'line': '# Section 1', 'type': <MarkdownLineEnum.HEADING: 1>}, {'line': 'Some stuff', 'type': <MarkdownLineEnum.DEFAULT: 0>}, {'line': '', 'type': <MarkdownLineEnum.BLANK_LINE: 9>}, {'line': '', 'type': <MarkdownLineEnum.BLANK_LINE: 9>}, {'line': '# Hello', 'type': <MarkdownLineEnum.HEADING: 1>}, {'line': '', 'type': <MarkdownLineEnum.BLANK_LINE: 9>}]


In [None]:
template_mf = MarkdownFile.from_string(template_text)
template_mf.remove_in_line_tags()
assert str(template_mf) == template_text

## Extract raw content from a `MarkownFile` object

One can add a multitude of meta-data to Obsidian Markdown notes - frontmatter metadata, headers/footers, links, embedded links, tags, etc.

We can extract raw content from notes by removing a combination of these meta-data.

See also `remove_in_line_tags`.

In [None]:
show_doc(MarkdownFile.replace_links_with_display_text)

---

[source](https://github.com/hyunjongkimmath/trouver/blob/main/trouver/markdown/markdown/file.py#L778){target="_blank" style="float:right; font-size:smaller"}

### MarkdownFile.replace_links_with_display_text

>      MarkdownFile.replace_links_with_display_text
>                                                    (remove_embedded_note_links
>                                                    :bool=False)

Remove nonembedded links and replaces them with their display text.

|    | **Type** | **Default** | **Details** |
| -- | -------- | ----------- | ----------- |
| remove_embedded_note_links | bool | False | If `True`, remove links to embedded notes as well. If `False`, does not modify embedded notes.` |
| **Returns** | **None** |  |  |

In [None]:
mf = MarkdownFile.from_string(text_4)
mf.replace_links_with_display_text()
print(str(mf))
assert str(mf) == """
# Some thing

I have a link

## Another topic
This is a link without a specified display text: some_kind_of_note.

This is a link to an anchor without a specified display text: another_note > another anchor."""


# Some thing

I have a link

## Another topic
This is a link without a specified display text: some_kind_of_note.

This is a link to an anchor without a specified display text: another_note > another anchor.


If `remove_embedded_note_links=True`, then embedded links will be replaced with their "display text" as a link; they will not be replaced with the underlying embedded text.

In [None]:
mf = MarkdownFile.from_string(text_5)
mf.replace_links_with_display_text()
assert str(mf) == text_5

mf.replace_links_with_display_text(remove_embedded_note_links=True)
print(str(mf))
assert str(mf) == """# A header
This note is embedded.

The link above should will not be replaced by `replace_links_with_display_text`,
unless `remove_embedded_note_links` is set to `True`."""

# A header
This note is embedded.

The link above should will not be replaced by `replace_links_with_display_text`,
unless `remove_embedded_note_links` is set to `True`.


In [None]:
show_doc(MarkdownFile.remove_footnotes_to_embedded_links)

---

[source](https://github.com/hyunjongkimmath/trouver/blob/main/trouver/markdown/markdown/file.py#L954){target="_blank" style="float:right; font-size:smaller"}

### MarkdownFile.remove_footnotes_to_embedded_links

>      MarkdownFile.remove_footnotes_to_embedded_links
>                                                       (remove_footnote_mention
>                                                       s:bool=True)

Remove footnotes to embedded links.

These are footnotes whose only content are embedded links, e.g.
`[^1]: ![[embedded_note]]`

|    | **Type** | **Default** | **Details** |
| -- | -------- | ----------- | ----------- |
| remove_footnote_mentions | bool | True | If `True`, removes the mentions to the footnote to the embedded links in the text. |
| **Returns** | **None** |  |  |

I very often use footnotes with only embedded links. We can remove such footnotes.

In [None]:
mf = MarkdownFile.from_string(text_6)
mf.remove_footnotes_to_embedded_links()
assert str(mf) == """
# Header

I want to link to some embedded note


You can also let the footnote mention be alphanumeric
"""

Setting `remove_footnote_mentions=False` removes the content of the footnotes themselves, but leaves the mentions intact:

In [None]:
mf = MarkdownFile.from_string(text_6)
mf.remove_footnotes_to_embedded_links(remove_footnote_mentions=False)
assert str(mf) == """
# Header

I want to link to some embedded note[^1]


You can also let the footnote mention be alphanumeric[^1][^note]
"""

In [None]:
# hide
mf = MarkdownFile.from_string(text_7)
mf.remove_footnotes_to_embedded_links(remove_footnote_mentions=True)
assert '[^2]' not in str(mf)

In [None]:
show_doc(MarkdownFile.remove_headers)

---

[source](https://github.com/hyunjongkimmath/trouver/blob/main/trouver/markdown/markdown/file.py#L980){target="_blank" style="float:right; font-size:smaller"}

### MarkdownFile.remove_headers

>      MarkdownFile.remove_headers ()

Remove all headers.

We can remove all of the headers and leave the rest of the text intact

In [None]:
mf = MarkdownFile.from_string(template_text)
mf.remove_headers()
assert str(mf) == """---
cssclass: clean-embeds
aliases: []
tags: [_meta/literature_note]
---



[^1]: Citation"""

mf = MarkdownFile.from_string(text_1)
mf.remove_headers()
assert str(mf) == """
some text 

asdfasdf

Didididi
Dododododo"""

In [None]:
show_doc(MarkdownFile.remove_double_blank_lines)

---

[source](https://github.com/hyunjongkimmath/trouver/blob/main/trouver/markdown/markdown/file.py#L988){target="_blank" style="float:right; font-size:smaller"}

### MarkdownFile.remove_double_blank_lines

>      MarkdownFile.remove_double_blank_lines ()

Remove blank lines so that there are no consecutive blank lines

When removing some of the "metadata", the content of the note can be left with a lot of consecutive blank lines. To ensure that machine-learning models will not develop some kind of blank line bias, we can remove such consecutive blank lines. 

In [None]:
mf = MarkdownFile.from_string(template_text)
mf.remove_headers()
mf.remove_double_blank_lines()
assert str(mf) == """---
cssclass: clean-embeds
aliases: []
tags: [_meta/literature_note]
---

[^1]: Citation"""

In [None]:
show_doc(MarkdownFile.replace_embedded_links_with_text)

---

[source](https://github.com/hyunjongkimmath/trouver/blob/main/trouver/markdown/markdown/file.py#L851){target="_blank" style="float:right; font-size:smaller"}

### MarkdownFile.replace_embedded_links_with_text

>      MarkdownFile.replace_embedded_links_with_text (vault:os.PathLike,
>                                                     recursive:bool=True, remov
>                                                     e_paragraph_id:bool=True)

Remove embedded links and replaces them with their underlying text,
as found in notes in the vault.

Assumes that the embedded links do not loop infinitely.

For embedded links to notes that do not exist in the vault,
the embedded links are replaced with blank str.

No new entries are added to `self.parts` even if the embedded links
have multiple lines.

|    | **Type** | **Default** | **Details** |
| -- | -------- | ----------- | ----------- |
| vault | PathLike |  |  |
| recursive | bool | True | If `True`, then recursively replaces embedded links in the text of the embedded links. |
| remove_paragraph_id | bool | True | If `True`, then removes the paragraph id's in the text of the embedded links. Leaves the paragraph id's of the origianl text in tact. |
| **Returns** | **None** |  |  |

In [None]:
# TODO: test recursive
with tempfile.TemporaryDirectory(prefix='temp_dir_', dir=os.getcwd()) as temp_dir:
    make_example_vault_2(temp_dir)
    vault = Path(temp_dir)
    vn = VaultNote(vault, name='note_with_embedded_links_1')
    mf = MarkdownFile.from_vault_note(vn)
    mf.replace_embedded_links_with_text(vault)
    assert str(mf) == r"""This is a note.

There are some embedded text here:

Hello, this is a note which becomes entirely embedded.
%%This is a comment. 
The comment is not visible.
This is the end of the comment %%


cheese 
bandit
$$asdf$$
asdf 

$$5 \neq 7
$$ 

# Section
Some kind of section?

Lalalala
## Subsection
argonaut"""

setting `remove_paragraph_id=False` keeps the paragraph id's in the embedded text.

In [None]:

with tempfile.TemporaryDirectory(prefix='temp_dir_', dir=os.getcwd()) as temp_dir:
    make_example_vault_2(temp_dir)
    vault = Path(temp_dir)
    vn = VaultNote(vault, name='note_with_embedded_links_1')
    mf = MarkdownFile.from_vault_note(vn)
    mf = MarkdownFile.from_vault_note(vn)
    mf.replace_embedded_links_with_text(vault, remove_paragraph_id=False)
    print(str(mf))
    assert str(mf) == r"""This is a note.

There are some embedded text here:

Hello, this is a note which becomes entirely embedded.
%%This is a comment. 
The comment is not visible.
This is the end of the comment %%


cheese 
bandit
$$asdf$$
asdf 
^65809f

$$5 \neq 7
$$ 
^221b51

# Section
Some kind of section?

Lalalala
## Subsection
argonaut"""

This is a note.

There are some embedded text here:

Hello, this is a note which becomes entirely embedded.
%%This is a comment. 
The comment is not visible.
This is the end of the comment %%


cheese 
bandit
$$asdf$$
asdf 
^65809f

$$5 \neq 7
$$ 
^221b51

# Section
Some kind of section?

Lalalala
## Subsection
argonaut


In [None]:
show_doc(MarkdownFile.remove_html_tags)

---

[source](https://github.com/hyunjongkimmath/trouver/blob/main/trouver/markdown/markdown/file.py#L998){target="_blank" style="float:right; font-size:smaller"}

### MarkdownFile.remove_html_tags

>      MarkdownFile.remove_html_tags ()

Remove HTML tags that are typeset in single lines.

HTML tags that span multiple lines are ignored.

In [None]:
# TODO: test

In [None]:
show_doc(MarkdownFile.merge_display_math_mode)

---

[source](https://github.com/hyunjongkimmath/trouver/blob/main/trouver/markdown/markdown/file.py#L1008){target="_blank" style="float:right; font-size:smaller"}

### MarkdownFile.merge_display_math_mode

>      MarkdownFile.merge_display_math_mode ()

Merge chunks of display_math_mode latex lines into single lines

In [None]:
mf = MarkdownFile.from_string(text_10)
mf.merge_display_math_mode()
print(mf)
assert len(mf.parts) == 13

This is a single line display math mode LaTeX equation:

$$\mathcal{O}_X$$

This is a single multi-line display math mode LaTeX equation:

$$ 5 + 2 = 7 $$

These are multiple consecutive display math mode LaTeX equations:

$$1+1 = 2 $$
$$5 + 7 = 14$$
$$  8 + 4 = 12  $$


In [None]:
show_doc(MarkdownFile.merge_display_math_mode_into_preceding_text)

---

[source](https://github.com/hyunjongkimmath/trouver/blob/main/trouver/markdown/markdown/file.py#L1033){target="_blank" style="float:right; font-size:smaller"}

### MarkdownFile.merge_display_math_mode_into_preceding_text

>      MarkdownFile.merge_display_math_mode_into_preceding_text
>                                                                (separator:str=
>                                                                '\n')

Merge chunks of display math mode latex lines into single lines and merge
those single lines into preceding text lines.

|    | **Type** | **Default** | **Details** |
| -- | -------- | ----------- | ----------- |
| separator | str | <br> | The str with which to join the latex lines into the text lines. Note that the display math mode latex lines are not joined with this str. |
| **Returns** | **None** |  |  |

In [None]:
mf = MarkdownFile.from_string(text_10)
mf.merge_display_math_mode_into_preceding_text(separator=' ')
print(mf)
assert len(mf.parts) == 5


This is a single line display math mode LaTeX equation:  $$\mathcal{O}_X$$

This is a single multi-line display math mode LaTeX equation:  $$ 5 + 2 = 7 $$

These are multiple consecutive display math mode LaTeX equations:  $$1+1 = 2 $$ $$5 + 7 = 14$$ $$  8 + 4 = 12  $$


We can set `separator` to its default value `\n`.

In [None]:
mf = MarkdownFile.from_string(text_10)
mf.merge_display_math_mode_into_preceding_text(separator='\n')
print(mf)
assert len(mf.parts) == 5  # Some of the parts have 'line' as multi-line str i.e. as str with `\n` characters.
assert '\n' in mf.parts[0]['line']
assert '\n' not in mf.parts[1]['line']

This is a single line display math mode LaTeX equation:

$$\mathcal{O}_X$$

This is a single multi-line display math mode LaTeX equation:

$$ 5 + 2 = 7 $$

These are multiple consecutive display math mode LaTeX equations:

$$1+1 = 2 $$
$$5 + 7 = 14$$
$$  8 + 4 = 12  $$


If the text starts with display math mode LaTeX, then that text is combined into one.

In [None]:
mf = MarkdownFile.from_string(text_11)
mf.merge_display_math_mode_into_preceding_text(separator=' ')
print(mf)
assert len(mf.parts) == 2

$$asdf$$ $$asdf$$ $$asdf$$
After text.


## Writing a `MarkdownFile` object to a file

We can write the contents of a `MarkdownFile` object to the file represented by `VaultNote` object.

In [None]:
show_doc(MarkdownFile.parts_of_id)

---

[source](https://github.com/hyunjongkimmath/trouver/blob/main/trouver/markdown/markdown/file.py#L791){target="_blank" style="float:right; font-size:smaller"}

### MarkdownFile.parts_of_id

>      MarkdownFile.parts_of_id (par_id:str)

Return the indices of the lines within the Markdown file
belonging to the specified text id.

This id can be used as an anchor for a link in Obsidian. For example,
`[[note#^65809f]]` is a link to a note named `note` to the text with id
`65809f`. Such a text is marked with a trailing `^65809f`.

|    | **Type** | **Details** |
| -- | -------- | ----------- |
| par_id | str | Must begin with `'\^'`. |
| **Returns** | **Union[tuple[int], None]** | **`(start,end)` where `self.parts[start:end]` consists of the lines of the specified id. If the specified id does not exist for the note, then `None` is returned.** |

Links in Obsidian can be anchored at "paragraphs" of text. As Wikilinks, such links have the format `[[<note_name>#^<id_of_paragraph>]]`. Note that the id begins with a carat `^`.

We can get the parts of the Markdown file to which the id refers to:

In [None]:
with tempfile.TemporaryDirectory(prefix='temp_dir_', dir=os.getcwd()) as temp_dir:
    make_example_vault_2(temp_dir)
    vault = Path(temp_dir)
    vn = VaultNote(vault, name = 'note_with_paragraphs_that_are_embedded_1')
    mf = MarkdownFile.from_vault_note(vn)

    start, end = mf.parts_of_id('^65809f')
    assert start == 3 and end == 8
    print(mf.text_of_lines(start, end), '\n')
    # lines = [mf.parts[i]['line'] for i in range(start, end)]
    # print('\n'.join(lines))

    start, end = mf.parts_of_id('^221b51')
    assert start == 13 and end == 16
    print(mf.text_of_lines(start, end), '\n')

    start, end = mf.parts_of_id('^123456')
    assert start == 17 and end == 18
    print(mf.text_of_lines(start, end), '\n')

    start, end = mf.parts_of_id('^fff123')
    assert start == 20 and end == 21
    print(mf.text_of_lines(start, end), '\n')

    start, end = mf.parts_of_id('^latexthing')
    assert start == 22 and end == 23
    print(mf.text_of_lines(start, end), '\n')

cheese 
bandit
$$asdf$$
asdf 
^65809f 

$$5 \neq 7
$$ 
^221b51 

# This section has an id ^123456 

^fff123 

$$\mathcal{O}_X$$  ^latexthing 



#### Hidden tests

In [None]:
# hide 
# TODO _look_at_start_of_file, _line_dict

## Misc TODO

In [None]:
show_doc(MarkdownFile.text_of_lines)

---

[source](https://github.com/hyunjongkimmath/trouver/blob/main/trouver/markdown/markdown/file.py#L229){target="_blank" style="float:right; font-size:smaller"}

### MarkdownFile.text_of_lines

>      MarkdownFile.text_of_lines (start:int, end:int)

Return the text of `self.parts[start:end]`,
        adding new line characters `'
'` in between.

In [None]:
show_doc(MarkdownFile.write)

---

[source](https://github.com/hyunjongkimmath/trouver/blob/main/trouver/markdown/markdown/file.py#L385){target="_blank" style="float:right; font-size:smaller"}

### MarkdownFile.write

>      MarkdownFile.write (vn:trouver.markdown.obsidian.vault.VaultNote,
>                          mode:str='w')

Write to the file specified by a `VaultNote` object.

If the file that the `VaultNote` object represents does not exist,
then this method creates it.

|    | **Type** | **Default** | **Details** |
| -- | -------- | ----------- | ----------- |
| vn | VaultNote |  | Represents the file. |
| mode | str | w | The specific mode to write the file with. |
| **Returns** | **None** |  | **enquote_entries_in_metadata_fields: list[str] = [] # A list of str of fields in the YAML metadata whose entries need to be enquoted. If there is a string that is not a key of `new_metadata`, then that string is essentially ignored (in particular, no errors are raised).** |

In [None]:
show_doc(MarkdownFile.copy)

---

[source](https://github.com/hyunjongkimmath/trouver/blob/main/trouver/markdown/markdown/file.py#L1118){target="_blank" style="float:right; font-size:smaller"}

### MarkdownFile.copy

>      MarkdownFile.copy (deep:bool)