In [None]:
#| default_exp markdown.obsidian.personal.note_processing

In [None]:
#| export
from deprecated import deprecated
from os import PathLike
from pathlib import Path, PureWindowsPath, WindowsPath
import re
from typing import Optional, Union

from trouver.markdown.markdown.file import MarkdownFile, MarkdownLineEnum, replace_embedded_links_with_text
from trouver.markdown.obsidian.footnotes import remove_footnote_mentions_in_markdown_text
from trouver.markdown.obsidian.links import EMBEDDED_PATTERN, remove_links_from_text
from trouver.markdown.obsidian.vault import VaultNote

In [None]:
from trouver.helper.tests import _test_directory
from fastcore.test import *

# markdown.obisidian.personal.note_processing
> Process notes to extract basic information about them 

`trouver` assumes that the notes in the `Obsidian.md` math vaults are are roughly of the following format: 

In [None]:
vault = _test_directory() / 'test_vault_5'
template_note = VaultNote(vault, name='_template_common')
print(template_note.text())

---
cssclass: clean-embeds
aliases: []
tags: [_meta/literature_note]
---
# Topic[^1]

# See Also

# Meta
## References

## Citations and Footnotes
[^1]: Citation


The functions in this module extract information about such notes. For example, they extract the main text (without the metadata, links, footnotes, etc.) to obtain the "raw" text of the note.

In [None]:
#| export
def remove_double_asterisks_in_markdown_file(
        markdown_file: MarkdownFile 
        ) -> None: 
    # TODO Don't remove asterisks in math mode.
    """Remove double asterisks in MarkdownFile object.
    
    The author of `trouver` chose to implement this function outside of the 
    `MarkdownFile` class because its use seems specific - this function
    is mostly intended to remove double asterisks marking definitions and
    notations introduced in LaTeX text.
    """
    for part in markdown_file.parts:
        part['line'] = part['line'].replace('**', '')

In [None]:
mf = MarkdownFile.from_string('Hi. Here are some double asterisks: **')

remove_double_asterisks_in_markdown_file(mf)
test_eq(str(mf), 'Hi. Here are some double asterisks: ')

mf = MarkdownFile.from_string(
    'I want to use double asterisks to surround definitions and notations:'
    ' the **Galois group** **$\operatorname{Gal}(L/K)$** of a Galois extension'
    ' $L/K$ is...')
remove_double_asterisks_in_markdown_file(mf)
test_eq(str(mf), 'I want to use double asterisks to surround definitions and notations: the Galois group $\operatorname{Gal}(L/K)$ of a Galois extension $L/K$ is...')

In [None]:
#| export
# TODO: add functionality to remove footnotes altogether.
def process_standard_information_note(
        markdown_file: Union[MarkdownFile, str],
        vault: PathLike,
        remove_frontmatter_meta: bool = True, # If `True`, removes the frontmatter meta. Defaults to `True`
        remove_see_also_section: bool = True, # If `True`, removes the `# See also` section. Defaults to `True`.
        remove_meta_section: bool = True, # If `True`, remove the `# Meta` section. Defaults to `True`.
        remove_references_section: bool = True, # If `True`, removes the `## References` section. Defaults to `True`.
        remove_double_asterisks: bool = True, # If `True`, removes double asterisks. Defaults to `True`.
        remove_html_tags: bool = True, # If `True`, remove HTML tags. Defaults to `True``
        remove_links: bool = True, # If `True`, removes nonembedded links and replaces them with their display text. Defaults to `True`.
        remove_in_line_tags: bool = True, # If `True`, removes in-line tags (the lines that start with a tag).  Defaults to `True`.
        remove_footnotes_to_embedded: bool = True, # If `True`, removes footnotes to embedded notes. Defaults to `True`.
        remove_headers: bool = True, # If `True`, removes headers. Defaults to `True`.
        remove_citation_footnotes: bool = True, # If `True`, removes the citation footnote. Defaults to `True`.
        replace_embedded_links_with_content: bool = True, # If `True`, replaces embedded links with their content.  Defaults to `True`.
        merge_display_math_mode: bool = True, # If `True`, merge each group of display math mode latex lines into single lines.  Defaults to `True`.
        merge_display_math_mode_into_text: Optional[str] = None, # If not `None`, merge each group of display math mode latex lines into single lines and merge those groups into the text that precedes them with the specified str. Defaults to `None`.  The blank character ` ` and the new-line character `\n` are recommended as arguments.
        no_double_blank_lines: bool = True # If `True`, removes escape characters `'\n'` to make it so that there are no double blank lines. Defaults to `True`.
        ) -> MarkdownFile: # If `markdown_file` is a `MarkdownFile` object, then the output is `markdown_file` itself (not a copy) with modifications. If `markdown_file` is a `str`, then the output is a `MarkdownFile` object with the modifications.
    """Process/modify a str/MarkdownFile of a standard information note.
        
    TODO: implement remove_citation_footnote properly.

    Even if `remove_html_tags` is set to `True`, only the HTML tags
    which are written within a single line of text are removed.
    """
    if isinstance(markdown_file, str):
        markdown_file = MarkdownFile.from_string(markdown_file)
    if remove_frontmatter_meta:
        markdown_file.remove_metadata()
    # if not remove_citation_footnote:
    #     return
    if remove_footnotes_to_embedded:
        markdown_file.remove_footnotes_to_embedded_links()
    if replace_embedded_links_with_content:
        markdown_file.replace_embedded_links_with_text(vault)
    if remove_see_also_section:
        markdown_file.remove_section('See Also')
    if remove_meta_section:
        markdown_file.remove_section('Meta')
    if remove_references_section:
        markdown_file.remove_section('References')
    if remove_double_asterisks:
        remove_double_asterisks_in_markdown_file(markdown_file)
    if remove_html_tags:
        markdown_file.remove_html_tags()
    if remove_links: 
        markdown_file.replace_links_with_display_text()
    if remove_in_line_tags:
        markdown_file.remove_in_line_tags()
    if remove_headers:
        markdown_file.remove_headers()
    if merge_display_math_mode:
        markdown_file.merge_display_math_mode()
    if merge_display_math_mode_into_text:
        markdown_file.merge_display_math_mode_into_preceding_text(
            separator=merge_display_math_mode_into_text)
    if no_double_blank_lines:
        markdown_file.remove_double_blank_lines()
    # print(markdown_file)
    return markdown_file


In [None]:
# TODO: add examples