In [None]:
#| default_exp markdown.obsidian.personal.machine_learning.notation

# markdown.obsidian.personal.machine_learning.notation
> Helper Functions for handling data for ML models that make predictions about notation ontes

In [None]:
#| export
from pathlib import Path
from typing import Optional, TypedDict, Union

from trouver.markdown.obsidian.personal.notation.parse import NotationNoteParsed, _notat_str
from trouver.markdown.obsidian.personal.note_processing import process_standard_information_note




In [None]:
from fastcore.test import *

## Describe Notation Note Data

In [None]:

#| export
class NotationNoteData(TypedDict):
    """
    A `TypedDict` wrapping the data of a notation note (either the
    `origin_notation_note`, which is the notation note which could have a link to `relied_notation_note`,
    or `relied_notation_note`) to be included in a single datapoint.
    """
    notation_note_name: str # The name of the notation note `notation_note`.
    main_info_note: str # The name of the main note of `notation_note`
    processed_content: str # The (processed) content of the notation note.
    main_note_content: str # The content of the main note of the notation note.
    latex_in_original_or_summarized: str # Either the first entry of the `latex_in_original` field of the YAML frontmatter metadata of `notation_note` or, if unavailable, the notation that is summarized in `notation_note`
    summarized: str # The notation that is summarized in `notation_note.`
    reference: Union[str, None]


In [None]:
#| export
def notat_note_data_from_parsed_and_main_note_processed(
        notat_note_name: str,
        notat_note_parsed: NotationNoteParsed,
        main_note_processed: str,
        vault: Path,
        reference: Optional[str] = None,
        ) -> NotationNoteData:
    """
    """

    return NotationNoteData(
        notation_note_name=notat_note_name,
        main_info_note=notat_note_parsed.name_of_main_note,
        processed_content=str(process_standard_information_note(
            notat_note_parsed.main_content_markdown_file, vault)),
        main_note_content=main_note_processed,
        latex_in_original_or_summarized=_notat_str(
            notat_note_parsed.yaml_frontmatter_meta, notat_note_parsed.notation_str),
        summarized=notat_note_parsed.notation_str,
        reference=reference,
    )
    

## Describe Notation Linking Data

In [None]:
#| export
class NotationLinkingDataPoint(TypedDict):
    """
    A `TypedDict` wrapping a single data point representing a pair of notation notes.
    """
    # reference: str # The name of the reference from which the notation notes and main information notes come from,

    origin_notation_note_name: str # The name of `origin_notation_note`
    main_of_origin_notation_note_name: str # The name of the main information note of `origin_notation_note`,
    origin_notation_note_content: str # The (processed) content `origin_notation_note`,
    processed_main_of_origin_content: str # The content of `origin_notation_note`
    latex_in_original_or_summarized_in_origin: str # The first entry in the `latex_in_original` field in the YAML frontmatter metadata or, if unavailable, the notation that is summarized in `origin_notation_note`
    summarized_in_origin: str # The notation that is summarized in `origin_notation_note`
    reference_of_origin: str # The name of the reference from which the origin notation note and main information note come from,


    relied_notation_note_name: str # The name of `relied_notation_note`,
    main_of_relied_notation_note_name: str # The name of the main information note of `relied_notation_note`,
    processed_main_of_relied_content: str # The (processed) content of `relied_notation_note`,
    relied_notation_note_content: str # The content of `relied_notation_note`
    latex_in_original_or_summarized_in_relied: str # The first entry in the `latex_in_original` field in the YAML frontmatter metadata or, if unavailable, the notation that is summarized in `relied_notation_note`
    summarized_in_relied: str # The notation that is summarized in `relied_notation_note`
    reference_of_relied: str # The name of the reference from which the relied notation note and main information note come from,

    origin_links_to_relied: Union[bool, None] #  `True` if `origin_notation_note` links to `relied_notation_note`. `False` otherwises



In [None]:
#| export
def data_point_to_notation_note_data_pair(
        data_point: NotationLinkingDataPoint
        ) -> tuple[NotationNoteData, NotationNoteData]:
    return (
        NotationNoteData(
            notation_note_name=data_point['origin_notation_note_name'],
            main_info_note=data_point['main_of_origin_notation_note_name'],
            processed_content=data_point['origin_notation_note_content'],
            main_note_content=data_point['processed_main_of_origin_content'],
            latex_in_original_or_summarized=data_point['latex_in_original_or_summarized_in_origin'],
            summarized=data_point['summarized_in_origin'],
            reference=data_point['reference_of_origin'],
        ),
        NotationNoteData(
            notation_note_name=data_point['relied_notation_note_name'],
            main_info_note=data_point['main_of_relied_notation_note_name'],
            processed_content=data_point['relied_notation_note_content'],
            main_note_content=data_point['processed_main_of_relied_content'],
            latex_in_original_or_summarized=data_point['latex_in_original_or_summarized_in_relied'],
            summarized=data_point['summarized_in_relied'],
            reference=data_point['reference_of_relied']
        ),
    )

In [None]:

#| export
def notation_note_data_pair_to_data_point(
        origin_notation_data: NotationNoteData,
        relied_notation_data: NotationNoteData,
        origin_links_to_relied: Optional[bool]=None
        ) -> NotationLinkingDataPoint:
    return NotationLinkingDataPoint(
        origin_notation_note_name=origin_notation_data['notation_note_name'],
        main_of_origin_notation_note_name=origin_notation_data['main_info_note'],
        origin_notation_note_content=origin_notation_data['processed_content'],
        processed_main_of_origin_content=origin_notation_data['main_note_content'],
        latex_in_original_or_summarized_in_origin=origin_notation_data['latex_in_original_or_summarized'],
        summarized_in_origin=origin_notation_data['summarized'],
        reference_of_origin=origin_notation_data['reference'],

        relied_notation_note_name=relied_notation_data['notation_note_name'],
        main_of_relied_notation_note_name=relied_notation_data['main_info_note'],
        relied_notation_note_content=relied_notation_data['processed_content'],
        processed_main_of_relied_content=relied_notation_data['main_note_content'],
        latex_in_original_or_summarized_in_relied=relied_notation_data['latex_in_original_or_summarized'],
        summarized_in_relied=relied_notation_data['summarized'],
        reference_of_relied=relied_notation_data['reference'],

        origin_links_to_relied=origin_links_to_relied
    )

In [None]:
origin_data = NotationNoteData(
    notation_note_name='origin_notation_note',
    main_info_note='main_of_origin_note',
    processed_content='Some content describing what the origin notation note means',
    main_note_content='Content of the main note of the origin notation note',
    latex_in_original_or_summarized=r'\operatorname{Gal}(L/K) = \operatorname{Aut}(L/K)',
    summarized=r'\operatorname{Gal}(L/K)',
    reference='Some reference'
    )

relied_data = NotationNoteData(
    notation_note_name='relied_notation_note',
    main_info_note='main_of_relied_note',
    processed_content='Some content describing what the relied notation note means',
    main_note_content='Content of the main note of the relied notation note',
    latex_in_original_or_summarized=r'\operatorname{Top}(X)',
    summarized=r'\operatorname{Top}(X)',
    reference='Some reference'
    )

pair_data = notation_note_data_pair_to_data_point(origin_data, relied_data)
test_eq(
    pair_data, 
    {'origin_notation_note_name': 'origin_notation_note',
    'main_of_origin_notation_note_name': 'main_of_origin_note',
    'origin_notation_note_content': 'Some content describing what the origin notation note means',
    'processed_main_of_origin_content': 'Content of the main note of the origin notation note',
    'latex_in_original_or_summarized_in_origin': '\\operatorname{Gal}(L/K) = \\operatorname{Aut}(L/K)',
    'summarized_in_origin': '\\operatorname{Gal}(L/K)',
    'reference_of_origin': 'Some reference',
    'relied_notation_note_name': 'relied_notation_note',
    'main_of_relied_notation_note_name': 'main_of_relied_note',
    'relied_notation_note_content': 'Some content describing what the relied notation note means',
    'processed_main_of_relied_content': 'Content of the main note of the relied notation note',
    'latex_in_original_or_summarized_in_relied': '\\operatorname{Top}(X)',
    'summarized_in_relied': '\\operatorname{Top}(X)',
    'reference_of_relied': 'Some reference',
    'origin_links_to_relied': None})

origin_recovered, relied_recovered = data_point_to_notation_note_data_pair(pair_data)
test_eq(
    (origin_recovered, relied_recovered), ({'notation_note_name': 'origin_notation_note',
  'main_info_note': 'main_of_origin_note',
  'processed_content': 'Some content describing what the origin notation note means',
  'main_note_content': 'Content of the main note of the origin notation note',
  'latex_in_original_or_summarized': '\\operatorname{Gal}(L/K) = \\operatorname{Aut}(L/K)',
  'summarized': '\\operatorname{Gal}(L/K)',
  'reference': 'Some reference'},
 {'notation_note_name': 'relied_notation_note',
  'main_info_note': 'main_of_relied_note',
  'processed_content': 'Some content describing what the relied notation note means',
  'main_note_content': 'Content of the main note of the relied notation note',
  'latex_in_original_or_summarized': '\\operatorname{Top}(X)',
  'summarized': '\\operatorname{Top}(X)',
  'reference': 'Some reference'})

)