# markdown.obsidian.links
> Functions for parsing internal links in [Obsidian.md](https://obsidian.md/) style markdown.

Obsidian uses both Markdown style links and Wikilinks as [internal links](https://help.obsidian.md/How+to/Internal+link). Markdown style links are of the form `[text_shown](link)` whereas Wikilinks are of the form `[[link_to_markdown#possible_anchor_to_header|text_shown]]`. They have an exclamation mark `!` if they are embedded.

Within Obsidian, it is often more convenient to use Wikilinks for Vault-internal links for several reasons:

- Obsidian automatically searches for links and aliases matching for auto-completion when constructing a Wikilink.
![Obsidian_link_autocomplete_example.gif](/images/markdown_obsidian_links_Obsidian_link_autocomplete_example.gif)
- Wikilink allow for the empty space character ` ` (whereas Markdown style links require empty space characters ` ` to be replaced with `%20`)

Nevertheless, Markdown style links have the following functions which Wikilinks lack:

- Markdown style links can contain external links (whether the links point to other Obsidian vaults or to a URL)
- Markdown style links can render LaTeX text.


In [None]:
#| default_exp markdown.obsidian.links

In [None]:
#| export
from __future__ import annotations
from enum import Enum
import re

from trouver.helper.regex import find_regex_in_text, replace_string_by_indices
from typing import Union


In [None]:
#| export
# TODO Make it so that these patterns don't capture latex code
# WIKILINK_PATTERN = r'!?\[\[.*?\]\]' 
WIKILINK_PATTERN = r'!?\[\[[^\]]+\]\]'
EMBEDDED_WIKILINK_PATTERN = r'!\[\[[^\]]+\]\]'
WIKILINK_CAPTURE_PATTERN = r'!?\[\[([^#\|]*?)(#(.*?))?(\|(.*?))?\]\]'

# Note that MARKDOWNLINK_PATTERN captures whitespace characters in its link, even though Obsidian
# does not. This is implmeneted to find if any misformats in the Obsidian Markdown files.
MARKDOWNLINK_PATTERN = r'!?\[[^\]]+\]\([^)]+\)'  
EMBEDDED_MARKDOWNLINK_PATERN = r'!\[[^\]]+\]\([^)]+\)'
MARKDOWNLINK_CAPTURE_PATTERN = r'!?\[([^\]]*)\]\(([^)#]+)(#([^)]+))?\)'


EMBEDDED_PATTERN = f'{EMBEDDED_WIKILINK_PATTERN}|{EMBEDDED_MARKDOWNLINK_PATERN}'
# MARKDOWNLINK_CAPTURE = r'!?\[([^\]]+)\]\(([^)#])+(#[^)]+)?\)'

In [None]:
from os import PathLike

from fastcore.test import *
from nbdev.showdoc import show_doc

## Finding links in text via indices

In [None]:
#| export
def find_links_in_markdown_text(
        text: str
        ) -> list[tuple]: # Each tuple is of the form `(a,b)` where `text[a:b]` is an obsidian internal link.
    # TODO: rename this function, say to link_ranges_in_text, 
    # because it is confusing when there is a links_from_text function below.
    """Return ranges in the markdown text string
    where internal links occur.

    **See Also**

    - `links_from_text`
    """
    regex = f'{WIKILINK_PATTERN}|{MARKDOWNLINK_PATTERN}'
    return find_regex_in_text(text, pattern=regex)


`find_links_in_markdown_text` returns a list of indices in a string in which the links are located.

In [None]:
# TODO: add markdown links to example

In [None]:
tutorial_text = r'''
This is an Obsidian note. It has some [[this_is_the_note_to_which_the_link_points|links]]!
Links are pretty neat. They can [[this_text_is_not_actually_shown|connect notes]] for you.
The following will create a link to the note `some_note`; the displayed text is `some_note`: [[some_note]]
You can also embed the contents of one note into another note. ![[note_being_embedded]].
The contents of `note_being_embedded` will be displayed when you view the note in Obsidian's view mode.
You can make anchors in links. For example [[note#This is a header title]] is a link to the note named
`note` and more specifically to the theader with title `This is a header title`.

The above links are all Wikilinks. Obsidian also supports Markdownlinks, e.g. [This is the text shown](This is the link.)

If the note of a link does not exist in an Obsidian vault, then Obsidian will create the note.
Even if the note does not have a header with title specified by the anchor of a link, Obsidian
will still open the note; it will not go to any particular header, however.
'''

ranges = find_links_in_markdown_text(tutorial_text)
match_strs = [tutorial_text[start:end] for start, end in ranges]
test_eq(match_strs, [
    '[[this_is_the_note_to_which_the_link_points|links]]', 
    '[[this_text_is_not_actually_shown|connect notes]]',
    '[[some_note]]',
    '![[note_being_embedded]]',
    '[[note#This is a header title]]',
    '[This is the text shown](This is the link.)'])

## `ObsidianLink` class

In [None]:
#| export
class LinkFormatError(Exception):
    """Error that is raised when a string cannot be parsed as an
    `ObsidianLink` object.
    
    **Attribute**

    - `text` - `str`
    """
    def __init__(self, text):
        self.text = text
        super().__init__(f'Obsidian Markdown link is not formatted properly: {text}')

In [None]:
#| export
class LinkType(Enum):
    """An Enumeration indicating whether an `ObsidianLink` object is a
    Wikilink or a Markdown-style link.

    Enumerates `LinkType.WIKILINK` and `LinkType.MARKDOWN`.
    """
    # See https://www.markdownguide.org/basic-syntax/
    WIKILINK = 0
    MARKDOWN = 1  
    # For Markdown links, use %20 to encode spaces in the link, e.g.
    # [asdf](localization_of_a_module#Localization%20of%20a%20module%201)
    # Links to the header `"Localization of a module 1"` in the file
    # localization_of_a_module



In [None]:
#| export
class ObsidianLink:
    """Object representing an obsidian link
    
    **Attributes**

    - `is_embedded` - `bool`
        - Whether or not the link is embedded.
    - `file_name` - `str`, or `-1`
        - The destination of the link. It is either 
        
          1. The Obsidian-vault-recognized name of the file that the link
          points to. It can be a path relative to the Obsidian vault path 
          without the file extension (.md), 
          2. an external link, such as a URL, or
          3. -1, in which case the object represents a generic link pointing
          to any file (this is for generating regex).
          
          Note that if `file_name` is the empty string, then the link is a
          link to the same file

    - `anchor` - `str`, `0`, or `-1`
        - The title of the header of the anchor in the destination that the
        link points to or the ID to the markdown block link (preceded by a
        carat `^`). If 0, then the `ObsidianLink` object represents a link
        without an anchor. If -1, then the object represents a generic link
        with or without an anchor (this is for generating regex).
    - `custom_text` - `str`, `0`, or `-1`
        - The custom text of the link. Is `None` if no such text is specified.
        If 0, then the `ObsidianLink` object represents an internal link
        without custom text. If -1, then the object represents a generic
        internal link of any custom text (this is for generating regex).
    - `link_type` - `LinkType`
        - If `LinkType.WIKILINK`, then the str should be of the format
        `'[[<Obsidian-vault-recognized-name>(#anchor)?(|custom_text)]]'` 
        (The question marks here indicate optional components). Otherwise,
        the str should be a more standard Markdown link. Defaults to
        `LinkType.WIKILINK`.
    
    **Parameters**

    - is_embedded - bool
    - file_name - str or `None`
        - If `None`, set `self.file_name` to `-1`.
    - anchor - str or `None`
    - custom_text - str or `None`
    - link_type - `LinkType`
    """
    
    def __init__(
            self, is_embedded: bool, file_name: Union[str, int],
            anchor: Union[str, int], custom_text: Union[str, int],
            link_type: LinkType = LinkType.WIKILINK):
        self.is_embedded = is_embedded
        self.file_name = file_name
        self.anchor = anchor
        self.custom_text = custom_text
        self.link_type = link_type


    @staticmethod
    def from_text(text: str) -> ObsidianLink:
        """Return an ObsidianLink object from text.
                
        **Raises**

        - InteralLinkFormatError
            - If `text` is not properly formatted as an Obsidian internal link.
        """
        is_embedded = text.startswith("!")
        regex_object = re.compile(WIKILINK_CAPTURE_PATTERN)
        matches = regex_object.match(text)
        if matches:
            file_name = matches.group(1)
            anchor = matches.group(3)
            custom_text = matches.group(5)
            link_type = LinkType.WIKILINK
        else:
            regex_object = re.compile(MARKDOWNLINK_CAPTURE_PATTERN)
            matches = regex_object.match(text)
            if not matches:
                raise LinkFormatError(text)
            file_name = matches.group(2).replace('%20', ' ')
            anchor = matches.group(4)
            if anchor:
                anchor = anchor.replace('%20', ' ')
            custom_text = matches.group(1)
            link_type = LinkType.MARKDOWN
        if anchor is None:
            anchor = 0
        if custom_text is None:
            custom_text = 0
        return ObsidianLink(is_embedded, file_name, anchor, custom_text, link_type)

    def _parse_text_as_wikilink(text: str):
        """
        Return details about the link `text` if `text` is a Wikilink or `None` if
        `text is not a Wikilink.

        This is a helper method for `from_text`.
        """
        # TODO
        return

    def to_regex(self
            )-> str: # Represents a regex.
        """Return the regex for that this `ObsidianLink` object represents.

        Assumes that `self.file_name`, `self.anchor`, and `self.custom_text` are
        regex-formatted strings, e.g. if `self.custom_text` is `denotes?`, then the
        outputted regex-pattern matches links whose custom text is either `denote`
        or `denotes`.

        If neither `self.file_name`, `self.anchor` nor `self.custom_text` is `-1`,
        then the regex will in fact be a concrete string.
        """
        embedding = '!' if self.is_embedded else ''

        if type(self.file_name) == str:
            filing = self.file_name
        else:  # self.file_name == -1
            filing = r'([^#\|]*)?'
        
        if type(self.anchor) == str:
            anchoring = f'#{self.anchor}'
        elif self.anchor == 0:
            anchoring = ''
        else:  # self.anchor == -1
            anchoring = '(#(.*?))?'
          
        if type(self.custom_text) == str and self.link_type == LinkType.WIKILINK:
            customing = fr'\|{self.custom_text}'
        elif type(self.custom_text) == str and self.link_type == LinkType.MARKDOWN:
            customing = self.custom_text
        elif self.custom_text == 0:
            customing = ''
        else:  # self.custom == -1
            if self.link_type == LinkType.MARKDOWN:
                customing = fr'(.*?)?'
            else:
                customing = fr'(\|(.*?))?'

        if self.link_type == LinkType.WIKILINK:
            return fr'{embedding}\[\[{filing}{anchoring}{customing}\]\]'
        else:
            # Markdown links format whitespace with '%20'
            filing = filing.replace(' ' , '%20')  
            anchoring = anchoring.replace(' ', '%20')
            return fr'{embedding}\[{customing}\]\({filing}{anchoring}\)'
    
    def __str__(self) -> str:
        # TODO: Choose what to do about | vs. \|.
        return self.to_string()

    def __hash__(self) -> int:
        return hash(self.to_regex())

    def to_string(self
            ) -> str: # The string for the link
        """
        Return the string for the link if it is concrete.
 
        **Raises**

        - ValueError
            - If `self.file_name`, `self.anchor` or `self.custom_text`
            is -1, i.e. ambiguously represents an anchor or custom text.
        """
        if self.is_abstract():
            raise ValueError(
                f'The ObsidianLink object is abstract.'
            )
        assert (self.anchor != -1 and self.custom_text != -1
                and self.file_name != -1)
        embedding = '!' if self.is_embedded else ''

        if type(self.anchor) == str:
            anchoring = f'#{self.anchor}'
        else:  # self.anchor == 0
            anchoring = ''
          
        if type(self.custom_text) == str:
            if self.link_type == LinkType.WIKILINK:
                customing = fr'|{self.custom_text}'
            else:
                customing = self.custom_text
        else:  # self.custom_text == 0:
            customing = ''
        
        if self.link_type == LinkType.WIKILINK:
            return f'{embedding}[[{self.file_name}{anchoring}{customing}]]'
        else:
            # Markdown links format whitespace with '%20'
            file_name = self.file_name.replace(' ' , '%20')  
            anchoring = anchoring.replace(' ', '%20')
            return fr'{embedding}[{customing}]({file_name}{anchoring})'
    
    def convert_link_type(
            self,
            link_type: LinkType
            ) -> ObsidianLink:
        """
        Return an equivalent Link object which has the specified
        `LinkType`.
        """
        # TODO
        return
    
    def displayed_text(self
            ) -> str: # The displayed text
        # TODO: implement error if any of the attributes is -1
        """Returns the displayed text of this link.
        
        `self.file_name`, `self.custom_text` and `self.anchor` are
        assumed to be not `-1`.
        """
        if self.custom_text:
            return self.custom_text
        else:
            if not self.anchor:
                return self.file_name
            else:
                return f'{self.file_name} > {self.anchor}'

    def is_abstract(self) -> bool:
        """
        Return `True` if self is abstract, i.e. file_name, anchor,
        or custom_text is `-1`.
        """
        return self.anchor == -1 or self.file_name == -1 or self.anchor == -1


    def __copy__(self):
        new_instance = self.__class__(
            self.is_embedded,
            self.file_name,
            self.anchor,
            self.custom_text, 
            self.link_type)
        return new_instance

    def __eq__(self, other):
        if not isinstance(other, self.__class__) or not isinstance(self, other.__class__):
            return False
        return (
            self.is_embedded == other.is_embedded
            and self.file_name == other.file_name
            and self.anchor == other.anchor
            and self.custom_text == other.custom_text
            and self.link_type == other.link_type)
    

In [None]:
show_doc(ObsidianLink.from_text)

---

[source](https://github.com/hyunjongkimmath/trouver/blob/main/trouver/markdown/obsidian/links.py#L149){target="_blank" style="float:right; font-size:smaller"}

### ObsidianLink.from_text

>      ObsidianLink.from_text (text:str)

*Return an ObsidianLink object from text.

**Raises**

- InteralLinkFormatError
    - If `text` is not properly formatted as an Obsidian internal link.*

In [None]:
show_doc(ObsidianLink.to_regex)

---

[source](https://github.com/hyunjongkimmath/trouver/blob/main/trouver/markdown/obsidian/links.py#L192){target="_blank" style="float:right; font-size:smaller"}

### ObsidianLink.to_regex

>      ObsidianLink.to_regex ()

*Return the regex for that this `ObsidianLink` object represents.

Assumes that `self.file_name`, `self.anchor`, and `self.custom_text` are
regex-formatted strings, e.g. if `self.custom_text` is `denotes?`, then the
outputted regex-pattern matches links whose custom text is either `denote`
or `denotes`.

If neither `self.file_name`, `self.anchor` nor `self.custom_text` is `-1`,
then the regex will in fact be a concrete string.*

In [None]:
show_doc(ObsidianLink.__str__)

---

[source](https://github.com/hyunjongkimmath/trouver/blob/main/trouver/markdown/obsidian/links.py#L238){target="_blank" style="float:right; font-size:smaller"}

### ObsidianLink.__str__

>      ObsidianLink.__str__ ()

*Return str(self).*

In [None]:
show_doc(ObsidianLink.to_string)

---

[source](https://github.com/hyunjongkimmath/trouver/blob/main/trouver/markdown/obsidian/links.py#L242){target="_blank" style="float:right; font-size:smaller"}

### ObsidianLink.to_string

>      ObsidianLink.to_string ()

*Return the string for the link if it is concrete.

**Raises**

- ValueError
    - If `self.file_name`, `self.anchor` or `self.custom_text`
    is -1, i.e. ambiguously represents an anchor or custom text.*

In [None]:
show_doc(ObsidianLink.convert_link_type)

---

[source](https://github.com/hyunjongkimmath/trouver/blob/main/trouver/markdown/obsidian/links.py#L282){target="_blank" style="float:right; font-size:smaller"}

### ObsidianLink.convert_link_type

>      ObsidianLink.convert_link_type (link_type:__main__.LinkType)

*Return an equivalent Link object which has the specified
`LinkType`.*

In [None]:
show_doc(ObsidianLink.displayed_text)

---

[source](https://github.com/hyunjongkimmath/trouver/blob/main/trouver/markdown/obsidian/links.py#L293){target="_blank" style="float:right; font-size:smaller"}

### ObsidianLink.displayed_text

>      ObsidianLink.displayed_text ()

*Returns the displayed text of this link.

`self.file_name`, `self.custom_text` and `self.anchor` are
assumed to be not `-1`.*

In [None]:
show_doc(ObsidianLink.is_abstract)

---

[source](https://github.com/hyunjongkimmath/trouver/blob/main/trouver/markdown/obsidian/links.py#L309){target="_blank" style="float:right; font-size:smaller"}

### ObsidianLink.is_abstract

>      ObsidianLink.is_abstract ()

*Return `True` if self is abstract, i.e. file_name, anchor,
or custom_text is `-1`.*

#### Parsing an ObsidianLink class from a string

The following example parses a Wikilink with an anchor and a custom display text:

In [None]:
internal_link = ObsidianLink.from_text("[[smooth_covering_map#Smooth covering map 1|smooth covering map]]")
assert internal_link.file_name == "smooth_covering_map"
assert internal_link.anchor == "Smooth covering map 1"
assert internal_link.custom_text == "smooth covering map"

The following example parses a Wikilink with an anchor, but not with a custom display text:

In [None]:
internal_link = ObsidianLink.from_text("[[sample_thing#anchor]]")
assert internal_link.file_name == "sample_thing"
assert internal_link.anchor == "anchor"
assert internal_link.custom_text == 0

The following example parses a Wikilink without an anchor, but with custom display text:

In [None]:
internal_link = ObsidianLink.from_text("[[another_file|this_is_the_text_shown]]")
assert internal_link.file_name == "another_file"
assert internal_link.anchor == 0
assert internal_link.custom_text == "this_is_the_text_shown"

The following example parses a Wikilink without neither an anchor nor custom display text:

In [None]:
internal_link = ObsidianLink.from_text("[[notation_I_S_ideal_of_vanishing_on_a_subset_of_the_spectrum_of_a_ring]]")
assert internal_link.file_name == "notation_I_S_ideal_of_vanishing_on_a_subset_of_the_spectrum_of_a_ring"
assert internal_link.anchor == 0
assert internal_link.custom_text == 0

The following example parses an embedded (Wiki)link

In [None]:
internal_link = ObsidianLink.from_text("![[_reference_some_reference]]")
assert internal_link.file_name == "_reference_some_reference"
assert internal_link.anchor == 0
assert internal_link.custom_text == 0

The following example parses a Markdownlink with an anchor:

In [None]:
internal_link = ObsidianLink.from_text("[asdf](localization_of_a_module#Localization of a module 1)")
assert internal_link.file_name == "localization_of_a_module"
assert internal_link.anchor == "Localization of a module 1"
assert internal_link.custom_text == "asdf"

The following example parses a Markdownlink with some spaces formatted as `%20`:

In [None]:
internal_link = ObsidianLink.from_text(r"[do do](Some%20note#Topic%201)")
assert internal_link.file_name == "Some note"
assert internal_link.anchor == "Topic 1"
assert internal_link.custom_text == "do do"

The following example parses a Markdownlink in which the displaytext has LaTeX in it:

In [None]:
internal_link = ObsidianLink.from_text("[$\\mathscr{O}(n)$](some_reference_notation_O_n_on_projective_scheme)")
assert internal_link.file_name == "some_reference_notation_O_n_on_projective_scheme"
assert internal_link.anchor == 0
assert internal_link.custom_text == "$\\mathscr{O}(n)$"
# TODO test links with file_name = -1

In [None]:
show_doc(ObsidianLink.__copy__)

---

[source](https://github.com/hyunjongkimmath/trouver/blob/main/trouver/markdown/obsidian/links.py#L317){target="_blank" style="float:right; font-size:smaller"}

### ObsidianLink.__copy__

>      ObsidianLink.__copy__ ()

In [None]:

link = ObsidianLink(
        is_embedded=False,
        file_name="test_file",
        anchor="test_anchor",
        custom_text="test_custom",
        link_type=LinkType.WIKILINK
    )
link_copy = link.__copy__()

test_eq(link_copy, link)
test_eq(link_copy.is_embedded, link.is_embedded)
test_eq(link_copy.file_name, link.file_name)
test_eq(link_copy.anchor, link.anchor)
test_eq(link_copy.custom_text, link.custom_text)
test_eq(link_copy.link_type, link.link_type)

In [None]:
show_doc(ObsidianLink.__eq__)

---

[source](https://github.com/hyunjongkimmath/trouver/blob/main/trouver/markdown/obsidian/links.py#L326){target="_blank" style="float:right; font-size:smaller"}

### ObsidianLink.__eq__

>      ObsidianLink.__eq__ (other)

*Return self==value.*

In [None]:
link1 = ObsidianLink(
    is_embedded=False,
    file_name="test_file",
    anchor="test_anchor",
    custom_text="test_custom",
    link_type=LinkType.WIKILINK
)
link2 = ObsidianLink(
    is_embedded=True,
    file_name="another_file",
    anchor=0,
    custom_text=0,
    link_type=LinkType.MARKDOWN
)
link1_copy = link1.__copy__()

test_eq(link1, link1_copy)
test_ne(link1, link2)
test_ne(link1, "Not an ObsidianLink")

In [None]:
link1 = ObsidianLink(
    is_embedded=False,
    file_name="test_file",
    anchor="test_anchor",
    custom_text="test_custom",
    link_type=LinkType.WIKILINK
)

link1_different_embedded = ObsidianLink(
    is_embedded=not link1.is_embedded,
    file_name=link1.file_name,
    anchor=link1.anchor,
    custom_text=link1.custom_text,
    link_type=link1.link_type
)
test_ne(link1, link1_different_embedded)

link1_different_file_name = ObsidianLink(
    is_embedded=link1.is_embedded,
    file_name="different_file",
    anchor=link1.anchor,
    custom_text=link1.custom_text,
    link_type=link1.link_type
)
test_ne(link1, link1_different_file_name)

#### Constructing abstract ObsidianLink objets

We might want to detect links in Obsidian Markdown notes of certain formats. Constructing `ObsidianLink` objects via the constructor can help with this.

Setting `file_name`, `anchor`, or `custom_text` to `-1` can yields an `ObisidianLink` object representing an abstract link. Use the `to_regex` function of the `ObsidianLink` object to get a regex str which detects links of the specified format.

In the following example, the anchor and custom text of the `ObsidianLink` object are both abstract - the regex pattern returned by `._to_regex` will detect any link of the specified type (by default, `LinkType.WIKILINK`) with the specified file name:

In [None]:
internal_link_object = ObsidianLink(is_embedded=False, file_name = 'hi', anchor=-1, custom_text=-1)
regex_pattern = internal_link_object.to_regex()
print(f'regex_pattern: {regex_pattern}')
assert re.match(regex_pattern, '[[hi#this is some anchor|this is some display text]]')
assert re.match(regex_pattern, '[[hi|some display text, but no anchor!]]')
assert re.match(regex_pattern, '[[hi#some anchor, but no custom text]]')
assert re.match(regex_pattern, '[[hi]]')  #No anchor and no custom text
assert not re.match(regex_pattern, '[[note_with_wrong_name]]')

regex_pattern: \[\[hi(#(.*?))?(\|(.*?))?\]\]


The following is an example where the custom_text is specified, but the file_name and anchor are both abstract:

In [None]:
internal_link_object = ObsidianLink(is_embedded=False, file_name = -1, anchor=-1, custom_text='must match this text!')
regex_pattern = internal_link_object.to_regex()
print(f'regex_pattern: {regex_pattern}')
assert re.match(regex_pattern, '[[some note#some anchor|must match this text!]]')
# TODO: matching the below is not currently implemented.
# assert re.match(regex_pattern, '[[must match this text!]]')
assert re.match(regex_pattern, '[[some note|must match this text!]]')
assert re.match(regex_pattern, '[[#anchor in note|must match this text!]]')  #No anchor and no custom text
assert not re.match(regex_pattern, '[[some note#some anchor|Wrong text]]')

regex_pattern: \[\[([^#\|]*)?(#(.*?))?\|must match this text!\]\]


In the following example, the linktype is specified to be `LinkeType.MARKDOWN`:

In [None]:
internal_link_object = ObsidianLink(is_embedded=False, file_name = 'hi', anchor=-1, custom_text=-1, link_type=LinkType.MARKDOWN)
regex_pattern = internal_link_object.to_regex()
print(f'regex_pattern: {regex_pattern}')
assert re.match(regex_pattern, '[Some custom text](hi#Some anchor)')
assert re.match(regex_pattern, '[Some custom text, no anchor](hi)')
assert not re.match(regex_pattern, '[Some custom text, link an anchor in the same note](#anchor)')

regex_pattern: \[(.*?)?\]\(hi(#(.*?))?\)


### String of an ObsidianLink object

A concrete `ObsidianLink` object has a `to_string` function:

In [None]:
internal_link_object = ObsidianLink(is_embedded=True, file_name = 'compactly_generated_product_hom_adjunction', anchor='Title', custom_text='compactly generated', link_type=LinkType.WIKILINK)
print(internal_link_object.to_string())
internal_link_object = ObsidianLink(is_embedded=True, file_name = 'compactly_generated_product_hom_adjunction', anchor='Title 1', custom_text='compactly generated', link_type=LinkType.MARKDOWN)
print(internal_link_object.to_string())

![[compactly_generated_product_hom_adjunction#Title|compactly generated]]
![compactly generated](compactly_generated_product_hom_adjunction#Title%201)


## Find the links in text as ObsidianLink objects

In [None]:
#| export
def links_from_text(
        text: str
        ) -> list[ObsidianLink]: # The `ObsidianLink` objects are ordered by appearance.
    """
    Return a list of `ObsidianLink` objects corresponding to links
    found in the text.
    """
    ranges = find_links_in_markdown_text(text)
    link_strs = [text[start:end] for start, end in ranges]
    return [ObsidianLink.from_text(link_str) for link_str in link_strs]

Unlike `find_links_in_markdown_text`, the `links_from_text` function returns a list of `ObsidianLink` objects instead.

In [None]:
links = links_from_text(tutorial_text)
assert links[0].displayed_text() == 'links'  # The displayed text of [[this_is_the_note_to_which_the_link_points|links]] is `links`.
assert links[3].is_embedded  # ![[note_being_embedded]] is an embedded link
assert links[4].anchor == 'This is a header title'  # [[note#This is a header title]] has anchor `This is a header title`.

## Removing links from text

In [None]:
#| export
def remove_links_from_text(
        text: str,
        exclude: list[ObsidianLink] = None, # A list of `ObsidianLink` objects of links to not be removed.
        remove_embedded_note_links: bool = False # If `True`, remove links to embedded notes as well. Note that embedded links are replaced by their "display" text in the same manner as non-embedded links and are not replaced the content of the embedding. If `False`, does not modify embedded note links.
        ) -> str:
    """
    Return a text with all Obsidian links removed and replaced with
    the display texts of the links.
    """
    if not exclude:
        exclude = []
    exclude_patterns = [re.compile(exclude_link.to_regex())
                        for exclude_link in exclude]
    link_indices = find_links_in_markdown_text(text)
    new_text = text
    for start, end in reversed(link_indices):
        if _do_not_remove_link(text[start:end], exclude_patterns):
            continue
        link_object = ObsidianLink.from_text(text[start:end])
        if link_object.is_embedded and not remove_embedded_note_links:
            continue
        replace_with = link_object.displayed_text()
        # link_object.custom_text if link_object.custom_text else link_object.file_name
        new_text = new_text[0:start] + replace_with + new_text[end:]
    return new_text


def _do_not_remove_link(text: str, exclude_patterns: list[re.Pattern]) -> bool:
    """
    Return `True` if text fully matches any of `re.Pattern`'s in `exclude_patterns`.

    This is a helper method for `remove_links_from_text`. 
    """
    for exclude_pattern in exclude_patterns:
        if exclude_pattern.fullmatch(text):
            return True
    return False

We can remove all links of from text and preserve the underlying display text of each link:

In [None]:
text = 'I have a [[this is a note#this is an anchor in the note|link]]'
links_removed = remove_links_from_text(text)
print(links_removed)
assert links_removed == 'I have a link'

text = 'Something about a [[some_reference_conormal_sheaf_of_a_locally_closed_embedding#For a locally closed embedding 2 4|conormal sheaf]]'
links_removed = remove_links_from_text(text)
print(links_removed)
assert links_removed == 'Something about a conormal sheaf'

text = 'This is a link without a specified display text: [[some_kind_of_note]].'
links_removed = remove_links_from_text(text)
print(links_removed)
assert links_removed == 'This is a link without a specified display text: some_kind_of_note.'

text = 'This is a link to an anchor without a specified display text: [[another_note#another anchor]].'
links_removed = remove_links_from_text(text)
print(links_removed)
assert links_removed == 'This is a link to an anchor without a specified display text: another_note > another anchor.'


I have a link
Something about a conormal sheaf
This is a link without a specified display text: some_kind_of_note.
This is a link to an anchor without a specified display text: another_note > another anchor.


We can specify links to not remove in this process:

In [None]:
text = 'This thing [[some note|denotes]] something'
do_not_remove_link = ObsidianLink(False, None, -1, 'denote(s)?')
links_removed = remove_links_from_text(text, exclude=[do_not_remove_link])
assert text == links_removed

This function does not remove embedded links by default.

In [None]:
text = '![[embedded note]]'
links_removed = remove_links_from_text(text)
print(links_removed)
assert links_removed == text

![[embedded note]]


In [None]:
text = '![[embedded note]]'
links_removed = remove_links_from_text(text, remove_embedded_note_links=True)
print(links_removed)
assert links_removed == 'embedded note'

embedded note


## Replacing links in text

In [None]:
#| export
def replace_links_in_text(
        text: str,
        links_to_replace: ObsidianLink,
        new_link_name: str
        # link_to_replace_with 
        ) -> str:
    """
    Modify all links matching `links_to_replace` so that the
    the new destination is `new_link_name`.
    """
    pattern = links_to_replace.to_regex()
    matches = find_regex_in_text(text, pattern)
    replacements = []
    for start, end in matches:
        concrete_link = ObsidianLink.from_text(text[start:end])
        concrete_link.file_name = new_link_name
        replacements.append(str(concrete_link))
    return replace_string_by_indices(text, matches, replacements)

In [None]:
# Test case 1: Replace a single wikilink
text1 = "This is a [[test link]] in the text."
link_to_replace1 = ObsidianLink(False, "test link", 0, 0, LinkType.WIKILINK)
new_link_name1 = "new link"
expected1 = "This is a [[new link]] in the text."
test_eq(replace_links_in_text(text1, link_to_replace1, new_link_name1), expected1)

# Test case 2: Replace multiple wikilinks
text2 = "[[link1]] and [[link1]] are the same."
link_to_replace2 = ObsidianLink(False, "link1", 0, 0, LinkType.WIKILINK)
new_link_name2 = "updated_link"
expected2 = "[[updated_link]] and [[updated_link]] are the same."
test_eq(replace_links_in_text(text2, link_to_replace2, new_link_name2), expected2)

# Test case 3: Replace markdown links
text3 = "This is a [markdown link](test_file.md) in the text."
link_to_replace3 = ObsidianLink(False, "test_file.md", 0, "markdown link", LinkType.MARKDOWN)
new_link_name3 = "new_file.md"
expected3 = "This is a [markdown link](new_file.md) in the text."
test_eq(replace_links_in_text(text3, link_to_replace3, new_link_name3), expected3)

# Test case 4: Replace links with anchors
text4 = "See [[file#section]] for more info."
link_to_replace4 = ObsidianLink(False, "file", "section", 0, LinkType.WIKILINK)
new_link_name4 = "new_file"
expected4 = "See [[new_file#section]] for more info."
test_eq(replace_links_in_text(text4, link_to_replace4, new_link_name4), expected4)

# Test case 5: Replace embedded links
text5 = "An embedded image: ![[image.png]]"
link_to_replace5 = ObsidianLink(True, "image.png", 0, 0, LinkType.WIKILINK)
new_link_name5 = "new_image.png"
expected5 = "An embedded image: ![[new_image.png]]"
test_eq(replace_links_in_text(text5, link_to_replace5, new_link_name5), expected5)

# Test case 6: No matches in text
text6 = "This text has no links to replace."
link_to_replace6 = ObsidianLink(False, "nonexistent", 0, 0, LinkType.WIKILINK)
new_link_name6 = "anything"
expected6 = "This text has no links to replace."
test_eq(replace_links_in_text(text6, link_to_replace6, new_link_name6), expected6)

# Test case 7: Replace links with custom text
text7 = "See [[file|Custom Text]] for details."
link_to_replace7 = ObsidianLink(False, "file", 0, "Custom Text", LinkType.WIKILINK)
new_link_name7 = "new_file"
expected7 = "See [[new_file|Custom Text]] for details."
test_eq(replace_links_in_text(text7, link_to_replace7, new_link_name7), expected7)