# helper

> Helper functions 

In [None]:
#| default_exp helper

In [None]:
#| export
from __future__ import annotations
from collections import OrderedDict
from collections.abc import Iterable
import datetime
from datetime import timezone
import errno
import glob
from graphlib import TopologicalSorter
from itertools import product
import os
from os import PathLike
from pathlib import Path
import platform
import re
from typing import Callable, Optional, Pattern, Sequence, Union

from natsort import natsorted

In [None]:
#| hide
from fastcore.test import *

## regex

In [None]:
#| export
def find_regex_in_text(
        text: str, # Text in which to find regex patter
        pattern: str | Pattern[str] # The regex pattern
        ) -> list[tuple[int]]: # Each tuple is of the form `(a,b)` where `text[a:b]` is the regex match.
    # TODO: rename into regex_indices_in_text
    # TODO: swap parameters.
    """Return ranges in `text` where `pattern` occurs.
    """
    matches = re.finditer(pattern, text)
    return [match.span() for match in matches]

The following example finds the occurrence of the Markdown footnote:

In [None]:
regex_pattern = r'\[\^\d\]'
text = '[^1]: asdf'

output = find_regex_in_text(text, regex_pattern)
test_eq(output, [(0,4)])

start, end = output[0]
test_eq(text[start:end], '[^1]')

If there are multiple matches for the regex pattern, then they are all included in the outputted list.

In [None]:
regex_pattern = r'\d+'  # Searches for one or more consecutive digits
text = '9000 is a big number. But you know what is bigger? 9001.'

output = find_regex_in_text(text, regex_pattern)
test_eq(len(output), 2)

start, end = output[0]
test_eq(text[start:end], '9000')

start, end = output[1]
test_eq(text[start:end], '9001')

The following example detects YAML frontmatter text as used in Obsidian. This regex pattern is also used in `markdown.markdown.file.find_front_matter_meta_in_markdown_text`.

> TODO: see if a link to the above backticked text is rendered.

The regex pattern used is able to detect the frontmatter even when it is empty.

In [None]:
sample_regex = r'---\n([\S\s]*?)?(?(1)\n|)---'
sample_str = '---\n---'
sample_output = find_regex_in_text(sample_str, sample_regex)
assert sample_output == [(0,7)]

sample_str = '---\naliases: [this_is_an_aliases_for_the_Obsidian_note]\n---'
sample_output = find_regex_in_text(sample_str, sample_regex)
assert sample_output == [(0, len(sample_str))]  # The entire sample_str is detected.


Contrast the regex pattern above with the pattern `---\n[\S\s]*?\n---`, which does not detect empty YAML frontmatter text.

In [None]:
sample_regex = '---\n[\S\s]*?\n---'
sample_str = '---\n---'
sample_output = find_regex_in_text(sample_str, sample_regex)
assert not sample_output

In [None]:
#| export
def replace_string_by_indices(
        string: str, # String in which to make replacemenets 
        replace_ranges: Sequence[Sequence[int] | int], # A list of lists/tuples of int's or a single list/tuple of int's. Each 
        replace_with: Sequence[str] | str # The str(s) which will replace the substrings at `replace_ranges` in `string`. `replace_with` must be a str exactly when `replace_ranges` is a Sequence of a single Sequence of int.
        ) -> str:  # The str obtained by replacing the substrings at `replace_range` in `string` by the strs specified by `replace_with`.
    """Replace parts of ``string`` at the specified locations"

    Use this with `find_regex_in_text`.

    **Parameters**

    - ``string`` - `str`
    - ``replace_ranges`` - `Sequence[Sequence[int] | int]`
        - Each list or tuple is of one or two int's. In particular,
        ``[a,b]`` or ``(a,b)`` means that ``string[a:b]`` is to be replaced.
        ``[a]`` means that ``string[a:]`` is to be replaced. The ranges should
        not overlap and should be arranged in chronological order.
    - ``replace_with`` - `Sequence[str] | str`
        - The str's which will replace the parts represented by 
        ``replace_ranges``. ``replace_ranges`` and ``replace_with`` must be
        both lists or both not lists. If they are lists, they must be of 
        the same length.

    **Returns**
    - str
    """
    if isinstance(replace_with, str):
        replace_ranges = [replace_ranges]
        replace_with = [replace_with]
    assert len(replace_ranges) == len(replace_with)
    if len(replace_ranges) == 0:
        return string
    str_parts = []
    for i, replace_string in enumerate(replace_ranges):
        replace_string = replace_with[i]
        if i > 0 and len(replace_ranges[i-1]) == 1:
            unreplaced_start_index = len(string)
        elif i > 0 and len(replace_ranges[i-1]) != 1:
            unreplaced_start_index = replace_ranges[i-1][1]
        else:
            unreplaced_start_index = 0
        unreplaced_end_index = replace_ranges[i][0]
        str_parts.append(string[unreplaced_start_index:unreplaced_end_index])
        str_parts.append(replace_string)

    # Add the last (unreplaced) part to str_parts.
    if len(replace_ranges[-1]) == 1:
        unreplaced_start_index = len(string)
    else:
        unreplaced_start_index = replace_ranges[-1][1]
    str_parts.append(string[unreplaced_start_index:])
    return "".join(str_parts)

The following are basic examples

In [None]:
assert replace_string_by_indices('hello world', replace_ranges=(0,5), replace_with='hi') == 'hi world'
assert replace_string_by_indices('hello somebody', replace_ranges=[(0,1), (6,10)], replace_with=['', '']) == 'ello body'

### Definitions and notations

I surround definitions and notations by double asterisks `**`. The following methods distinguish between the two.