In [19]:
#| default_exp tags

In [20]:
#| export
from __future__ import annotations

# tags

> Tag discovery and substitution for template processing

<!-- # Prologue -->

In [21]:
#| export
import re
from pathlib import Path


In [22]:
#| hide
import os
import tempfile

from fastcore.test import *
from nbdev.showdoc import *
from pote.display import RenderJSON


# Tag Discovery

In [23]:
#| export
# Tag pattern: <Tag-Name> where Tag-Name can contain letters, numbers, hyphens, and underscores
TAG_PATTERN = re.compile(r'<([A-Za-z0-9_-]+)>')


In [24]:
#| export
def discover_tags(text: str) -> set[str]:
    "Find all <Tag> patterns in text"
    return set(TAG_PATTERN.findall(text))

In [25]:
text = "Hello <User-Name>, welcome to <Project-Name>!"
tags = discover_tags(text)
test_eq(tags, {'User-Name', 'Project-Name'})

test_eq(discover_tags("No tags here"), set())

text_dup = "<Tag1> and <Tag2> and <Tag1> again"
test_eq(discover_tags(text_dup), {'Tag1', 'Tag2'})

In [26]:
#| export
#| export
def discover_tags_in_file(path: str | Path) -> set[str]:
    "Find all tags in a file, return empty set on error"
    try:
        with open(Path(path), 'r', encoding='utf-8') as f:
            return discover_tags(f.read())
    except Exception:
        return set()

In [27]:
template_file = Path('../templates/Cursor/project.mdc')
if template_file.exists():
    tags_in_file = discover_tags_in_file(template_file)
    test_is('Project-Name' in tags_in_file, True)
    test_is('Project-Objective' in tags_in_file, True)

In [28]:
#| export
def discover_tags_in_templates(template_dir: str | Path, extensions: list[str] | None = None) -> dict[str, set[str]]:
    """
    Discover all tags in template files
    Returns dict mapping file paths to sets of tags found in each file
    """
    template_dir = Path(template_dir)
    if not template_dir.exists(): return {}
    # Default to common text extensions
    if extensions is None: extensions = ['.md', '.txt', '.mdc']
    result = {}
    for ext in extensions:
        for file_path in template_dir.rglob(f'*{ext}'):
            if file_path.is_file():
                try:
                    tags = discover_tags_in_file(file_path)
                    if tags:  # Only include files with tags
                        result[str(file_path.relative_to(template_dir))] = tags
                except Exception: pass
    return result

In [29]:
# Test discover_tags_in_templates
templates_dir = Path('../templates')
if templates_dir.exists():
    all_tags = discover_tags_in_templates(templates_dir)
    test_is(len(all_tags) > 0, True)  # Should find some files with tags
    test_is(isinstance(all_tags, dict), True)

# Tag Substitution


In [30]:
#| export
def substitute_tags(
    text: str,            # Text containing tags to substitute
    tags: dict[str, str], # Dictionary mapping tag names to values
    max_depth: int = 10,  # Maximum recursion depth for nested tags
    warn_missing: bool = False # If True, print warnings for missing tags
) -> str:
    "Recursively substitute tags in text"
    result = text
    depth = 0
    
    while depth < max_depth:
        found_tags = discover_tags(result)
        if not found_tags: break
        
        missing = found_tags - set(tags.keys())
        if missing:
            if warn_missing: print(f"Warning: Missing tags: {missing}")
        
        made_substitution = False
        for tag in found_tags:
            if tag in tags:
                result = result.replace(f'<{tag}>', str(tags[tag]))
                made_substitution = True
        
        if not made_substitution: break
        depth += 1
    
    # Hit max depth - warn and return what we got
    if depth >= max_depth and discover_tags(result):
        remaining = discover_tags(result)
        print(f"Warning: Max recursion depth ({max_depth}) reached. Unresolved tags: {remaining}")
    
    return result

In [31]:
text = "Hello <User-Name>, welcome to <Project-Name>!"
tags = {'User-Name': 'Vic', 'Project-Name': 'Nooprompter'}
result = substitute_tags(text, tags)
test_eq(result, "Hello Vic, welcome to Nooprompter!")

tags_nested = {
    'Name': 'Vic',
    'Greeting': 'Hello <Name>',
    'Message': '<Greeting>, welcome!'
}
result = substitute_tags('<Message>', tags_nested)
test_eq(result, 'Hello Vic, welcome!')

text = "Hello <Missing-Tag>"
result = substitute_tags(text, {})
test_eq(result, "Hello <Missing-Tag>")  # Should leave as-is

In [32]:
# Test recursion limit protection
circular_tags = {
    'A': 'Value with <B>',
    'B': 'Value with <A>'
}
test_eq(substitute_tags('<A>', circular_tags, max_depth=5), 'Value with Value with Value with Value with Value with <B>')



In [33]:
#| export
def substitute_tags_in_file(
    src: str | Path,          # Source file path
    dst: str | Path,          # Destination file path
    tags: dict[str, str],     # Dictionary mapping tag names to values
    **options                 # Additional options for substitute_tags (strict, max_depth, warn_missing)
) -> None:
    "Process file with tag substitution and write to destination"
    src, dst = Path(src), Path(dst)
    if not src.exists(): raise FileNotFoundError(f"Source file not found: {src}")
    with open(src, 'r', encoding='utf-8') as f: text = f.read()
    result = substitute_tags(text, tags, **options)
    dst.parent.mkdir(parents=True, exist_ok=True)
    with open(dst, 'w', encoding='utf-8') as f: f.write(result)

In [34]:
# Create a temp file with tags
try:
    with tempfile.NamedTemporaryFile(suffix='.txt', delete_on_close=False) as tmp_src:
        tmp_src_path = Path(tmp_src.name)
        tmp_src_path.write_text("Project: <Project-Name>\nUser: <User-Name>")

        with tempfile.NamedTemporaryFile(mode='w', suffix='.txt', delete=False) as tmp_dst:
            tmp_dst_path = Path(tmp_dst.name)
            tags = {'Project-Name': 'TestProject', 'User-Name': 'TestUser'}
            substitute_tags_in_file(tmp_src_path, tmp_dst_path, tags)
            test_eq(tmp_dst_path.read_text(), "Project: TestProject\nUser: TestUser")
finally:
    if tmp_src_path.exists(): tmp_src_path.unlink()
    if tmp_dst_path.exists(): tmp_dst_path.unlink()

----
<!-- # Colophon -->

In [35]:
#|hide
#|eval: false

import fastcore.all as FC
import nbdev
from nbdev.clean import nbdev_clean

In [36]:
#|hide
#|eval: false

if FC.IN_NOTEBOOK:
    nb_path = '02_tags.ipynb'
    # nbdev_clean(nb_path)
    nbdev.nbdev_export(nb_path)