In [1]:
import pandas as pd
import numpy as np

import glob
import re

In [None]:
matches = glob.glob("./docs.derivative.ca/*")


matches = [match for match in matches if '.html' in match]

drop_keywords = ['Palette', 'Experimental', 'Introduction', '\\File_', 'POP']
keep_keywords = ['COMP', 'TOP', 'CHOP', 'SOP', 'MAT', 'DAT', 'Class']
matches_filtered = []
for match in matches:
    drop = False
    for keyword in drop_keywords:
        if keyword in match:
            drop = True
            break
    if drop:
        continue
    res = re.findall(pattern="[0-9]", string=match)
    if len(res) > 0:
        continue
    keep = False
    for keyword in keep_keywords:
        if keyword in match:
            matches_filtered.append(match)
            keep = True
            break
    if keep:
        continue

print(len(matches_filtered))
# matches_filtered

675


In [3]:
from pathlib import Path
import shutil

# --- customise these ---------------------------------------------------------
SRC_DIR  = Path("./docs.derivative.ca")       # folder all files live in
DEST_DIR = Path("./docs-trimmed")  # target folder you want to refresh
FILES = matches_filtered
# -----------------------------------------------------------------------------

# make sure destination exists, then empty it
def recreate_dir(dir):
    dir.mkdir(parents=True, exist_ok=True)
    for item in dir.iterdir():
        (shutil.rmtree if item.is_dir() else item.unlink)()

recreate_dir(DEST_DIR)

for p in FILES:
    src = Path(p)
    shutil.copy2(src, DEST_DIR / src.name)


In [4]:
from pathlib import Path

# ► EDIT THESE  ◄
HTML_DIR   = Path("./docs-trimmed")
IDS_TO_KILL = {"mw-navigation", "footer"}      # any number of id strings
MAKE_BACKUP = False                                 # set False to skip .bak copies

In [5]:
import re, shutil, textwrap
from bs4 import BeautifulSoup           # pip install beautifulsoup4 html5lib
from tqdm.auto import tqdm              # pip install tqdm (nice progress bar)

def strip_ids(html_path: Path, ids: set[str], backup: bool = True) -> int:
    """
    Remove every element whose id is in *ids* from *html_path*.
    Returns the number of elements deleted.
    """
    original = html_path.read_text(encoding="utf-8", errors="ignore")
    soup     = BeautifulSoup(original, "html5lib")

    targets  = soup.find_all(id=lambda _id: _id in ids)
    count    = len(targets)

    for tag in targets:
        tag.decompose()

    if backup:
        bak = html_path.with_suffix(html_path.suffix + ".bak")
        if not bak.exists():
            bak.write_text(original, encoding="utf-8")

    html_path.write_text(str(soup), encoding="utf-8")
    return count


  from .autonotebook import tqdm as notebook_tqdm


In [6]:
html_files = sorted(
    p for p in HTML_DIR.iterdir()
    if p.suffix.lower() in {".html", ".htm"}
)

if not html_files:
    raise FileNotFoundError("No .html / .htm files in that folder 🤔")

summary = []                      # (filename, removed) tuples

for fp in tqdm(html_files, unit="file"):
    removed = strip_ids(fp, IDS_TO_KILL, backup=MAKE_BACKUP)
    summary.append((fp.name, removed))

print("\nDone!\n")

# Pretty print a quick report
width = max(len(name) for name, _ in summary)
for name, n in summary:
    print(f"{name:<{width}}  →  {n:>3} element(s) removed")

total = sum(n for _, n in summary)
print(f"\n• Processed {len(summary)} file(s)")
print(f"• {total} total element(s) removed")
print("• Backups written" if MAKE_BACKUP else "• No backups made")


100%|██████████| 675/675 [01:08<00:00,  9.82file/s]


Done!

Ableton_Link_CHOP.html             →    2 element(s) removed
AbsTime_Class.html                 →    2 element(s) removed
Actor_COMP.html                    →    2 element(s) removed
ActorCOMP_Class.html               →    2 element(s) removed
Actors_Class.html                  →    2 element(s) removed
Add_SOP.html                       →    2 element(s) removed
Add_TOP.html                       →    2 element(s) removed
Alembic_SOP.html                   →    2 element(s) removed
Align_SOP.html                     →    2 element(s) removed
Ambient_Light_COMP.html            →    2 element(s) removed
Analyze_CHOP.html                  →    2 element(s) removed
Analyze_TOP.html                   →    2 element(s) removed
Anatomy_of_a_CHOP.html             →    2 element(s) removed
Angle_CHOP.html                    →    2 element(s) removed
Animation_COMP.html                →    2 element(s) removed
Annotate_COMP.html                 →    2 element(s) removed
AnnotateCOMP_Cla




In [7]:
sections_to_keep = [
    # video / texture sharing
    "RenderStream In TOP",
    "RenderStream Out TOP",
    "NDI In TOP",
    "NDI Out TOP",
    "Syphon Spout In TOP",
    "Syphon Spout Out TOP",
    "Shared Mem In TOP",
    "Shared Mem Out TOP",

    # data sharing
    "Shared Mem In CHOP",
    "Shared Mem Out CHOP",
    "OSC In CHOP",
    "OSC Out CHOP",
    "OSC In DAT",
    "OSC Out DAT",
    "TCPIP DAT",
    "WebSocket DAT",

    # cue-/control-level I/O that often bridges TD↔UE
    "DMX In CHOP",
    "DMX Out CHOP",
    "MIDI In CHOP",
    "MIDI Out CHOP",

    # TouchDesigner → Unreal plug-ins
    "Engine COMP",          # TouchEngine / Engine COMP for Unreal
    "TouchEngine",          # generic TouchEngine docs & classes

    # convenience
    "TOP to CHOP",          # moves GPU values to CHOP for OSC / TCP, etc.
    "TOP to DAT",           # text/JSON workflows
]

In [9]:
from markdownify import markdownify as md

DEST_DIR = Path('./docs-trimmed-md')
recreate_dir(DEST_DIR)

html_files = sorted(
    p for p in HTML_DIR.iterdir()
    if p.suffix.lower() in {".html", ".htm"}
)

full_markdown = "# The Full TouchDesigner Documentation in one Markdown File:\n\n"

for html_path in tqdm(html_files, unit="file"):
    new_path = (DEST_DIR / html_path.with_suffix(".md").name)
    name = new_path.name.replace('.md', '')

    with open(html_path, "r", encoding="utf-8") as f:
        html = f.read()

    markdown = md(html, heading_style="ATX", strip=['span'])
    markdown = markdown.strip()
    markdown = f"\n\n" + markdown
    full_markdown += markdown

    with open(new_path, "w", encoding="utf-8") as f:
        f.write(markdown)


100%|██████████| 675/675 [01:59<00:00,  5.67file/s]


In [10]:
import re, hashlib, itertools

print(len(full_markdown))

heading_re = re.compile(r'^(#{1,6})\s', re.M)  # matches Markdown headings

def iter_sections(md):
    """Yield (heading_line, body_text) tuples."""
    spans = list(heading_re.finditer(md))
    for match, next_match in itertools.zip_longest(spans, spans[1:]):
        start = match.start()
        end   = next_match.start() if next_match else len(md)
        yield md[start:end]

def dedup_sections(md: str) -> str:
    seen = set()
    unique_sections = []
    for section in iter_sections(md):
        digest = hashlib.sha1(section.encode()).hexdigest()
        if digest not in seen:
            seen.add(digest)
            unique_sections.append(section.rstrip())
    return '\n\n'.join(unique_sections) + '\n'

full_markdown = dedup_sections(full_markdown)

print(len(full_markdown))

13629831
9271236


In [11]:
import hashlib

def dedup_paragraphs(md: str) -> str:
    seen = set()
    uniq_parts = []
    for part in md.split('\n\n'):
        part_stripped = part.strip()           # ignore trailing whitespace
        digest = hashlib.sha1(part_stripped.encode()).hexdigest()
        if digest not in seen:
            seen.add(digest)
            uniq_parts.append(part_stripped)
    return '\n\n'.join(uniq_parts) + '\n'       # final trailing newline

print(len(full_markdown))
full_markdown = dedup_paragraphs(full_markdown)
print(len(full_markdown))


9271236
6572113


In [None]:
# pip install rapidfuzz
from rapidfuzz import fuzz, process

def dedup_fuzzy(paragraphs, threshold=98):
    unique = []
    for para in tqdm(paragraphs, unit="paragraph"):
        if not any(fuzz.ratio(para, u) >= threshold for u in unique):
            unique.append(para)
    return unique

print(len(full_markdown))
paras = full_markdown.split('\n\n')
full_markdown = '\n\n'.join(dedup_fuzzy(paras))
print(len(full_markdown))


6572113


 54%|█████▍    | 10379/19064 [01:58<02:10, 66.74paragraph/s]

In [None]:
with open('full_td_markdown.md', 'w', encoding='utf-8') as f:
    f.write(full_markdown)
print(full_markdown)

# The Full TouchDesigner Documentation in one Markdown File:



Ableton Link CHOP - TouchDesigner Documentation

# Ableton Link CHOP

From Derivative

[Jump to navigation](#mw-head)
[Jump to search](#searchInput)

## Summary[[edit](https://docs.derivative.ca/index.php?title=Template:Summary&action=edit&section=T-1 "Edit section: Summary")]

The Ableton Link CHOP retrieves timing information from an Ableton Link supported network.
For more information see: <http://www.ableton.com/en/link/>

The full support of the Ableton Live system is [TDAbleton](TDAbleton.html "TDAbleton"), a group of components that give you access to Ableton Songs, Tracks, Chains, Parameters and MIDI.

Ableton's Link FAQ is very helpful for issues on the Ableton end: <https://help.ableton.com/hc/en-us/articles/209776125-Link-FAQs>.

One common problem is that Ableton Link doesn't work with all sound drivers, including DirectX. The free application [ASIO4All](http://www.asio4all.com/) is an easy replacement that act