In [1]:
import pandas as pd
import numpy as np

import glob

In [2]:
matches = glob.glob("./docs.derivative.ca/*")


matches = [match for match in matches if '.html' in match]

drop_keywords = ['Palette', 'Experimental']
keep_keywords = ['COMP', 'TOP', 'CHOP', 'SOP', 'MAT', 'DAT', 'Class']
matches_filtered = []
for match in matches:
    drop = False
    for keyword in drop_keywords:
        if keyword in match:
            drop = True
            break
    if drop:
        continue
    import re
    res = re.findall(pattern="[0-9]", string=match)
    if len(res) > 0:
        continue
    keep = False
    for keyword in keep_keywords:
        if keyword in match:
            matches_filtered.append(match)
            keep = True
            break
    if keep:
        continue

print(len(matches_filtered))
# matches_filtered

704


In [3]:
from pathlib import Path
import shutil

# --- customise these ---------------------------------------------------------
SRC_DIR  = Path("./docs.derivative.ca")       # folder all files live in
DEST_DIR = Path("./docs-trimmed")  # target folder you want to refresh
FILES = matches_filtered
# -----------------------------------------------------------------------------

# make sure destination exists, then empty it
DEST_DIR.mkdir(parents=True, exist_ok=True)
for item in DEST_DIR.iterdir():
    (shutil.rmtree if item.is_dir() else item.unlink)()

for p in FILES:
    src = Path(p)
    shutil.copy2(src, DEST_DIR / src.name)


In [4]:
from pathlib import Path

# ► EDIT THESE  ◄
HTML_DIR   = Path("./docs-trimmed")
IDS_TO_KILL = {"mw-navigation", "footer"}      # any number of id strings
MAKE_BACKUP = False                                 # set False to skip .bak copies

In [5]:
import re, shutil, textwrap
from bs4 import BeautifulSoup           # pip install beautifulsoup4 html5lib
from tqdm.auto import tqdm              # pip install tqdm (nice progress bar)

def strip_ids(html_path: Path, ids: set[str], backup: bool = True) -> int:
    """
    Remove every element whose id is in *ids* from *html_path*.
    Returns the number of elements deleted.
    """
    original = html_path.read_text(encoding="utf-8", errors="ignore")
    soup     = BeautifulSoup(original, "html5lib")

    targets  = soup.find_all(id=lambda _id: _id in ids)
    count    = len(targets)

    for tag in targets:
        tag.decompose()

    if backup:
        bak = html_path.with_suffix(html_path.suffix + ".bak")
        if not bak.exists():
            bak.write_text(original, encoding="utf-8")

    html_path.write_text(str(soup), encoding="utf-8")
    return count


  from .autonotebook import tqdm as notebook_tqdm


In [6]:
html_files = sorted(
    p for p in HTML_DIR.iterdir()
    if p.suffix.lower() in {".html", ".htm"}
)

if not html_files:
    raise FileNotFoundError("No .html / .htm files in that folder 🤔")

summary = []                      # (filename, removed) tuples

for fp in tqdm(html_files, unit="file"):
    removed = strip_ids(fp, IDS_TO_KILL, backup=MAKE_BACKUP)
    summary.append((fp.name, removed))

print("\nDone!\n")

# Pretty print a quick report
width = max(len(name) for name, _ in summary)
for name, n in summary:
    print(f"{name:<{width}}  →  {n:>3} element(s) removed")

total = sum(n for _, n in summary)
print(f"\n• Processed {len(summary)} file(s)")
print(f"• {total} total element(s) removed")
print("• Backups written" if MAKE_BACKUP else "• No backups made")


100%|██████████| 704/704 [01:09<00:00, 10.10file/s]


Done!

Ableton_Link_CHOP.html             →    2 element(s) removed
AbsTime_Class.html                 →    2 element(s) removed
Actor_COMP.html                    →    2 element(s) removed
ActorCOMP_Class.html               →    2 element(s) removed
Actors_Class.html                  →    2 element(s) removed
Add_SOP.html                       →    2 element(s) removed
Add_TOP.html                       →    2 element(s) removed
Alembic_SOP.html                   →    2 element(s) removed
Align_SOP.html                     →    2 element(s) removed
Ambient_Light_COMP.html            →    2 element(s) removed
Analyze_CHOP.html                  →    2 element(s) removed
Analyze_TOP.html                   →    2 element(s) removed
Anatomy_of_a_CHOP.html             →    2 element(s) removed
Angle_CHOP.html                    →    2 element(s) removed
Animation_COMP.html                →    2 element(s) removed
Annotate_COMP.html                 →    2 element(s) removed
AnnotateCOMP_Cla


