# Utils

Cells in this notebook can be used to perform scriptable tasks like generating pages based on data structures. Normally these would be implemented as Jekyll plugins, but GitHub Pages does not allow custom plugins. There's probably a more clever way to run these tasks in GitHub workflows or something, but this works fine.

## Generate Tag Pages

The following cell reads `_config.yml` and all collection files, extracts tags, and writes a `.md` file for each tag under `tags/` as `tags/<tag>.md`.

Run this cell whenever new tags are added.


In [5]:
import os
import re
import glob
import yaml

# Load Jekyll configuration
with open('_config.yml', 'r') as f:
    cfg = yaml.safe_load(f)

# Determine collections (including posts)
collections = set(cfg.get('collections', {}).keys())
collections.add('posts')

# Gather tags from all collection files
tags = set()
for coll in collections:
    dir_name = '_posts' if coll == 'posts' else f"_{coll}"
    if not os.path.isdir(dir_name):
        continue
    for path in glob.glob(os.path.join(dir_name, '*.md')):
        with open(path, 'r') as f:
            content = f.read()
        fm = re.match(r'^---\s*(.*?)\s*---', content, re.DOTALL)
        if not fm:
            continue
        data = yaml.safe_load(fm.group(1))
        for t in data.get('tags', []) or []:
            if isinstance(t, str):
                t = t.strip()
                tags.add(t)

# Create tags directory and write tag pages
os.makedirs('_tags', exist_ok=True)
for tag in tags:
    slug = re.sub(r'[^\w-]', '', tag.lower().replace(' ', '-'))
    header = f"""---
layout: tag
tag: {tag}
title: {tag}
---"""
    with open(os.path.join('_tags', f'{slug}.md'), 'w') as f:
        f.write(header + '\n')
print(f"Generated {len(tags)} tag pages in '_tags/' directory.")

Generated 93 tag pages in '_tags/' directory.


In [None]:
#%pip install pyyaml

Collecting pyyaml
  Downloading pyyaml-6.0.3-cp311-cp311-macosx_11_0_arm64.whl.metadata (2.4 kB)
Downloading pyyaml-6.0.3-cp311-cp311-macosx_11_0_arm64.whl (175 kB)
Installing collected packages: pyyaml
Successfully installed pyyaml-6.0.3

[1m[[0m[34;49mnotice[0m[1;39;49m][0m[39;49m A new release of pip is available: [0m[31;49m25.1.1[0m[39;49m -> [0m[32;49m25.3[0m
[1m[[0m[34;49mnotice[0m[1;39;49m][0m[39;49m To update, run: [0m[32;49m/opt/homebrew/opt/python@3.11/bin/python3.11 -m pip install --upgrade pip[0m
Note: you may need to restart the kernel to use updated packages.


## Generate Markdown File for Each Image File

This cell generates a corresponding markdown file for each image in a directory (drawings or stereoscopic-images etc.) and puts it in the corresponding Jekyll collection directory with the same name, just with the underscore prefix. If the markdown file already exists, it will _not_ be overwritten.

In [9]:
import os
import re
import sys
from pathlib import Path

def generate_md_for_images(directory, dest_dir, layout):
    img_pattern = re.compile(r'^(?P<date>\d{4}-\d{2}-\d{2})_(?P<name>.+)\.(?:jpg|png|gif)$', re.IGNORECASE)
    directory = Path(directory)
    # if the source directory doesn't exist, exit
    if not directory.exists():
        print(f"Source directory {directory} does not exist.")
        return
    dest_dir = Path(dest_dir)
    # if the destination directory doesn't exist, create it
    if not dest_dir.exists():
        dest_dir.mkdir(parents=True)
    for img_path in directory.iterdir():
        if not img_path.is_file():
            continue
        m = img_pattern.match(img_path.name)
        if not m:
            continue
        date = m.group('date')
        name = m.group('name')
        filename = img_path.name
        pagename = f"{date}_{name}"
        md_path = dest_dir / f"{pagename}.md"
        if md_path.exists():
            continue
        front_matter = f"""---
layout: {layout}
filename: {filename}
pagename: {pagename}
date: {date}
tags:
---\n"""
        md_path.write_text(front_matter)
        print(f"Created {md_path}")

directory = 'drawings/'
dest_dir = '_drawings/'
layout = 'drawing'
generate_md_for_images(directory, dest_dir, layout)

directory = 'stereoscopic-images/'
dest_dir = '_stereoscopic_images/'
layout = 'stereoscopic_image'
generate_md_for_images(directory, dest_dir, layout)


Created _drawings/2025-11-11_aurora-spotting.md
Created _drawings/2025-11-09_eyes.md
Created _drawings/2025-11-12_frequencies.md
Created _drawings/2025-11-10_timeout.md


In [8]:
# find images with the name pattern YYYY-MM-DD.jpg and rename them to YYYY-MM-DD_1.jpg, YYYY-MM-DD_2.jpg, etc.
def rename_images(directory):
    # pattern is YYYY-MM-DD.jpg
    img_pattern = re.compile(r'^(?P<date>\d{4}-\d{2}-\d{2})\.jpg$', re.IGNORECASE)
    directory = Path(directory)
    for img_path in directory.iterdir():
        if not img_path.is_file():
            continue
        m = img_pattern.match(img_path.name)
        if not m:
            continue
        date = m.group('date')
        name = '1'
        new_name = f"{date}_{name}.jpg"
        new_path = directory / new_name
        if new_path.exists():
            continue
        img_path.rename(new_path)
        print(f"Renamed {img_path} to {new_path}")

rename_images('stereoscopic-images/')
rename_images('drawings/')

# Convert JPGs to SVGs

In [10]:
# Batch JPG/PNG/TIF → SVG vectorization for black/white line art
# Tools required: ImageMagick and Potrace
#   macOS:    brew install imagemagick potrace
#   Ubuntu:   sudo apt-get update && sudo apt-get install -y imagemagick potrace
#   Windows:  winget install ImageMagick.ImageMagick ; winget install potrace.potrace

from pathlib import Path
import subprocess, os, tempfile, concurrent.futures
from typing import List
from dataclasses import dataclass
from tqdm import tqdm  # pip install tqdm

@dataclass
class Config:
    input_dir: Path
    output_dir: Path
    extensions: List[str] = None     # ["jpg","jpeg","png","tif","tiff","bmp"]
    threshold: int = 88              # 0–100; raise to clean paper, lower if lines break
    despck: int = 0                  # pre-despeckle min area (pixels) via ImageMagick; 0 disables
    turdsize: int = 2                # Potrace speckle filter (area in px)
    alphamax: float = 1.0            # curve smoothness (lower=sharper)
    opttol: float = 0.2              # curve optimization tolerance
    turnpolicy: str = "minority"     # black|white|left|right|minority|majority
    overwrite: bool = False
    jobs: int = max(os.cpu_count() or 4, 4)

    def __post_init__(self):
        if self.extensions is None:
            self.extensions = ["jpg","jpeg","png","tif","tiff","bmp"]
        self.input_dir = Path(self.input_dir)
        self.output_dir = Path(self.output_dir)
        self.output_dir.mkdir(parents=True, exist_ok=True)


In [13]:
def _run(cmd: list):
    p = subprocess.run(cmd, stdout=subprocess.PIPE, stderr=subprocess.PIPE, text=True)
    if p.returncode != 0:
        raise RuntimeError(f"Command failed: {' '.join(cmd)}\n{p.stderr}")
    return p

def _find_magick() -> str:
    # Prefer "magick" to avoid conflicts with other "convert" binaries
    for candidate in ["magick", "convert"]:
        try:
            subprocess.run([candidate, "-version"], stdout=subprocess.DEVNULL, stderr=subprocess.DEVNULL)
            return candidate
        except Exception:
            continue
    raise RuntimeError("ImageMagick not found. Install it and ensure 'magick' or 'convert' is on PATH.")

def _ensure_tools():
    _ = _find_magick()
    try:
        subprocess.run(["potrace", "--version"], stdout=subprocess.DEVNULL, stderr=subprocess.DEVNULL, check=True)
    except Exception:
        raise RuntimeError("Potrace not found. Install it and ensure 'potrace' is on PATH.")

def _to_pbm(src: Path, pbm_path: Path, cfg: Config):
    magick = _find_magick()
    cmd = [
        magick, str(src),
        "-colorspace", "Gray",
        "-filter", "point",      # no antialias
        "-resize", "100%",
        "-threshold", f"{cfg.threshold}%",
    ]
    if cfg.despck > 0:
        cmd += [
            "-define", f"connected-components:area-threshold={cfg.despck}",
            "-define", "connected-components:mean-color=true",
            "-connected-components", "4",
            "-auto-level"
        ]
    cmd += ["-type", "bilevel", "-compress", "none", str(pbm_path)]
    _run(cmd)

def _potrace_to_svg(pbm_path: Path, svg_path: Path, cfg: Config):
    cmd = [
        "potrace", str(pbm_path),
        "--svg",
        "--output", str(svg_path),
        "--turdsize", str(cfg.turdsize),
        "--alphamax", str(cfg.alphamax),
        "--opttolerance", str(cfg.opttol),
        "--flat",
        "--longcurve",
        "--group",
        # "--tight", # This option crops the image to smallest size, which messes up the image dimensions
        "--turnpolicy", cfg.turnpolicy,
    ]
    _run(cmd)

def vectorize_one(src: Path, dst_dir: Path, cfg: Config) -> str:
    dst = dst_dir / (src.stem + ".svg")
    if dst.exists() and not cfg.overwrite:
        return f"skip  {src.name}"
    with tempfile.TemporaryDirectory() as td:
        pbm = Path(td) / (src.stem + ".pbm")
        _to_pbm(src, pbm, cfg)
        _potrace_to_svg(pbm, dst, cfg)
    return f"done  {src.name} → {dst.name}"

def discover_inputs(root: Path, extensions: List[str]) -> List[Path]:
    exts = {e.lower().lstrip(".") for e in extensions}
    files = []
    for ext in exts:
        files.extend(root.rglob(f"*.{ext}"))
        files.extend(root.rglob(f"*.{ext.upper()}"))
    # de-dup while preserving order
    seen = set()
    out = []
    for f in files:
        if f.resolve() not in seen:
            seen.add(f.resolve())
            out.append(f)
    return out

def batch_vectorize(cfg: Config):
    _ensure_tools()
    files = discover_inputs(cfg.input_dir, cfg.extensions)
    if not files:
        print("No input files found.")
        return
    print(f"Found {len(files)} images. Writing SVGs to: {cfg.output_dir}")
    with concurrent.futures.ThreadPoolExecutor(max_workers=cfg.jobs) as ex:
        futs = {ex.submit(vectorize_one, f, cfg.output_dir, cfg): f for f in files}
        for fut in tqdm(concurrent.futures.as_completed(futs), total=len(futs)):
            try:
                _ = fut.result()
            except Exception as e:
                print(f"FAIL  {futs[fut].name}: {e}")


In [None]:
# Edit paths and knobs, then run this cell.
cfg = Config(
    input_dir="drawings",       # folder containing your JPG/PNG/TIF
    output_dir="svgs",          # output folder for SVGs
    extensions=["jpg","jpeg","png","tif","tiff","bmp"],
    threshold=88,               # raise to clean paper; lower if lines break
    despck=0,                   # try 16 or 32 if dust/specks
    turdsize=2,                 # Potrace speck filter
    alphamax=1.0,               # 0.6–0.8 for crisper corners
    opttol=0.2,
    turnpolicy="minority",
    overwrite=False,
    jobs=max(os.cpu_count() or 4, 4),
)

# # Optional stricter cleanup
# cfg.threshold = 90
# cfg.despck = 16
# cfg.alphamax = 0.8
# cfg.turdsize = 4

# # Optional preserve thin lines
# cfg.threshold = 82
# cfg.despck = 0
# cfg.alphamax = 1.0
# cfg.turdsize = 2

batch_vectorize(cfg)


Found 676 images. Writing SVGs to: svgs


100%|██████████| 676/676 [01:40<00:00,  6.74it/s]
