# Making a Mega File of Data 88E Materials

Concatenate the markdown files for the lecture slides, lecture notebooks, and textbook chapters into three large markdown files.

In [None]:
import os
import shutil
import tempfile
import urllib.request
import zipfile
from pathlib import Path

In [None]:
# ── Configuration ──────────────────────────────────────────────────────────
# Source repo on GitHub
REPO_URL = "https://github.com/data-88e/88e_training_material"
BRANCH   = "main"

# Option A: set to the path of your local clone of 88e_training_material
#           e.g. Path("../88e_training_material") if both repos are side-by-side
# Option B: leave as None → the notebook downloads the repo automatically
TRAINING_MATERIAL_REPO_PATH = None

# Where to write the mega files (relative to this notebook)
OUTPUT_DIR = Path("./mega_files")

In [None]:
# ── Resolve source repo path ────────────────────────────────────────────────
if TRAINING_MATERIAL_REPO_PATH is not None:
    repo_root = Path(TRAINING_MATERIAL_REPO_PATH).expanduser().resolve()
    _tmp_dir  = None
    print(f"Using local repo: {repo_root}")
else:
    print(f"Downloading {REPO_URL} @ {BRANCH} ...")
    _tmp_dir = tempfile.mkdtemp()
    try:
        zip_url  = f"{REPO_URL}/archive/refs/heads/{BRANCH}.zip"
        zip_path = Path(_tmp_dir) / "repo.zip"
        urllib.request.urlretrieve(zip_url, zip_path)
        with zipfile.ZipFile(zip_path) as zf:
            zf.extractall(_tmp_dir)
        repo_root = Path(_tmp_dir) / f"88e_training_material-{BRANCH}"
        print(f"Downloaded to temp dir: {repo_root}")
    except Exception as e:
        shutil.rmtree(_tmp_dir, ignore_errors=True)
        raise RuntimeError(
            f"Failed to download {REPO_URL} (branch '{BRANCH}'). "
            f"Check your network connection or set TRAINING_MATERIAL_REPO_PATH "
            f"to a local clone.\nOriginal error: {e}"
        ) from e

# ── Define folders and their summary files ──────────────────────────────────
folders = {
    "slides": {
        "path":    repo_root / "F24LS_md",
        "summary": repo_root / "F24LS_md" / "summary.yaml",
    },
    "lectures": {
        "path":    repo_root / "F24Lec_MD",
        "summary": repo_root / "F24Lec_MD" / "LecNB_summary.yaml",
    },
    "textbook": {
        "path":    repo_root / "F24Textbook_MD",
        "summary": repo_root / "F24Textbook_MD" / "summary.yaml",
    },
}

# Course-level summary (optional global context)
course_summary = repo_root / "course_summary.yaml"

# Output directory
output_dir = OUTPUT_DIR
output_dir.mkdir(parents=True, exist_ok=True)
print(f"Output directory: {output_dir.resolve()}")

In [None]:


def make_section_mega_file(label: str, folder: Path, summary_file: Path):
    """Concatenate section summary + all .md files (recursively)."""
    out_path = output_dir / f"{label}_mega.md"
    with open(out_path, "w", encoding="utf-8") as outfile:
        # Insert section summary at top
        if summary_file.exists():
            outfile.write(f"\n\n--- SUMMARY for {label.upper()} ---\n\n")
            outfile.write(summary_file.read_text(encoding="utf-8"))
            outfile.write("\n\n")

        # Insert all markdown files
        for md_file in sorted(folder.rglob("*.md")):
            outfile.write(f"\n\n--- START {md_file.relative_to(folder)} ---\n\n")
            outfile.write(md_file.read_text(encoding="utf-8"))
            outfile.write(f"\n\n--- END {md_file.relative_to(folder)} ---\n\n")

    print(f"✅ Created {out_path}")

def make_course_summary_file():
    """Create a standalone course summary mega-file."""
    if course_summary.exists():
        out_path = output_dir / "course_mega.md"
        with open(out_path, "w", encoding="utf-8") as outfile:
            outfile.write(f"--- COURSE SUMMARY ---\n\n")
            outfile.write(course_summary.read_text(encoding="utf-8"))
        print(f"✅ Created {out_path}")
    else:
        print("⚠️ No course summary found")



In [None]:
# Build mega files for each folder
for label, meta in folders.items():
    make_section_mega_file(label, meta["path"], meta["summary"])

# Build standalone course summary file
make_course_summary_file()

In [None]:
# ── Cleanup temp download (if applicable) ───────────────────────────────────
if _tmp_dir is not None:
    try:
        shutil.rmtree(_tmp_dir)
        print("Cleaned up temp download.")
    except Exception as e:
        print(f"⚠️  Could not clean up temp dir {_tmp_dir}: {e}")
print(f"\n✅ All mega files written to: {output_dir.resolve()}")