# How to Make Flashcards of Your Ward

1. Click the "Run All" Button above

2. When the File Explorer pops up, find the pdf of your ward directory (see [this link](https://github.com/code-for-neighborhood-needs/make_name_flashcards) for instructions) and click "Open".

3. When prompted, write down the name you would like for [Anki](https://apps.ankiweb.net/) flashcards deck when it is uploaded to your [Anki](https://apps.ankiweb.net/) software.

In [None]:
#!/usr/bin/env python3
# -*- coding: utf-8 -*-
"""
End-to-end pipeline that:
1. Extracts head-and-shoulder photos and names from a Ward Directory PDF
2. Builds an Anki-/Quizlet-style TSV (tab-separated) file
3. Renders a two-sided study-card PDF (image on the front, first and last name on the back)
4. Generates an Anki .apkg deck from the TSV + images, asking for a custom deck name
5. Packages TSV, PDF, images, and .apkg into a single .zip archive

Usage:
    python flashcards_pipeline_with_anki.py [-o OUTPUT] [-n DECK_NAME]
    (A file selection dialog will open for PDF input.)

Optional flags:
    -o / --output     Base name for generated files (default: flashcards)
    -n / --deck-name  Title for the Anki flashcard deck (default: "Flashcards Deck")
"""

import argparse
import csv
import os
import re
import random
import zipfile

import fitz  # PyMuPDF
import pandas as pd
import matplotlib.pyplot as plt
from matplotlib.backends.backend_pdf import PdfPages
from PIL import Image
import genanki

# For GUI file selection
import tkinter as tk
from tkinter import filedialog


def extract_cards(pdf_path: str, img_dir: str):
    """Return list of dicts with full_name, pref_name, img_file for each person."""
    os.makedirs(img_dir, exist_ok=True)

    NAME_RE = re.compile(r"^([A-Za-z][A-Za-z'\- ]+,\s+[A-Za-z][A-Za-z'\- ]+)$")
    PREF_RE = re.compile(r"^(?:Preferred:\s*)?([A-Za-z][A-Za-z'\- ]+)$")

    doc = fitz.open(pdf_path)
    rows = []

    for page in doc:
        page_dict = page.get_text("dict")
        names = []

        for block in page_dict.get("blocks", []):
            for line in block.get("lines", []):
                text = "".join(span.get("text", "") for span in line.get("spans", [])).strip()
                if not text or "Ward Directory" in text:
                    continue
                if len(text) == 1 and text.isupper():
                    continue

                m_full = NAME_RE.match(text)
                m_pref = PREF_RE.match(text)
                if m_full:
                    entry = {"full": m_full.group(1), "pref": None, "y": line["bbox"][1], "matched": False, "row_idx": len(rows)}
                    rows.append({"full_name": entry["full"], "pref_name": None, "img_file": ""})
                    names.append(entry)
                elif m_pref and names:
                    last = names[-1]
                    last["pref"] = m_pref.group(1)
                    rows[last["row_idx"]]["pref_name"] = last["pref"]

        seen_xrefs = set()
        for img_idx, img in enumerate(page.get_images(full=True), start=1):
            xref = img[0]
            if xref in seen_xrefs:
                continue
            seen_xrefs.add(xref)

            pix = fitz.Pixmap(doc, xref)
            if pix.n >= 5:
                pix = None
                continue
            img_name = f"{page.number+1:03d}_{img_idx}.png"
            img_path = os.path.join(img_dir, img_name)
            pix.save(img_path)
            pix = None

            rects = page.get_image_rects(xref)
            if not rects:
                continue
            y_img = rects[0].y0
            candidates = [n for n in names if not n["matched"]]
            if not candidates:
                continue
            closest = min(candidates, key=lambda n: abs(n["y"] - y_img))
            closest["matched"] = True
            rows[closest["row_idx"]]["img_file"] = img_path

    doc.close()
    return rows


def write_tsv(rows, tsv_path):
    """Write a TSV with the image tag on the front and name on the back."""
    with open(tsv_path, "w", newline="", encoding="utf8") as f:
        writer = csv.writer(f, delimiter="\t")
        writer.writerow(["Front", "Back"])
        for r in rows:
            front = f'<img src="{r["img_file"]}">' if r["img_file"] else ""
            last, first = r["full_name"].split(", ")
            first = r["pref_name"] or first
            back = f"{first} {last}"
            writer.writerow([front, back])


def make_pdf(tsv_path, pdf_path):
    """Render a PDF with image on left and name on right."""
    df = pd.read_csv(tsv_path, sep="\t")
    df["ImagePath"] = df["Front"].str.extract(r'<img src="([^\"]+)"')
    df["FullName"] = df["Back"]
    df = df.dropna(subset=["ImagePath", "FullName"]).reset_index(drop=True)

    with PdfPages(pdf_path) as pdf:
        for _, row in df.iterrows():
            img_path = row["ImagePath"]
            name = row["FullName"]
            if not os.path.isfile(img_path):
                continue

            fig, axes = plt.subplots(1, 2, figsize=(8.5, 5.5))
            for ax in axes:
                ax.axis("off")
            axes[0].imshow(Image.open(img_path))
            axes[1].text(
                0.5, 0.5, name,
                ha="center", va="center",
                fontsize=24, weight="bold",
                transform=axes[1].transAxes
            )
            pdf.savefig(fig, bbox_inches="tight")
            plt.close(fig)


def strip_img_prefix(html):
    """
    Turn <img src="path/foo.png"> into <img src="foo.png"> for Anki media lookup.
    """
    def repl(match):
        return f'<img src="{os.path.basename(match.group(1))}">'  # noqa
    return re.sub(r'<img\s+src="([^\"]+)"\s*>', repl, html)


def build_apkg(tsv_path, image_dir, apkg_path, deck_name):
    """Generate an Anki .apkg deck from TSV and images with custom deck name."""
    deck_id = random.getrandbits(63)
    model_id = random.getrandbits(63)
    deck = genanki.Deck(deck_id, deck_name)
    model = genanki.Model(
        model_id,
        "Simple Model",
        fields=[{"name": "Front"}, {"name": "Back"}],
        templates=[{
            "name": "Card 1",
            "qfmt": "{{Front}}",
            "afmt": '{{Front}}<hr id="answer">{{Back}}',
        }],
    )

    with open(tsv_path, encoding="utf-8") as f:
        reader = csv.reader(f, delimiter="\t")
        next(reader, None)
        for front_raw, back_raw in reader:
            front = strip_img_prefix(front_raw)
            back = strip_img_prefix(back_raw)
            deck.add_note(genanki.Note(
                model=model,
                fields=[front, back],
            ))

    pkg = genanki.Package(deck)
    if image_dir and os.path.isdir(image_dir):
        pkg.media_files = [
            os.path.join(image_dir, fn)
            for fn in os.listdir(image_dir)
            if fn.lower().endswith((".png", ".jpg", ".jpeg", ".gif", ".bmp"))
        ]
    pkg.write_to_file(apkg_path)


def bundle_all(zip_path, tsv_path, pdf_path, img_dir, apkg_path):
    """Package TSV, PDF, images, and APKG into a zip."""
    with zipfile.ZipFile(zip_path, "w", zipfile.ZIP_DEFLATED) as zf:
        zf.write(tsv_path, os.path.basename(tsv_path))
        zf.write(pdf_path, os.path.basename(pdf_path))
        zf.write(apkg_path, os.path.basename(apkg_path))
        for root, _, files in os.walk(img_dir):
            for fn in files:
                path = os.path.join(root, fn)
                arcname = os.path.relpath(path, start=os.path.dirname(img_dir))
                zf.write(path, arcname)


def main():
    # Prompt the user to select a Ward Directory PDF
    root = tk.Tk()
    root.withdraw()
    pdf_file = filedialog.askopenfilename(
        title="Select Ward Directory PDF",
        filetypes=[("PDF files", "*.pdf")]
    )
    if not pdf_file:
        print("No PDF selected. Exiting.")
        return

    # Parse output base name and deck name
    ap = argparse.ArgumentParser(
        description="Generate flashcards (TSV, PDF, APKG, ZIP) from a Ward Directory PDF"
    )
    ap.add_argument(
        "-o", "--output", default="flashcards",
        help="Base name for generated files (default: flashcards)"
    )
    ap.add_argument(
        "-n", "--deck-name", default=None,
        help="Title for the Anki flashcard deck (default: 'Flashcards Deck')"
    )
    args = ap.parse_args()

    base = args.output
    deck_name = args.deck_name if args.deck_name else input("Enter Anki deck name (default 'Flashcards Deck'): ").strip() or "Flashcards Deck"

    img_dir    = f"{base}_images"
    tsv_path   = f"{base}.tsv"
    pdf_path   = f"{base}.pdf"
    apkg_path  = f"{base}.apkg"
    zip_path   = f"{base}_bundle.zip"

    # 1) Extract cards and images
    rows = extract_cards(pdf_file, img_dir)
    # 2) Write TSV
    write_tsv(rows, tsv_path)
    print(f"✔ TSV: {tsv_path} ({len(rows)} cards)")
    # 3) Render PDF
    make_pdf(tsv_path, pdf_path)
    print(f"✔ PDF: {pdf_path}")
    # 4) Build Anki deck with custom name
    build_apkg(tsv_path, img_dir, apkg_path, deck_name)
    print(f"✔ APKG: {apkg_path} (Deck: {deck_name})")
    # 5) Bundle everything
    bundle_all(zip_path, tsv_path, pdf_path, img_dir, apkg_path)
    print(f"✔ ZIP: {zip_path}\nAll done!")



In [None]:
main()

In [None]:
import os
from pathlib import Path
from IPython.display import display, FileLink

def download_zip(zip_path: Path):
    """
    If running in Colab, triggers an automatic browser download.
    Otherwise, displays a download link in a classic Jupyter notebook.
    """
    zip_path = Path(zip_path)
    if not zip_path.exists():
        raise FileNotFoundError(f"{zip_path} does not exist.")

    try:
        # Colab’s built-in download utility
        from google.colab import files
        files.download(str(zip_path))
    except (ImportError, ModuleNotFoundError):
        # Fallback for standard Jupyter: show a clickable link
        display(FileLink(str(zip_path), result_html_prefix="Click here to download: "))

In [None]:
download_zip(Path("flashcards_bundle.zip"))