**Description**

This script transforms individual pages from the main entries of the *1476 Andong Kwŏn Genealogy* into high-resolution (300-dpi) JPEG images to facilitate the verification of the critical digital edition. Front matter, such as prefaces or postscripts, is excluded by specifying a start page. Volume one starts at page 13, and volumes two and three begin on page 3. Because each leaf in the woodblock-printed volume contains two sides, the script labels the output accordingly: `01a`, `01b`, `12a`, `12b`, and so forth. Filenames are prefixed with the volume identifier (e.g., `songwabo_vol2_`) to keep them distinct from other volumes in the series. The final images are saved in a designated `jpg/` directory.

In [1]:
#!uv pip install --upgrade pymupdf

In [2]:
import fitz  # fitz is PyMuPDF
import os

In [3]:
pdf_path = "안동권씨성화보 AKS JE A 55020 001.pdf"
output_dir = "jpg"
start_page = 12  # Start form page 13.
start_number = 1

# Create the directory jpg/ if it doesn't exist.
os.makedirs(output_dir, exist_ok=True)

doc = fitz.open(pdf_path)

for j, i in enumerate(range(start_page, len(doc))):
    number = start_number + (j // 2)
    side = "a" if j % 2 == 0 else "b"
    filename = f"songwabo_vol1_{number:02d}{side}.jpg"
    filepath = os.path.join(output_dir, filename)

    page = doc[i]
    pix = page.get_pixmap(dpi=300)
    pix.save(filepath)

print(f"Saved {len(doc) - start_page} pages to '{output_dir}/'.")

Saved 114 pages to 'jpg/'.


In [4]:
pdf_path = "안동권씨성화보 AKS JE A 55020 002.pdf"
output_dir = "jpg"
start_page = 2  # Start from page 3.
start_number = 1

# Create the directory jpg/ if it doesn't exist.
os.makedirs(output_dir, exist_ok=True)

doc = fitz.open(pdf_path)

for j, i in enumerate(range(start_page, len(doc))):
    number = start_number + (j // 2)
    side = "a" if j % 2 == 0 else "b"
    filename = f"songwabo_vol2_{number:02d}{side}.jpg"
    filepath = os.path.join(output_dir, filename)

    page = doc[i]
    pix = page.get_pixmap(dpi=300)
    pix.save(filepath)

print(f"Saved {len(doc) - start_page} pages to '{output_dir}/'.")

Saved 108 pages to 'jpg/'.


In [5]:
pdf_path = "안동권씨성화보 AKS JE A 55020 003.pdf"
output_dir = "jpg"
start_page = 2  # Start from page 3.
start_number = 1
end_number = 71  # Stop when number reaches 71.

# Create the directory jpg/ if it doesn't exist.
os.makedirs(output_dir, exist_ok=True)

doc = fitz.open(pdf_path)

for j, i in enumerate(range(start_page, len(doc))):
    number = start_number + (j // 2)
    if number > end_number:
        break

    side = "a" if j % 2 == 0 else "b"
    filename = f"songwabo_vol3_{number:02d}{side}.jpg"
    filepath = os.path.join(output_dir, filename)

    page = doc[i]
    pix = page.get_pixmap(dpi=300)
    pix.save(filepath)

print(f"Saved up to leaf number {end_number} into '{output_dir}/'.")

Saved up to leaf number 71 into 'jpg/'.
