In [1]:
# ⬇️ Put everything in one notebook cell ⬇️
from pathlib import Path
import tifffile as tiff
import numpy as np

def chunk_bsd_tf_images(
    input_root: str | Path,
    output_root: str | Path,
    chunk_size: int = 10_000,
    compression: str | None = "zlib",   # lossless; set to None for no compression
) -> None:
    """
    Recursively walk `input_root`, find images whose filenames end with `_bsd.tf`,
    split each into square `chunk_size` patches, and save them under `output_root`.

    Filenames are written as:
        {orig_stem}_{y0}_{x0}{orig_suffix}

    where (x0, y0) are the *upper-left* pixel indices of the patch in the
    original image.  The original sub-directory structure is mirrored beneath
    `output_root`.

    Parameters
    ----------
    input_root : str or Path
        Directory tree to search.
    output_root : str or Path
        Destination directory tree.
    chunk_size : int, default 10_000
        Pixel width/height of each square patch.
    compression : str | None, default "zlib"
        tifffile lossless compression to use when writing.  `None` disables it.
    """
    input_root  = Path(input_root).expanduser().resolve()
    output_root = Path(output_root).expanduser().resolve()
    output_root.mkdir(parents=True, exist_ok=True)

    for tiff_path in input_root.rglob("*.tif"):
        last_part = tiff_path.name.split("/")[-1]
        if last_part.startswith("."):
            continue
        print(f"Processing {tiff_path.name.split("/")[-1]}")
        # Map TIFF lazily so huge files do not fill memory
        img = tiff.imread(tiff_path)
        height, width = img.shape[:2]

        # Where to recreate the sub-folder tree
        rel_parent = tiff_path.relative_to(input_root).parent
        out_parent = output_root / rel_parent
        out_parent.mkdir(parents=True, exist_ok=True)

        # Iterate over chunk origins
        for y0 in range(0, height, chunk_size):
            for x0 in range(0, width, chunk_size):
                y1, x1 = y0 + chunk_size, x0 + chunk_size
                patch = img[y0:y1, x0:x1]

                out_name = f"{tiff_path.stem}_{y0}_{x0}{tiff_path.suffix}"
                out_path = out_parent / out_name

                # Preserve dtype; write with optional lossless compression
                tiff.imwrite(out_path, patch, compression=compression)

        # Explicitly close memmap to release file handle
        del img

    print("✅  All matching files have been chunked and saved.")

# Example call:
input_folder = "/Volumes/Chris_SSD/file_shuttle/20250630-DZX-pt1/EM"
output_folder = "/Volumes/Chris_SSD/file_shuttle/20250630-DZX-pt1/EM-chunked"
chunk_bsd_tf_images(input_folder, output_folder)


✅  All matching files have been chunked and saved.


In [1]:
import re
from pathlib import Path
import numpy as np
import tifffile

def img_unchunker(input_folder: str, output_folder: str) -> None:
    """
    Scan `input_folder` for tiles named like
      processed_<base>_x{X}_y{Y}.tif
    Group by <base>, stitch all tiles into one big mosaic according to their (X,Y) offsets,
    and save each mosaic in `output_folder` as <base>.tif (dropping the "processed_" prefix).

    Args:
        input_folder: path to folder containing the processed_*.tif tiles
        output_folder: path to folder where full mosaics will be written
    """
    input_path = Path(input_folder)
    output_path = Path(output_folder)
    output_path.mkdir(parents=True, exist_ok=True)

    # regex to extract base, x, and y
    pattern = re.compile(r'^processed_(?P<base>.+?)_x(?P<x>\d+)_y(?P<y>\d+)\.tif$')

    # collect tiles
    groups = {}
    for tif in input_path.glob('processed_*.tif'):
        m = pattern.match(tif.name)
        if not m:
            continue
        base = m.group('base')
        x = int(m.group('x'))
        y = int(m.group('y'))
        groups.setdefault(base, []).append((tif, x, y))

    # process each group
    for base, tiles in groups.items():
        # first pass: determine output mosaic size
        max_x = max_x_extent = 0
        max_y = max_y_extent = 0
        dtype = None

        for fp, x, y in tiles:
            arr = tifffile.imread(str(fp))
            h, w = arr.shape[:2]
            dtype = arr.dtype
            max_x_extent = max(max_x_extent, x + w)
            max_y_extent = max(max_y_extent, y + h)

        # allocate mosaic
        mosaic = np.zeros((max_y_extent, max_x_extent), dtype=dtype)

        # second pass: copy each tile into the mosaic
        for fp, x, y in tiles:
            arr = tifffile.imread(str(fp))
            h, w = arr.shape[:2]
            mosaic[y:y+h, x:x+w] = arr

        # write out
        out_name = f"{base}.tif"
        tifffile.imwrite(str(output_path / out_name), mosaic)
        print(f"Written mosaic: {out_name} ({max_x_extent}×{max_y_extent})")

input_folder = "/Volumes/Chris_SSD/lds_20250625"
output_folder = "/Volumes/Chris_SSD/lds_20250625_8bit"

#img_unchunker(input_folder, output_folder)

Written mosaic: 25-0073_5nm_Region12_bsd.tif (64640×32629)
Written mosaic: 25-0075_5nm_Region11_bsd.tif (33565×35375)
Written mosaic: 25-0077_5nm_R11_bsd.tif (21810×54124)
Written mosaic: 25-0079_5nm_R10_bsd.tif (24079×67298)
Written mosaic: 25-0081_5nm_R13_bsd.tif (32515×26702)
Written mosaic: 25-0081_5nm_R7_bsd.tif (23199×34038)
Written mosaic: 25-0083_5nm_R14_bsd.tif (49972×42486)
Written mosaic: 25-0085_5nm_Region13_bsd.tif (47416×31866)
Written mosaic: 25-0087_5nm_Region17_bsd.tif (38633×72488)
Written mosaic: 25-0089_5nm_Region15_bsd.tif (39248×53185)
