In [1]:
import numpy as np
from PIL import Image
from pathlib import Path
from tqdm.notebook import tqdm
import h5py

In [2]:
PATHS_TO_STACKS = {
    "Gills": Path("../box/coregistered/Gills/"),
    "Tektites": Path("../box/coregistered/Tektites/"),
    "Rock": Path("../box/coregistered/Rock/"),
}

In [5]:
def stack_to_dataset(path_to_glob: Path, ext: str = "tif", progress: bool = True) -> np.ndarray:
    """
    Read in a stack of images and convert to a 3D numpy array.
    """
    ext = ext.strip()
    if ext.startswith("."):
        ext = ext[1:]
    _prog = tqdm if progress else lambda x: x
    imgs = [np.array(Image.open(img)) for img in _prog(list(sorted(Path(path_to_glob).glob(f"*.{ext}"))))]
    return np.stack(imgs)


In [6]:
datasets = {
    name: stack_to_dataset(path_to_glob)
    for name, path_to_glob in PATHS_TO_STACKS.items()
}

  0%|          | 0/1700 [00:00<?, ?it/s]

  0%|          | 0/1700 [00:00<?, ?it/s]

  0%|          | 0/1700 [00:00<?, ?it/s]

In [7]:
# Save the datasets as hdf5 channels:
!mkdir -p ../data
for name, dataset in datasets.items():
    with h5py.File(f"../data/{name}.hdf5", "w") as f:
        f.create_dataset("data", data=(dataset > dataset.min()), dtype=bool, compression="gzip")

In [7]:
images = stack_to_dataset(Path("../box/HA-900_13.67_Paddle-fish-Tanis_colA-2_pag-0.35_2.14_jp2-10_bin2_"), "jp2")

  0%|          | 0/1700 [00:00<?, ?it/s]

In [8]:
images.shape

(1700, 1915, 1915)

In [9]:
with h5py.File("../data/images.hdf5", "w") as f:
    f.create_dataset("data", data=images, compression="gzip")