-
Notifications
You must be signed in to change notification settings - Fork 17
Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
Merge pull request #110 from ilastik/faster-hdf5
Speed up HDF5 processing
- Loading branch information
Showing
18 changed files
with
1,535 additions
and
944 deletions.
There are no files selected for viewing
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,80 @@ | ||
#!/usr/bin/env python | ||
|
||
"""Generate HDF5 dataset of the specified size filled with zeros.""" | ||
|
||
import argparse | ||
import json | ||
|
||
import h5py | ||
import numpy | ||
|
||
|
||
def parse_shape(s: str) -> tuple[int, ...]: | ||
return tuple(map(int, reversed(s.split(",")))) | ||
|
||
|
||
def show_shape(shape: tuple[int, ...]) -> str: | ||
return ",".join(map(str, reversed(shape))) | ||
|
||
|
||
def human_size(shape: tuple[int, ...], itemsize: int) -> str: | ||
n = numpy.prod(shape) * itemsize | ||
suffixes = "bytes", "KiB", "MiB", "GiB", "TiB", "PiB" | ||
suffix = suffixes[0] | ||
for suffix in suffixes: | ||
if n < 1024: | ||
break | ||
n /= 1024 | ||
return f"{n:.2f} {suffix}" | ||
|
||
|
||
def main(): | ||
ap = argparse.ArgumentParser(description=__doc__) | ||
ap.add_argument( | ||
"-p", | ||
"--path", | ||
help="path to HDF5 file (default: test-dataset.h5)", | ||
default="test-dataset.h5", | ||
) | ||
ap.add_argument( | ||
"-d", "--dataset", help="dataset name (default: /data)", default="/data" | ||
) | ||
ap.add_argument("-t", "--dtype", help="data type (default: uint8)", default="uint8") | ||
ap.add_argument( | ||
"-s", | ||
"--shape", | ||
help="comma-separated column-major shape (default: 64,64,3,64,10)", | ||
default="64,64,3,64,10", | ||
) | ||
ap.add_argument( | ||
"-c", | ||
"--chunk", | ||
help="comma-separated column-major chunk shape (default: no chunking)", | ||
) | ||
args = ap.parse_args() | ||
|
||
path = args.path | ||
dataset = "/" + args.dataset.removeprefix("/") | ||
dtype = numpy.dtype(args.dtype) | ||
shape = parse_shape(args.shape) | ||
chunk = parse_shape(args.chunk) if args.chunk is not None else None | ||
|
||
report = { | ||
"path": path, | ||
"dataset": dataset, | ||
"dtype": str(dtype), | ||
"shape": show_shape(shape), | ||
"shape_size": human_size(shape, dtype.itemsize), | ||
} | ||
if chunk is not None: | ||
report["chunk"] = show_shape(chunk) | ||
report["chunk_size"] = human_size(chunk, dtype.itemsize) | ||
print(json.dumps(report, indent=2)) | ||
|
||
with h5py.File(path, "w") as f: | ||
ds = f.create_dataset(dataset, shape=shape, dtype=dtype, chunks=chunk) | ||
ds.write_direct(numpy.zeros(shape, dtype=dtype)) | ||
|
||
|
||
if __name__ == "__main__": | ||
main() |
120 changes: 120 additions & 0 deletions
120
src/main/java/org/ilastik/ilastik4ij/hdf5/DatasetDescription.java
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,120 @@ | ||
package org.ilastik.ilastik4ij.hdf5; | ||
|
||
import ch.systemsx.cisd.hdf5.HDF5DataSetInformation; | ||
import ch.systemsx.cisd.hdf5.IHDF5Reader; | ||
import hdf.hdf5lib.exceptions.HDF5AttributeException; | ||
import net.imagej.axis.AxisType; | ||
import org.ilastik.ilastik4ij.util.ImgUtils; | ||
import org.json.JSONException; | ||
|
||
import java.util.ArrayList; | ||
import java.util.Arrays; | ||
import java.util.List; | ||
import java.util.Objects; | ||
import java.util.Optional; | ||
|
||
import static org.ilastik.ilastik4ij.util.ImgUtils.guessAxes; | ||
import static org.ilastik.ilastik4ij.util.ImgUtils.parseAxes; | ||
|
||
/** | ||
* Metadata for HDF5 dataset. | ||
*/ | ||
public final class DatasetDescription { | ||
/** | ||
* Internal dataset path in a file. | ||
*/ | ||
public final String path; | ||
|
||
/** | ||
* Type of the dataset. | ||
*/ | ||
public final DatasetType type; | ||
|
||
/** | ||
* Dataset dimensions in the <em>column-major</em> order. | ||
*/ | ||
public final long[] dims; | ||
|
||
/** | ||
* Dimension axes. | ||
*/ | ||
public final List<AxisType> axes; | ||
|
||
/** | ||
* Whether {@link #axes} are read by {@link ImgUtils#parseAxes} | ||
* or inferred with {@link ImgUtils#guessAxes}. | ||
*/ | ||
public boolean axesGuessed; | ||
|
||
/** | ||
* Try to get dataset description for HDF5 dataset. | ||
*/ | ||
static Optional<DatasetDescription> ofHdf5(IHDF5Reader reader, String path) { | ||
Objects.requireNonNull(reader); | ||
Objects.requireNonNull(path); | ||
|
||
HDF5DataSetInformation info = reader.object().getDataSetInformation(path); | ||
|
||
Optional<DatasetType> type = DatasetType.ofHdf5(info.getTypeInformation()); | ||
if (!type.isPresent()) { | ||
return Optional.empty(); | ||
} | ||
|
||
long[] dims = ImgUtils.reversed(info.getDimensions()); | ||
if (!(2 <= dims.length && dims.length <= 5)) { | ||
return Optional.empty(); | ||
} | ||
|
||
List<AxisType> axes; | ||
boolean axesGuessed; | ||
try { | ||
axes = parseAxes(reader.string().getAttr(path, "axistags")); | ||
axesGuessed = false; | ||
} catch (HDF5AttributeException | JSONException ignored) { | ||
axes = guessAxes(dims); | ||
axesGuessed = true; | ||
} | ||
|
||
path = "/" + path.replaceFirst("/+", ""); | ||
return Optional.of(new DatasetDescription(path, type.get(), dims, axes, axesGuessed)); | ||
} | ||
|
||
public DatasetDescription( | ||
String path, DatasetType type, long[] dims, List<AxisType> axes, boolean axesGuessed) { | ||
this.path = Objects.requireNonNull(path); | ||
this.type = Objects.requireNonNull(type); | ||
this.dims = Objects.requireNonNull(dims).clone(); | ||
this.axes = new ArrayList<>(Objects.requireNonNull(axes)); | ||
this.axesGuessed = axesGuessed; | ||
} | ||
|
||
@Override | ||
public boolean equals(Object o) { | ||
if (this == o) return true; | ||
if (o == null || getClass() != o.getClass()) return false; | ||
DatasetDescription that = (DatasetDescription) o; | ||
return Objects.equals(path, that.path) && | ||
type == that.type && | ||
Arrays.equals(dims, that.dims) && | ||
Objects.equals(axes, that.axes) && | ||
axesGuessed == that.axesGuessed; | ||
} | ||
|
||
@Override | ||
public int hashCode() { | ||
int result = Objects.hash(path, type, axes, axesGuessed); | ||
result = 31 * result + Arrays.hashCode(dims); | ||
return result; | ||
} | ||
|
||
@Override | ||
public String toString() { | ||
return String.format( | ||
"DatasetDescription{path='%s', type=%s, dims=%s, axes=%s, axesGuessed=%s}", | ||
path, | ||
type, | ||
Arrays.toString(dims), | ||
axes, | ||
axesGuessed); | ||
} | ||
} |
Oops, something went wrong.