Skip to content

Commit

Permalink
hdf5: add zlib dataset compression support
Browse files Browse the repository at this point in the history
* Support dataset compression.

* Remove support SZIP because of legal reason in v1.8.x

* Fix according to comments

* Fix according to comments

* Change copyright year for new files

* Change compression level to type int

* Remove unnesessary check

* Resolve unsafe type conversion for C.hsize_t

* Remove unused if conditions

* Add api GetChunk
  • Loading branch information
yuanqj8191 authored and sbinet committed May 13, 2019
1 parent 100ef04 commit c8645ba
Show file tree
Hide file tree
Showing 2 changed files with 228 additions and 3 deletions.
61 changes: 58 additions & 3 deletions h5p_proplist.go
Original file line number Diff line number Diff line change
Expand Up @@ -7,18 +7,31 @@ package hdf5
// #include "hdf5.h"
// #include <stdlib.h>
// #include <string.h>
// inline static
// hid_t _go_hdf5_H5P_DEFAULT() { return H5P_DEFAULT; }
// static inline hid_t _go_hdf5_H5P_DEFAULT() { return H5P_DEFAULT; }
// static inline hid_t _go_hdf5_H5P_DATASET_CREATE() { return H5P_DATASET_CREATE; }
import "C"

import (
"compress/zlib"
"fmt"
)

const (
NoCompression = zlib.NoCompression
BestSpeed = zlib.BestSpeed
BestCompression = zlib.BestCompression
DefaultCompression = zlib.DefaultCompression
)

type PropType C.hid_t

type PropList struct {
Identifier
}

var (
P_DEFAULT *PropList = newPropList(C._go_hdf5_H5P_DEFAULT())
P_DEFAULT *PropList = newPropList(C._go_hdf5_H5P_DEFAULT())
P_DATASET_CREATE PropType = PropType(C._go_hdf5_H5P_DATASET_CREATE()) // Properties for dataset creation
)

func newPropList(id C.hid_t) *PropList {
Expand All @@ -40,6 +53,48 @@ func (p *PropList) Close() error {
return p.closeWith(h5pclose)
}

// SetChunk sets the size of the chunks used to store a chunked layout dataset.
// https://support.hdfgroup.org/HDF5/doc/RM/RM_H5P.html#Property-SetChunk
func (p *PropList) SetChunk(dims []uint) error {
ndims := len(dims)
if ndims <= 0 {
return fmt.Errorf("number of dimensions must be same size as the rank of the dataset, but zero received")
}
c_dim := make([]C.hsize_t, ndims)
for i := range dims {
c_dim[i] = C.hsize_t(dims[i])
}
return h5err(C.H5Pset_chunk(C.hid_t(p.id), C.int(ndims), &c_dim[0]))
}

// GetChunk retrieves the size of chunks for the raw data of a chunked layout dataset.
// https://support.hdfgroup.org/HDF5/doc/RM/RM_H5P.html#Property-GetChunk
func (p *PropList) GetChunk(ndims int) (dims []uint, err error) {
if ndims <= 0 {
err = fmt.Errorf("number of dimensions must be same size as the rank of the dataset, but nonpositive value received")
return
}
c_dims := make([]C.hsize_t, ndims)
if err = h5err(C.H5Pget_chunk(C.hid_t(p.id), C.int(ndims), &c_dims[0])); err != nil {
return
}
dims = make([]uint, ndims)
for i := range dims {
dims[i] = uint(c_dims[i])
}
return
}

// SetDeflate sets deflate (GNU gzip) compression method and compression level.
// If level is set as DefaultCompression, 6 will be used.
// https://support.hdfgroup.org/HDF5/doc/RM/RM_H5P.html#Property-SetDeflate
func (p *PropList) SetDeflate(level int) error {
if level == DefaultCompression {
level = 6
}
return h5err(C.H5Pset_deflate(C.hid_t(p.id), C.uint(level)))
}

func h5pclose(id C.hid_t) C.herr_t {
return C.H5Pclose(id)
}
Expand Down
170 changes: 170 additions & 0 deletions h5p_proplist_test.go
Original file line number Diff line number Diff line change
@@ -0,0 +1,170 @@
// Copyright ©2019 The Gonum Authors. All rights reserved.
// Use of this source code is governed by a BSD-style
// license that can be found in the LICENSE file.

package hdf5

import (
"fmt"
"math"
"os"
"testing"
)

/**
* These test cases are based on the h5_cmprss.c by The HDF Group.
* https://support.hdfgroup.org/HDF5/examples/intro.html#c
*/

func TestChunk(t *testing.T) {
DisplayErrors(true)
defer DisplayErrors(false)
var (
fn = "test_chunk.h5"
dsn = "dset_chunk"
dims = []uint{1000, 1000}
cdims = []uint{100, 100}
)
defer os.Remove(fn)

dclp, err := NewPropList(P_DATASET_CREATE)
if err != nil {
t.Fatal(err)
}
defer dclp.Close()
err = dclp.SetChunk(cdims)
if err != nil {
t.Fatal(err)
}

cdims_, err := dclp.GetChunk(len(cdims))
if err != nil {
t.Fatal(err)
}
for i, cdim := range cdims_ {
if cdim != cdims[i] {
t.Fatalf("chunked dimensions mismatch: %d != %d", cdims[i], cdim)
}
}

data0, err := save(fn, dsn, dims, dclp)
if err != nil {
t.Fatal(err)
}

data1, err := load(fn, dsn)
if err != nil {
t.Fatal(err)
}

if err := compare(data0, data1); err != nil {
t.Fatal(err)
}
}

func TestDeflate(t *testing.T) {
DisplayErrors(true)
defer DisplayErrors(false)
var (
fn = "test_cmprss_deflate.h5"
dsn = "dset_cmpress"
dims = []uint{1000, 1000}
cdims = []uint{100, 100}
)
defer os.Remove(fn)

dclp, err := NewPropList(P_DATASET_CREATE)
if err != nil {
t.Fatal(err)
}
defer dclp.Close()
err = dclp.SetChunk(cdims)
if err != nil {
t.Fatal(err)
}
err = dclp.SetDeflate(DefaultCompression)
if err != nil {
t.Fatal(err)
}

data0, err := save(fn, dsn, dims, dclp)
if err != nil {
t.Fatal(err)
}

data1, err := load(fn, dsn)
if err != nil {
t.Fatal(err)
}

if err := compare(data0, data1); err != nil {
t.Fatal(err)
}
}

func save(fn, dsn string, dims []uint, dclp *PropList) ([]float64, error) {
f, err := CreateFile(fn, F_ACC_TRUNC)
if err != nil {
return nil, err
}
defer f.Close()

dspace, err := CreateSimpleDataspace(dims, dims)
if err != nil {
return nil, err
}

dset, err := f.CreateDatasetWith(dsn, T_NATIVE_DOUBLE, dspace, dclp)
if err != nil {
return nil, err
}
defer dset.Close()

n := dims[0] * dims[1]
data := make([]float64, n)
for i := range data {
data[i] = float64((i*i*i + 13) % 8191)
}
err = dset.Write(&data[0])
if err != nil {
return nil, err
}
return data, nil
}

func load(fn, dsn string) ([]float64, error) {
f, err := OpenFile(fn, F_ACC_RDONLY)
if err != nil {
return nil, err
}
defer f.Close()

dset, _ := f.OpenDataset(dsn)
if err != nil {
return nil, err
}
defer dset.Close()

dims, _, err := dset.Space().SimpleExtentDims()
if err != nil {
return nil, err
}

data := make([]float64, dims[0]*dims[1])
dset.Read(&data[0])
return data, nil
}

func compare(ds0, ds1 []float64) error {
n0, n1 := len(ds0), len(ds1)
if n0 != n1 {
return fmt.Errorf("dimensions mismatch: %d != %d", n0, n1)
}
for i := 0; i < n0; i++ {
d := math.Abs(ds0[i] - ds1[i])
if d > 1e-7 {
return fmt.Errorf("values at index %d differ: %f != %f", i, ds0[i], ds1[i])
}
}
return nil
}

0 comments on commit c8645ba

Please sign in to comment.