Skip to content

Commit

Permalink
Allow specifying the compression level
Browse files Browse the repository at this point in the history
  • Loading branch information
eschnett committed Jan 24, 2019
1 parent d83aede commit 818ee95
Show file tree
Hide file tree
Showing 8 changed files with 51 additions and 20 deletions.
2 changes: 1 addition & 1 deletion CMakeLists.txt
Expand Up @@ -6,7 +6,7 @@ cmake_policy(SET CMP0048 NEW)
set(CMAKE_DISABLE_IN_SOURCE_BUILD ON)
set(CMAKE_DISABLE_SOURCE_CHANGES ON)

project(asdf-cxx VERSION 6.3.0 LANGUAGES CXX)
project(asdf-cxx VERSION 6.4.0 LANGUAGES CXX)
set(PROJECT_DESCRIPTION
"asdf-cxx (Advanced Scientific Data Format), C++ implementation")
set(PROJECT_URL "https://github.com/eschnett/asdf-cxx")
Expand Down
2 changes: 2 additions & 0 deletions asdf_io.hpp
Expand Up @@ -59,6 +59,8 @@ struct copy_state {
block_format_t block_format;
bool set_compression;
compression_t compression;
bool set_compression_level;
int compression_level;
};

class writer {
Expand Down
9 changes: 5 additions & 4 deletions asdf_ndarray.hpp
Expand Up @@ -100,6 +100,7 @@ class ndarray {
memoized<block_t> mdata;
block_format_t block_format;
compression_t compression; // TODO: move to block_t
int compression_level; // TODO: move to block_t
vector<bool> mask;
shared_ptr<datatype_t> datatype;
byteorder_t byteorder; // TODO: move to block_t
Expand All @@ -119,12 +120,12 @@ class ndarray {
ndarray &operator=(ndarray &&) = default;

ndarray(memoized<block_t> mdata1, block_format_t block_format,
compression_t compression, vector<bool> mask1,
compression_t compression, int compression_level, vector<bool> mask1,
shared_ptr<datatype_t> datatype1, byteorder_t byteorder,
vector<int64_t> shape1, int64_t offset = 0,
vector<int64_t> strides1 = {})
: mdata(move(mdata1)), block_format(block_format),
compression(compression), mask(move(mask1)), datatype(move(datatype1)),
compression(compression), compression_level(compression_level),mask(move(mask1)), datatype(move(datatype1)),
byteorder(byteorder), shape(move(shape1)), offset(offset),
strides(move(strides1)) {
// Check shape
Expand Down Expand Up @@ -157,11 +158,11 @@ class ndarray {

template <typename T>
ndarray(vector<T> data1, block_format_t block_format,
compression_t compression, vector<bool> mask1, vector<int64_t> shape1,
compression_t compression,int compression_level, vector<bool> mask1, vector<int64_t> shape1,
int64_t offset = 0, vector<int64_t> strides1 = {})
: ndarray(make_constant_memoized(shared_ptr<block_t>(
make_shared<typed_block_t<T>>(move(data1)))),
block_format, compression, move(mask1),
block_format, compression, compression_level,move(mask1),
make_shared<datatype_t>(get_scalar_type_id<T>::value),
host_byteorder(), move(shape1), offset, move(strides1)) {}

Expand Down
33 changes: 29 additions & 4 deletions copy.cpp
Expand Up @@ -18,13 +18,14 @@ int main(int argc, char **argv) {
if (cond)
return;
cerr << msg << "Syntax: " << argv[0]
<< " [--array=(blockinline)] [--compression=(none|bzip2|zlib)] <input "
"file> <output file>\n"
<< " [--array=(blockinline)] [--compression=(none|bzip2|zlib)] "
"[--compression-level=[0-9]] <input file> <output file>\n"
<< "Aborting.\n";
exit(1);
};
block_format_t block_format = block_format_t::undefined;
compression_t compression = compression_t::undefined;
int compression_level = -1;
vector<string> args;
for (int argi = 1; argi < argc; ++argi)
args.push_back(argv[argi]);
Expand All @@ -50,6 +51,26 @@ int main(int argc, char **argv) {
check(compression == compression_t::undefined,
"Compression type already set\n");
compression = compression_t::zlib;
} else if (opt == "--compression-level=0") { // Dont' judge me for this
compression_level = 0;
} else if (opt == "--compression-level=1") {
compression_level = 1;
} else if (opt == "--compression-level=2") {
compression_level = 2;
} else if (opt == "--compression-level=3") {
compression_level = 3;
} else if (opt == "--compression-level=4") {
compression_level = 4;
} else if (opt == "--compression-level=5") {
compression_level = 5;
} else if (opt == "--compression-level=6") {
compression_level = 6;
} else if (opt == "--compression-level=7") {
compression_level = 7;
} else if (opt == "--compression-level=8") {
compression_level = 8;
} else if (opt == "--compression-level=9") {
compression_level = 9;
} else {
assert(0);
}
Expand All @@ -65,8 +86,12 @@ int main(int argc, char **argv) {
auto project = asdf(inputfilename);

// Copy project
const copy_state cs{block_format != block_format_t::undefined, block_format,
compression != compression_t::undefined, compression};
const copy_state cs{block_format != block_format_t::undefined,
block_format,
compression != compression_t::undefined,
compression,
compression_level != -1,
compression_level};
auto project2 = project.copy(cs);

// Write project
Expand Down
2 changes: 1 addition & 1 deletion demo-external.cpp
Expand Up @@ -15,7 +15,7 @@ void write_external() {
// The actual dataset
auto alpha = make_shared<ndarray>(
vector<int64_t>{1, 2, 3}, block_format_t::inline_array,
compression_t::none, vector<bool>(), vector<int64_t>{3});
compression_t::none, 0, vector<bool>(), vector<int64_t>{3});
// A local reference
auto beta =
make_shared<reference>("", vector<string>{"group", "alpha", "data"});
Expand Down
2 changes: 1 addition & 1 deletion demo-large.cpp
Expand Up @@ -34,7 +34,7 @@ int main(int argc, char **argv) {

cout << " creating project..." << flush;
auto array3d = make_shared<ndarray>(move(rho), block_format_t::block,
compression_t::zlib, vector<bool>(),
compression_t::zlib, 9, vector<bool>(),
vector<int64_t>{ni, nj, nk});
assert(rho.empty());
auto ent = make_shared<entry>("rho", array3d, string());
Expand Down
10 changes: 5 additions & 5 deletions demo.cpp
Expand Up @@ -16,25 +16,25 @@ int main(int argc, char **argv) {
cout << "asdf-demo: Create a simple ASDF file\n";

auto array0d = make_shared<ndarray>(
vector<int64_t>{42}, block_format_t::inline_array, compression_t::none,
vector<int64_t>{42}, block_format_t::inline_array, compression_t::none, 0,
vector<bool>(), vector<int64_t>{});
auto ent0 = make_shared<entry>("alpha", array0d, string());
auto array1d = make_shared<ndarray>(
vector<int64_t>{1, 2, 3}, block_format_t::block, compression_t::none,
vector<int64_t>{1, 2, 3}, block_format_t::block, compression_t::none, 0,
vector<bool>(), vector<int64_t>{3});
auto ent1 = make_shared<entry>("beta", array1d, string());
auto array2d =
make_shared<ndarray>(vector<float64_t>{1.0, 2.0, 3.0, 4.0, 5.0, 6.0},
block_format_t::inline_array, compression_t::none,
block_format_t::inline_array, compression_t::none, 0,
vector<bool>(), vector<int64_t>{2, 3});
auto ent2 = make_shared<entry>("gamma", array2d, string());
auto array3d =
make_shared<ndarray>(vector<complex128_t>{1, -2, 3i, -4i, 5 + 1i, 6 - 1i},
block_format_t::block, compression_t::bzip2,
block_format_t::block, compression_t::bzip2, 9,
vector<bool>(), vector<int64_t>{1, 2, 3});
auto ent3 = make_shared<entry>("delta", array3d, string());
auto array8d = make_shared<ndarray>(
vector<bool8_t>{true}, block_format_t::block, compression_t::zlib,
vector<bool8_t>{true}, block_format_t::block, compression_t::zlib, 9,
vector<bool>(), vector<int64_t>{1, 1, 1, 1, 1, 1, 1, 1});
auto ent8 = make_shared<entry>("epsilon", array8d, string());
auto seq = make_shared<sequence>(vector<shared_ptr<entry>>{ent0, ent1, ent2});
Expand Down
11 changes: 7 additions & 4 deletions ndarray.cpp
Expand Up @@ -365,7 +365,7 @@ void ndarray::write_block(ostream &os) const {
// Allocate 600 bytes plus 1% more
outdata = make_shared<typed_block_t<unsigned char>>(vector<unsigned char>(
600 + get_data()->nbytes() + (get_data()->nbytes() + 99) / 100));
const int level = 9;
const int level = compression_level;
bz_stream strm;
strm.bzalloc = NULL;
strm.bzfree = NULL;
Expand Down Expand Up @@ -413,7 +413,7 @@ void ndarray::write_block(ostream &os) const {
outdata = make_shared<typed_block_t<unsigned char>>(
vector<unsigned char>((6 + get_data()->nbytes() +
(get_data()->nbytes() + 16383) / 16384 * 5)));
const int level = 9;
const int level = compression_level;
z_stream strm;
strm.zalloc = Z_NULL;
strm.zfree = Z_NULL;
Expand Down Expand Up @@ -507,8 +507,8 @@ void ndarray::write_block(ostream &os) const {

ndarray::ndarray(const shared_ptr<reader_state> &rs, const YAML::Node &node)
: block_format(block_format_t::undefined),
compression(compression_t::undefined), byteorder(byteorder_t::undefined),
offset(-1) {
compression(compression_t::undefined), compression_level(-1),
byteorder(byteorder_t::undefined), offset(-1) {
assert(node.Tag() == "tag:stsci.edu:asdf/core/ndarray-1.0.0");
if (node["source"].IsDefined())
block_format = block_format_t::block;
Expand All @@ -524,6 +524,7 @@ ndarray::ndarray(const shared_ptr<reader_state> &rs, const YAML::Node &node)
yaml_decode(node["source"], source);
// TODO: This is just a default choice
compression = compression_t::zlib;
compression_level = 9;
datatype = make_shared<datatype_t>(rs, node["datatype"]);
yaml_decode(node["byteorder"], byteorder);
yaml_decode(node["shape"], shape);
Expand Down Expand Up @@ -580,6 +581,8 @@ ndarray::ndarray(const copy_state &cs, const ndarray &arr) : ndarray(arr) {
block_format = cs.block_format;
if (cs.set_compression)
compression = cs.compression;
if (cs.set_compression_level)
compression_level = cs.compression_level;
}

writer &ndarray::to_yaml(writer &w) const {
Expand Down

0 comments on commit 818ee95

Please sign in to comment.