From 071877e87f13e39fa6667b27ab09420dfcf67bdc Mon Sep 17 00:00:00 2001 From: Eugene Kliuchnikov Date: Fri, 18 Oct 2019 16:09:45 +0200 Subject: [PATCH] Update (#48) Update * encoder: refactor bit-writing * common: turn dcheck to be conditional check * add experimaental code for "groups" encoding --- .gitmodules | 3 + BUILD | 45 +++- WORKSPACE | 13 + brunsli.cmake | 1 + c/common/platform.h | 16 +- c/enc/ans_encode.cc | 10 +- c/enc/ans_encode.h | 10 +- c/enc/brunsli_encode.cc | 243 +++++++++--------- c/enc/context_map_encode.cc | 232 ++++++++---------- c/enc/context_map_encode.h | 10 +- c/enc/histogram_encode.cc | 49 ++-- c/enc/histogram_encode.h | 17 +- c/enc/state.h | 13 +- c/enc/write_bits.cc | 29 +++ c/enc/write_bits.h | 58 +++-- c/experimental/groups.cc | 476 ++++++++++++++++++++++++++++++++++++ c/experimental/groups.h | 32 +++ c/tools/cbrunsli.cc | 17 ++ c/tools/dbrunsli.cc | 25 +- third_party/highwayhash | 1 + 20 files changed, 953 insertions(+), 347 deletions(-) create mode 100644 c/enc/write_bits.cc create mode 100644 c/experimental/groups.cc create mode 100644 c/experimental/groups.h create mode 160000 third_party/highwayhash diff --git a/.gitmodules b/.gitmodules index 129a6f59..01572aa6 100644 --- a/.gitmodules +++ b/.gitmodules @@ -4,3 +4,6 @@ [submodule "third_party/googletest"] path = third_party/googletest url = https://github.com/google/googletest +[submodule "third_party/highwayhash"] + path = third_party/highwayhash + url = https://github.com/google/highwayhash.git diff --git a/BUILD b/BUILD index b0e7b819..ccb1cc24 100644 --- a/BUILD +++ b/BUILD @@ -157,24 +157,65 @@ cc_library( ], ) +config_setting( + name = "experimental", + define_values = { + "brunsli_groups": "experimental", + }, +) + +EXPERIMENTAL_DEPS = select({ + ":experimental": [ + ":groups", + "@highwayhash//:highwayhash_inc", + ], + "//conditions:default": [], +}) + +EXPERIMENTAL_DEFINES = select({ + ":experimental": ["BRUNSLI_EXPERIMENTAL_GROUPS"], + "//conditions:default": [], +}) + +EXPERIMENTAL_LINKOPTS = select({ + ":experimental": ["-pthread"], + "//conditions:default": [], +}) + +cc_library( + name = "groups", + srcs = ["c/experimental/groups.cc"], + hdrs = ["c/experimental/groups.h"], + copts = STRICT_C_OPTIONS, + deps = [ + ":brunslicommon", + ":brunslidec", + ":brunslienc", + ], +) + cc_binary( name = "cbrunsli", srcs = ["c/tools/cbrunsli.cc"], copts = STRICT_C_OPTIONS, + defines = EXPERIMENTAL_DEFINES, + linkopts = EXPERIMENTAL_LINKOPTS, deps = [ ":brunslicommon", ":brunslienc", - ], + ] + EXPERIMENTAL_DEPS, ) cc_binary( name = "dbrunsli", srcs = ["c/tools/dbrunsli.cc"], copts = STRICT_C_OPTIONS, + defines = EXPERIMENTAL_DEFINES, + linkopts = EXPERIMENTAL_LINKOPTS, deps = [ ":brunslicommon", ":brunslidec", - ], + ] + EXPERIMENTAL_DEPS, ) cc_test( diff --git a/WORKSPACE b/WORKSPACE index 868e25a6..e65ab3b7 100644 --- a/WORKSPACE +++ b/WORKSPACE @@ -15,3 +15,16 @@ new_local_repository( path = "third_party/googletest", build_file = "third_party/googletest/BUILD.bazel", ) + +new_local_repository( + name = "highwayhash", + path = "third_party/highwayhash", + build_file_content = """ +package(default_visibility = ["//visibility:public"]) +cc_library( + name = "highwayhash_inc", + hdrs = glob(["highwayhash/*.h"]), + strip_include_prefix = "", +) + """, +) diff --git a/brunsli.cmake b/brunsli.cmake index a76bb941..e9c48ca1 100644 --- a/brunsli.cmake +++ b/brunsli.cmake @@ -37,6 +37,7 @@ set(BRUNSLI_ENC_SOURCES c/enc/histogram_encode.cc c/enc/jpeg_data_reader.cc c/enc/jpeg_huffman_decode.cc + c/enc/write_bits.cc ) # TODO(eustas): split public/private headers. diff --git a/c/common/platform.h b/c/common/platform.h index 23cee869..1a83bfae 100644 --- a/c/common/platform.h +++ b/c/common/platform.h @@ -46,10 +46,6 @@ #define BRUNSLI_X_BIG_ENDIAN BIG_ENDIAN #endif -#if defined(BRUNSLI_DEBUG) -#include -#endif - /* The following macros were borrowed from https://github.com/nemequ/hedley * with permission of original author - Evan Nemerson */ @@ -492,12 +488,6 @@ static BRUNSLI_INLINE void BRUNSLI_UNALIGNED_STORE64LE(void* p, uint64_t v) { #define BRUNSLI_LOG_ERROR() BRUNSLI_LOG_(ERROR) #endif // defined(BRUNSLI_DISABLE_LOG) -#if defined(BRUNSLI_DEBUG) -#define BRUNSLI_DCHECK(V) assert(V) -#else -#define BRUNSLI_DCHECK(V) -#endif - namespace brunsli { void BrunsliDumpAndAbort(const char* f, int l, const char* fn); } // namespace brunsli @@ -510,6 +500,12 @@ void BrunsliDumpAndAbort(const char* f, int l, const char* fn); while (true) ; \ } +#if defined(BRUNSLI_DEBUG) +#define BRUNSLI_DCHECK(V) BRUNSLI_CHECK(V) +#else +#define BRUNSLI_DCHECK(V) +#endif + // TODO: Pick up upgrade after https://github.com/google/brotli/pull/636 // is landed and merged. inline int Log2FloorNonZero(uint32_t n) { diff --git a/c/enc/ans_encode.cc b/c/enc/ans_encode.cc index 52de6290..df1eebe1 100644 --- a/c/enc/ans_encode.cc +++ b/c/enc/ans_encode.cc @@ -10,6 +10,7 @@ #include #include "./histogram_encode.h" +#include "./write_bits.h" namespace brunsli { @@ -38,17 +39,16 @@ void ANSBuildInfoTable(const int* counts, int alphabet_size, } // namespace -void BuildAndStoreANSEncodingData(const int* histogram, - ANSTable* table, - size_t* storage_ix, uint8_t* storage) { +void BuildAndStoreANSEncodingData(const int* histogram, ANSTable* table, + Storage* storage) { int num_symbols; - int symbols[kMaxNumSymbolsForSmallCode] = { 0 }; + int symbols[kMaxNumSymbolsForSmallCode] = {0}; std::vector counts(histogram, histogram + ANS_MAX_SYMBOLS); int omit_pos; NormalizeCounts(&counts[0], &omit_pos, ANS_MAX_SYMBOLS, ANS_LOG_TAB_SIZE, &num_symbols, symbols); ANSBuildInfoTable(&counts[0], ANS_MAX_SYMBOLS, table->info_); - EncodeCounts(&counts[0], omit_pos, num_symbols, symbols, storage_ix, storage); + EncodeCounts(&counts[0], omit_pos, num_symbols, symbols, storage); } } // namespace brunsli diff --git a/c/enc/ans_encode.h b/c/enc/ans_encode.h index c83ac3f5..b74078bd 100644 --- a/c/enc/ans_encode.h +++ b/c/enc/ans_encode.h @@ -12,6 +12,7 @@ #include "../common/ans_params.h" #include +#include "./write_bits.h" namespace brunsli { @@ -52,8 +53,8 @@ class ANSCoder { const uint32_t offset = state_ - v * t.freq_ + t.start_; state_ = (v << ANS_LOG_TAB_SIZE) + offset; #else - state_ = ((state_ / t.freq_) << ANS_LOG_TAB_SIZE) - + (state_ % t.freq_) + t.start_; + state_ = ((state_ / t.freq_) << ANS_LOG_TAB_SIZE) + (state_ % t.freq_) + + t.start_; #endif return bits; } @@ -64,9 +65,8 @@ class ANSCoder { uint32_t state_; }; -void BuildAndStoreANSEncodingData(const int* histogram, - ANSTable* table, - size_t* storage_ix, uint8_t* storage); +void BuildAndStoreANSEncodingData(const int* histogram, ANSTable* table, + Storage* storage); } // namespace brunsli diff --git a/c/enc/brunsli_encode.cc b/c/enc/brunsli_encode.cc index 10083df2..52dc5d5b 100644 --- a/c/enc/brunsli_encode.cc +++ b/c/enc/brunsli_encode.cc @@ -37,12 +37,12 @@ static const int kBrotliQuality = 6; static const int kBrotliWindowBits = 18; using ::brunsli::internal::enc::BlockI32; -using ::brunsli::internal::enc::State; using ::brunsli::internal::enc::ComponentMeta; using ::brunsli::internal::enc::DataStream; using ::brunsli::internal::enc::EntropyCodes; using ::brunsli::internal::enc::EntropySource; using ::brunsli::internal::enc::Histogram; +using ::brunsli::internal::enc::State; using ::brunsli::internal::enc::SelectContextBits; @@ -84,11 +84,13 @@ size_t Base128Size(size_t val) { return size; } -void EncodeBase128(size_t val, uint8_t* data, size_t* pos) { +size_t EncodeBase128(size_t val, uint8_t* data) { + size_t len = 0; do { - data[(*pos)++] = (val & 0x7f) | (val >= 128 ? 0x80 : 0); + data[len++] = (val & 0x7f) | (val >= 128 ? 0x80 : 0); val >>= 7; } while (val > 0); + return len; } void EncodeBase128Fix(size_t val, size_t len, uint8_t* data) { @@ -203,51 +205,50 @@ int GetQuantTableId(const JPEGQuantTable& q, bool is_chroma, return kNumStockQuantTables + FindBestMatrix(&q.values[0], is_chroma, dst); } -void EncodeVarint(int n, int max_bits, size_t* storage_ix, uint8_t* storage) { +void EncodeVarint(int n, int max_bits, Storage* storage) { int b; BRUNSLI_DCHECK(n < (1 << max_bits)); for (b = 0; n != 0 && b < max_bits; ++b) { if (b + 1 != max_bits) { - WriteBits(1, 1, storage_ix, storage); + WriteBits(1, 1, storage); } - WriteBits(1, n & 1, storage_ix, storage); + WriteBits(1, n & 1, storage); n >>= 1; } if (b < max_bits) { - WriteBits(1, 0, storage_ix, storage); + WriteBits(1, 0, storage); } } // encodes an integer with packets of 'nbits' bits, limited to 'max_symbols' // emitted symbols. void EncodeLimitedVarint(size_t bits, int nbits, int max_symbols, - size_t* storage_ix, uint8_t* storage) { + Storage* storage) { const size_t mask = (static_cast(1) << nbits) - 1; for (int b = 0; b < max_symbols; ++b) { - WriteBits(1, bits != 0, storage_ix, storage); + WriteBits(1, bits != 0, storage); if (bits == 0) break; - WriteBits(nbits, bits & mask, storage_ix, storage); + WriteBits(nbits, bits & mask, storage); bits >>= nbits; } } -bool EncodeQuantTables(const JPEGData& jpg, size_t* storage_ix, - uint8_t* storage) { +bool EncodeQuantTables(const JPEGData& jpg, Storage* storage) { if (jpg.quant.empty() || jpg.quant.size() > 4) { // If ReadJpeg() succeeded with JPEG_READ_ALL mode, this should not happen. return false; } - WriteBits(2, jpg.quant.size() - 1, storage_ix, storage); + WriteBits(2, jpg.quant.size() - 1, storage); for (size_t i = 0; i < jpg.quant.size(); ++i) { const JPEGQuantTable& q = jpg.quant[i]; uint8_t predictor[kDCTBlockSize]; const int code = GetQuantTableId(q, i > 0, predictor); - WriteBits(1, (code >= kNumStockQuantTables), storage_ix, storage); + WriteBits(1, (code >= kNumStockQuantTables), storage); if (code < kNumStockQuantTables) { - WriteBits(3, code, storage_ix, storage); + WriteBits(3, code, storage); } else { BRUNSLI_DCHECK(code - kNumStockQuantTables < (1 << 6)); - WriteBits(6, code - kNumStockQuantTables, storage_ix, storage); + WriteBits(6, code - kNumStockQuantTables, storage); int last_diff = 0; // difference predictor for (int k = 0; k < kDCTBlockSize; ++k) { const int j = kJPEGNaturalOrder[k]; @@ -258,31 +259,31 @@ bool EncodeQuantTables(const JPEGData& jpg, size_t* storage_ix, const int new_diff = q.values[j] - predictor[j]; int diff = new_diff - last_diff; last_diff = new_diff; - WriteBits(1, diff != 0, storage_ix, storage); + WriteBits(1, diff != 0, storage); if (diff) { - WriteBits(1, diff < 0, storage_ix, storage); + WriteBits(1, diff < 0, storage); if (diff < 0) diff = -diff; diff -= 1; // This only happens on 16-bit precision with crazy values, // e.g. [..., 65535, 1, 65535,...] if (diff > 65535) return false; - EncodeVarint(diff, 16, storage_ix, storage); + EncodeVarint(diff, 16, storage); } } } } for (size_t i = 0; i < jpg.components.size(); ++i) { - WriteBits(2, jpg.components[i].quant_idx, storage_ix, storage); + WriteBits(2, jpg.components[i].quant_idx, storage); } return true; } bool EncodeHuffmanCode(const JPEGHuffmanCode& huff, bool is_known_last, - size_t* storage_ix, uint8_t* storage) { - WriteBits(2, huff.slot_id & 0xf, storage_ix, storage); - WriteBits(1, huff.slot_id >> 4, storage_ix, storage); + Storage* storage) { + WriteBits(2, huff.slot_id & 0xf, storage); + WriteBits(1, huff.slot_id >> 4, storage); if (!is_known_last) { - WriteBits(1, huff.is_last, storage_ix, storage); + WriteBits(1, huff.is_last, storage); } else if (!huff.is_last) { return false; } @@ -314,16 +315,16 @@ bool EncodeHuffmanCode(const JPEGHuffmanCode& huff, bool is_known_last, } } } - WriteBits(1, found_match, storage_ix, storage); + WriteBits(1, found_match, storage); if (found_match) { - WriteBits(1, stock_table_idx, storage_ix, storage); + WriteBits(1, stock_table_idx, storage); return true; } while (max_len > 0 && huff.counts[max_len] == 0) --max_len; if (huff.counts[0] != 0 || max_len == 0) { return false; } - WriteBits(4, max_len - 1, storage_ix, storage); + WriteBits(4, max_len - 1, storage); space -= (1 << (kJpegHuffmanMaxBitLength - max_len)); for (int i = 1; i <= max_len; ++i) { int count = huff.counts[i] - (i == max_len ? 1 : 0); @@ -337,7 +338,7 @@ bool EncodeHuffmanCode(const JPEGHuffmanCode& huff, bool is_known_last, } if (count_limit > 0) { int nbits = Log2FloorNonZero(count_limit) + 1; - WriteBits(nbits, count, storage_ix, storage); + WriteBits(nbits, count, storage); total_count += count; space -= count * (1 << (kJpegHuffmanMaxBitLength - i)); } @@ -356,34 +357,33 @@ bool EncodeHuffmanCode(const JPEGHuffmanCode& huff, bool is_known_last, if (!p.RemoveValue(val, &code, &nbits)) { return false; } - EncodeLimitedVarint(code, 2, (nbits + 1) >> 1, storage_ix, storage); + EncodeLimitedVarint(code, 2, (nbits + 1) >> 1, storage); } return true; } -bool EncodeScanInfo(const JPEGScanInfo& si, size_t* storage_ix, - uint8_t* storage) { - WriteBits(6, si.Ss, storage_ix, storage); - WriteBits(6, si.Se, storage_ix, storage); - WriteBits(4, si.Ah, storage_ix, storage); - WriteBits(4, si.Al, storage_ix, storage); - WriteBits(2, si.components.size() - 1, storage_ix, storage); +bool EncodeScanInfo(const JPEGScanInfo& si, Storage* storage) { + WriteBits(6, si.Ss, storage); + WriteBits(6, si.Se, storage); + WriteBits(4, si.Ah, storage); + WriteBits(4, si.Al, storage); + WriteBits(2, si.components.size() - 1, storage); for (size_t i = 0; i < si.components.size(); ++i) { const JPEGComponentScanInfo& csi = si.components[i]; - WriteBits(2, csi.comp_idx, storage_ix, storage); - WriteBits(2, csi.dc_tbl_idx, storage_ix, storage); - WriteBits(2, csi.ac_tbl_idx, storage_ix, storage); + WriteBits(2, csi.comp_idx, storage); + WriteBits(2, csi.dc_tbl_idx, storage); + WriteBits(2, csi.ac_tbl_idx, storage); } int last_block_idx = -1; for (std::set::const_iterator it = si.reset_points.begin(); it != si.reset_points.end(); ++it) { int block_idx = *it; - WriteBits(1, 1, storage_ix, storage); + WriteBits(1, 1, storage); BRUNSLI_DCHECK(block_idx >= last_block_idx + 1); - EncodeVarint(block_idx - last_block_idx - 1, 28, storage_ix, storage); + EncodeVarint(block_idx - last_block_idx - 1, 28, storage); last_block_idx = block_idx; } - WriteBits(1, 0, storage_ix, storage); + WriteBits(1, 0, storage); last_block_idx = 0; for (size_t i = 0; i < si.extra_zero_runs.size(); ++i) { @@ -391,12 +391,12 @@ bool EncodeScanInfo(const JPEGScanInfo& si, size_t* storage_ix, int num = si.extra_zero_runs[i].num_extra_zero_runs; BRUNSLI_DCHECK(block_idx >= last_block_idx); for (int j = 0; j < num; ++j) { - WriteBits(1, 1, storage_ix, storage); - EncodeVarint(block_idx - last_block_idx, 28, storage_ix, storage); + WriteBits(1, 1, storage); + EncodeVarint(block_idx - last_block_idx, 28, storage); last_block_idx = block_idx; } } - WriteBits(1, 0, storage_ix, storage); + WriteBits(1, 0, storage); return true; } @@ -415,14 +415,14 @@ int MatchComponentIds(const std::vector& comps) { return kComponentIdsCustom; } -void JumpToByteBoundary(size_t* storage_ix, uint8_t* storage) { - int nbits = *storage_ix & 7; +void JumpToByteBoundary(Storage* storage) { + int nbits = storage->pos & 7; if (nbits > 0) { - WriteBits(8 - nbits, 0, storage_ix, storage); + WriteBits(8 - nbits, 0, storage); } } -bool EncodeAuxData(const JPEGData& jpg, size_t* storage_ix, uint8_t* storage) { +bool EncodeAuxData(const JPEGData& jpg, Storage* storage) { if (jpg.marker_order.empty() || jpg.marker_order.back() != 0xd9) { return false; } @@ -433,20 +433,19 @@ bool EncodeAuxData(const JPEGData& jpg, size_t* storage_ix, uint8_t* storage) { if (marker < 0xc0) { return false; } - WriteBits(6, marker - 0xc0, storage_ix, storage); + WriteBits(6, marker - 0xc0, storage); if (marker == 0xdd) have_dri = true; if (marker == 0xda) ++num_scans; } if (have_dri) { - WriteBits(16, jpg.restart_interval, storage_ix, storage); + WriteBits(16, jpg.restart_interval, storage); } BRUNSLI_DCHECK(jpg.huffman_code.size() < kMaxDHTMarkers); for (size_t i = 0; i < jpg.huffman_code.size(); ++i) { const bool is_known_last = ((i + 1) == jpg.huffman_code.size()); - WriteBits(1, is_known_last, storage_ix, storage); - if (!EncodeHuffmanCode(jpg.huffman_code[i], is_known_last, storage_ix, - storage)) { + WriteBits(1, is_known_last, storage); + if (!EncodeHuffmanCode(jpg.huffman_code[i], is_known_last, storage)) { return false; } } @@ -455,45 +454,44 @@ bool EncodeAuxData(const JPEGData& jpg, size_t* storage_ix, uint8_t* storage) { return false; } for (size_t i = 0; i < jpg.scan_info.size(); ++i) { - if (!EncodeScanInfo(jpg.scan_info[i], storage_ix, storage)) { + if (!EncodeScanInfo(jpg.scan_info[i], storage)) { return false; } } - WriteBits(2, jpg.quant.size() - 1, storage_ix, storage); + WriteBits(2, jpg.quant.size() - 1, storage); for (size_t i = 0; i < jpg.quant.size(); ++i) { - WriteBits(2, jpg.quant[i].index, storage_ix, storage); + WriteBits(2, jpg.quant[i].index, storage); if (i != jpg.quant.size() - 1) { - WriteBits(1, jpg.quant[i].is_last, storage_ix, storage); + WriteBits(1, jpg.quant[i].is_last, storage); } else if (!jpg.quant[i].is_last) { return false; } - WriteBits(4, jpg.quant[i].precision, storage_ix, storage); + WriteBits(4, jpg.quant[i].precision, storage); } int comp_ids = MatchComponentIds(jpg.components); - WriteBits(2, comp_ids, storage_ix, storage); + WriteBits(2, comp_ids, storage); if (comp_ids == kComponentIdsCustom) { for (size_t i = 0; i < jpg.components.size(); ++i) { - WriteBits(8, jpg.components[i].id, storage_ix, storage); + WriteBits(8, jpg.components[i].id, storage); } } size_t nsize = jpg.has_zero_padding_bit ? jpg.padding_bits.size() : 0; if (nsize > PaddingBitsLimit(jpg)) return false; // we limit to 32b for nsize - EncodeLimitedVarint(nsize, 8, 4, storage_ix, storage); + EncodeLimitedVarint(nsize, 8, 4, storage); if (nsize > 0) { for (size_t i = 0; i < nsize; ++i) { - WriteBits(1, jpg.padding_bits[i], storage_ix, storage); + WriteBits(1, jpg.padding_bits[i], storage); } } - JumpToByteBoundary(storage_ix, storage); - size_t pos = *storage_ix >> 3; + JumpToByteBoundary(storage); for (size_t i = 0; i < jpg.inter_marker_data.size(); ++i) { const std::string& s = jpg.inter_marker_data[i]; - EncodeBase128(s.size(), storage, &pos); - memcpy(&storage[pos], reinterpret_cast(s.data()), s.size()); - pos += s.size(); + uint8_t buffer[(sizeof(size_t) * 8 + 6) / 7]; + size_t len = EncodeBase128(s.size(), buffer); + storage->AppendBytes(buffer, len); + storage->AppendBytes(reinterpret_cast(s.data()), s.size()); } - *storage_ix = pos << 3; return true; } @@ -568,18 +566,15 @@ EntropyCodes::EntropyCodes(const std::vector& histograms, &context_map_); } -void EntropyCodes::EncodeContextMap(size_t* storage_ix, - uint8_t* storage) const { - brunsli::EncodeContextMap(context_map_, clustered_.size(), storage_ix, - storage); +void EntropyCodes::EncodeContextMap(Storage* storage) const { + brunsli::EncodeContextMap(context_map_, clustered_.size(), storage); } -void EntropyCodes::BuildAndStoreEntropyCodes(size_t* storage_ix, - uint8_t* storage) { +void EntropyCodes::BuildAndStoreEntropyCodes(Storage* storage) { ans_tables_.resize(clustered_.size()); for (size_t i = 0; i < clustered_.size(); ++i) { BuildAndStoreANSEncodingData(&clustered_[i].data_[0], &ans_tables_[i], - storage_ix, storage); + storage); } } @@ -678,8 +673,7 @@ void DataStream::AddBit(Prob* const p, int bit) { } } -void DataStream::EncodeCodeWords(EntropyCodes* s, size_t* storage_ix, - uint8_t* storage) { +void DataStream::EncodeCodeWords(EntropyCodes* s, Storage* storage) { FlushBitWriter(); FlushArithmeticCoder(); ANSCoder ans; @@ -692,7 +686,8 @@ void DataStream::EncodeCodeWords(EntropyCodes* s, size_t* storage_ix, } } const uint32_t state = ans.GetState(); - uint16_t* out = reinterpret_cast(storage); + // TODO: what about alignment and endianness? + uint16_t* out = reinterpret_cast(storage->data); const uint16_t* out_start = out; *(out++) = (state >> 16) & 0xffff; *(out++) = (state >> 0) & 0xffff; @@ -702,7 +697,7 @@ void DataStream::EncodeCodeWords(EntropyCodes* s, size_t* storage_ix, *(out++) = word.value; } } - *storage_ix += (out - out_start) * 16; + storage->pos += (out - out_start) * 16; } void EncodeNumNonzeros(int val, Prob* p, DataStream* data_stream) { @@ -784,11 +779,10 @@ bool EncodeSignature(size_t len, uint8_t* data, size_t* pos) { static void EncodeValue(uint8_t tag, size_t value, uint8_t* data, size_t* pos) { data[(*pos)++] = ValueMarker(tag); - EncodeBase128(value, data, pos); + *pos += EncodeBase128(value, data + *pos); } -bool EncodeHeader(const JPEGData& jpg, State* s, uint8_t* data, - size_t* len) { +bool EncodeHeader(const JPEGData& jpg, State* s, uint8_t* data, size_t* len) { if ((jpg.version != 1 && (jpg.width == 0 || jpg.height == 0)) || jpg.components.empty() || jpg.components.size() > kMaxComponents) { return false; @@ -808,8 +802,7 @@ bool EncodeHeader(const JPEGData& jpg, State* s, uint8_t* data, return true; } -bool EncodeMetaData(const JPEGData& jpg, State* s, uint8_t* data, - size_t* len) { +bool EncodeMetaData(const JPEGData& jpg, State* s, uint8_t* data, size_t* len) { // Concatenate all the (possibly transformed) metadata pieces into one string. std::string metadata; size_t transformed_marker_count = 0; @@ -840,8 +833,7 @@ bool EncodeMetaData(const JPEGData& jpg, State* s, uint8_t* data, } // Write base-128 encoding of the original metadata size. - size_t pos = 0; - EncodeBase128(metadata.size(), data, &pos); + size_t pos = EncodeBase128(metadata.size(), data); // Write the compressed metadata directly to the output. size_t compressed_size = *len - pos; @@ -861,59 +853,61 @@ bool EncodeMetaData(const JPEGData& jpg, State* s, uint8_t* data, return true; } -bool EncodeJPEGInternals(const JPEGData& jpg, State* s, - uint8_t* data, size_t* len) { - size_t storage_ix = 0; - WriteBitsPrepareStorage(storage_ix, data); - if (!EncodeAuxData(jpg, &storage_ix, data)) { +bool EncodeJPEGInternals(const JPEGData& jpg, State* s, uint8_t* data, + size_t* len) { + Storage storage(data, *len); + + if (!EncodeAuxData(jpg, &storage)) { return false; } - *len = (storage_ix + 7) >> 3; + + *len = storage.GetBytesUsed(); return true; } bool EncodeQuantData(const JPEGData& jpg, State* s, uint8_t* data, size_t* len) { - size_t storage_ix = 0; - WriteBitsPrepareStorage(storage_ix, data); - if (!EncodeQuantTables(jpg, &storage_ix, data)) { + Storage storage(data, *len); + + if (!EncodeQuantTables(jpg, &storage)) { return false; } - *len = (storage_ix + 7) >> 3; + + *len = storage.GetBytesUsed(); return true; } -bool EncodeHistogramData(const JPEGData& jpg, State* state, - uint8_t* data, size_t* len) { - size_t storage_ix = 0; - WriteBitsPrepareStorage(storage_ix, data); +bool EncodeHistogramData(const JPEGData& jpg, State* state, uint8_t* data, + size_t* len) { + Storage storage(data, *len); for (size_t i = 0; i < jpg.components.size(); ++i) { - WriteBits(3, state->meta[i].context_bits, &storage_ix, data); + WriteBits(3, state->meta[i].context_bits, &storage); } - state->entropy_codes->EncodeContextMap(&storage_ix, data); + state->entropy_codes->EncodeContextMap(&storage); + + state->entropy_codes->BuildAndStoreEntropyCodes(&storage); - state->entropy_codes->BuildAndStoreEntropyCodes(&storage_ix, data); - *len = (storage_ix + 7) >> 3; + *len = storage.GetBytesUsed(); return true; } -bool EncodeDCData(const JPEGData& jpg, State* s, uint8_t* data, - size_t* len) { - size_t storage_ix = 0; - WriteBitsPrepareStorage(storage_ix, data); - s->data_stream_dc.EncodeCodeWords(s->entropy_codes, &storage_ix, data); - *len = (storage_ix + 7) >> 3; +bool EncodeDCData(const JPEGData& jpg, State* s, uint8_t* data, size_t* len) { + Storage storage(data, *len); + + s->data_stream_dc.EncodeCodeWords(s->entropy_codes, &storage); + + *len = storage.GetBytesUsed(); return true; } -bool EncodeACData(const JPEGData& jpg, State* s, uint8_t* data, - size_t* len) { - size_t storage_ix = 0; - WriteBitsPrepareStorage(storage_ix, data); - s->data_stream_ac.EncodeCodeWords(s->entropy_codes, &storage_ix, data); - *len = (storage_ix + 7) >> 3; +bool EncodeACData(const JPEGData& jpg, State* s, uint8_t* data, size_t* len) { + Storage storage(data, *len); + + s->data_stream_ac.EncodeCodeWords(s->entropy_codes, &storage); + + *len = storage.GetBytesUsed(); return true; } @@ -1311,8 +1305,8 @@ EntropyCodes PrepareEntropyCodes(State* state) { return state->entropy_source.Finish(group_context_offsets); } -bool BrunsliSerialize(State* state, const JPEGData& jpg, - uint32_t skip_sections, uint8_t* data, size_t* len) { +bool BrunsliSerialize(State* state, const JPEGData& jpg, uint32_t skip_sections, + uint8_t* data, size_t* len) { size_t pos = 0; // TODO: refactor to remove repetitive params. @@ -1348,8 +1342,9 @@ bool BrunsliSerialize(State* state, const JPEGData& jpg, } if (!(skip_sections & (1u << kBrunsliHistogramDataTag))) { - ok = EncodeSection(jpg, state, kBrunsliHistogramDataTag, - EncodeHistogramData, Base128Size(*len - pos), *len, data, &pos); + ok = + EncodeSection(jpg, state, kBrunsliHistogramDataTag, EncodeHistogramData, + Base128Size(*len - pos), *len, data, &pos); if (!ok) return false; } @@ -1436,8 +1431,8 @@ size_t GetBrunsliBypassSize(size_t jpg_size) { return jpg_size + kBrunsliSignatureSize + kMaxBypassHeaderSize; } -bool EncodeOriginalJpg(const JPEGData& jpg, State* s, - uint8_t* data, size_t* len) { +bool EncodeOriginalJpg(const JPEGData& jpg, State* s, uint8_t* data, + size_t* len) { if (jpg.original_jpg == NULL || jpg.original_jpg_size > *len) { return false; } @@ -1446,7 +1441,7 @@ bool EncodeOriginalJpg(const JPEGData& jpg, State* s, return true; } -bool BrunsliEncodeJpegBypass(const uint8_t* jpg_data, const size_t jpg_data_len, +bool BrunsliEncodeJpegBypass(const uint8_t* jpg_data, size_t jpg_data_len, uint8_t* data, size_t* len) { size_t pos = 0; if (!EncodeSignature(*len, data, &pos)) { diff --git a/c/enc/context_map_encode.cc b/c/enc/context_map_encode.cc index fff2aa76..c1b58102 100644 --- a/c/enc/context_map_encode.cc +++ b/c/enc/context_map_encode.cc @@ -9,7 +9,7 @@ #include "./context_map_encode.h" #include -#include /* for memset */ +#include /* for memset */ #include #include "../common/huffman_tree.h" @@ -27,25 +27,22 @@ static const int kCodeLengthCodes = 18; // size is 256 + 16 = 272. (We can have 256 clusters and 16 run length codes). static const size_t kMaxAlphabetSize = 272; -void StoreVarLenUint8(size_t n, size_t* storage_ix, uint8_t* storage) { +void StoreVarLenUint8(size_t n, Storage* storage) { if (n == 0) { - WriteBits(1, 0, storage_ix, storage); + WriteBits(1, 0, storage); } else { - WriteBits(1, 1, storage_ix, storage); + WriteBits(1, 1, storage); size_t nbits = Log2FloorNonZero(n); - WriteBits(3, nbits, storage_ix, storage); - WriteBits(nbits, n - (1 << nbits), storage_ix, storage); + WriteBits(3, nbits, storage); + WriteBits(nbits, n - (1 << nbits), storage); } } -void StoreHuffmanTreeOfHuffmanTreeToBitMask( - const int num_codes, - const uint8_t* code_length_bitdepth, - size_t* storage_ix, - uint8_t* storage) { +void StoreHuffmanTreeOfHuffmanTreeToBitMask(const int num_codes, + const uint8_t* code_length_bitdepth, + Storage* storage) { static const uint8_t kStorageOrder[kCodeLengthCodes] = { - 1, 2, 3, 4, 0, 5, 17, 6, 16, 7, 8, 9, 10, 11, 12, 13, 14, 15 - }; + 1, 2, 3, 4, 0, 5, 17, 6, 16, 7, 8, 9, 10, 11, 12, 13, 14, 15}; // The bit lengths of the Huffman code over the code length alphabet // are compressed with the following static Huffman code: // Symbol Code @@ -56,12 +53,10 @@ void StoreHuffmanTreeOfHuffmanTreeToBitMask( // 3 01 // 4 10 // 5 1111 - static const uint8_t kHuffmanBitLengthHuffmanCodeSymbols[6] = { - 0, 7, 3, 2, 1, 15 - }; - static const uint8_t kHuffmanBitLengthHuffmanCodeBitLengths[6] = { - 2, 4, 3, 2, 2, 4 - }; + static const uint8_t kHuffmanBitLengthHuffmanCodeSymbols[6] = {0, 7, 3, + 2, 1, 15}; + static const uint8_t kHuffmanBitLengthHuffmanCodeBitLengths[6] = {2, 4, 3, + 2, 2, 4}; // Throw away trailing zeros: size_t codes_to_store = kCodeLengthCodes; @@ -80,46 +75,42 @@ void StoreHuffmanTreeOfHuffmanTreeToBitMask( skip_some = 3; // skips three. } } - WriteBits(2, skip_some, storage_ix, storage); + WriteBits(2, skip_some, storage); for (size_t i = skip_some; i < codes_to_store; ++i) { size_t l = code_length_bitdepth[kStorageOrder[i]]; WriteBits(kHuffmanBitLengthHuffmanCodeBitLengths[l], - kHuffmanBitLengthHuffmanCodeSymbols[l], storage_ix, storage); + kHuffmanBitLengthHuffmanCodeSymbols[l], storage); } } -void StoreHuffmanTreeToBitMask( - const size_t huffman_tree_size, - const uint8_t* huffman_tree, - const uint8_t* huffman_tree_extra_bits, - const uint8_t* code_length_bitdepth, - const uint16_t* code_length_bitdepth_symbols, - size_t * __restrict storage_ix, - uint8_t * __restrict storage) { +void StoreHuffmanTreeToBitMask(const size_t huffman_tree_size, + const uint8_t* huffman_tree, + const uint8_t* huffman_tree_extra_bits, + const uint8_t* code_length_bitdepth, + const uint16_t* code_length_bitdepth_symbols, + Storage* storage) { for (size_t i = 0; i < huffman_tree_size; ++i) { size_t ix = huffman_tree[i]; WriteBits(code_length_bitdepth[ix], code_length_bitdepth_symbols[ix], - storage_ix, storage); + storage); // Extra bits switch (ix) { case 16: - WriteBits(2, huffman_tree_extra_bits[i], storage_ix, storage); + WriteBits(2, huffman_tree_extra_bits[i], storage); break; case 17: - WriteBits(3, huffman_tree_extra_bits[i], storage_ix, storage); + WriteBits(3, huffman_tree_extra_bits[i], storage); break; } } } -void StoreSimpleHuffmanTree(const uint8_t* depths, - size_t symbols[4], - size_t num_symbols, - size_t max_bits, - size_t* storage_ix, uint8_t* storage) { +void StoreSimpleHuffmanTree(const uint8_t* depths, size_t symbols[4], + size_t num_symbols, size_t max_bits, + Storage* storage) { // value of 1 indicates a simple Huffman code - WriteBits(2, 1, storage_ix, storage); - WriteBits(2, num_symbols - 1, storage_ix, storage); // NSYM - 1 + WriteBits(2, 1, storage); + WriteBits(2, num_symbols - 1, storage); // NSYM - 1 // Sort for (size_t i = 0; i < num_symbols; i++) { @@ -131,26 +122,25 @@ void StoreSimpleHuffmanTree(const uint8_t* depths, } if (num_symbols == 2) { - WriteBits(max_bits, symbols[0], storage_ix, storage); - WriteBits(max_bits, symbols[1], storage_ix, storage); + WriteBits(max_bits, symbols[0], storage); + WriteBits(max_bits, symbols[1], storage); } else if (num_symbols == 3) { - WriteBits(max_bits, symbols[0], storage_ix, storage); - WriteBits(max_bits, symbols[1], storage_ix, storage); - WriteBits(max_bits, symbols[2], storage_ix, storage); + WriteBits(max_bits, symbols[0], storage); + WriteBits(max_bits, symbols[1], storage); + WriteBits(max_bits, symbols[2], storage); } else { - WriteBits(max_bits, symbols[0], storage_ix, storage); - WriteBits(max_bits, symbols[1], storage_ix, storage); - WriteBits(max_bits, symbols[2], storage_ix, storage); - WriteBits(max_bits, symbols[3], storage_ix, storage); + WriteBits(max_bits, symbols[0], storage); + WriteBits(max_bits, symbols[1], storage); + WriteBits(max_bits, symbols[2], storage); + WriteBits(max_bits, symbols[3], storage); // tree-select - WriteBits(1, depths[symbols[0]] == 1 ? 1 : 0, storage_ix, storage); + WriteBits(1, depths[symbols[0]] == 1 ? 1 : 0, storage); } } // num = alphabet size // depths = symbol depths -void StoreHuffmanTree(const uint8_t* depths, size_t num, - size_t* storage_ix, uint8_t* storage) { +void StoreHuffmanTree(const uint8_t* depths, size_t num, Storage* storage) { // Write the Huffman tree into the compact representation. BRUNSLI_DCHECK(num <= kMaxAlphabetSize); uint8_t huffman_tree[kMaxAlphabetSize]; @@ -160,7 +150,7 @@ void StoreHuffmanTree(const uint8_t* depths, size_t num, huffman_tree_extra_bits); // Calculate the statistics of the Huffman tree in the compact representation. - uint32_t huffman_tree_histogram[kCodeLengthCodes] = { 0 }; + uint32_t huffman_tree_histogram[kCodeLengthCodes] = {0}; for (size_t i = 0; i < huffman_tree_size; ++i) { ++huffman_tree_histogram[huffman_tree[i]]; } @@ -181,70 +171,25 @@ void StoreHuffmanTree(const uint8_t* depths, size_t num, // Calculate another Huffman tree to use for compressing both the // earlier Huffman tree with. - uint8_t code_length_bitdepth[kCodeLengthCodes] = { 0 }; - uint16_t code_length_bitdepth_symbols[kCodeLengthCodes] = { 0 }; - CreateHuffmanTree(&huffman_tree_histogram[0], kCodeLengthCodes, - 5, &code_length_bitdepth[0]); + uint8_t code_length_bitdepth[kCodeLengthCodes] = {0}; + uint16_t code_length_bitdepth_symbols[kCodeLengthCodes] = {0}; + CreateHuffmanTree(&huffman_tree_histogram[0], kCodeLengthCodes, 5, + &code_length_bitdepth[0]); ConvertBitDepthsToSymbols(code_length_bitdepth, kCodeLengthCodes, &code_length_bitdepth_symbols[0]); // Now, we have all the data, let's start storing it StoreHuffmanTreeOfHuffmanTreeToBitMask(num_codes, code_length_bitdepth, - storage_ix, storage); + storage); if (num_codes == 1) { code_length_bitdepth[code] = 0; } // Store the real huffman tree now. - StoreHuffmanTreeToBitMask(huffman_tree_size, - huffman_tree, - huffman_tree_extra_bits, - &code_length_bitdepth[0], - code_length_bitdepth_symbols, - storage_ix, storage); -} - -void BuildAndStoreHuffmanTree(const uint32_t* histogram, - const size_t length, - uint8_t* depth, - uint16_t* bits, - size_t* storage_ix, - uint8_t* storage) { - size_t count = 0; - size_t s4[4] = { 0 }; - for (size_t i = 0; i < length; i++) { - if (histogram[i]) { - if (count < 4) { - s4[count] = i; - } else if (count > 4) { - break; - } - count++; - } - } - - size_t max_bits_counter = length - 1; - size_t max_bits = 0; - while (max_bits_counter) { - max_bits_counter >>= 1; - ++max_bits; - } - - if (count <= 1) { - WriteBits(4, 1, storage_ix, storage); - WriteBits(max_bits, s4[0], storage_ix, storage); - return; - } - - CreateHuffmanTree(histogram, length, 15, depth); - ConvertBitDepthsToSymbols(depth, length, bits); - - if (count <= 4) { - StoreSimpleHuffmanTree(depth, s4, count, max_bits, storage_ix, storage); - } else { - StoreHuffmanTree(depth, length, storage_ix, storage); - } + StoreHuffmanTreeToBitMask(huffman_tree_size, huffman_tree, + huffman_tree_extra_bits, &code_length_bitdepth[0], + code_length_bitdepth_symbols, storage); } size_t IndexOf(const std::vector& v, uint32_t value) { @@ -288,14 +233,12 @@ void RunLengthCodeZeros(const std::vector& v_in, uint32_t* max_run_length_prefix, std::vector* v_out, std::vector* extra_bits) { - uint32_t max_reps = 0; + size_t max_reps = 0; for (size_t i = 0; i < v_in.size();) { - for (; i < v_in.size() && v_in[i] != 0; ++i) ; - uint32_t reps = 0; - for (; i < v_in.size() && v_in[i] == 0; ++i) { - ++reps; - } - max_reps = std::max(reps, max_reps); + while (i < v_in.size() && v_in[i] != 0) ++i; + size_t i0 = i; + while (i < v_in.size() && v_in[i] == 0) ++i; + max_reps = std::max(i - i0, max_reps); } uint32_t max_prefix = max_reps > 0 ? Log2FloorNonZero(max_reps) : 0; max_prefix = std::min(max_prefix, *max_run_length_prefix); @@ -329,10 +272,48 @@ void RunLengthCodeZeros(const std::vector& v_in, } // namespace +void BuildAndStoreHuffmanTree(const uint32_t* histogram, const size_t length, + uint8_t* depth, uint16_t* bits, + Storage* storage) { + size_t count = 0; + size_t s4[4] = {0}; + for (size_t i = 0; i < length; i++) { + if (histogram[i]) { + if (count < 4) { + s4[count] = i; + } else if (count > 4) { + break; + } + count++; + } + } + + size_t max_bits_counter = length - 1; + size_t max_bits = 0; + while (max_bits_counter) { + max_bits_counter >>= 1; + ++max_bits; + } + + if (count <= 1) { + WriteBits(4, 1, storage); + WriteBits(max_bits, s4[0], storage); + return; + } + + CreateHuffmanTree(histogram, length, 15, depth); + ConvertBitDepthsToSymbols(depth, length, bits); + + if (count <= 4) { + StoreSimpleHuffmanTree(depth, s4, count, max_bits, storage); + } else { + StoreHuffmanTree(depth, length, storage); + } +} + void EncodeContextMap(const std::vector& context_map, - size_t num_clusters, - size_t* storage_ix, uint8_t* storage) { - StoreVarLenUint8(num_clusters - 1, storage_ix, storage); + size_t num_clusters, Storage* storage) { + StoreVarLenUint8(num_clusters - 1, storage); if (num_clusters == 1) { return; @@ -342,35 +323,32 @@ void EncodeContextMap(const std::vector& context_map, std::vector rle_symbols; std::vector extra_bits; uint32_t max_run_length_prefix = 6; - RunLengthCodeZeros(transformed_symbols, &max_run_length_prefix, - &rle_symbols, &extra_bits); + RunLengthCodeZeros(transformed_symbols, &max_run_length_prefix, &rle_symbols, + &extra_bits); uint32_t symbol_histogram[kMaxAlphabetSize]; memset(symbol_histogram, 0, sizeof(symbol_histogram)); for (size_t i = 0; i < rle_symbols.size(); ++i) { ++symbol_histogram[rle_symbols[i]]; } bool use_rle = max_run_length_prefix > 0; - WriteBits(1, use_rle, storage_ix, storage); + WriteBits(1, use_rle, storage); if (use_rle) { - WriteBits(4, max_run_length_prefix - 1, storage_ix, storage); + WriteBits(4, max_run_length_prefix - 1, storage); } uint8_t bit_depths[kMaxAlphabetSize]; uint16_t bit_codes[kMaxAlphabetSize]; memset(bit_depths, 0, sizeof(bit_depths)); memset(bit_codes, 0, sizeof(bit_codes)); BuildAndStoreHuffmanTree(symbol_histogram, - num_clusters + max_run_length_prefix, - bit_depths, bit_codes, - storage_ix, storage); + num_clusters + max_run_length_prefix, bit_depths, + bit_codes, storage); for (size_t i = 0; i < rle_symbols.size(); ++i) { - WriteBits(bit_depths[rle_symbols[i]], - bit_codes[rle_symbols[i]], - storage_ix, storage); + WriteBits(bit_depths[rle_symbols[i]], bit_codes[rle_symbols[i]], storage); if (rle_symbols[i] > 0 && rle_symbols[i] <= max_run_length_prefix) { - WriteBits(rle_symbols[i], extra_bits[i], storage_ix, storage); + WriteBits(rle_symbols[i], extra_bits[i], storage); } } - WriteBits(1, 1, storage_ix, storage); // use move-to-front + WriteBits(1, 1, storage); // use move-to-front } } // namespace brunsli diff --git a/c/enc/context_map_encode.h b/c/enc/context_map_encode.h index 2cffe223..cc4a30d6 100644 --- a/c/enc/context_map_encode.h +++ b/c/enc/context_map_encode.h @@ -10,14 +10,20 @@ #include #include +#include "./write_bits.h" namespace brunsli { +// Builds a Huffman tree for the given histogram, and encodes it into storage +// in a format that can be read by HuffmanDecodingData::ReadFromBitstream. +void BuildAndStoreHuffmanTree(const uint32_t* histogram, const size_t length, + uint8_t* depth, uint16_t* bits, + Storage* storage); + // Encodes the given context map to the bit stream. The number of different // histogram ids is given by num_clusters. void EncodeContextMap(const std::vector& context_map, - size_t num_clusters, - size_t* storage_ix, uint8_t* storage); + size_t num_clusters, Storage* storage); } // namespace brunsli diff --git a/c/enc/histogram_encode.cc b/c/enc/histogram_encode.cc index 98a47126..1ce1c2c3 100644 --- a/c/enc/histogram_encode.cc +++ b/c/enc/histogram_encode.cc @@ -19,18 +19,18 @@ namespace brunsli { // Static Huffman code for encoding histogram length. static const uint8_t kHistogramLengthBitLengths[ANS_MAX_SYMBOLS - 2] = { - 8, 8, 6, 6, 6, 5, 4, 3, 3, 3, 3, 3, 3, 4, 5, 7, + 8, 8, 6, 6, 6, 5, 4, 3, 3, 3, 3, 3, 3, 4, 5, 7, }; static const uint16_t kHistogramLengthSymbols[ANS_MAX_SYMBOLS - 2] = { - 127, 255, 15, 47, 31, 7, 3, 0, 4, 2, 6, 1, 5, 11, 23, 63, + 127, 255, 15, 47, 31, 7, 3, 0, 4, 2, 6, 1, 5, 11, 23, 63, }; // Static Huffman code for encoding logcounts. static const uint8_t kLogCountBitLengths[ANS_LOG_TAB_SIZE + 1] = { - 5, 4, 4, 4, 3, 3, 2, 3, 3, 6, 6, + 5, 4, 4, 4, 3, 3, 2, 3, 3, 6, 6, }; static const uint16_t kLogCountSymbols[ANS_LOG_TAB_SIZE + 1] = { - 15, 3, 11, 7, 2, 6, 0, 1, 5, 31, 63, + 15, 3, 11, 7, 2, 6, 0, 1, 5, 31, 63, }; // Returns the difference between largest count that can be represented and is @@ -42,9 +42,9 @@ static int SmallestIncrement(int count) { return (1 << drop_bits); } -template bool RebalanceHistogram( - const float* targets, int max_symbol, int table_size, int* omit_pos, - int* counts) { +template +bool RebalanceHistogram(const float* targets, int max_symbol, int table_size, + int* omit_pos, int* counts) { BRUNSLI_DCHECK(table_size >= 2); int sum = 0; float sum_nonrounded = 0.0; @@ -66,8 +66,8 @@ template bool RebalanceHistogram( counts[n] -= counts[n] & (inc - 1); const float target = minimize_error_of_sum ? (sum_nonrounded - sum) : targets[n]; - if (counts[n] == 0 || (target > counts[n] + inc / 2 && - counts[n] + inc < table_size)) { + if (counts[n] == 0 || + (target > counts[n] + inc / 2 && counts[n] + inc < table_size)) { counts[n] += inc; } sum += counts[n]; @@ -84,7 +84,6 @@ template bool RebalanceHistogram( return counts[remainder_pos] > 0; } - void NormalizeCounts(int* counts, int* omit_pos, const int length, const int precision_bits, int* num_symbols, int* symbols) { BRUNSLI_DCHECK(precision_bits > 0); @@ -128,33 +127,29 @@ void NormalizeCounts(int* counts, int* omit_pos, const int length, } } -void EncodeCounts(const int* counts, - const int omit_pos, - const int num_symbols, - const int* symbols, - size_t* storage_ix, - uint8_t* storage) { +void EncodeCounts(const int* counts, const int omit_pos, const int num_symbols, + const int* symbols, Storage* storage) { int max_bits = 5; // = 1 + Log2Floor(ANS_MAX_SYMBOLS - 1); if (num_symbols <= 2) { // Small tree marker to encode 1-2 symbols. - WriteBits(1, 1, storage_ix, storage); + WriteBits(1, 1, storage); if (num_symbols == 0) { - WriteBits(max_bits + 1, 0, storage_ix, storage); + WriteBits(max_bits + 1, 0, storage); } else { - WriteBits(1, num_symbols - 1, storage_ix, storage); + WriteBits(1, num_symbols - 1, storage); for (int i = 0; i < num_symbols; ++i) { - WriteBits(max_bits, symbols[i], storage_ix, storage); + WriteBits(max_bits, symbols[i], storage); } } if (num_symbols == 2) { - WriteBits(ANS_LOG_TAB_SIZE, counts[symbols[0]], storage_ix, storage); + WriteBits(ANS_LOG_TAB_SIZE, counts[symbols[0]], storage); } } else { // Mark non-small tree. - WriteBits(1, 0, storage_ix, storage); + WriteBits(1, 0, storage); int length = 0; - int logcounts[ANS_MAX_SYMBOLS] = { 0 }; + int logcounts[ANS_MAX_SYMBOLS] = {0}; int omit_log = 0; for (int i = 0; i < ANS_MAX_SYMBOLS; ++i) { BRUNSLI_DCHECK(counts[i] <= ANS_TAB_SIZE); @@ -175,14 +170,12 @@ void EncodeCounts(const int* counts, // Since num_symbols >= 3, we know that length >= 3, therefore we encode // length - 3 with a static Huffman code. WriteBits(kHistogramLengthBitLengths[length - 3], - kHistogramLengthSymbols[length - 3], - storage_ix, storage); + kHistogramLengthSymbols[length - 3], storage); // The logcount values are encoded with a static Huffman code. for (int i = 0; i < length; ++i) { WriteBits(kLogCountBitLengths[logcounts[i]], - kLogCountSymbols[logcounts[i]], - storage_ix, storage); + kLogCountSymbols[logcounts[i]], storage); } for (int i = 0; i < length; ++i) { if (logcounts[i] > 1 && i != omit_pos) { @@ -190,7 +183,7 @@ void EncodeCounts(const int* counts, int drop_bits = logcounts[i] - 1 - bitcount; BRUNSLI_CHECK((counts[i] & ((1 << drop_bits) - 1)) == 0); WriteBits(bitcount, (counts[i] >> drop_bits) - (1 << bitcount), - storage_ix, storage); + storage); } } } diff --git a/c/enc/histogram_encode.h b/c/enc/histogram_encode.h index 381dcb9e..57f81431 100644 --- a/c/enc/histogram_encode.h +++ b/c/enc/histogram_encode.h @@ -11,6 +11,7 @@ #include "../common/ans_params.h" #include +#include "./write_bits.h" namespace brunsli { @@ -25,12 +26,8 @@ static const int kMaxNumSymbolsForSmallCode = 4; // Each count will all be rounded to multiples of // 1 << GetPopulationCountPrecision(count), except possibly for one. The index // of that count will be stored in *omit_pos. -void NormalizeCounts(int* counts, - int* omit_pos, - const int length, - const int precision_bits, - int* num_symbols, - int* symbols); +void NormalizeCounts(int* counts, int* omit_pos, const int length, + const int precision_bits, int* num_symbols, int* symbols); // Stores a histogram in counts[0 .. ANS_MAX_SYMBOLS) to the bit-stream where // the sum of all population counts is ANS_TAB_SIZE and the number of symbols @@ -39,12 +36,8 @@ void NormalizeCounts(int* counts, // with non-zero population counts. // Each count must be rounded to a multiple of // 1 << GetPopulationCountPrecision(count), except possibly counts[omit_pos]. -void EncodeCounts(const int* counts, - const int omit_pos, - const int num_symbols, - const int* symbols, - size_t* storage_ix, - uint8_t* storage); +void EncodeCounts(const int* counts, const int omit_pos, const int num_symbols, + const int* symbols, Storage* storage); // Returns an estimate of the number of bits required to encode the given // histogram (header bits plus data bits). diff --git a/c/enc/state.h b/c/enc/state.h index 7723acb0..113c5448 100644 --- a/c/enc/state.h +++ b/c/enc/state.h @@ -18,6 +18,7 @@ #include "../common/platform.h" #include #include "./ans_encode.h" +#include "./write_bits.h" namespace brunsli { namespace internal { @@ -60,10 +61,10 @@ class EntropyCodes { public: EntropyCodes(const std::vector& histograms, int num_bands, const std::vector& offsets); - // GCC is insane! + // GCC declares it won't apply RVO, even if it actually does. //EntropyCodes(const EntropyCodes&) = delete; - void EncodeContextMap(size_t* storage_ix, uint8_t* storage) const; - void BuildAndStoreEntropyCodes(size_t* storage_ix, uint8_t* storage); + void EncodeContextMap(Storage* storage) const; + void BuildAndStoreEntropyCodes(Storage* storage); const ANSTable* GetANSTable(int context) const; private: @@ -101,7 +102,7 @@ class DataStream { // Encodes the next bit to the bit stream, based on the 8-bit precision // probability, i.e. P(bit = 0) = prob / 256. Statistics are updated in 'p'. void AddBit(Prob* const p, int bit); - void EncodeCodeWords(EntropyCodes* s, size_t* storage_ix, uint8_t* storage); + void EncodeCodeWords(EntropyCodes* s, Storage* storage); private: struct CodeWord { @@ -145,8 +146,8 @@ bool PredictDCCoeffs(State* state); void EncodeDC(State* state); void EncodeAC(State* state); EntropyCodes PrepareEntropyCodes(State* state); -bool BrunsliSerialize(State* state, const JPEGData& jpg, - uint32_t skip_sections, uint8_t* data, size_t* len); +bool BrunsliSerialize(State* state, const JPEGData& jpg, uint32_t skip_sections, + uint8_t* data, size_t* len); } // namespace enc } // namespace internal diff --git a/c/enc/write_bits.cc b/c/enc/write_bits.cc new file mode 100644 index 00000000..1567fbd3 --- /dev/null +++ b/c/enc/write_bits.cc @@ -0,0 +1,29 @@ +// Copyright (c) Google LLC 2019 +// +// Use of this source code is governed by an MIT-style +// license that can be found in the LICENSE file or at +// https://opensource.org/licenses/MIT. + +#include "../common/platform.h" +#include "./write_bits.h" + +namespace brunsli { + +Storage::Storage(uint8_t* data, size_t length) + : data(data), length(length), pos(0) { + BRUNSLI_CHECK(length > 0); + data[0] = 0; +} + +void Storage::AppendBytes(const uint8_t* data, size_t len) { + BRUNSLI_DCHECK((pos & 7) == 0); + BRUNSLI_DCHECK(GetBytesUsed() + len <= length); + memcpy(this->data + (pos >> 3), data, len); + pos += 8 * len; +} + +Storage::~Storage() { + BRUNSLI_CHECK(GetBytesUsed() <= length); +} + +} // namespace brunsli diff --git a/c/enc/write_bits.h b/c/enc/write_bits.h index 3459f507..2368b623 100644 --- a/c/enc/write_bits.h +++ b/c/enc/write_bits.h @@ -14,6 +14,28 @@ namespace brunsli { +class Storage { + public: + Storage(uint8_t* data, size_t length); + + /** + * Crashes in case of buffer overflow. + */ + ~Storage(); + + size_t GetBytesUsed() const { + return (pos + 7) >> 3; + } + + void AppendBytes(const uint8_t* data, size_t len); + + uint8_t* const data; + // Size of buffer in bytes. + const size_t length; + // Number of bits written. + size_t pos; +}; + /* This function writes bits into bytes in increasing addresses, and within a byte least-significant-bit first. @@ -29,51 +51,41 @@ namespace brunsli { For n bits, we take the last 5 bits, OR that with high bits in BYTE-0, and locate the rest in BYTE+1, BYTE+2, etc. */ -BRUNSLI_INLINE void WriteBits(size_t n_bits, - uint64_t bits, - size_t* BRUNSLI_RESTRICT pos, - uint8_t* BRUNSLI_RESTRICT array) { +BRUNSLI_INLINE void WriteBits(size_t n_bits, uint64_t bits, Storage* storage) { BRUNSLI_LOG_DEBUG() << "WriteBits " << std::setw(2) << n_bits << " " << std::hex << std::setw(16) << bits << " " << std::dec - << std::setw(10) << *pos << BRUNSLI_ENDL(); + << std::setw(10) << storage->pos << BRUNSLI_ENDL(); BRUNSLI_DCHECK((bits >> n_bits) == 0); BRUNSLI_DCHECK(n_bits <= 56); #if defined(BRUNSLI_LITTLE_ENDIAN) + BRUNSLI_DCHECK(((storage->pos + n_bits) >> 3) + 7 < storage->length); /* This branch of the code can write up to 56 bits at a time, 7 bits are lost by being perhaps already in *p and at least 1 bit is needed to initialize the bit-stream ahead (i.e. if 7 bits are in *p and we write 57 bits, then the next write will access a byte that was never initialized). */ - uint8_t* p = &array[*pos >> 3]; - uint64_t v = static_cast(*p); /* Zero-extend 8 to 64 bits. */ - v |= bits << (*pos & 7); - BRUNSLI_UNALIGNED_STORE64LE(p, v); /* Set some bits. */ - *pos += n_bits; + uint8_t* BRUNSLI_RESTRICT p = storage->data + (storage->pos >> 3); + uint64_t v = static_cast(*p); /* Zero-extend 8 to 64 bits. */ + v |= bits << (storage->pos & 7); + BRUNSLI_UNALIGNED_STORE64LE(p, v); /* Set some bits. */ + storage->pos += n_bits; #else /* implicit & 0xFF is assumed for uint8_t arithmetics */ - uint8_t* array_pos = &array[*pos >> 3]; - const size_t bits_reserved_in_first_byte = (*pos & 7); + BRUNSLI_DCHECK(((storage->pos + n_bits) >> 3) < storage->length); + uint8_t* BRUNSLI_RESTRICT array_pos = storage->data + (storage->pos >> 3); + const size_t bits_reserved_in_first_byte = (storage->pos & 7); bits <<= bits_reserved_in_first_byte; *(array_pos++) |= static_cast(bits); for (size_t bits_left_to_write = n_bits + bits_reserved_in_first_byte; - bits_left_to_write >= 9; - bits_left_to_write -= 8) { + bits_left_to_write >= 9; bits_left_to_write -= 8) { bits >>= 8; *(array_pos++) = static_cast(bits); } *array_pos = 0; - *pos += n_bits; + storage->pos += n_bits; #endif } -BRUNSLI_INLINE void WriteBitsPrepareStorage( - size_t pos, uint8_t* array) { - BRUNSLI_LOG_DEBUG() << "WriteBitsPrepareStorage " << std::setw(10) << pos - << BRUNSLI_ENDL(); - BRUNSLI_DCHECK((pos & 7) == 0); - array[pos >> 3] = 0; -} - } // namespace brunsli #endif // BRUNSLI_ENC_WRITE_BITS_H_ diff --git a/c/experimental/groups.cc b/c/experimental/groups.cc new file mode 100644 index 00000000..9e70d86b --- /dev/null +++ b/c/experimental/groups.cc @@ -0,0 +1,476 @@ +// Copyright (c) Google LLC 2019 +// +// Use of this source code is governed by an MIT-style +// license that can be found in the LICENSE file or at +// https://opensource.org/licenses/MIT. + +// Functions for writing encoding / decoding Brunsli in "groups" mode. + +#include "./groups.h" + +#include +#include +#include + +#include "../common/constants.h" +#include "../common/context.h" +#include +#include +#include +#include +#include +#include "../dec/state.h" +#include +#include +#include "../enc/state.h" + +namespace brunsli { + +namespace { + +bool SkipSection(const uint8_t** data, size_t len) { + size_t section_len = 0; + uint64_t b = 0x80; + size_t off = 1; + for (size_t i = 0; (i < 9) && (b & 0x80u); ++i) { + if (off >= len) return false; + b = (*data)[off++]; + section_len |= (b & 0x7Fu) << (i * 7); + } + if ((b & 0x80u) != 0) return false; + off += section_len; + if (off > len) return false; + *data += off; + return true; +} + +} // namespace + +void SequentialExecutor(const Runnable& runnable, size_t num_tasks) { + for (size_t i = 0; i < num_tasks; ++i) runnable(i); +} + +bool EncodeGroups(const brunsli::JPEGData& jpg, uint8_t* data, size_t* len, + size_t ac_group_dim, size_t dc_group_dim, + Executor* executor) { + using ::brunsli::internal::enc::BlockI32; + using ::brunsli::internal::enc::ComponentMeta; + using ::brunsli::internal::enc::DataStream; + using ::brunsli::internal::enc::EntropyCodes; + using ::brunsli::internal::enc::EntropySource; + using ::brunsli::internal::enc::Histogram; + using ::brunsli::internal::enc::SelectContextBits; + using ::brunsli::internal::enc::State; + + if ((ac_group_dim & (ac_group_dim - 1)) != 0) return false; + if ((dc_group_dim & (dc_group_dim - 1)) != 0) return false; + if ((dc_group_dim % ac_group_dim) != 0) return false; + if ((ac_group_dim % jpg.max_h_samp_factor) != 0) return false; + if ((ac_group_dim % jpg.max_v_samp_factor) != 0) return false; + + size_t num_components = jpg.components.size(); + + std::vector approx_total_nonzeros(num_components); + + size_t width_in_blocks = jpg.MCU_cols * jpg.max_h_samp_factor; + size_t height_in_blocks = jpg.MCU_rows * jpg.max_v_samp_factor; + + size_t w_ac = (width_in_blocks + ac_group_dim - 1) / ac_group_dim; + size_t h_ac = (height_in_blocks + ac_group_dim - 1) / ac_group_dim; + + size_t w_dc = (width_in_blocks + dc_group_dim - 1) / dc_group_dim; + size_t h_dc = (height_in_blocks + dc_group_dim - 1) / dc_group_dim; + + std::vector> dc_prediction_errors( + num_components); + std::vector> block_state(num_components); + for (size_t i = 0; i < num_components; ++i) { + const JPEGComponent& c = jpg.components[i]; + dc_prediction_errors[i].resize(c.width_in_blocks * c.height_in_blocks); + block_state[i].resize(c.width_in_blocks * c.height_in_blocks); + } + + State state; + std::vector dc_state(w_dc * h_dc); + std::vector ac_state(w_ac * h_ac); + + if (!CalculateMeta(jpg, &state)) return false; + for (size_t c = 0; c < num_components; ++c) { + ComponentMeta& m = state.meta[c]; + m.dc_prediction_errors = dc_prediction_errors[c].data(); + m.block_state = block_state[c].data(); + } + + for (size_t y = 0; y < h_dc; ++y) { + for (size_t x = 0; x < w_dc; ++x) { + State& s = dc_state[x + y * w_dc]; + std::vector& meta = s.meta; + if (!CalculateMeta(jpg, &s)) return false; + for (size_t c = 0; c < num_components; ++c) { + ComponentMeta& m = meta[c]; + size_t h_group_dim = m.h_samp * dc_group_dim / jpg.max_h_samp_factor; + size_t first_x = x * h_group_dim; + size_t last_x = + std::min(first_x + h_group_dim, m.width_in_blocks); + size_t v_group_dim = m.v_samp * dc_group_dim / jpg.max_v_samp_factor; + size_t first_y = y * v_group_dim; + size_t last_y = + std::min(first_y + v_group_dim, m.height_in_blocks); + m.ac_coeffs += first_x * brunsli::kDCTBlockSize + first_y * m.ac_stride; + m.width_in_blocks = last_x - first_x; + m.height_in_blocks = last_y - first_y; + m.dc_prediction_errors = + dc_prediction_errors[c].data() + first_x + first_y * m.dc_stride; + m.block_state = block_state[c].data() + first_x + first_y * m.b_stride; + } + } + } + + for (size_t y = 0; y < h_ac; ++y) { + for (size_t x = 0; x < w_ac; ++x) { + State& s = ac_state[x + y * w_ac]; + std::vector& meta = s.meta; + if (!CalculateMeta(jpg, &s)) return false; + for (size_t c = 0; c < num_components; ++c) { + ComponentMeta& m = meta[c]; + size_t h_group_dim = m.h_samp * ac_group_dim / jpg.max_h_samp_factor; + size_t first_x = x * h_group_dim; + size_t last_x = + std::min(first_x + h_group_dim, m.width_in_blocks); + size_t v_group_dim = m.v_samp * ac_group_dim / jpg.max_v_samp_factor; + size_t first_y = y * v_group_dim; + size_t last_y = + std::min(first_y + v_group_dim, m.height_in_blocks); + m.ac_coeffs += first_x * brunsli::kDCTBlockSize + first_y * m.ac_stride; + m.width_in_blocks = last_x - first_x; + m.height_in_blocks = last_y - first_y; + m.dc_prediction_errors = + dc_prediction_errors[c].data() + first_x + first_y * m.dc_stride; + m.block_state = block_state[c].data() + first_x + first_y * m.b_stride; + } + } + } + + const auto sample_nonzeros = [num_components, &ac_state](size_t idx) { + for (size_t c = 0; c < num_components; ++c) { + ComponentMeta& m = ac_state[idx].meta[c]; + m.approx_total_nonzeros = SampleNumNonZeros(&m); + } + }; + (*executor)(sample_nonzeros, ac_state.size()); + + // Groups workflow: reduce approx_total_nonzeros. + for (size_t y = 0; y < h_ac; ++y) { + for (size_t x = 0; x < w_ac; ++x) { + for (size_t c = 0; c < num_components; ++c) { + approx_total_nonzeros[c] += + ac_state[x + y * w_ac].meta[c].approx_total_nonzeros; + } + } + } + + int32_t num_contexts = num_components; + for (size_t c = 0; c < num_components; ++c) { + ComponentMeta& m = state.meta[c]; + m.context_bits = SelectContextBits(approx_total_nonzeros[c] + 1); + m.context_offset = num_contexts; + num_contexts += brunsli::kNumNonzeroContextSkip[m.context_bits]; + } + state.num_contexts = num_contexts; + + // Groups workflow: distribute context_bits. + for (size_t y = 0; y < h_ac; ++y) { + for (size_t x = 0; x < w_ac; ++x) { + State& s = ac_state[x + y * w_ac]; + for (size_t c = 0; c < num_components; ++c) { + ComponentMeta& m = state.meta[c]; + s.meta[c].context_bits = m.context_bits; + s.meta[c].context_offset = m.context_offset; + } + s.num_contexts = state.num_contexts; + } + } + + for (size_t y = 0; y < h_dc; ++y) { + for (size_t x = 0; x < w_dc; ++x) { + State& s = dc_state[x + y * w_dc]; + for (size_t c = 0; c < num_components; ++c) { + ComponentMeta& m = state.meta[c]; + s.meta[c].context_bits = m.context_bits; + s.meta[c].context_offset = m.context_offset; + } + s.num_contexts = state.num_contexts; + } + } + + std::atomic failed{false}; + const auto encode_dc = [&failed, &dc_state](size_t idx) { + if (failed.load()) return; + if (!PredictDCCoeffs(&dc_state[idx])) failed.store(true); + if (failed.load()) return; + EncodeDC(&dc_state[idx]); + }; + (*executor)(encode_dc, dc_state.size()); + if (failed.load()) return false; + + const auto encode_ac = [&ac_state](size_t idx) { + EncodeAC(&ac_state[idx]); + }; + (*executor)(encode_ac, ac_state.size()); + + // Groups workflow: merge histograms. + // TODO: SIMDify. + state.entropy_source.Resize(num_contexts); + for (size_t y = 0; y < h_dc; ++y) { + for (size_t x = 0; x < w_dc; ++x) { + state.entropy_source.Merge(dc_state[x + y * w_dc].entropy_source); + } + } + for (size_t y = 0; y < h_ac; ++y) { + for (size_t x = 0; x < w_ac; ++x) { + state.entropy_source.Merge(ac_state[x + y * w_ac].entropy_source); + } + } + + EntropyCodes entropy_codes = PrepareEntropyCodes(&state); + + std::vector> output; + output.resize(1 + dc_state.size() + ac_state.size()); + + // TODO: pull entropy codes serialization "side effect". + { + std::vector& part = output[0]; + state.entropy_codes = &entropy_codes; + size_t part_size = 20480; + for (size_t i = 0; i < jpg.inter_marker_data.size(); ++i) { + part_size += 5 + jpg.inter_marker_data[i].size(); + } + for (const std::string& data : jpg.app_data) part_size += data.size(); + for (const std::string& data : jpg.com_data) part_size += data.size(); + part_size += jpg.tail_data.size(); + // TODO: take into account histograms. + part.resize(part_size); + uint32_t skip_flags = + (1u << brunsli::kBrunsliDCDataTag) | (1u << brunsli::kBrunsliACDataTag); + if (!BrunsliSerialize(&state, jpg, skip_flags, part.data(), &part_size)) { + return false; + } + part.resize(part_size); + } + + const auto serialize = [&](size_t idx) { + if (failed.load()) return; + std::vector& part = output[idx]; + if (idx == 0) return; + idx--; + if (idx < dc_state.size()) { + State& s = dc_state[idx]; + // TODO: reduce for subsampled + size_t part_size = 128 * (128 + 16) * jpg.components.size(); + part.resize(part_size); + s.entropy_codes = &entropy_codes; + uint32_t skip_flags = ~(1u << brunsli::kBrunsliDCDataTag); + bool ok = BrunsliSerialize(&s, jpg, skip_flags, part.data(), &part_size); + if (ok) { + part.resize(part_size); + } else { + failed.store(true); + } + return; + } + idx -= dc_state.size(); + if (idx < ac_state.size()) { + State& s = ac_state[idx]; + // TODO: reduce for subsampled + size_t part_size = 32 * 32 * 63 * jpg.components.size(); + part.resize(part_size); + s.entropy_codes = &entropy_codes; + uint32_t skip_flags = ~(1u << brunsli::kBrunsliACDataTag); + bool ok = BrunsliSerialize(&s, jpg, skip_flags, part.data(), &part_size); + if (ok) { + part.resize(part_size); + } else{ + failed.store(true); + } + return; + } + failed.store(true); + }; + (*executor)(serialize, output.size()); + if (failed.load()) return false; + + size_t capacity = *len; + size_t size = 0; + for (const std::vector& part : output) { + if (size + part.size() > capacity) return false; + memcpy(data, part.data(), part.size()); + size += part.size(); + data += part.size(); + } + *len = size; + + return true; +} + +bool DecodeGroups(const uint8_t* data, size_t len, brunsli::JPEGData* jpg, + size_t ac_group_dim, size_t dc_group_dim, + Executor* executor) { + using ::brunsli::BrunsliStatus; + using ::brunsli::internal::dec::BlockI32; + using ::brunsli::internal::dec::ComponentMeta; + using ::brunsli::internal::dec::PrepareMeta; + using ::brunsli::internal::dec::ProcessJpeg; + using ::brunsli::internal::dec::Stage; + using ::brunsli::internal::dec::State; + using ::brunsli::internal::dec::WarmupMeta; + + if ((ac_group_dim & (ac_group_dim - 1)) != 0) return false; + if ((dc_group_dim & (dc_group_dim - 1)) != 0) return false; + if ((dc_group_dim % ac_group_dim) != 0) return false; + + const uint8_t* data_end = data + len; + const uint8_t* chunk_end = data; + const uint8_t* chunk_start = chunk_end; + // Signature / Header / Meta / Internals / Quant / Histo. + for (size_t i = 0; i < 6; ++i) { + if (!SkipSection(&chunk_end, data_end - chunk_end)) return false; + } + + // Common sections. + State state; + state.data = chunk_start; + state.len = chunk_end - chunk_start; + chunk_start = chunk_end; + + BrunsliStatus status = ProcessJpeg(&state, jpg); + if (status != BrunsliStatus::BRUNSLI_NOT_ENOUGH_DATA) return false; + WarmupMeta(jpg, &state); + + if ((ac_group_dim % jpg->max_h_samp_factor) != 0) return false; + if ((ac_group_dim % jpg->max_v_samp_factor) != 0) return false; + + size_t num_components = jpg->components.size(); + + size_t width_in_blocks = jpg->MCU_cols * jpg->max_h_samp_factor; + size_t height_in_blocks = jpg->MCU_rows * jpg->max_v_samp_factor; + + size_t w_ac = (width_in_blocks + ac_group_dim - 1) / ac_group_dim; + size_t h_ac = (height_in_blocks + ac_group_dim - 1) / ac_group_dim; + + size_t w_dc = (width_in_blocks + dc_group_dim - 1) / dc_group_dim; + size_t h_dc = (height_in_blocks + dc_group_dim - 1) / dc_group_dim; + + std::vector dc_section_start(h_dc * w_dc); + std::vector dc_section_length(h_dc * w_dc); + for (size_t y = 0; y < h_dc; ++y) { + for (size_t x = 0; x < w_dc; ++x) { + if (!SkipSection(&chunk_end, data_end - chunk_end)) return false; + size_t idx = x + w_dc * y; + dc_section_start[idx] = chunk_start; + dc_section_length[idx] = chunk_end - chunk_start; + chunk_start = chunk_end; + } + } + + std::vector ac_section_start(h_ac * w_ac); + std::vector ac_section_length(h_ac * w_ac); + for (size_t y = 0; y < h_ac; ++y) { + for (size_t x = 0; x < w_ac; ++x) { + if (!SkipSection(&chunk_end, data_end - chunk_end)) return false; + size_t idx = x + w_ac * y; + ac_section_start[idx] = chunk_start; + ac_section_length[idx] = chunk_end - chunk_start; + chunk_start = chunk_end; + } + } + if (chunk_end != data_end) return false; + + std::atomic failed{false}; + const auto decode_dc = [&](size_t idx) { + if (failed.load()) return; + size_t y = idx / w_dc; + size_t x = idx % w_dc; + State dc_state; + dc_state.stage = Stage::SECTION; + dc_state.tags_met = ~(1 << brunsli::kBrunsliDCDataTag); + dc_state.data = dc_section_start[idx]; + dc_state.len = dc_section_length[idx]; + + dc_state.context_map = state.context_map; + dc_state.entropy_codes = state.entropy_codes; + + std::vector& meta = dc_state.meta; + + PrepareMeta(jpg, &dc_state); + dc_state.is_storage_allocated = true; + WarmupMeta(jpg, &dc_state); + for (size_t c = 0; c < num_components; ++c) { + ComponentMeta& m = meta[c]; + size_t h_group_dim = m.h_samp * dc_group_dim / jpg->max_h_samp_factor; + size_t first_x = x * h_group_dim; + size_t last_x = + std::min(first_x + h_group_dim, m.width_in_blocks); + size_t v_group_dim = m.v_samp * dc_group_dim / jpg->max_v_samp_factor; + size_t first_y = y * v_group_dim; + size_t last_y = + std::min(first_y + v_group_dim, m.height_in_blocks); + m.ac_coeffs += first_x * brunsli::kDCTBlockSize + first_y * m.ac_stride; + m.block_state = + state.block_state_[c].data() + first_x + first_y * m.b_stride; + m.width_in_blocks = last_x - first_x; + m.height_in_blocks = last_y - first_y; + } + + status = ProcessJpeg(&dc_state, jpg); + if (status != BrunsliStatus::BRUNSLI_OK) failed.store(true); + }; + (*executor)(decode_dc, dc_section_start.size()); + if (failed.load()) return false; + + const auto decode_ac = [&](size_t idx) { + if (failed.load()) return; + size_t y = idx / w_ac; + size_t x = idx % w_ac; + State ac_state; + ac_state.stage = Stage::SECTION; + ac_state.tags_met = ~(1 << brunsli::kBrunsliACDataTag); + ac_state.data = ac_section_start[idx]; + ac_state.len = ac_section_length[idx]; + + ac_state.context_map = state.context_map; + ac_state.entropy_codes = state.entropy_codes; + + std::vector& meta = ac_state.meta; + + PrepareMeta(jpg, &ac_state); + ac_state.is_storage_allocated = true; + WarmupMeta(jpg, &ac_state); + for (size_t c = 0; c < num_components; ++c) { + ComponentMeta& m = meta[c]; + size_t h_group_dim = m.h_samp * ac_group_dim / jpg->max_h_samp_factor; + size_t first_x = x * h_group_dim; + size_t last_x = + std::min(first_x + h_group_dim, m.width_in_blocks); + size_t v_group_dim = m.v_samp * ac_group_dim / jpg->max_v_samp_factor; + size_t first_y = y * v_group_dim; + size_t last_y = + std::min(first_y + v_group_dim, m.height_in_blocks); + m.context_bits = state.meta[c].context_bits; + m.context_offset = state.meta[c].context_offset; + m.ac_coeffs += first_x * brunsli::kDCTBlockSize + first_y * m.ac_stride; + m.block_state = + state.block_state_[c].data() + first_x + first_y * m.b_stride; + m.width_in_blocks = last_x - first_x; + m.height_in_blocks = last_y - first_y; + } + + status = ProcessJpeg(&ac_state, jpg); + if (status != BrunsliStatus::BRUNSLI_OK) failed.store(true); + }; + (*executor)(decode_ac, ac_section_start.size()); + if (failed.load()) return false; + + return true; +} + +} // namespace brunsli diff --git a/c/experimental/groups.h b/c/experimental/groups.h new file mode 100644 index 00000000..346985b6 --- /dev/null +++ b/c/experimental/groups.h @@ -0,0 +1,32 @@ +// Copyright (c) Google LLC 2019 +// +// Use of this source code is governed by an MIT-style +// license that can be found in the LICENSE file or at +// https://opensource.org/licenses/MIT. + +// Functions for writing encoding / decoding Brunsli in "groups" mode. + +#ifndef BRUNSLI_EXPERIMENTAL_GROUPS_H_ +#define BRUNSLI_EXPERIMENTAL_GROUPS_H_ + +#include + +#include +#include + +namespace brunsli { + +typedef std::function Runnable; +typedef std::function Executor; + +void SequentialExecutor(const Runnable& runnable, size_t num_tasks); + +bool DecodeGroups(const uint8_t* data, size_t len, brunsli::JPEGData* jpg, + size_t ac_group_dim, size_t dc_group_dim, Executor* executor); + +bool EncodeGroups(const brunsli::JPEGData& jpg, uint8_t* data, size_t* len, + size_t ac_group_dim, size_t dc_group_dim, Executor* executor); + +} // namespace brunsli + +#endif // BRUNSLI_EXPERIMENTAL_GROUPS_H_ diff --git a/c/tools/cbrunsli.cc b/c/tools/cbrunsli.cc index 87830295..1088a471 100644 --- a/c/tools/cbrunsli.cc +++ b/c/tools/cbrunsli.cc @@ -13,6 +13,11 @@ #include #include +#if defined(BRUNSLI_EXPERIMENTAL_GROUPS) +#include "../experimental/groups.h" +#include +#endif + bool ReadFileInternal(FILE* file, std::string* content) { if (fseek(file, 0, SEEK_END) != 0) { fprintf(stderr, "Failed to seek end of input file.\n"); @@ -110,7 +115,19 @@ bool ProcessFile(const std::string& file_name, output.resize(output_size); uint8_t* output_data = reinterpret_cast(&output[0]); +#if defined(BRUNSLI_EXPERIMENTAL_GROUPS) + { + highwayhash::ThreadPool thread_pool(4); + brunsli::Executor executor = [&](const brunsli::Runnable& runnable, + size_t num_tasks) { + thread_pool.Run(0, num_tasks, runnable); + }; + ok = brunsli::EncodeGroups(jpg, output_data, &output_size, 32, 128, + &executor); + } +#else ok = brunsli::BrunsliEncodeJpeg(jpg, output_data, &output_size); +#endif if (!ok) { // TODO: use fallback? diff --git a/c/tools/dbrunsli.cc b/c/tools/dbrunsli.cc index 7741bd0a..55d9696e 100644 --- a/c/tools/dbrunsli.cc +++ b/c/tools/dbrunsli.cc @@ -14,6 +14,11 @@ #include #include +#if defined(BRUNSLI_EXPERIMENTAL_GROUPS) +#include "../experimental/groups.h" +#include +#endif + int StringWriter(void* data, const uint8_t* buf, size_t count) { std::string* output = reinterpret_cast(data); output->append(reinterpret_cast(buf), count); @@ -105,9 +110,21 @@ bool ProcessFile(const std::string& file_name, brunsli::JPEGData jpg; const uint8_t* input_data = reinterpret_cast(input.data()); +#if defined(BRUNSLI_EXPERIMENTAL_GROUPS) + { + highwayhash::ThreadPool thread_pool(4); + brunsli::Executor executor = [&](const brunsli::Runnable& runnable, + size_t num_tasks) { + thread_pool.Run(0, num_tasks, runnable); + }; + ok = brunsli::DecodeGroups(input_data, input.size(), &jpg, 32, 128, + &executor); + } +#else brunsli::BrunsliStatus status = brunsli::BrunsliDecodeJpeg(input_data, input.size(), &jpg); ok = (status == brunsli::BRUNSLI_OK); +#endif input.clear(); input.shrink_to_fit(); @@ -139,7 +156,9 @@ int main(int argc, char** argv) { fprintf(stderr, "Empty input file name.\n"); return EXIT_FAILURE; } - const std::string outfile_name = argc == 2 ? file_name + ".jpg" : - std::string(argv[2]); - return ProcessFile(file_name, outfile_name) ? EXIT_SUCCESS : EXIT_FAILURE; + const std::string outfile_name = + argc == 2 ? file_name + ".jpg" : std::string(argv[2]); + + bool ok = ProcessFile(file_name, outfile_name); + return ok ? EXIT_SUCCESS : EXIT_FAILURE; } diff --git a/third_party/highwayhash b/third_party/highwayhash new file mode 160000 index 00000000..0aaf66bb --- /dev/null +++ b/third_party/highwayhash @@ -0,0 +1 @@ +Subproject commit 0aaf66bb8a1634ceee4b778df51a652bdf4e1f17