From 7e347a7c849db05acad20304f5e9b29071ecec7c Mon Sep 17 00:00:00 2001 From: Eugene Kliuchnikov Date: Thu, 22 Dec 2016 15:55:05 +0100 Subject: [PATCH] Update encoder (#492) * fix comment position in `context.h` * fix typo in internal quality constant name * deduplicate `BuildMetaBlockGreedy` code * simplify aggregation in `ChooseContextMap` --- enc/context.h | 2 +- enc/encode.c | 32 +++------ enc/metablock.c | 187 ++++++++++++++++++++++-------------------------- enc/metablock.h | 12 +--- enc/quality.h | 2 +- 5 files changed, 97 insertions(+), 138 deletions(-) diff --git a/enc/context.h b/enc/context.h index 9cde400b3..0e2e453a4 100644 --- a/enc/context.h +++ b/enc/context.h @@ -127,9 +127,9 @@ static const uint8_t kUTF8ContextLookup[512] = { 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, - 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, /* UTF8 lead byte range. */ 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, }; diff --git a/enc/encode.c b/enc/encode.c index d846513a0..c7a90040f 100644 --- a/enc/encode.c +++ b/enc/encode.c @@ -330,18 +330,13 @@ static void ChooseContextMap(int quality, uint32_t monogram_histo[3] = { 0 }; uint32_t two_prefix_histo[6] = { 0 }; - size_t total = 0; + size_t total; size_t i; size_t dummy; double entropy[4]; for (i = 0; i < 9; ++i) { - size_t j = i; - total += bigram_histo[i]; monogram_histo[i % 3] += bigram_histo[i]; - if (j >= 6) { - j -= 6; - } - two_prefix_histo[j] += bigram_histo[i]; + two_prefix_histo[i % 6] += bigram_histo[i]; } entropy[1] = ShannonEntropy(monogram_histo, 3, &dummy); entropy[2] = (ShannonEntropy(two_prefix_histo, 3, &dummy) + @@ -351,6 +346,7 @@ static void ChooseContextMap(int quality, entropy[3] += ShannonEntropy(bigram_histo + 3 * i, 3, &dummy); } + total = monogram_histo[0] + monogram_histo[1] + monogram_histo[2]; assert(total != 0); entropy[0] = 1.0 / (double)total; entropy[1] *= entropy[0]; @@ -480,7 +476,7 @@ static void WriteMetaBlockInternal(MemoryManager* m, num_direct_distance_codes, distance_postfix_bits); } - if (params->quality <= MAX_QUALITY_FOR_STATIC_ENRTOPY_CODES) { + if (params->quality <= MAX_QUALITY_FOR_STATIC_ENTROPY_CODES) { BrotliStoreMetaBlockFast(m, data, wrapped_last_flush_pos, bytes, mask, is_last, commands, num_commands, @@ -505,22 +501,10 @@ static void WriteMetaBlockInternal(MemoryManager* m, &literal_context_mode, &num_literal_contexts, &literal_context_map); - if (literal_context_map == NULL) { - BrotliBuildMetaBlockGreedy(m, data, wrapped_last_flush_pos, mask, - commands, num_commands, &mb); - if (BROTLI_IS_OOM(m)) return; - } else { - BrotliBuildMetaBlockGreedyWithContexts(m, data, - wrapped_last_flush_pos, - mask, - prev_byte, prev_byte2, - literal_context_mode, - num_literal_contexts, - literal_context_map, - commands, num_commands, - &mb); - if (BROTLI_IS_OOM(m)) return; - } + BrotliBuildMetaBlockGreedy(m, data, wrapped_last_flush_pos, mask, + prev_byte, prev_byte2, literal_context_mode, num_literal_contexts, + literal_context_map, commands, num_commands, &mb); + if (BROTLI_IS_OOM(m)) return; } else { if (!BrotliIsMostlyUTF8(data, wrapped_last_flush_pos, mask, bytes, kMinUTF8Ratio)) { diff --git a/enc/metablock.c b/enc/metablock.c index 402aef6c1..db99fbc16 100644 --- a/enc/metablock.c +++ b/enc/metablock.c @@ -136,53 +136,7 @@ void BrotliBuildMetaBlock(MemoryManager* m, #include "./metablock_inc.h" /* NOLINT(build/include) */ #undef FN -void BrotliBuildMetaBlockGreedy(MemoryManager* m, - const uint8_t* ringbuffer, - size_t pos, - size_t mask, - const Command *commands, - size_t n_commands, - MetaBlockSplit* mb) { - BlockSplitterLiteral lit_blocks; - BlockSplitterCommand cmd_blocks; - BlockSplitterDistance dist_blocks; - size_t num_literals = 0; - size_t i; - for (i = 0; i < n_commands; ++i) { - num_literals += commands[i].insert_len_; - } - - InitBlockSplitterLiteral(m, &lit_blocks, 256, 512, 400.0, num_literals, - &mb->literal_split, &mb->literal_histograms, - &mb->literal_histograms_size); - if (BROTLI_IS_OOM(m)) return; - InitBlockSplitterCommand(m, &cmd_blocks, BROTLI_NUM_COMMAND_SYMBOLS, 1024, - 500.0, n_commands, &mb->command_split, &mb->command_histograms, - &mb->command_histograms_size); - if (BROTLI_IS_OOM(m)) return; - InitBlockSplitterDistance(m, &dist_blocks, 64, 512, 100.0, n_commands, - &mb->distance_split, &mb->distance_histograms, - &mb->distance_histograms_size); - if (BROTLI_IS_OOM(m)) return; - - for (i = 0; i < n_commands; ++i) { - const Command cmd = commands[i]; - size_t j; - BlockSplitterAddSymbolCommand(&cmd_blocks, cmd.cmd_prefix_); - for (j = cmd.insert_len_; j != 0; --j) { - BlockSplitterAddSymbolLiteral(&lit_blocks, ringbuffer[pos & mask]); - ++pos; - } - pos += CommandCopyLen(&cmd); - if (CommandCopyLen(&cmd) && cmd.cmd_prefix_ >= 128) { - BlockSplitterAddSymbolDistance(&dist_blocks, cmd.dist_prefix_); - } - } - - BlockSplitterFinishBlockLiteral(&lit_blocks, /* is_final = */ BROTLI_TRUE); - BlockSplitterFinishBlockCommand(&cmd_blocks, /* is_final = */ BROTLI_TRUE); - BlockSplitterFinishBlockDistance(&dist_blocks, /* is_final = */ BROTLI_TRUE); -} +#define BROTLI_MAX_STATIC_CONTEXTS 3 /* Greedy block splitter for one block category (literal, command or distance). Gathers histograms for all context buckets. */ @@ -214,7 +168,7 @@ typedef struct ContextBlockSplitter { /* Offset of the histograms of the previous two block types. */ size_t last_histogram_ix_[2]; /* Entropy of the previous two block types. */ - double* last_entropy_; + double last_entropy_[2 * BROTLI_MAX_STATIC_CONTEXTS]; /* The number of times we merged the current block with the last one. */ size_t merge_last_count_; } ContextBlockSplitter; @@ -226,6 +180,7 @@ static void InitContextBlockSplitter( size_t* histograms_size) { size_t max_num_blocks = num_symbols / min_block_size + 1; size_t max_num_types; + assert(num_contexts <= BROTLI_MAX_STATIC_CONTEXTS); self->alphabet_size_ = alphabet_size; self->num_contexts_ = num_contexts; @@ -250,7 +205,6 @@ static void InitContextBlockSplitter( split->lengths, split->lengths_alloc_size, max_num_blocks); if (BROTLI_IS_OOM(m)) return; split->num_blocks = max_num_blocks; - self->last_entropy_ = BROTLI_ALLOC(m, double, 2 * num_contexts); if (BROTLI_IS_OOM(m)) return; assert(*histograms == 0); *histograms_size = max_num_types * num_contexts; @@ -262,17 +216,12 @@ static void InitContextBlockSplitter( self->last_histogram_ix_[0] = self->last_histogram_ix_[1] = 0; } -static void CleanupContextBlockSplitter( - MemoryManager* m, ContextBlockSplitter* self) { - BROTLI_FREE(m, self->last_entropy_); -} - /* Does either of three things: (1) emits the current block with a new block type; (2) emits the current block with the type of the second last block; (3) merges the current block with the last block. */ static void ContextBlockSplitterFinishBlock( - MemoryManager* m, ContextBlockSplitter* self, BROTLI_BOOL is_final) { + ContextBlockSplitter* self, BROTLI_BOOL is_final) { BlockSplit* split = self->split_; const size_t num_contexts = self->num_contexts_; double* last_entropy = self->last_entropy_; @@ -305,13 +254,11 @@ static void ContextBlockSplitterFinishBlock( respective set of histograms for the last and second last block types. Decide over the split based on the total reduction of entropy across all contexts. */ - double* entropy = BROTLI_ALLOC(m, double, num_contexts); - HistogramLiteral* combined_histo = - BROTLI_ALLOC(m, HistogramLiteral, 2 * num_contexts); - double* combined_entropy = BROTLI_ALLOC(m, double, 2 * num_contexts); + double entropy[BROTLI_MAX_STATIC_CONTEXTS]; + HistogramLiteral combined_histo[2 * BROTLI_MAX_STATIC_CONTEXTS]; /* 6KiB */ + double combined_entropy[2 * BROTLI_MAX_STATIC_CONTEXTS]; double diff[2] = { 0.0 }; size_t i; - if (BROTLI_IS_OOM(m)) return; for (i = 0; i < num_contexts; ++i) { size_t curr_histo_ix = self->curr_histogram_ix_ + i; size_t j; @@ -383,9 +330,6 @@ static void ContextBlockSplitterFinishBlock( self->target_block_size_ += self->min_block_size_; } } - BROTLI_FREE(m, combined_entropy); - BROTLI_FREE(m, combined_histo); - BROTLI_FREE(m, entropy); } if (is_final) { *self->histograms_size_ = split->num_types * num_contexts; @@ -395,30 +339,47 @@ static void ContextBlockSplitterFinishBlock( /* Adds the next symbol to the current block type and context. When the current block reaches the target size, decides on merging the block. */ -static void ContextBlockSplitterAddSymbol(MemoryManager* m, +static void ContextBlockSplitterAddSymbol( ContextBlockSplitter* self, size_t symbol, size_t context) { HistogramAddLiteral(&self->histograms_[self->curr_histogram_ix_ + context], symbol); ++self->block_size_; if (self->block_size_ == self->target_block_size_) { - ContextBlockSplitterFinishBlock(m, self, /* is_final = */ BROTLI_FALSE); - if (BROTLI_IS_OOM(m)) return; + ContextBlockSplitterFinishBlock(self, /* is_final = */ BROTLI_FALSE); } } -void BrotliBuildMetaBlockGreedyWithContexts(MemoryManager* m, - const uint8_t* ringbuffer, - size_t pos, - size_t mask, - uint8_t prev_byte, - uint8_t prev_byte2, - ContextType literal_context_mode, - size_t num_contexts, - const uint32_t* static_context_map, - const Command *commands, - size_t n_commands, - MetaBlockSplit* mb) { - ContextBlockSplitter lit_blocks; +static void MapStaticContexts(MemoryManager* m, + size_t num_contexts, + const uint32_t* static_context_map, + MetaBlockSplit* mb) { + size_t i; + assert(mb->literal_context_map == 0); + mb->literal_context_map_size = + mb->literal_split.num_types << BROTLI_LITERAL_CONTEXT_BITS; + mb->literal_context_map = + BROTLI_ALLOC(m, uint32_t, mb->literal_context_map_size); + if (BROTLI_IS_OOM(m)) return; + + for (i = 0; i < mb->literal_split.num_types; ++i) { + uint32_t offset = (uint32_t)(i * num_contexts); + size_t j; + for (j = 0; j < (1u << BROTLI_LITERAL_CONTEXT_BITS); ++j) { + mb->literal_context_map[(i << BROTLI_LITERAL_CONTEXT_BITS) + j] = + offset + static_context_map[j]; + } + } +} + +static BROTLI_INLINE void BrotliBuildMetaBlockGreedyInternal( + MemoryManager* m, const uint8_t* ringbuffer, size_t pos, size_t mask, + uint8_t prev_byte, uint8_t prev_byte2, ContextType literal_context_mode, + const size_t num_contexts, const uint32_t* static_context_map, + const Command *commands, size_t n_commands, MetaBlockSplit* mb) { + union { + BlockSplitterLiteral plain; + ContextBlockSplitter ctx; + } lit_blocks; BlockSplitterCommand cmd_blocks; BlockSplitterDistance dist_blocks; size_t num_literals = 0; @@ -427,9 +388,15 @@ void BrotliBuildMetaBlockGreedyWithContexts(MemoryManager* m, num_literals += commands[i].insert_len_; } - InitContextBlockSplitter(m, &lit_blocks, 256, num_contexts, 512, 400.0, - num_literals, &mb->literal_split, &mb->literal_histograms, - &mb->literal_histograms_size); + if (num_contexts == 1) { + InitBlockSplitterLiteral(m, &lit_blocks.plain, 256, 512, 400.0, + num_literals, &mb->literal_split, &mb->literal_histograms, + &mb->literal_histograms_size); + } else { + InitContextBlockSplitter(m, &lit_blocks.ctx, 256, num_contexts, 512, 400.0, + num_literals, &mb->literal_split, &mb->literal_histograms, + &mb->literal_histograms_size); + } if (BROTLI_IS_OOM(m)) return; InitBlockSplitterCommand(m, &cmd_blocks, BROTLI_NUM_COMMAND_SYMBOLS, 1024, 500.0, n_commands, &mb->command_split, &mb->command_histograms, @@ -445,12 +412,15 @@ void BrotliBuildMetaBlockGreedyWithContexts(MemoryManager* m, size_t j; BlockSplitterAddSymbolCommand(&cmd_blocks, cmd.cmd_prefix_); for (j = cmd.insert_len_; j != 0; --j) { - size_t context = Context(prev_byte, prev_byte2, literal_context_mode); uint8_t literal = ringbuffer[pos & mask]; - ContextBlockSplitterAddSymbol( - m, &lit_blocks, literal, static_context_map[context]); + if (num_contexts == 1) { + BlockSplitterAddSymbolLiteral(&lit_blocks.plain, literal); + } else { + size_t context = Context(prev_byte, prev_byte2, literal_context_mode); + ContextBlockSplitterAddSymbol(&lit_blocks.ctx, literal, + static_context_map[context]); + } prev_byte2 = prev_byte; - if (BROTLI_IS_OOM(m)) return; prev_byte = literal; ++pos; } @@ -464,25 +434,40 @@ void BrotliBuildMetaBlockGreedyWithContexts(MemoryManager* m, } } - ContextBlockSplitterFinishBlock(m, &lit_blocks, /* is_final = */ BROTLI_TRUE); - if (BROTLI_IS_OOM(m)) return; - CleanupContextBlockSplitter(m, &lit_blocks); + if (num_contexts == 1) { + BlockSplitterFinishBlockLiteral( + &lit_blocks.plain, /* is_final = */ BROTLI_TRUE); + } else { + ContextBlockSplitterFinishBlock( + &lit_blocks.ctx, /* is_final = */ BROTLI_TRUE); + } BlockSplitterFinishBlockCommand(&cmd_blocks, /* is_final = */ BROTLI_TRUE); BlockSplitterFinishBlockDistance(&dist_blocks, /* is_final = */ BROTLI_TRUE); - assert(mb->literal_context_map == 0); - mb->literal_context_map_size = - mb->literal_split.num_types << BROTLI_LITERAL_CONTEXT_BITS; - mb->literal_context_map = - BROTLI_ALLOC(m, uint32_t, mb->literal_context_map_size); - if (BROTLI_IS_OOM(m)) return; + if (num_contexts > 1) { + MapStaticContexts(m, num_contexts, static_context_map, mb); + } +} - for (i = 0; i < mb->literal_split.num_types; ++i) { - size_t j; - for (j = 0; j < (1u << BROTLI_LITERAL_CONTEXT_BITS); ++j) { - mb->literal_context_map[(i << BROTLI_LITERAL_CONTEXT_BITS) + j] = - (uint32_t)(i * num_contexts) + static_context_map[j]; - } +void BrotliBuildMetaBlockGreedy(MemoryManager* m, + const uint8_t* ringbuffer, + size_t pos, + size_t mask, + uint8_t prev_byte, + uint8_t prev_byte2, + ContextType literal_context_mode, + size_t num_contexts, + const uint32_t* static_context_map, + const Command* commands, + size_t n_commands, + MetaBlockSplit* mb) { + if (num_contexts == 1) { + BrotliBuildMetaBlockGreedyInternal(m, ringbuffer, pos, mask, prev_byte, + prev_byte2, literal_context_mode, 1, NULL, commands, n_commands, mb); + } else { + BrotliBuildMetaBlockGreedyInternal(m, ringbuffer, pos, mask, prev_byte, + prev_byte2, literal_context_mode, num_contexts, static_context_map, + commands, n_commands, mb); } } diff --git a/enc/metablock.h b/enc/metablock.h index da29d8135..cc523999c 100644 --- a/enc/metablock.h +++ b/enc/metablock.h @@ -80,20 +80,10 @@ BROTLI_INTERNAL void BrotliBuildMetaBlock(MemoryManager* m, ContextType literal_context_mode, MetaBlockSplit* mb); -/* Uses a fast greedy block splitter that tries to merge current block with the - last or the second last block and does not do any context modeling. */ -BROTLI_INTERNAL void BrotliBuildMetaBlockGreedy(MemoryManager* m, - const uint8_t* ringbuffer, - size_t pos, - size_t mask, - const Command* commands, - size_t n_commands, - MetaBlockSplit* mb); - /* Uses a fast greedy block splitter that tries to merge current block with the last or the second last block and uses a static context clustering which is the same for all block types. */ -BROTLI_INTERNAL void BrotliBuildMetaBlockGreedyWithContexts( +BROTLI_INTERNAL void BrotliBuildMetaBlockGreedy( MemoryManager* m, const uint8_t* ringbuffer, size_t pos, size_t mask, uint8_t prev_byte, uint8_t prev_byte2, ContextType literal_context_mode, size_t num_contexts, const uint32_t* static_context_map, diff --git a/enc/quality.h b/enc/quality.h index 6dade0723..079027e3a 100755 --- a/enc/quality.h +++ b/enc/quality.h @@ -17,7 +17,7 @@ #define ZOPFLIFICATION_QUALITY 10 #define HQ_ZOPFLIFICATION_QUALITY 11 -#define MAX_QUALITY_FOR_STATIC_ENRTOPY_CODES 2 +#define MAX_QUALITY_FOR_STATIC_ENTROPY_CODES 2 #define MIN_QUALITY_FOR_BLOCK_SPLIT 4 #define MIN_QUALITY_FOR_OPTIMIZE_HISTOGRAMS 4 #define MIN_QUALITY_FOR_EXTENSIVE_REFERENCE_SEARCH 5