From 038a8a906b8bbf60491b2643febaf8f9d5a4139c Mon Sep 17 00:00:00 2001 From: Yann Collet Date: Sun, 25 Feb 2024 17:33:41 -0800 Subject: [PATCH 1/8] targetCBlockSize: modified splitting strategy to generate blocks of more regular size notably avoiding to feature a larger first block --- lib/compress/zstd_compress_superblock.c | 166 +++++++++++++++++------- 1 file changed, 119 insertions(+), 47 deletions(-) diff --git a/lib/compress/zstd_compress_superblock.c b/lib/compress/zstd_compress_superblock.c index 824a2be6d5..a32616409e 100644 --- a/lib/compress/zstd_compress_superblock.c +++ b/lib/compress/zstd_compress_superblock.c @@ -390,7 +390,11 @@ static size_t ZSTD_estimateSubBlockSize_sequences(const BYTE* ofCodeTable, return cSeqSizeEstimate + sequencesSectionHeaderSize; } -static size_t ZSTD_estimateSubBlockSize(const BYTE* literals, size_t litSize, +typedef struct { + size_t estLitSize; + size_t estBlockSize; +} EstimatedBlockSize; +static EstimatedBlockSize ZSTD_estimateSubBlockSize(const BYTE* literals, size_t litSize, const BYTE* ofCodeTable, const BYTE* llCodeTable, const BYTE* mlCodeTable, @@ -398,15 +402,17 @@ static size_t ZSTD_estimateSubBlockSize(const BYTE* literals, size_t litSize, const ZSTD_entropyCTables_t* entropy, const ZSTD_entropyCTablesMetadata_t* entropyMetadata, void* workspace, size_t wkspSize, - int writeLitEntropy, int writeSeqEntropy) { - size_t cSizeEstimate = 0; - cSizeEstimate += ZSTD_estimateSubBlockSize_literal(literals, litSize, - &entropy->huf, &entropyMetadata->hufMetadata, - workspace, wkspSize, writeLitEntropy); - cSizeEstimate += ZSTD_estimateSubBlockSize_sequences(ofCodeTable, llCodeTable, mlCodeTable, + int writeLitEntropy, int writeSeqEntropy) +{ + EstimatedBlockSize ebs; + ebs.estLitSize = ZSTD_estimateSubBlockSize_literal(literals, litSize, + &entropy->huf, &entropyMetadata->hufMetadata, + workspace, wkspSize, writeLitEntropy); + ebs.estBlockSize = ZSTD_estimateSubBlockSize_sequences(ofCodeTable, llCodeTable, mlCodeTable, nbSeq, &entropy->fse, &entropyMetadata->fseMetadata, workspace, wkspSize, writeSeqEntropy); - return cSizeEstimate + ZSTD_blockHeaderSize; + ebs.estBlockSize += ebs.estLitSize + ZSTD_blockHeaderSize; + return ebs; } static int ZSTD_needSequenceEntropyTables(ZSTD_fseCTablesMetadata_t const* fseMetadata) @@ -427,17 +433,43 @@ static size_t countLiterals(seqStore_t const* seqStore, const seqDef* sp, size_t for (n=0; n %zu bytes", seqCount, (const void*)sp, total); + DEBUGLOG(6, "countLiterals for %zu sequences from %p => %zu bytes", seqCount, (const void*)sp, total); return total; } +#define BYTESCALE 256 + +static size_t sizeBlockSequences(const seqDef* sp, size_t nbSeqs, + size_t targetBudget, size_t avgLitCost, size_t avgSeqCost, + int firstSubBlock) +{ + size_t n, budget = 0; + /* entropy headers */ + if (firstSubBlock) { + budget += 120 * BYTESCALE; /* generous estimate */ + } + /* first sequence => at least one sequence*/ + budget += sp[0].litLength * avgLitCost + avgSeqCost;; + if (budget > targetBudget) return 1; + + /* loop over sequences */ + for (n=1; n targetBudget) break; + budget += currentCost; + } + return n; +} + +#define CBLOCK_TARGET_SIZE_MIN 1300 /* suitable to fit an ethernet / wifi / 4G transport frame */ + /** ZSTD_compressSubBlock_multi() : * Breaks super-block into multiple sub-blocks and compresses them. - * Entropy will be written to the first block. - * The following blocks will use repeat mode to compress. - * All sub-blocks are compressed blocks (no raw or rle blocks). - * @return : compressed size of the super block (which is multiple ZSTD blocks) - * Or 0 if it failed to compress. */ + * Entropy will be written into the first block. + * The following blocks use repeat_mode to compress. + * Sub-blocks are all compressed, except the last one when beneficial. + * @return : compressed size of the super block (which features multiple ZSTD blocks) + * or 0 if it failed to compress. */ static size_t ZSTD_compressSubBlock_multi(const seqStore_t* seqStorePtr, const ZSTD_compressedBlockState_t* prevCBlock, ZSTD_compressedBlockState_t* nextCBlock, @@ -452,7 +484,6 @@ static size_t ZSTD_compressSubBlock_multi(const seqStore_t* seqStorePtr, const seqDef* const send = seqStorePtr->sequences; const seqDef* sp = sstart; /* tracks progresses within seqStorePtr->sequences */ size_t const nbSeqs = (size_t)(send - sstart); - size_t nbSeqsPerBlock = nbSeqs; const BYTE* const lstart = seqStorePtr->litStart; const BYTE* const lend = seqStorePtr->lit; const BYTE* lp = lstart; @@ -465,54 +496,96 @@ static size_t ZSTD_compressSubBlock_multi(const seqStore_t* seqStorePtr, const BYTE* llCodePtr = seqStorePtr->llCode; const BYTE* mlCodePtr = seqStorePtr->mlCode; const BYTE* ofCodePtr = seqStorePtr->ofCode; - size_t const minTarget = 1300; /* enforce minimum size, to reduce undesirable side effects */ + size_t const minTarget = CBLOCK_TARGET_SIZE_MIN; /* enforce minimum size, to reduce undesirable side effects */ size_t const targetCBlockSize = MAX(minTarget, cctxParams->targetCBlockSize); int writeLitEntropy = (entropyMetadata->hufMetadata.hType == set_compressed); int writeSeqEntropy = 1; size_t nbSubBlocks = 1; + size_t avgLitCost, avgSeqCost, avgBlockBudget; DEBUGLOG(5, "ZSTD_compressSubBlock_multi (srcSize=%u, litSize=%u, nbSeq=%u)", (unsigned)srcSize, (unsigned)(lend-lstart), (unsigned)(send-sstart)); + /* let's start by a general estimation for the full block */ if (nbSeqs == 0) { - /* special case : no sequence */ - nbSeqsPerBlock = 0; nbSubBlocks = 1; } else { - /* let's start by a general estimation for the full block */ - size_t const cBlockSizeEstimate = + EstimatedBlockSize const ebs = ZSTD_estimateSubBlockSize(lp, nbLiterals, ofCodePtr, llCodePtr, mlCodePtr, nbSeqs, &nextCBlock->entropy, entropyMetadata, workspace, wkspSize, writeLitEntropy, writeSeqEntropy); /* quick estimation */ - nbSubBlocks = (cBlockSizeEstimate + (targetCBlockSize-1)) / targetCBlockSize; - assert(nbSubBlocks > 0); - if (nbSeqs > nbSubBlocks) { - nbSeqsPerBlock = nbSeqs / nbSubBlocks; - } else { - nbSeqsPerBlock = 1; - nbSubBlocks = nbSeqs; - } - /* Note: this is very approximative. Obviously, some sub-blocks will be larger and others smaller. - * But the contract of this feature has always been approximative, so for now we'll leverage it for speed. - * It can be refined later, for closer-to-target compressed block size, if it ever matters. */ + avgLitCost = nbLiterals ? (ebs.estLitSize * BYTESCALE) / nbLiterals : BYTESCALE; + avgSeqCost = ((ebs.estBlockSize - ebs.estLitSize) * BYTESCALE) / nbSeqs; + nbSubBlocks = (ebs.estBlockSize + (targetCBlockSize-1)) / targetCBlockSize; + if (nbSubBlocks<1) nbSubBlocks=1; + avgBlockBudget = (ebs.estBlockSize * BYTESCALE) / nbSubBlocks; + DEBUGLOG(5, "estimated fullblock size=%u bytes ; avgLitCost=%.2f ; avgSeqCost=%.2f ; targetCBlockSize=%u, nbSubBlocks=%u ; avgBlockBudget=%.0f bytes", + (unsigned)ebs.estBlockSize, (double)avgLitCost/BYTESCALE, (double)avgSeqCost/BYTESCALE, + (unsigned)targetCBlockSize, (unsigned)nbSubBlocks, (double)avgBlockBudget/BYTESCALE); } - /* write sub-blocks */ + /* compress and write sub-blocks */ { size_t n; - size_t nbSeqsToProcess = 0; - for (n=0; n < nbSubBlocks; n++) { - int const lastSubBlock = (n==nbSubBlocks-1); - size_t const nbSeqsLastSubBlock = nbSeqs - (nbSubBlocks-1) * nbSeqsPerBlock; - size_t nbSeqsSubBlock = lastSubBlock ? nbSeqsLastSubBlock : nbSeqsPerBlock; - size_t seqCount = nbSeqsToProcess+nbSeqsSubBlock; - size_t litSize = lastSubBlock ? (size_t)(lend-lp) : countLiterals(seqStorePtr, sp, seqCount); - int litEntropyWritten = 0; + size_t blockBudgetSupp = 0; + for (n=0; n+1 < nbSubBlocks; n++) { + /* determine nb of sequences for current sub-block + nbLiterals from next sequence */ + size_t seqCount = sizeBlockSequences(sp, (size_t)(send-sp), avgBlockBudget + blockBudgetSupp, avgLitCost, avgSeqCost, n==0); + /* if reached last sequence : break to last sub-block (simplification) */ + assert(seqCount <= (size_t)(send-sp)); + if (sp + seqCount == send) break; + assert(seqCount > 0); + /* compress sub-block */ + { int litEntropyWritten = 0; + int seqEntropyWritten = 0; + size_t litSize = countLiterals(seqStorePtr, sp, seqCount); + const size_t decompressedSize = + ZSTD_seqDecompressedSize(seqStorePtr, sp, seqCount, litSize, 0); + size_t const cSize = ZSTD_compressSubBlock(&nextCBlock->entropy, entropyMetadata, + sp, seqCount, + lp, litSize, + llCodePtr, mlCodePtr, ofCodePtr, + cctxParams, + op, (size_t)(oend-op), + bmi2, writeLitEntropy, writeSeqEntropy, + &litEntropyWritten, &seqEntropyWritten, + 0); + FORWARD_IF_ERROR(cSize, "ZSTD_compressSubBlock failed"); + + /* check compressibility, update state components */ + if (cSize > 0 && cSize < decompressedSize) { + DEBUGLOG(5, "Committed sub-block compressing %u bytes => %u bytes", + (unsigned)decompressedSize, (unsigned)cSize); + assert(ip + decompressedSize <= iend); + ip += decompressedSize; + lp += litSize; + op += cSize; + llCodePtr += seqCount; + mlCodePtr += seqCount; + ofCodePtr += seqCount; + /* Entropy only needs to be written once */ + if (litEntropyWritten) { + writeLitEntropy = 0; + } + if (seqEntropyWritten) { + writeSeqEntropy = 0; + } + sp += seqCount; + blockBudgetSupp = 0; + } } + /* otherwise : do not compress yet, coalesce current block with next one */ + } + + /* write last block */ + DEBUGLOG(2, "Generate last sub-block: %u sequences remaining", (unsigned)(send - sp)); + { int litEntropyWritten = 0; int seqEntropyWritten = 0; + size_t litSize = (size_t)(lend - lp); + size_t seqCount = (size_t)(send - sp); const size_t decompressedSize = - ZSTD_seqDecompressedSize(seqStorePtr, sp, seqCount, litSize, lastSubBlock); + ZSTD_seqDecompressedSize(seqStorePtr, sp, seqCount, litSize, 1); size_t const cSize = ZSTD_compressSubBlock(&nextCBlock->entropy, entropyMetadata, sp, seqCount, lp, litSize, @@ -521,12 +594,12 @@ static size_t ZSTD_compressSubBlock_multi(const seqStore_t* seqStorePtr, op, (size_t)(oend-op), bmi2, writeLitEntropy, writeSeqEntropy, &litEntropyWritten, &seqEntropyWritten, - lastBlock && lastSubBlock); - nbSeqsToProcess = seqCount; + lastBlock); FORWARD_IF_ERROR(cSize, "ZSTD_compressSubBlock failed"); + /* update pointers, the nb of literals borrowed from next sequence must be preserved */ if (cSize > 0 && cSize < decompressedSize) { - DEBUGLOG(5, "Committed sub-block compressing %u bytes => %u bytes", + DEBUGLOG(2, "Last sub-block compressed %u bytes => %u bytes", (unsigned)decompressedSize, (unsigned)cSize); assert(ip + decompressedSize <= iend); ip += decompressedSize; @@ -543,9 +616,8 @@ static size_t ZSTD_compressSubBlock_multi(const seqStore_t* seqStorePtr, writeSeqEntropy = 0; } sp += seqCount; - nbSeqsToProcess = 0; + blockBudgetSupp = 0; } - /* otherwise : coalesce current block with next one */ } } @@ -565,7 +637,7 @@ static size_t ZSTD_compressSubBlock_multi(const seqStore_t* seqStorePtr, /* some data left : last part of the block sent uncompressed */ size_t const rSize = (size_t)((iend - ip)); size_t const cSize = ZSTD_noCompressBlock(op, (size_t)(oend - op), ip, rSize, lastBlock); - DEBUGLOG(5, "Generate last uncompressed sub-block of %u bytes", (unsigned)(rSize)); + DEBUGLOG(2, "Generate last uncompressed sub-block of %u bytes", (unsigned)(rSize)); FORWARD_IF_ERROR(cSize, "ZSTD_noCompressBlock failed"); assert(cSize != 0); op += cSize; From 1fafd0c4ae56a524a92369c065d616a447a21a0f Mon Sep 17 00:00:00 2001 From: Yann Collet Date: Sun, 25 Feb 2024 19:45:32 -0800 Subject: [PATCH 2/8] fix minor visual static analyzer warning it's a false positive, but change the code nonetheless to make it more obvious to the static analyzer. --- lib/compress/zstd_compress_superblock.c | 100 +++++++++++------------- 1 file changed, 47 insertions(+), 53 deletions(-) diff --git a/lib/compress/zstd_compress_superblock.c b/lib/compress/zstd_compress_superblock.c index a32616409e..5d0d23353c 100644 --- a/lib/compress/zstd_compress_superblock.c +++ b/lib/compress/zstd_compress_superblock.c @@ -500,16 +500,12 @@ static size_t ZSTD_compressSubBlock_multi(const seqStore_t* seqStorePtr, size_t const targetCBlockSize = MAX(minTarget, cctxParams->targetCBlockSize); int writeLitEntropy = (entropyMetadata->hufMetadata.hType == set_compressed); int writeSeqEntropy = 1; - size_t nbSubBlocks = 1; - size_t avgLitCost, avgSeqCost, avgBlockBudget; DEBUGLOG(5, "ZSTD_compressSubBlock_multi (srcSize=%u, litSize=%u, nbSeq=%u)", (unsigned)srcSize, (unsigned)(lend-lstart), (unsigned)(send-sstart)); /* let's start by a general estimation for the full block */ - if (nbSeqs == 0) { - nbSubBlocks = 1; - } else { + if (nbSeqs > 0) { EstimatedBlockSize const ebs = ZSTD_estimateSubBlockSize(lp, nbLiterals, ofCodePtr, llCodePtr, mlCodePtr, nbSeqs, @@ -517,19 +513,17 @@ static size_t ZSTD_compressSubBlock_multi(const seqStore_t* seqStorePtr, workspace, wkspSize, writeLitEntropy, writeSeqEntropy); /* quick estimation */ - avgLitCost = nbLiterals ? (ebs.estLitSize * BYTESCALE) / nbLiterals : BYTESCALE; - avgSeqCost = ((ebs.estBlockSize - ebs.estLitSize) * BYTESCALE) / nbSeqs; - nbSubBlocks = (ebs.estBlockSize + (targetCBlockSize-1)) / targetCBlockSize; + size_t const avgLitCost = nbLiterals ? (ebs.estLitSize * BYTESCALE) / nbLiterals : BYTESCALE; + size_t const avgSeqCost = ((ebs.estBlockSize - ebs.estLitSize) * BYTESCALE) / nbSeqs; + size_t nbSubBlocks = (ebs.estBlockSize + (targetCBlockSize-1)) / targetCBlockSize; + size_t n, avgBlockBudget, blockBudgetSupp=0; if (nbSubBlocks<1) nbSubBlocks=1; avgBlockBudget = (ebs.estBlockSize * BYTESCALE) / nbSubBlocks; DEBUGLOG(5, "estimated fullblock size=%u bytes ; avgLitCost=%.2f ; avgSeqCost=%.2f ; targetCBlockSize=%u, nbSubBlocks=%u ; avgBlockBudget=%.0f bytes", (unsigned)ebs.estBlockSize, (double)avgLitCost/BYTESCALE, (double)avgSeqCost/BYTESCALE, (unsigned)targetCBlockSize, (unsigned)nbSubBlocks, (double)avgBlockBudget/BYTESCALE); - } /* compress and write sub-blocks */ - { size_t n; - size_t blockBudgetSupp = 0; for (n=0; n+1 < nbSubBlocks; n++) { /* determine nb of sequences for current sub-block + nbLiterals from next sequence */ size_t seqCount = sizeBlockSequences(sp, (size_t)(send-sp), avgBlockBudget + blockBudgetSupp, avgLitCost, avgSeqCost, n==0); @@ -577,50 +571,50 @@ static size_t ZSTD_compressSubBlock_multi(const seqStore_t* seqStorePtr, } } /* otherwise : do not compress yet, coalesce current block with next one */ } - - /* write last block */ - DEBUGLOG(2, "Generate last sub-block: %u sequences remaining", (unsigned)(send - sp)); - { int litEntropyWritten = 0; - int seqEntropyWritten = 0; - size_t litSize = (size_t)(lend - lp); - size_t seqCount = (size_t)(send - sp); - const size_t decompressedSize = - ZSTD_seqDecompressedSize(seqStorePtr, sp, seqCount, litSize, 1); - size_t const cSize = ZSTD_compressSubBlock(&nextCBlock->entropy, entropyMetadata, - sp, seqCount, - lp, litSize, - llCodePtr, mlCodePtr, ofCodePtr, - cctxParams, - op, (size_t)(oend-op), - bmi2, writeLitEntropy, writeSeqEntropy, - &litEntropyWritten, &seqEntropyWritten, - lastBlock); - FORWARD_IF_ERROR(cSize, "ZSTD_compressSubBlock failed"); - - /* update pointers, the nb of literals borrowed from next sequence must be preserved */ - if (cSize > 0 && cSize < decompressedSize) { - DEBUGLOG(2, "Last sub-block compressed %u bytes => %u bytes", - (unsigned)decompressedSize, (unsigned)cSize); - assert(ip + decompressedSize <= iend); - ip += decompressedSize; - lp += litSize; - op += cSize; - llCodePtr += seqCount; - mlCodePtr += seqCount; - ofCodePtr += seqCount; - /* Entropy only needs to be written once */ - if (litEntropyWritten) { - writeLitEntropy = 0; - } - if (seqEntropyWritten) { - writeSeqEntropy = 0; - } - sp += seqCount; - blockBudgetSupp = 0; + } /* if (nbSeqs > 0) */ + + /* write last block */ + DEBUGLOG(2, "Generate last sub-block: %u sequences remaining", (unsigned)(send - sp)); + { int litEntropyWritten = 0; + int seqEntropyWritten = 0; + size_t litSize = (size_t)(lend - lp); + size_t seqCount = (size_t)(send - sp); + const size_t decompressedSize = + ZSTD_seqDecompressedSize(seqStorePtr, sp, seqCount, litSize, 1); + size_t const cSize = ZSTD_compressSubBlock(&nextCBlock->entropy, entropyMetadata, + sp, seqCount, + lp, litSize, + llCodePtr, mlCodePtr, ofCodePtr, + cctxParams, + op, (size_t)(oend-op), + bmi2, writeLitEntropy, writeSeqEntropy, + &litEntropyWritten, &seqEntropyWritten, + lastBlock); + FORWARD_IF_ERROR(cSize, "ZSTD_compressSubBlock failed"); + + /* update pointers, the nb of literals borrowed from next sequence must be preserved */ + if (cSize > 0 && cSize < decompressedSize) { + DEBUGLOG(2, "Last sub-block compressed %u bytes => %u bytes", + (unsigned)decompressedSize, (unsigned)cSize); + assert(ip + decompressedSize <= iend); + ip += decompressedSize; + lp += litSize; + op += cSize; + llCodePtr += seqCount; + mlCodePtr += seqCount; + ofCodePtr += seqCount; + /* Entropy only needs to be written once */ + if (litEntropyWritten) { + writeLitEntropy = 0; } + if (seqEntropyWritten) { + writeSeqEntropy = 0; + } + sp += seqCount; } } + if (writeLitEntropy) { DEBUGLOG(5, "Literal entropy tables were never written"); ZSTD_memcpy(&nextCBlock->entropy.huf, &prevCBlock->entropy.huf, sizeof(prevCBlock->entropy.huf)); @@ -653,8 +647,8 @@ static size_t ZSTD_compressSubBlock_multi(const seqStore_t* seqStorePtr, } } - DEBUGLOG(5, "ZSTD_compressSubBlock_multi compressed %u subBlocks: total compressed size = %u", - (unsigned)nbSubBlocks, (unsigned)(op-ostart)); + DEBUGLOG(5, "ZSTD_compressSubBlock_multi compressed all subBlocks: total compressed size = %u", + (unsigned)(op-ostart)); return (size_t)(op-ostart); } From e0412c20625c7358d506c969a9c9861b70eb10ee Mon Sep 17 00:00:00 2001 From: Yann Collet Date: Mon, 26 Feb 2024 12:26:54 -0800 Subject: [PATCH 3/8] fix extraneous semicolon ';' as reported by @terrelln --- lib/compress/zstd_compress_superblock.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/lib/compress/zstd_compress_superblock.c b/lib/compress/zstd_compress_superblock.c index 5d0d23353c..32bea370f3 100644 --- a/lib/compress/zstd_compress_superblock.c +++ b/lib/compress/zstd_compress_superblock.c @@ -449,7 +449,7 @@ static size_t sizeBlockSequences(const seqDef* sp, size_t nbSeqs, budget += 120 * BYTESCALE; /* generous estimate */ } /* first sequence => at least one sequence*/ - budget += sp[0].litLength * avgLitCost + avgSeqCost;; + budget += sp[0].litLength * avgLitCost + avgSeqCost; if (budget > targetBudget) return 1; /* loop over sequences */ From aa8592c532e1a2b30b08763140b9bd66bdce4f83 Mon Sep 17 00:00:00 2001 From: Yann Collet Date: Mon, 26 Feb 2024 13:21:14 -0800 Subject: [PATCH 4/8] minor: reformulate nbSubBlocks assignment --- lib/compress/zstd_compress_superblock.c | 5 ++--- 1 file changed, 2 insertions(+), 3 deletions(-) diff --git a/lib/compress/zstd_compress_superblock.c b/lib/compress/zstd_compress_superblock.c index 32bea370f3..cf3cb43622 100644 --- a/lib/compress/zstd_compress_superblock.c +++ b/lib/compress/zstd_compress_superblock.c @@ -461,7 +461,7 @@ static size_t sizeBlockSequences(const seqDef* sp, size_t nbSeqs, return n; } -#define CBLOCK_TARGET_SIZE_MIN 1300 /* suitable to fit an ethernet / wifi / 4G transport frame */ +#define CBLOCK_TARGET_SIZE_MIN 1340 /* suitable to fit into an ethernet / wifi / 4G transport frame */ /** ZSTD_compressSubBlock_multi() : * Breaks super-block into multiple sub-blocks and compresses them. @@ -515,9 +515,8 @@ static size_t ZSTD_compressSubBlock_multi(const seqStore_t* seqStorePtr, /* quick estimation */ size_t const avgLitCost = nbLiterals ? (ebs.estLitSize * BYTESCALE) / nbLiterals : BYTESCALE; size_t const avgSeqCost = ((ebs.estBlockSize - ebs.estLitSize) * BYTESCALE) / nbSeqs; - size_t nbSubBlocks = (ebs.estBlockSize + (targetCBlockSize-1)) / targetCBlockSize; + const size_t nbSubBlocks = MAX((ebs.estBlockSize + (targetCBlockSize/2)) / targetCBlockSize, 1); size_t n, avgBlockBudget, blockBudgetSupp=0; - if (nbSubBlocks<1) nbSubBlocks=1; avgBlockBudget = (ebs.estBlockSize * BYTESCALE) / nbSubBlocks; DEBUGLOG(5, "estimated fullblock size=%u bytes ; avgLitCost=%.2f ; avgSeqCost=%.2f ; targetCBlockSize=%u, nbSubBlocks=%u ; avgBlockBudget=%.0f bytes", (unsigned)ebs.estBlockSize, (double)avgLitCost/BYTESCALE, (double)avgSeqCost/BYTESCALE, From ef82b214ad1023f6123c3d9c9a7dbce24130d9bd Mon Sep 17 00:00:00 2001 From: Yann Collet Date: Mon, 26 Feb 2024 13:23:59 -0800 Subject: [PATCH 5/8] nit: comment indentation as reported by @terrelln --- lib/compress/zstd_compress_superblock.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/lib/compress/zstd_compress_superblock.c b/lib/compress/zstd_compress_superblock.c index cf3cb43622..e9038c472c 100644 --- a/lib/compress/zstd_compress_superblock.c +++ b/lib/compress/zstd_compress_superblock.c @@ -522,7 +522,7 @@ static size_t ZSTD_compressSubBlock_multi(const seqStore_t* seqStorePtr, (unsigned)ebs.estBlockSize, (double)avgLitCost/BYTESCALE, (double)avgSeqCost/BYTESCALE, (unsigned)targetCBlockSize, (unsigned)nbSubBlocks, (double)avgBlockBudget/BYTESCALE); - /* compress and write sub-blocks */ + /* compress and write sub-blocks */ for (n=0; n+1 < nbSubBlocks; n++) { /* determine nb of sequences for current sub-block + nbLiterals from next sequence */ size_t seqCount = sizeBlockSequences(sp, (size_t)(send-sp), avgBlockBudget + blockBudgetSupp, avgLitCost, avgSeqCost, n==0); From 86db60752d1f813642054d12d704663c7757d434 Mon Sep 17 00:00:00 2001 From: Yann Collet Date: Mon, 26 Feb 2024 13:27:59 -0800 Subject: [PATCH 6/8] optimization: bail out faster in presence of incompressible data --- lib/compress/zstd_compress_superblock.c | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) diff --git a/lib/compress/zstd_compress_superblock.c b/lib/compress/zstd_compress_superblock.c index e9038c472c..a9e9493bed 100644 --- a/lib/compress/zstd_compress_superblock.c +++ b/lib/compress/zstd_compress_superblock.c @@ -521,6 +521,9 @@ static size_t ZSTD_compressSubBlock_multi(const seqStore_t* seqStorePtr, DEBUGLOG(5, "estimated fullblock size=%u bytes ; avgLitCost=%.2f ; avgSeqCost=%.2f ; targetCBlockSize=%u, nbSubBlocks=%u ; avgBlockBudget=%.0f bytes", (unsigned)ebs.estBlockSize, (double)avgLitCost/BYTESCALE, (double)avgSeqCost/BYTESCALE, (unsigned)targetCBlockSize, (unsigned)nbSubBlocks, (double)avgBlockBudget/BYTESCALE); + /* simplification: if estimates states that the full superblock doesn't compress, just bail out immediately + * this will result in the production of a single uncompressed block covering @srcSize.*/ + if (ebs.estBlockSize > srcSize) return 0; /* compress and write sub-blocks */ for (n=0; n+1 < nbSubBlocks; n++) { @@ -568,7 +571,7 @@ static size_t ZSTD_compressSubBlock_multi(const seqStore_t* seqStorePtr, sp += seqCount; blockBudgetSupp = 0; } } - /* otherwise : do not compress yet, coalesce current block with next one */ + /* otherwise : do not compress yet, coalesce current sub-block with following one */ } } /* if (nbSeqs > 0) */ From d23b95d21d5cb9c5378b3537271dbbff7cdb49b7 Mon Sep 17 00:00:00 2001 From: Yann Collet Date: Mon, 26 Feb 2024 14:06:34 -0800 Subject: [PATCH 7/8] minor refactor for clarity since we can ensure that nbSubBlocks>0 --- lib/compress/zstd_compress_superblock.c | 6 ++++-- 1 file changed, 4 insertions(+), 2 deletions(-) diff --git a/lib/compress/zstd_compress_superblock.c b/lib/compress/zstd_compress_superblock.c index a9e9493bed..295ccf3049 100644 --- a/lib/compress/zstd_compress_superblock.c +++ b/lib/compress/zstd_compress_superblock.c @@ -526,9 +526,11 @@ static size_t ZSTD_compressSubBlock_multi(const seqStore_t* seqStorePtr, if (ebs.estBlockSize > srcSize) return 0; /* compress and write sub-blocks */ - for (n=0; n+1 < nbSubBlocks; n++) { + assert(nbSubBlocks>0); + for (n=0; n < nbSubBlocks-1; n++) { /* determine nb of sequences for current sub-block + nbLiterals from next sequence */ - size_t seqCount = sizeBlockSequences(sp, (size_t)(send-sp), avgBlockBudget + blockBudgetSupp, avgLitCost, avgSeqCost, n==0); + size_t const seqCount = sizeBlockSequences(sp, (size_t)(send-sp), + avgBlockBudget + blockBudgetSupp, avgLitCost, avgSeqCost, n==0); /* if reached last sequence : break to last sub-block (simplification) */ assert(seqCount <= (size_t)(send-sp)); if (sp + seqCount == send) break; From 8d31e8ec42a736bf7cc70f9f21e9c1afc920c148 Mon Sep 17 00:00:00 2001 From: Yann Collet Date: Mon, 26 Feb 2024 14:31:12 -0800 Subject: [PATCH 8/8] sizeBlockSequences() also tracks uncompressed size and only defines a sub-block boundary when it believes that it is compressible. It's effectively an optimization, avoiding a compression cycle to reach the same conclusion. --- lib/compress/zstd_compress_superblock.c | 18 +++++++++++++----- 1 file changed, 13 insertions(+), 5 deletions(-) diff --git a/lib/compress/zstd_compress_superblock.c b/lib/compress/zstd_compress_superblock.c index 295ccf3049..f5430eccb0 100644 --- a/lib/compress/zstd_compress_superblock.c +++ b/lib/compress/zstd_compress_superblock.c @@ -443,21 +443,29 @@ static size_t sizeBlockSequences(const seqDef* sp, size_t nbSeqs, size_t targetBudget, size_t avgLitCost, size_t avgSeqCost, int firstSubBlock) { - size_t n, budget = 0; + size_t n, budget = 0, inSize=0; /* entropy headers */ - if (firstSubBlock) { - budget += 120 * BYTESCALE; /* generous estimate */ - } + size_t const headerSize = (size_t)firstSubBlock * 120 * BYTESCALE; /* generous estimate */ + assert(firstSubBlock==0 || firstSubBlock==1); + budget += headerSize; + /* first sequence => at least one sequence*/ budget += sp[0].litLength * avgLitCost + avgSeqCost; if (budget > targetBudget) return 1; + inSize = sp[0].litLength + (sp[0].mlBase+MINMATCH); /* loop over sequences */ for (n=1; n targetBudget) break; budget += currentCost; + inSize += sp[n].litLength + (sp[n].mlBase+MINMATCH); + /* stop when sub-block budget is reached */ + if ( (budget > targetBudget) + /* though continue to expand until the sub-block is deemed compressible */ + && (budget < inSize * BYTESCALE) ) + break; } + return n; }