Skip to content

Commit 6a7ede3

Browse files
binhdvoBinh Vo
andauthored
Reduce size of dctx by reutilizing dst buffer (#2751)
* Reduce size of dctx by reutilizing dst buffer Co-authored-by: Binh Vo <binhvo@fb.com>
1 parent 0a794f5 commit 6a7ede3

File tree

7 files changed

+663
-103
lines changed

7 files changed

+663
-103
lines changed

lib/README.md

Lines changed: 6 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -155,6 +155,12 @@ The file structure is designed to make this selection manually achievable for an
155155
- The build macro `ZSTD_NO_INTRINSICS` can be defined to disable all explicit intrinsics.
156156
Compiler builtins are still used.
157157

158+
- The build macro `ZSTD_LITBUFFEREXTRASIZE` can be set to control the amount of extra memory used
159+
during decompression to store literals. This defaults to 64kB. Reducing it can reduce the
160+
memory footprint required for decompression by increasing the portion of the literal buffer that
161+
is stored in the unwritten portion of the dst buffer, at the cost of performance impact for
162+
decompression.
163+
158164

159165
#### Windows : using MinGW+MSYS to create DLL
160166

lib/common/zstd_internal.h

Lines changed: 1 addition & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -181,7 +181,7 @@ static void ZSTD_copy16(void* dst, const void* src) {
181181
#if defined(ZSTD_ARCH_ARM_NEON)
182182
vst1q_u8((uint8_t*)dst, vld1q_u8((const uint8_t*)src));
183183
#else
184-
ZSTD_memcpy(dst, src, 16);
184+
ZSTD_memmove(dst, src, 16);
185185
#endif
186186
}
187187
#define COPY16(d,s) { ZSTD_copy16(d,s); d+=16; s+=16; }
@@ -210,8 +210,6 @@ void ZSTD_wildcopy(void* dst, const void* src, ptrdiff_t length, ZSTD_overlap_e
210210
BYTE* op = (BYTE*)dst;
211211
BYTE* const oend = op + length;
212212

213-
assert(diff >= 8 || (ovtype == ZSTD_no_overlap && diff <= -WILDCOPY_VECLEN));
214-
215213
if (ovtype == ZSTD_overlap_src_before_dst && diff < WILDCOPY_VECLEN) {
216214
/* Handle short offset copies. */
217215
do {

lib/decompress/zstd_decompress.c

Lines changed: 4 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -916,7 +916,7 @@ static size_t ZSTD_decompressFrame(ZSTD_DCtx* dctx,
916916
switch(blockProperties.blockType)
917917
{
918918
case bt_compressed:
919-
decodedSize = ZSTD_decompressBlock_internal(dctx, op, (size_t)(oend-op), ip, cBlockSize, /* frame */ 1);
919+
decodedSize = ZSTD_decompressBlock_internal(dctx, op, (size_t)(oend-op), ip, cBlockSize, /* frame */ 1, not_streaming);
920920
break;
921921
case bt_raw :
922922
decodedSize = ZSTD_copyRawBlock(op, (size_t)(oend-op), ip, cBlockSize);
@@ -1229,7 +1229,7 @@ size_t ZSTD_decompressContinue(ZSTD_DCtx* dctx, void* dst, size_t dstCapacity, c
12291229
{
12301230
case bt_compressed:
12311231
DEBUGLOG(5, "ZSTD_decompressContinue: case bt_compressed");
1232-
rSize = ZSTD_decompressBlock_internal(dctx, dst, dstCapacity, src, srcSize, /* frame */ 1);
1232+
rSize = ZSTD_decompressBlock_internal(dctx, dst, dstCapacity, src, srcSize, /* frame */ 1, is_streaming);
12331233
dctx->expected = 0; /* Streaming not supported */
12341234
break;
12351235
case bt_raw :
@@ -1824,7 +1824,8 @@ size_t ZSTD_sizeof_DStream(const ZSTD_DStream* dctx)
18241824
size_t ZSTD_decodingBufferSize_min(unsigned long long windowSize, unsigned long long frameContentSize)
18251825
{
18261826
size_t const blockSize = (size_t) MIN(windowSize, ZSTD_BLOCKSIZE_MAX);
1827-
unsigned long long const neededRBSize = windowSize + blockSize + (WILDCOPY_OVERLENGTH * 2);
1827+
/* space is needed to store the litbuffer after the output of a given block without stomping the extDict of a previous run, as well as to cover both windows against wildcopy*/
1828+
unsigned long long const neededRBSize = windowSize + blockSize + ZSTD_BLOCKSIZE_MAX + (WILDCOPY_OVERLENGTH * 2);
18281829
unsigned long long const neededSize = MIN(frameContentSize, neededRBSize);
18291830
size_t const minRBSize = (size_t) neededSize;
18301831
RETURN_ERROR_IF((unsigned long long)minRBSize != neededSize,

lib/decompress/zstd_decompress_block.c

Lines changed: 628 additions & 92 deletions
Large diffs are not rendered by default.

lib/decompress/zstd_decompress_block.h

Lines changed: 7 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -33,6 +33,12 @@
3333
*/
3434

3535

36+
/* Streaming state is used to inform allocation of the literal buffer */
37+
typedef enum {
38+
not_streaming = 0,
39+
is_streaming = 1
40+
} streaming_operation;
41+
3642
/* ZSTD_decompressBlock_internal() :
3743
* decompress block, starting at `src`,
3844
* into destination buffer `dst`.
@@ -41,7 +47,7 @@
4147
*/
4248
size_t ZSTD_decompressBlock_internal(ZSTD_DCtx* dctx,
4349
void* dst, size_t dstCapacity,
44-
const void* src, size_t srcSize, const int frame);
50+
const void* src, size_t srcSize, const int frame, const streaming_operation streaming);
4551

4652
/* ZSTD_buildFSETable() :
4753
* generate FSE decoding table for one symbol (ll, ml or off)

lib/decompress/zstd_decompress_internal.h

Lines changed: 14 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -106,6 +106,16 @@ typedef struct {
106106
size_t ddictPtrCount;
107107
} ZSTD_DDictHashSet;
108108

109+
#ifndef ZSTD_LITBUFFEREXTRASIZE
110+
#define ZSTD_LITBUFFEREXTRASIZE (1 << 16) /* extra buffer reduces amount of dst required to store litBuffer */
111+
#endif
112+
113+
typedef enum {
114+
ZSTD_not_in_dst = 0, /* Stored entirely within litExtraBuffer */
115+
ZSTD_in_dst = 1, /* Stored entirely within dst (in memory after current output write) */
116+
ZSTD_split = 2 /* Split between litExtraBuffer and dst */
117+
} ZSTD_litLocation_e;
118+
109119
struct ZSTD_DCtx_s
110120
{
111121
const ZSTD_seqSymbol* LLTptr;
@@ -171,7 +181,10 @@ struct ZSTD_DCtx_s
171181
ZSTD_outBuffer expectedOutBuffer;
172182

173183
/* workspace */
174-
BYTE litBuffer[ZSTD_BLOCKSIZE_MAX + WILDCOPY_OVERLENGTH];
184+
BYTE* litBuffer;
185+
const BYTE* litBufferEnd;
186+
ZSTD_litLocation_e litBufferLocation;
187+
BYTE litExtraBuffer[ZSTD_LITBUFFEREXTRASIZE + WILDCOPY_OVERLENGTH]; /* literal buffer can be split between storage within dst and within this scratch buffer */
175188
BYTE headerBuffer[ZSTD_FRAMEHEADERSIZE_MAX];
176189

177190
size_t oversizedDuration;

tests/fullbench.c

Lines changed: 3 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -123,11 +123,11 @@ static size_t local_ZSTD_decompress(const void* src, size_t srcSize,
123123
static ZSTD_DCtx* g_zdc = NULL;
124124

125125
#ifndef ZSTD_DLL_IMPORT
126-
extern size_t ZSTD_decodeLiteralsBlock(ZSTD_DCtx* ctx, const void* src, size_t srcSize);
126+
extern size_t ZSTD_decodeLiteralsBlock(ZSTD_DCtx* ctx, const void* src, size_t srcSize, void* dst, size_t dstCapacity);
127127
static size_t local_ZSTD_decodeLiteralsBlock(const void* src, size_t srcSize, void* dst, size_t dstSize, void* buff2)
128128
{
129129
(void)src; (void)srcSize; (void)dst; (void)dstSize;
130-
return ZSTD_decodeLiteralsBlock(g_zdc, buff2, g_cSize);
130+
return ZSTD_decodeLiteralsBlock(g_zdc, buff2, g_cSize, dst, dstSize);
131131
}
132132

133133
static size_t local_ZSTD_decodeSeqHeaders(const void* src, size_t srcSize, void* dst, size_t dstSize, void* buff2)
@@ -577,7 +577,7 @@ static int benchMem(unsigned benchNb,
577577
ip += ZSTD_blockHeaderSize; /* skip block header */
578578
ZSTD_decompressBegin(g_zdc);
579579
CONTROL(iend > ip);
580-
ip += ZSTD_decodeLiteralsBlock(g_zdc, ip, (size_t)(iend-ip)); /* skip literal segment */
580+
ip += ZSTD_decodeLiteralsBlock(g_zdc, ip, (size_t)(iend-ip), dstBuff, dstBuffSize); /* skip literal segment */
581581
g_cSize = (size_t)(iend-ip);
582582
memcpy(dstBuff2, ip, g_cSize); /* copy rest of block (it starts by SeqHeader) */
583583
srcSize = srcSize > 128 KB ? 128 KB : srcSize; /* speed relative to block */

0 commit comments

Comments
 (0)