Skip to content

Commit

Permalink
Fuzzing and bugfixes for magicless-format decoding (#3976)
Browse files Browse the repository at this point in the history
* fuzzing and bugfixes for magicless format

* reset dctx before each decompression

* do not memcmp empty buffers

* nit: decompressor errata
  • Loading branch information
embg committed Mar 20, 2024
1 parent 6a0052a commit 741b87b
Show file tree
Hide file tree
Showing 6 changed files with 163 additions and 4 deletions.
1 change: 1 addition & 0 deletions CHANGELOG
Original file line number Diff line number Diff line change
Expand Up @@ -9,6 +9,7 @@ lib: accept dictionaries with partial literal tables, by @terrelln
lib: fix CCtx size estimation with external sequence producer, by @embg
lib: fix corner case decoder behaviors, by @Cyan4973 and @aimuz
lib: fix zdict prototype mismatch in static_only mode, by @ldv-alt
lib: fix several bugs in magicless-format decoding, by @embg
cli: add common compressed file types to `--exclude-compressed`` by @daniellerozenblit
cli: fix mixing `-c` and `-o` commands with `--rm`, by @Cyan4973
cli: fix erroneous exclusion of hidden files with `--output-dir-mirror` by @felixhandte
Expand Down
21 changes: 21 additions & 0 deletions doc/decompressor_errata.md
Original file line number Diff line number Diff line change
Expand Up @@ -125,3 +125,24 @@ The total `Block_Content` is `5` bytes, and `Last_Table_Offset` is `2`.
See the compressor workaround code:

https://github.com/facebook/zstd/blob/8814aa5bfa74f05a86e55e9d508da177a893ceeb/lib/compress/zstd_compress.c#L2667-L2682

Magicless format
----------------------

**Last affected version**: v1.5.5

**Affected decompressor component(s)**: Library

**Produced by the reference compressor**: Yes (example: https://gist.github.com/embg/9940726094f4cf2cef162cffe9319232)

**Example Frame**: `27 b5 2f fd 00 03 19 00 00 66 6f 6f 3f ba c4 59`

v1.5.6 fixes several bugs in which the magicless-format decoder rejects valid frames.
These include but are not limited to:
* Valid frames that happen to begin with a legacy magic number (little-endian)
* Valid frames that happen to begin with a skippable magic number (little-endian)

If you are affected by this issue and cannot update to v1.5.6 or later, there is a
workaround to recover affected data. Simply prepend the ZSTD magic number
`0xFD2FB528` (little-endian) to your data and decompress using the standard-format
decoder.
8 changes: 5 additions & 3 deletions lib/decompress/zstd_decompress.c
Original file line number Diff line number Diff line change
Expand Up @@ -1085,7 +1085,7 @@ size_t ZSTD_decompressMultiFrame(ZSTD_DCtx* dctx,
while (srcSize >= ZSTD_startingInputLength(dctx->format)) {

#if defined(ZSTD_LEGACY_SUPPORT) && (ZSTD_LEGACY_SUPPORT >= 1)
if (ZSTD_isLegacy(src, srcSize)) {
if (dctx->format == ZSTD_f_zstd1 && ZSTD_isLegacy(src, srcSize)) {
size_t decodedSize;
size_t const frameSize = ZSTD_findFrameCompressedSizeLegacy(src, srcSize);
if (ZSTD_isError(frameSize)) return frameSize;
Expand Down Expand Up @@ -1115,7 +1115,7 @@ size_t ZSTD_decompressMultiFrame(ZSTD_DCtx* dctx,
}
#endif

if (srcSize >= 4) {
if (dctx->format == ZSTD_f_zstd1 && srcSize >= 4) {
U32 const magicNumber = MEM_readLE32(src);
DEBUGLOG(5, "reading magic number %08X", (unsigned)magicNumber);
if ((magicNumber & ZSTD_MAGIC_SKIPPABLE_MASK) == ZSTD_MAGIC_SKIPPABLE_START) {
Expand Down Expand Up @@ -1412,6 +1412,7 @@ size_t ZSTD_decompressContinue(ZSTD_DCtx* dctx, void* dst, size_t dstCapacity, c
case ZSTDds_decodeSkippableHeader:
assert(src != NULL);
assert(srcSize <= ZSTD_SKIPPABLEHEADERSIZE);
assert(dctx->format != ZSTD_f_zstd1_magicless);
ZSTD_memcpy(dctx->headerBuffer + (ZSTD_SKIPPABLEHEADERSIZE - srcSize), src, srcSize); /* complete skippable header */
dctx->expected = MEM_readLE32(dctx->headerBuffer + ZSTD_FRAMEIDSIZE); /* note : dctx->expected can grow seriously large, beyond local buffer size */
dctx->stage = ZSTDds_skipFrame;
Expand Down Expand Up @@ -2209,7 +2210,8 @@ size_t ZSTD_decompressStream(ZSTD_DStream* zds, ZSTD_outBuffer* output, ZSTD_inB
DEBUGLOG(4, "Consume header");
FORWARD_IF_ERROR(ZSTD_decompressBegin_usingDDict(zds, ZSTD_getDDict(zds)), "");

if ((MEM_readLE32(zds->headerBuffer) & ZSTD_MAGIC_SKIPPABLE_MASK) == ZSTD_MAGIC_SKIPPABLE_START) { /* skippable frame */
if (zds->format == ZSTD_f_zstd1
&& (MEM_readLE32(zds->headerBuffer) & ZSTD_MAGIC_SKIPPABLE_MASK) == ZSTD_MAGIC_SKIPPABLE_START) { /* skippable frame */
zds->expected = MEM_readLE32(zds->headerBuffer + ZSTD_FRAMEIDSIZE);
zds->stage = ZSTDds_skipFrame;
} else {
Expand Down
6 changes: 5 additions & 1 deletion tests/fuzz/Makefile
Original file line number Diff line number Diff line change
Expand Up @@ -124,7 +124,8 @@ FUZZ_TARGETS := \
sequence_compression_api \
seekable_roundtrip \
huf_round_trip \
huf_decompress
huf_decompress \
decompress_cross_format

all: libregression.a $(FUZZ_TARGETS)

Expand Down Expand Up @@ -238,6 +239,9 @@ huf_round_trip: $(FUZZ_HEADERS) $(FUZZ_ROUND_TRIP_OBJ) rt_fuzz_huf_round_trip.o

huf_decompress: $(FUZZ_HEADERS) $(FUZZ_DECOMPRESS_OBJ) d_fuzz_huf_decompress.o
$(CXX) $(FUZZ_TARGET_FLAGS) $(FUZZ_DECOMPRESS_OBJ) d_fuzz_huf_decompress.o $(LIB_FUZZING_ENGINE) -o $@

decompress_cross_format: $(FUZZ_HEADERS) $(FUZZ_DECOMPRESS_OBJ) d_fuzz_decompress_cross_format.o
$(CXX) $(FUZZ_TARGET_FLAGS) $(FUZZ_DECOMPRESS_OBJ) d_fuzz_decompress_cross_format.o $(LIB_FUZZING_ENGINE) -o $@

libregression.a: $(FUZZ_HEADERS) $(PRGDIR)/util.h $(PRGDIR)/util.c d_fuzz_regression_driver.o
$(AR) $(FUZZ_ARFLAGS) $@ d_fuzz_regression_driver.o
Expand Down
130 changes: 130 additions & 0 deletions tests/fuzz/decompress_cross_format.c
Original file line number Diff line number Diff line change
@@ -0,0 +1,130 @@
/*
* Copyright (c) Meta Platforms, Inc. and affiliates.
* All rights reserved.
*
* This source code is licensed under both the BSD-style license (found in the
* LICENSE file in the root directory of this source tree) and the GPLv2 (found
* in the COPYING file in the root directory of this source tree).
* You may select, at your option, one of the above-listed licenses.
*/

// This fuzz target validates decompression of magicless-format compressed data.

#include <stddef.h>
#include <stdlib.h>
#include <stdio.h>
#include <string.h>
#include "fuzz_helpers.h"
#define ZSTD_STATIC_LINKING_ONLY
#include "zstd.h"
#include "fuzz_data_producer.h"

static ZSTD_DCtx *dctx = NULL;

int LLVMFuzzerTestOneInput(const uint8_t *src, size_t size)
{
// Give a random portion of src data to the producer, to use for parameter generation.
// The rest will be interpreted as magicless compressed data.
FUZZ_dataProducer_t *producer = FUZZ_dataProducer_create(src, size);
size_t magiclessSize = FUZZ_dataProducer_reserveDataPrefix(producer);
const void* const magiclessSrc = src;
size_t const dstSize = FUZZ_dataProducer_uint32Range(producer, 0, 10 * size);
void* const standardDst = FUZZ_malloc(dstSize);
void* const magiclessDst = FUZZ_malloc(dstSize);

// Create standard-format src from magicless-format src
const uint32_t zstd_magic = ZSTD_MAGICNUMBER;
size_t standardSize = sizeof(zstd_magic) + magiclessSize;
void* const standardSrc = FUZZ_malloc(standardSize);
memcpy(standardSrc, &zstd_magic, sizeof(zstd_magic)); // assume fuzzing on little-endian machine
memcpy(standardSrc + sizeof(zstd_magic), magiclessSrc, magiclessSize);

// Truncate to a single frame
{
const size_t standardFrameCompressedSize = ZSTD_findFrameCompressedSize(standardSrc, standardSize);
if (ZSTD_isError(standardFrameCompressedSize)) {
goto cleanup_and_return;
}
standardSize = standardFrameCompressedSize;
magiclessSize = standardFrameCompressedSize - sizeof(zstd_magic);
}

// Create DCtx if needed
if (!dctx) {
dctx = ZSTD_createDCtx();
FUZZ_ASSERT(dctx);
}

// Test one-shot decompression
{
FUZZ_ZASSERT(ZSTD_DCtx_reset(dctx, ZSTD_reset_session_and_parameters));
FUZZ_ZASSERT(ZSTD_DCtx_setParameter(dctx, ZSTD_d_format, ZSTD_f_zstd1));
const size_t standardRet = ZSTD_decompressDCtx(
dctx, standardDst, dstSize, standardSrc, standardSize);

FUZZ_ZASSERT(ZSTD_DCtx_reset(dctx, ZSTD_reset_session_and_parameters));
FUZZ_ZASSERT(ZSTD_DCtx_setParameter(dctx, ZSTD_d_format, ZSTD_f_zstd1_magicless));
const size_t magiclessRet = ZSTD_decompressDCtx(
dctx, magiclessDst, dstSize, magiclessSrc, magiclessSize);

// Standard accepts => magicless should accept
if (!ZSTD_isError(standardRet)) FUZZ_ZASSERT(magiclessRet);

// Magicless accepts => standard should accept
// NOTE: this is nice-to-have, please disable this check if it is difficult to satisfy.
if (!ZSTD_isError(magiclessRet)) FUZZ_ZASSERT(standardRet);

// If both accept, decompressed size and data should match
if (!ZSTD_isError(standardRet) && !ZSTD_isError(magiclessRet)) {
FUZZ_ASSERT(standardRet == magiclessRet);
if (standardRet > 0) {
FUZZ_ASSERT(
memcmp(standardDst, magiclessDst, standardRet) == 0
);
}
}
}

// Test streaming decompression
{
ZSTD_inBuffer standardIn = { standardSrc, standardSize, 0 };
ZSTD_inBuffer magiclessIn = { magiclessSrc, magiclessSize, 0 };
ZSTD_outBuffer standardOut = { standardDst, dstSize, 0 };
ZSTD_outBuffer magiclessOut = { magiclessDst, dstSize, 0 };

FUZZ_ZASSERT(ZSTD_DCtx_reset(dctx, ZSTD_reset_session_and_parameters));
FUZZ_ZASSERT(ZSTD_DCtx_setParameter(dctx, ZSTD_d_format, ZSTD_f_zstd1));
const size_t standardRet = ZSTD_decompressStream(dctx, &standardOut, &standardIn);

FUZZ_ZASSERT(ZSTD_DCtx_reset(dctx, ZSTD_reset_session_and_parameters));
FUZZ_ZASSERT(ZSTD_DCtx_setParameter(dctx, ZSTD_d_format, ZSTD_f_zstd1_magicless));
const size_t magiclessRet = ZSTD_decompressStream(dctx, &magiclessOut, &magiclessIn);

// Standard accepts => magicless should accept
if (standardRet == 0) FUZZ_ASSERT(magiclessRet == 0);

// Magicless accepts => standard should accept
// NOTE: this is nice-to-have, please disable this check if it is difficult to satisfy.
if (magiclessRet == 0) FUZZ_ASSERT(standardRet == 0);

// If both accept, decompressed size and data should match
if (standardRet == 0 && magiclessRet == 0) {
FUZZ_ASSERT(standardOut.pos == magiclessOut.pos);
if (standardOut.pos > 0) {
FUZZ_ASSERT(
memcmp(standardOut.dst, magiclessOut.dst, standardOut.pos) == 0
);
}
}
}

cleanup_and_return:
#ifndef STATEFUL_FUZZING
ZSTD_freeDCtx(dctx); dctx = NULL;
#endif
free(standardSrc);
free(standardDst);
free(magiclessDst);
FUZZ_dataProducer_free(producer);
return 0;
}
1 change: 1 addition & 0 deletions tests/fuzz/fuzz.py
Original file line number Diff line number Diff line change
Expand Up @@ -65,6 +65,7 @@ def __init__(self, input_type, frame_type=FrameType.ZSTD):
'seekable_roundtrip': TargetInfo(InputType.RAW_DATA),
'huf_round_trip': TargetInfo(InputType.RAW_DATA),
'huf_decompress': TargetInfo(InputType.RAW_DATA),
'decompress_cross_format': TargetInfo(InputType.RAW_DATA),
}
TARGETS = list(TARGET_INFO.keys())
ALL_TARGETS = TARGETS + ['all']
Expand Down

0 comments on commit 741b87b

Please sign in to comment.