Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Fuzzing and bugfixes for magicless-format decoding #3976

Merged
merged 4 commits into from
Mar 20, 2024
Merged
Show file tree
Hide file tree
Changes from 3 commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Jump to
Jump to file
Failed to load files.
Diff view
Diff view
1 change: 1 addition & 0 deletions CHANGELOG
Original file line number Diff line number Diff line change
Expand Up @@ -9,6 +9,7 @@ lib: accept dictionaries with partial literal tables, by @terrelln
lib: fix CCtx size estimation with external sequence producer, by @embg
lib: fix corner case decoder behaviors, by @Cyan4973 and @aimuz
lib: fix zdict prototype mismatch in static_only mode, by @ldv-alt
lib: fix several bugs in magicless-format decoding, by @embg
cli: add common compressed file types to `--exclude-compressed`` by @daniellerozenblit
cli: fix mixing `-c` and `-o` commands with `--rm`, by @Cyan4973
cli: fix erroneous exclusion of hidden files with `--output-dir-mirror` by @felixhandte
Expand Down
21 changes: 21 additions & 0 deletions doc/decompressor_errata.md
Original file line number Diff line number Diff line change
Expand Up @@ -125,3 +125,24 @@ The total `Block_Content` is `5` bytes, and `Last_Table_Offset` is `2`.
See the compressor workaround code:

https://github.com/facebook/zstd/blob/8814aa5bfa74f05a86e55e9d508da177a893ceeb/lib/compress/zstd_compress.c#L2667-L2682

Magicless format
----------------------

**Last affected version**: v1.5.5

**Affected decompressor component(s)**: Library & CLI
embg marked this conversation as resolved.
Show resolved Hide resolved

**Produced by the reference compressor**: Yes (example: https://gist.github.com/embg/9940726094f4cf2cef162cffe9319232)

**Example Frame**: `27 b5 2f fd 00 03 19 00 00 66 6f 6f 3f ba c4 59`

v1.5.6 fixes several bugs in which the magicless-format decoder rejects valid frames.
These include but are not limited to:
* Valid frames that happen to begin with a legacy magic number (little-endian)
* Valid frames that happen to begin with a skippable magic number (little-endian)

If you are affected by this issue and cannot update to v1.5.6 or later, there is a
embg marked this conversation as resolved.
Show resolved Hide resolved
workaround to recover affected data. Simply prepend the ZSTD magic number
`0xFD2FB528` (little-endian) to your data and decompress using the standard-format
decoder.
8 changes: 5 additions & 3 deletions lib/decompress/zstd_decompress.c
Original file line number Diff line number Diff line change
Expand Up @@ -1085,7 +1085,7 @@ size_t ZSTD_decompressMultiFrame(ZSTD_DCtx* dctx,
while (srcSize >= ZSTD_startingInputLength(dctx->format)) {

#if defined(ZSTD_LEGACY_SUPPORT) && (ZSTD_LEGACY_SUPPORT >= 1)
if (ZSTD_isLegacy(src, srcSize)) {
if (dctx->format == ZSTD_f_zstd1 && ZSTD_isLegacy(src, srcSize)) {
size_t decodedSize;
size_t const frameSize = ZSTD_findFrameCompressedSizeLegacy(src, srcSize);
if (ZSTD_isError(frameSize)) return frameSize;
Expand Down Expand Up @@ -1115,7 +1115,7 @@ size_t ZSTD_decompressMultiFrame(ZSTD_DCtx* dctx,
}
#endif

if (srcSize >= 4) {
if (dctx->format == ZSTD_f_zstd1 && srcSize >= 4) {
U32 const magicNumber = MEM_readLE32(src);
DEBUGLOG(5, "reading magic number %08X", (unsigned)magicNumber);
if ((magicNumber & ZSTD_MAGIC_SKIPPABLE_MASK) == ZSTD_MAGIC_SKIPPABLE_START) {
Expand Down Expand Up @@ -1412,6 +1412,7 @@ size_t ZSTD_decompressContinue(ZSTD_DCtx* dctx, void* dst, size_t dstCapacity, c
case ZSTDds_decodeSkippableHeader:
assert(src != NULL);
assert(srcSize <= ZSTD_SKIPPABLEHEADERSIZE);
assert(dctx->format != ZSTD_f_zstd1_magicless);
ZSTD_memcpy(dctx->headerBuffer + (ZSTD_SKIPPABLEHEADERSIZE - srcSize), src, srcSize); /* complete skippable header */
dctx->expected = MEM_readLE32(dctx->headerBuffer + ZSTD_FRAMEIDSIZE); /* note : dctx->expected can grow seriously large, beyond local buffer size */
dctx->stage = ZSTDds_skipFrame;
Expand Down Expand Up @@ -2209,7 +2210,8 @@ size_t ZSTD_decompressStream(ZSTD_DStream* zds, ZSTD_outBuffer* output, ZSTD_inB
DEBUGLOG(4, "Consume header");
FORWARD_IF_ERROR(ZSTD_decompressBegin_usingDDict(zds, ZSTD_getDDict(zds)), "");

if ((MEM_readLE32(zds->headerBuffer) & ZSTD_MAGIC_SKIPPABLE_MASK) == ZSTD_MAGIC_SKIPPABLE_START) { /* skippable frame */
if (zds->format == ZSTD_f_zstd1
&& (MEM_readLE32(zds->headerBuffer) & ZSTD_MAGIC_SKIPPABLE_MASK) == ZSTD_MAGIC_SKIPPABLE_START) { /* skippable frame */
zds->expected = MEM_readLE32(zds->headerBuffer + ZSTD_FRAMEIDSIZE);
zds->stage = ZSTDds_skipFrame;
} else {
Expand Down
6 changes: 5 additions & 1 deletion tests/fuzz/Makefile
Original file line number Diff line number Diff line change
Expand Up @@ -124,7 +124,8 @@ FUZZ_TARGETS := \
sequence_compression_api \
seekable_roundtrip \
huf_round_trip \
huf_decompress
huf_decompress \
decompress_cross_format

all: libregression.a $(FUZZ_TARGETS)

Expand Down Expand Up @@ -238,6 +239,9 @@ huf_round_trip: $(FUZZ_HEADERS) $(FUZZ_ROUND_TRIP_OBJ) rt_fuzz_huf_round_trip.o

huf_decompress: $(FUZZ_HEADERS) $(FUZZ_DECOMPRESS_OBJ) d_fuzz_huf_decompress.o
$(CXX) $(FUZZ_TARGET_FLAGS) $(FUZZ_DECOMPRESS_OBJ) d_fuzz_huf_decompress.o $(LIB_FUZZING_ENGINE) -o $@

decompress_cross_format: $(FUZZ_HEADERS) $(FUZZ_DECOMPRESS_OBJ) d_fuzz_decompress_cross_format.o
$(CXX) $(FUZZ_TARGET_FLAGS) $(FUZZ_DECOMPRESS_OBJ) d_fuzz_decompress_cross_format.o $(LIB_FUZZING_ENGINE) -o $@

libregression.a: $(FUZZ_HEADERS) $(PRGDIR)/util.h $(PRGDIR)/util.c d_fuzz_regression_driver.o
$(AR) $(FUZZ_ARFLAGS) $@ d_fuzz_regression_driver.o
Expand Down
130 changes: 130 additions & 0 deletions tests/fuzz/decompress_cross_format.c
Original file line number Diff line number Diff line change
@@ -0,0 +1,130 @@
/*
* Copyright (c) Meta Platforms, Inc. and affiliates.
* All rights reserved.
*
* This source code is licensed under both the BSD-style license (found in the
* LICENSE file in the root directory of this source tree) and the GPLv2 (found
* in the COPYING file in the root directory of this source tree).
* You may select, at your option, one of the above-listed licenses.
*/

// This fuzz target validates decompression of magicless-format compressed data.

#include <stddef.h>
#include <stdlib.h>
#include <stdio.h>
#include <string.h>
#include "fuzz_helpers.h"
#define ZSTD_STATIC_LINKING_ONLY
#include "zstd.h"
#include "fuzz_data_producer.h"

static ZSTD_DCtx *dctx = NULL;

int LLVMFuzzerTestOneInput(const uint8_t *src, size_t size)
{
// Give a random portion of src data to the producer, to use for parameter generation.
// The rest will be interpreted as magicless compressed data.
FUZZ_dataProducer_t *producer = FUZZ_dataProducer_create(src, size);
size_t magiclessSize = FUZZ_dataProducer_reserveDataPrefix(producer);
const void* const magiclessSrc = src;
size_t const dstSize = FUZZ_dataProducer_uint32Range(producer, 0, 10 * size);
void* const standardDst = FUZZ_malloc(dstSize);
void* const magiclessDst = FUZZ_malloc(dstSize);

// Create standard-format src from magicless-format src
const uint32_t zstd_magic = ZSTD_MAGICNUMBER;
size_t standardSize = sizeof(zstd_magic) + magiclessSize;
void* const standardSrc = FUZZ_malloc(standardSize);
memcpy(standardSrc, &zstd_magic, sizeof(zstd_magic)); // assume fuzzing on little-endian machine
memcpy(standardSrc + sizeof(zstd_magic), magiclessSrc, magiclessSize);

// Truncate to a single frame
{
const size_t standardFrameCompressedSize = ZSTD_findFrameCompressedSize(standardSrc, standardSize);
if (ZSTD_isError(standardFrameCompressedSize)) {
goto cleanup_and_return;
}
standardSize = standardFrameCompressedSize;
magiclessSize = standardFrameCompressedSize - sizeof(zstd_magic);
}

// Create DCtx if needed
if (!dctx) {
dctx = ZSTD_createDCtx();
FUZZ_ASSERT(dctx);
}

// Test one-shot decompression
{
FUZZ_ZASSERT(ZSTD_DCtx_reset(dctx, ZSTD_reset_session_and_parameters));
FUZZ_ZASSERT(ZSTD_DCtx_setParameter(dctx, ZSTD_d_format, ZSTD_f_zstd1));
const size_t standardRet = ZSTD_decompressDCtx(
dctx, standardDst, dstSize, standardSrc, standardSize);

FUZZ_ZASSERT(ZSTD_DCtx_reset(dctx, ZSTD_reset_session_and_parameters));
FUZZ_ZASSERT(ZSTD_DCtx_setParameter(dctx, ZSTD_d_format, ZSTD_f_zstd1_magicless));
const size_t magiclessRet = ZSTD_decompressDCtx(
dctx, magiclessDst, dstSize, magiclessSrc, magiclessSize);

// Standard accepts => magicless should accept
if (!ZSTD_isError(standardRet)) FUZZ_ZASSERT(magiclessRet);

// Magicless accepts => standard should accept
// NOTE: this is nice-to-have, please disable this check if it is difficult to satisfy.
if (!ZSTD_isError(magiclessRet)) FUZZ_ZASSERT(standardRet);

// If both accept, decompressed size and data should match
if (!ZSTD_isError(standardRet) && !ZSTD_isError(magiclessRet)) {
FUZZ_ASSERT(standardRet == magiclessRet);
if (standardRet > 0) {
FUZZ_ASSERT(
memcmp(standardDst, magiclessDst, standardRet) == 0
);
}
}
}

// Test streaming decompression
{
ZSTD_inBuffer standardIn = { standardSrc, standardSize, 0 };
ZSTD_inBuffer magiclessIn = { magiclessSrc, magiclessSize, 0 };
ZSTD_outBuffer standardOut = { standardDst, dstSize, 0 };
ZSTD_outBuffer magiclessOut = { magiclessDst, dstSize, 0 };

FUZZ_ZASSERT(ZSTD_DCtx_reset(dctx, ZSTD_reset_session_and_parameters));
FUZZ_ZASSERT(ZSTD_DCtx_setParameter(dctx, ZSTD_d_format, ZSTD_f_zstd1));
const size_t standardRet = ZSTD_decompressStream(dctx, &standardOut, &standardIn);

FUZZ_ZASSERT(ZSTD_DCtx_reset(dctx, ZSTD_reset_session_and_parameters));
FUZZ_ZASSERT(ZSTD_DCtx_setParameter(dctx, ZSTD_d_format, ZSTD_f_zstd1_magicless));
const size_t magiclessRet = ZSTD_decompressStream(dctx, &magiclessOut, &magiclessIn);

// Standard accepts => magicless should accept
if (standardRet == 0) FUZZ_ASSERT(magiclessRet == 0);

// Magicless accepts => standard should accept
// NOTE: this is nice-to-have, please disable this check if it is difficult to satisfy.
if (magiclessRet == 0) FUZZ_ASSERT(standardRet == 0);

// If both accept, decompressed size and data should match
if (standardRet == 0 && magiclessRet == 0) {
FUZZ_ASSERT(standardOut.pos == magiclessOut.pos);
if (standardOut.pos > 0) {
FUZZ_ASSERT(
memcmp(standardOut.dst, magiclessOut.dst, standardOut.pos) == 0
);
}
}
}

cleanup_and_return:
#ifndef STATEFUL_FUZZING
ZSTD_freeDCtx(dctx); dctx = NULL;
#endif
free(standardSrc);
free(standardDst);
free(magiclessDst);
FUZZ_dataProducer_free(producer);
return 0;
}
1 change: 1 addition & 0 deletions tests/fuzz/fuzz.py
Original file line number Diff line number Diff line change
Expand Up @@ -65,6 +65,7 @@ def __init__(self, input_type, frame_type=FrameType.ZSTD):
'seekable_roundtrip': TargetInfo(InputType.RAW_DATA),
'huf_round_trip': TargetInfo(InputType.RAW_DATA),
'huf_decompress': TargetInfo(InputType.RAW_DATA),
'decompress_cross_format': TargetInfo(InputType.RAW_DATA),
}
TARGETS = list(TARGET_INFO.keys())
ALL_TARGETS = TARGETS + ['all']
Expand Down