Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Struggling with ZSTD_decompressBlock #227

Closed
GregSlazinski opened this issue Jun 26, 2016 · 51 comments
Closed

Struggling with ZSTD_decompressBlock #227

GregSlazinski opened this issue Jun 26, 2016 · 51 comments

Comments

@GregSlazinski
Copy link

GregSlazinski commented Jun 26, 2016

Hi,

I'm having a problem when using the block-based methods.

If I use 'ZSTD_compressContinue' with 'ZSTD_decompressContinue' then my codes work fine:

static Bool ZSTDCompress(File &src, File &dest, Int compression_level)
{
   Bool ok=false;
   if(ZSTD_CCtx *ctx=ZSTD_createCCtx_advanced(ZSTDMem))
   {
      ZSTD_parameters params; Zero(params);
      params.cParams=ZSTD_getCParams(Mid(compression_level, 1, ZSTD_maxCLevel()), src.left(), 0);
      if(!ZSTD_isError(ZSTD_compressBegin_advanced(ctx, null, 0, params, src.left())))
      {
         // sizes for 'window_size', 'block_size', 's', 'd' were taken from "zstd" tutorial, "zbuff_compress.c" file, "ZBUFF_compressInit_advanced" function
       C Int window_size=1<<params.cParams.windowLog, block_size=Min(window_size, ZSTD_BLOCKSIZE_MAX);
         Memt<Byte> s, d; s.setNum(window_size+block_size); d.setNum(ZSTDSize(block_size)+1); Int s_pos=0;
         for(; !src.end(); )
         {
            Int read=Min(ZSTD_BLOCKSIZE_MAX, Min(s.elms(), src.left())); // ZSTD_BLOCKSIZE_MAX taken from 'ZBUFF_recommendedCInSize' (without this, 'ZSTD_compressContinue' may fail with 'dest' too small error)
            if(s_pos>s.elms()-read)s_pos=0; // if reading will exceed buffer size
            read=src.getReturnSize(&s[s_pos], read); if(read<=0)goto error;
            auto size=ZSTD_compressContinue(ctx, d.data(), d.elms(), &s[s_pos], read); if(ZSTD_isError(size))goto error;
            if(!dest.put(d.data(), size))goto error;
            s_pos+=read;
         }
         auto size=ZSTD_compressEnd(ctx, d.data(), d.elms()); if(ZSTD_isError(size))goto error;
         if(dest.put(d.data(), size))ok=true;
      }
   error:
      ZSTD_freeCCtx(ctx);
   }
   return ok;
}
static Bool ZSTDDecompress(File &src, File &dest, Long compressed_size, Long decompressed_size)
{
   Bool ok=false;
   if(ZSTD_DCtx *ctx=ZSTD_createDCtx_advanced(ZSTDMem))
   {
      ZSTD_decompressBegin(ctx);
      Byte header[ZSTD_frameHeaderSize_max];
      Long pos=src.pos();
      Int read=src.getReturnSize(header, SIZE(header));
      src.pos(pos);
      ZSTD_frameParams frame; if(!ZSTD_getFrameParams(&frame, header, read))
      {
         Long start=dest.pos();
         // sizes for 'block_size', 's', 'd' were taken from "zstd" tutorial, "zbuff_decompress.c" file, "ZBUFF_decompressContinue" function
       C auto block_size=Min(frame.windowSize, ZSTD_BLOCKSIZE_MAX);
         Memt<Byte> s; s.setNum(block_size);
         for(;;)
         {
            auto size=ZSTD_nextSrcSizeToDecompress(ctx); if(!size){if(dest.pos()-start==decompressed_size)ok=true; break;} if(ZSTD_isError(size) || size>s.elms())break;
            if(!src.getFast(s.data(), size))break; // need exactly 'size' amount
            size=ZSTD_decompressContinue(ctx, dest.mem(), dest.left(), s.data(), size); if(ZSTD_isError(size))break;
            if(!MemWrote(dest, size))break;
         }
      }
      ZSTD_freeDCtx(ctx);
   }
   return ok;
}

But if I replace them with 'ZSTD_compressBlock' and 'ZSTD_decompressBlock', (including writing/reading the compressed buffer size before each buffer), then decompression fails:

static Bool ZSTDCompressRaw(File &src, File &dest, Int compression_level)
{
   Bool ok=false;
   if(ZSTD_CCtx *ctx=ZSTD_createCCtx_advanced(ZSTDMem))
   {
      ZSTD_parameters params; Zero(params);
      params.cParams=ZSTD_getCParams(Mid(compression_level, 1, ZSTD_maxCLevel()), src.left(), 0);
      if(!ZSTD_isError(ZSTD_compressBegin_advanced(ctx, null, 0, params, src.left())))
      {
         // sizes for 'window_size', 'block_size', 's', 'd' were taken from "zstd" tutorial, "zbuff_compress.c" file, "ZBUFF_compressInit_advanced" function
       C Int window_size=1<<params.cParams.windowLog, block_size=Min(window_size, ZSTD_BLOCKSIZE_MAX);
         Memt<Byte> s, d; s.setNum(window_size+block_size); d.setNum(ZSTDSize(block_size)+1); Int s_pos=0;
         dest.cmpUIntV(params.cParams.windowLog);
         for(; !src.end(); )
         {
            Int read=Min(ZSTD_BLOCKSIZE_MAX, Min(s.elms(), src.left())); // ZSTD_BLOCKSIZE_MAX taken from 'ZBUFF_recommendedCInSize' (without this, 'ZSTD_compressContinue' may fail with 'dest' too small error)
            if(s_pos>s.elms()-read)s_pos=0; // if reading will exceed buffer size
            read=src.getReturnSize(&s[s_pos], read); if(read<=0)goto error;
            auto size=ZSTD_compressBlock(ctx, d.data(), d.elms(), &s[s_pos], read); if(ZSTD_isError(size))goto error;
            if(  size>0) // compressed OK
            {
               dest.cmpIntV(size-1);
               if(!dest.put(d.data(), size))goto error;
            }else // failed to compress
            {
               dest.cmpIntV(-read);
               if(!dest.put(&s[s_pos], read))goto error;
            }
            s_pos+=read;
         }
         ok=true;
      }
   error:
      ZSTD_freeCCtx(ctx);
   }
   return ok;
}
static Bool ZSTDDecompressRaw(File &src, File &dest, Long compressed_size, Long decompressed_size)
{
   Bool ok=false;
   if(ZSTD_DCtx *ctx=ZSTD_createDCtx_advanced(ZSTDMem))
   {
      ZSTD_decompressBegin(ctx);
      // sizes for 'block_size', 's', 'd' were taken from "zstd" tutorial, "zbuff_decompress.c" file, "ZBUFF_decompressContinue" function
    C auto window_size=1<<src.decUIntV(), block_size=Min(window_size, ZSTD_BLOCKSIZE_MAX);
      Memt<Byte> s; s.setNum(block_size);
      for(; !src.end(); )
      {
         Int chunk; src.decIntV(chunk);
         if( chunk<0) // un-compressed
         {
            if(!src.copy(dest, -chunk))goto error;
         }else
         {
            chunk++; if(chunk>s.elms())goto error;
            if(!src.getFast(s.data(), chunk))goto error; // need exactly 'chunk' amount
            auto size=ZSTD_decompressBlock(ctx, dest.mem(), dest.left(), s.data(), chunk); if(ZSTD_isError(size))Exit(ZSTD_getErrorName(size)); // here the error occurs
            if(!MemWrote(dest, size))goto error; // this does: dest.mem+=size; and dest.left-=size;
         }
      }
      ok=true;
   error:
      ZSTD_freeDCtx(ctx);
   }
   return ok;
}

The error occurs at the second call to ZSTD_decompressBlock
First call succeeds:
chunk=96050
size=ZSTD_decompressBlock(ctx, dest.mem(), dest.left(), s.data(), chunk);
size=131072

Second call fails:
chunk=94707
size=ZSTD_decompressBlock(ctx, dest.mem(), dest.left(), s.data(), chunk);
size=18446744073709551605 ("Corrupted block detected")

Am I missing something obvious here?

When decompressing, the 'dest' File, in this test is a continuous memory capable of storing the entire decompressed data.
And with each decompression call, I am advancing 'dest.mem' to the next decompressed chunk position.

Thanks for any help

@Cyan4973
Copy link
Contributor

Cyan4973 commented Jun 26, 2016

Thanks for notification @GregSlazinski .

Indeed, the construction you describe is expected to work.
It's unclear why it does not, although a bug is definitely possible.

I checked the test suite, and only "single block" construction was tested so far.
That's because ZSTD_compressBlock() is primarily used in combination with very small blocks, when the size of headers becomes a sizable portion of the final compressed size, so it becomes tempting to remove them.

As multi-blocks are not part of the test suite yet, this construction can be buggy.

Onto debugging now ....

@Cyan4973
Copy link
Contributor

Latest update in "dev" branch is an attempt at fixing this issue.
Tests have been extended to include multi-blocks compression + decompression.
Seems to work well now.

@luben
Copy link
Contributor

luben commented Jun 27, 2016

What are the intended usage scenarios for ZSTD_compress and ZSTD_decompress versus ZSTD_compressBlock and ZSTD_decompressBlock?

My understanding is that both of the variants will not look back into the previously processed block and both will not add frame headers. So what will be the difference apart of the ..Block variants receiving a context?

@GregSlazinski
Copy link
Author

GregSlazinski commented Jun 27, 2016

Thanks a lot for the quick fix.
After that it started working, however only up to 29MB of my original 800 MB file.
After processing several compressed and uncompressed chunks, it again returned "Corrupted block detected"

I've narrowed down the block that fails, and created a sample binary file of 1 MB.
https://www.dropbox.com/s/v8wgeeo1ax1syzd/binary?dl=0

Using the code as above, you can notice that it will fail.
You need to use "compression_level=1" to reproduce the error, and "ZSTD_compressBlock" and "ZSTD_decompressBlock"
(I'm using latest source from "dev" branch)

from decompressor I get:
step 1: compressed
chunk=106563
size=131072

step 2:
uncompressed of size=131072

step 3: compressed and fail
chunk=104839
size=18446744073709551605 ("Corrupted block detected")

@Cyan4973
Copy link
Contributor

What are the intended usage scenarios for ZSTD_compress and ZSTD_decompress versus ZSTD_compressBlock and ZSTD_decompressBlock?

ZSTD_compress() generates a fully conforming frame, including necessary headers and footer. It has no size limit, except size_t limitation.

ZSTD_compressBlock() only generates one compressed block.
It doesn't generate neither frame nor block header.
Input size is limited to maximum block size, aka 128 KB.

ZSTD_compressBlock() is more equivalent to ZSTD_compressContinue(), in the sense that it will remember previously compressed blocks and try to use them to compress next block. Context must be reset, using ZSTD_compressBegin() in order to avoid this effect.

But the intended usage for ZSTD_compressBlock() is compression of very small data blocks (< 1 KB), where it makes sense to remove headers since they may represent a significant portion of compressed size (~15 bytes on average).
For larger elements, savings are probably insignificant.

@luben
Copy link
Contributor

luben commented Jun 27, 2016

I see. Thanks for the explanation.

@Cyan4973
Copy link
Contributor

Cyan4973 commented Jun 27, 2016

OK. I guess I understand what's going on.

ZSTD_decompressBlock() doesn't manage uncompressed blocks.

Even if you properly handle uncompressed blocks directly in your application, which seems to be the case here, the problem is ZSTD_decompressBlock() will not "see" such block. So its history context is blind.
Any back-reference into it will fail.

This is what happens here.

Fixing this issue will be a bit more complex.
It will require to add some "weird" prototypes, and my feeling is that it will make the API more complex to use.

Maybe it's time to reconsider if supporting multi-blocks is really a good idea.

Btw @GregSlazinski, is there any property that made you select this mode rather than ZSTD_compressContinue() or ZBUFF_compressContinue() for example ?

@GregSlazinski
Copy link
Author

GregSlazinski commented Jun 27, 2016

I want to compress with absolute smallest possible size, without unnecessary headers.
I pack many small files, some are 16 bytes, some 100 bytes, I have bigger too of course, but the most noticeable overhead is with many small files.

I've found the most efficient way to compress every file, is to use:

UINT total_decompressed_size
UINT total_compressed_size
LOOP
{
    UINT compressed_block_size
    BYTE block_data[] <- Array
}

Additionally in above scheme, I pack UInt's using following code:

   Byte data[5]; Int pos=0;
   data[pos++]=((u&127)|((u>=128)<<7));
   if(u>=128)
   {
      u>>=7; data[pos++]=((u&127)|((u>=128)<<7));
      if(u>=128)
      {
         u>>=7; data[pos++]=((u&127)|((u>=128)<<7));
         if(u>=128)
         {
            u>>=7; data[pos++]=((u&127)|((u>=128)<<7));
            if(u>=128)
            {
               u>>=7; data[pos++]=(u);
            }
         }
      }
   }

Allowing to encode small UInt's to just 1 byte.

I think that to surround blocks with bigger frame containers than just the block size (with method as described above), is a waste of space.

In short: I just want to avoid the extra overhead, and pack as small as possible.

@Cyan4973
Copy link
Contributor

I just want to avoid the extra overhead, and pack as small as possible.

I believe it's a very good use case which justifies ZSTD_compressBlock().
Especially the part regarding small sources.

I'm just wondering if, for sources > 128 KB, the headers savings remain significant enough to justify ZSTD_compressBlock() rather than, for example, ZSTD_compress().

@GregSlazinski
Copy link
Author

I'd like to use ZSTD_compressBlock everywhere, rather than use 2 separate paths for small files vs large files.
This way compress/decompress functions will be compatible with each other (for small and large files).

@Cyan4973
Copy link
Contributor

Cyan4973 commented Jul 4, 2016

OK, I've been thinking about this issue for a while, and believe to have a proposition.

First, if it was for my own usage, I would advise to use ZSTD_compressBlock() for sources <= 128 KB, and switch to ZSTD_compress() for larger sources.
I know it's a branch in the code, but there already is such a branch in the code, since compressing source > 128 KB requires cutting input in successive blocks, hence to manage these intermediate blocks and their metadata.

My guess is that the code is effectively simplified by using frames for sources > 128 KB.

Anyway, I was so seduced by the concept that I was initially considering to just apply it transparently from within ZSTD_compressBlock().

But it was a problem regarding definition. In such a construction, ZSTD_compressBlock() sometimes generate a block, sometimes a frame. Even with some good comment around, it's still a strange behavior which invites unforeseen side-effects.

So I've switched side since.

I believe the issue comes from ZSTD_decompressBlock() : its history tracker is unable to take into consideration blocks it has not processed, such as uncompressed blocks, or RLE blocks.

There are 2 solutions to this :

  • Add a function which basically "insert" data segments within ZSTD_DCtx* context, so that history can be completed manually.
  • Make ZSTD_decompressBlock() compatible with uncompressed and RLE blocks. This way, it would process them, and be able to add them into its history tracker.

I believe the second solution is likely the most transparent one.

@GregSlazinski
Copy link
Author

Hi,

Thanks for the reply.

Whichever solution you choose is fine by me.
My only suggestion is that built-in frames can be avoided altogether on small and big files.

I have a lot of files, small and big, all of them are packed into one big archive PAK that has all files stored inside and packed as separate entries.
The goal is to make the PAK file as small as possible.
And let's not forget that large sources >128KB would require multiple frames, effectively making the overhead big as well due to each frame.
So I'd like to use the internal block-based stream compression methods everywhere.

Thanks for your help, this is the last issue that stops me from integrating this awesome library into my game engine.

@Cyan4973
Copy link
Contributor

Cyan4973 commented Jul 5, 2016

There's a problem with my latest suggestion :

Make ZSTD_decompressBlock() compatible with uncompressed and RLE blocks.

This would require to know the size to decompress.

Hence, the prototype requirement would change from "needs maximum possible decompressed size" (aka output buffer size) to "needs exact decompressed size".

Is that an issue ?

@GregSlazinski
Copy link
Author

GregSlazinski commented Jul 5, 2016

Hi,

Currently I store only the uncompressed size for entire file (not single block, but all blocks that make the file summed together).
However if it's necessary, then I can write the uncompressed size for each block as well.

I'm assuming that this information is stored in the ZSTD frame headers?

If it's required, then I can provide it.
It would be better of course if it's not required, then the overhead would be smaller, but if there's no other way then I think it's okay.

I'm curious how does it compare to 'LZ4_decompress_safe_continue' which doesn't require exact decompressed size.
Does 'LZ4_decompress_safe_continue' have the extra headers in each block?

Thank you

@Cyan4973
Copy link
Contributor

Cyan4973 commented Jul 5, 2016

LZ4_decompress_safe_continue() has very much the same scope as ZSTD_decompressBlock(). Both decompress raw blocks, without frame metadata.

Hence, they share same properties :
both don't need exact decompressed size, just a bounded maximum.
And that's also why both are unable to deal with non-compressed blocks.

For LZ4 though, that might not be a problem, because LZ4_compress_continue() accepts to generate compressed blocks which are larger than original (it only guarantees a maximum overhead of 0.4%). Hence LZ4_decompress_safe_continue() will always decode fully formed LZ4 blocks.

In contrast, ZSTD_compressBlock() skips non-compressible blocks, expecting the higher level call process to take care of them. It's more efficient, but also requires more involvement from upper layer. And then, such blocks cannot be forwarded to ZSTD_decompressBlock().

Currently I store only the uncompressed size for entire file

OK, so indeed having to manage the exact decompressed size of each block is an additional duty, which doesn't fit well.

Maybe then the previous idea of "inserting data segments within ZSTD_DCtx* history context" would prove a better choice.

It would need a new prototype though, something like :

size_t ZSTD_insertBlock(ZSTD_DCtx* dctx, const void* src, size_t srcSize);

Thinking harder about it :

it might be possible to refactor ZSTD_compressBlock() in a way which makes it generate ZSTD-compatible blocks even for non-compressible data. In which case, ZSTD_decompressBlock() would be able to handle it correctly, removing the need for "special case handling".
There will be some overhead though, but I'm guessing it's going to be limited, something around +4 bytes per block.

Thoughts ?

@GregSlazinski
Copy link
Author

GregSlazinski commented Jul 5, 2016

Is it my understanding that "ZSTD_insertBlock" would be called only for non-compressed blocks?

In that case I think that would be the best solution.

Because my decompressor currently works like that:

static Bool ZSTDDecompressRaw(File &src, File &dest, Long compressed_size, Long decompressed_size)
{
   Bool ok=false;
   if(ZSTD_DCtx *ctx=ZSTD_createDCtx_advanced(ZSTDMem))
   {
      ZSTD_decompressBegin(ctx);
      // sizes for 'block_size', 's', 'd' were taken from "zstd" tutorial, "zbuff_decompress.c" file, "ZBUFF_decompressContinue" function
    C auto window_size=1<<src.decUIntV(), block_size=Min(window_size, ZSTD_BLOCKSIZE_MAX);
      Memt<Byte> s; s.setNum(block_size);
      for(; !src.end(); )
      {
         Int chunk; src.decIntV(chunk);
         if( chunk<0) // un-compressed
         {
            if(!src.copy(dest, -chunk))goto error;
         }else
         {
            chunk++; if(chunk>s.elms())goto error;
            if(!src.getFast(s.data(), chunk))goto error; // need exactly 'chunk' amount
            auto size=ZSTD_decompressBlock(ctx, dest.mem(), dest.left(), s.data(), chunk); if(ZSTD_isError(size))Exit(ZSTD_getErrorName(size)); // here the error occurs
            if(!MemWrote(dest, size))goto error; // this does: dest.mem+=size; and dest.left-=size;
         }
      }
      ok=true;
   error:
      ZSTD_freeDCtx(ctx);
   }
   return ok;
}

LOOP
{
    UINT compressed_block_size
    BYTE block_data[] <- Array
}

In this approach, the size for blocks that are not compressed is already known.
So there would be no need to write any extra overhead.
But just replace this call:

if( chunk<0) // un-compressed
{
   if(!src.copy(dest, -chunk))goto error;
}else

with this:

if( chunk<0) // un-compressed
{
   if(!src.copy(dest, -chunk))goto error;
   ZSTD_insertBlock(..here pass the uncompressed data that was just copied);
}else

Is that correct?
I think that would be the best, instead of doing the other approach below:

There will be some overhead though, but I'm guessing it's going to be limited, something around +4 bytes per block.

I think this approach would be worse, if it would introduce +4 byte overhead.
Because right now I can just use a 1-bit flag in the "UINT compressed_block_size" (which I write before each block) to specify if this block is compressed or not.

It's much better to have 1-bit overhead in UINT that's already needed for compressed block size, instead of adding +4 extra bytes.

Thank you

@FrancescAlted
Copy link

FWIW I'd also prefer to avoid the 4 bytes overhead if this adds to the ~15 bytes that are already there. If this 4 bytes overhead is just for blocks > 128 KB, then I don't care too much. My use case is that I could use zstd from a higher level compressor, so I can communicate exact input and output sizes to it.

@KrzysFR
Copy link
Contributor

KrzysFR commented Jul 5, 2016

Here's a stupid idea: if ZSTD_compressBlock(..., sourceSize, ...) always consider anything that would compress to more than 99.x% of sourceSize as uncompressible, and if ZSTD_decompressBlock() asks for the original uncompressed size, it can detect if it is uncompressed data by doing the same exact 99.x% test, without having to store any header before the data to distinguish both types of blocks or store the size.

  • Encoder side:
    • ZSTD_compressBlock(...) always return > 0, even for uncompressible blocks, which it will memcpy to the target buffer.
    • Application emit the block, as well as the original uncompressed size, to some stream or file using it's own framing or metadata (which it would have to do anyway)
  • Decoder side:
    • Application reads the block as well as originalSize, using its own framing
    • calls ZSTD_decompressBlock(...compressedSize,..., originalSize) which will compare originalSize and compressedSize to determine if the block is uncompressed (using the same 99.x% rule) by testing if compressedSize==originalSize. And if yes, it will memcpy it to the target buffer. If compressed, same as before.

This way, there is no custom logic to deal with compressed/uncompressed at the app level, and no need to have custom headers to distinguish between compressed/uncompressed block (stored out of bound, via the app's own framing).

Only downside, is that once you decide on what is the threshold to consider a block as uncompressible, it will be baked in the library, and not be able to be change ever. Unless you add it as a parameter to the encoder/decoder...
No problem here: if (compressedSize < originalSize) then DECOMPRESS(..) else MEMCPY(...) from the decoder's side, meaning the encoder can choose any threshold it wants!

@KrzysFR
Copy link
Contributor

KrzysFR commented Jul 5, 2016

After thinking about it more, I've edited the comment above with a simpler method.

@Cyan4973
Copy link
Contributor

Cyan4973 commented Jul 5, 2016

@KrzysFR : that's indeed the core idea in the suggestion to make ZSTD_decompressBlock() compatible with uncompressed data.

The problem is, it requires to change the prototype, from ZSTD_decompressBlock(..., dstCapacity, ...), to ZSTD_decompressBlock(..., originalSize, ...) .
Which in turn means, it's necessary to know, i.e. save and restore, originalSize,
which is a new element, currently unnecessary in @GregSlazinski implementation.

Hence the discussion investigating other potential ideas.

@KrzysFR
Copy link
Contributor

KrzysFR commented Jul 5, 2016

I guess it depends if you want to completely delegate the task of framing to the application itself or not?

With current implementation, application already needs to do it anyway, if only to store a (compressed_size, is_uncompressed?) header with each block. Adding originalSize to it would not be hard in most cases (you can use varint or equivalent encoding if you want small headers)

Plus, for some applications, they already either store the original size anyway (metada), OR they know it from the context (size is constant, like the size of a raw video frame or vector of N values, can be deduced from file name, or some other out-of-band information).

I feel that the ZSTD_compressBlock/ZSTD_decompressBlock API should be meant for people who want/need to do their own framing, and it should not add its own headers if they would be redundant anyway. If it needs some parameters to decode the block, it should ask it from the caller.

@Cyan4973
Copy link
Contributor

Cyan4973 commented Jul 5, 2016

it depends if you want to completely delegate the task of framing to the application itself

That's indeed the purpose of the ZSTD_*Block() API.

With current implementation, application already needs to do it anyway, if only to store a (compressedSize, uncompressed) header with each block.

According to @GregSlazinski , he only stores the uncompressed size of the complete object, not for each block. And it would work fine, if only for the uncompressed blocks.

So the only question remaining is how to deal with uncompressed blocks, in a way which keeps the initial spirit of the API.

@KrzysFR
Copy link
Contributor

KrzysFR commented Jul 5, 2016

So the only question remaining is how to deal with uncompressed blocks, in a way which keeps the initial spirit of the API.

Don't deal with it: treat it as any other block. That what they can be used to encode the next block as well.

My vote is for no special treatment of uncompressed blocks. ZSTD_compressBlock should not surface to the app that they are special, and that way not require an else { ... } branch in the encoder or decoder code (which would simplify things a lot).

I'm not familiar with the File API from code above (C++?) but I guess it could become:

//...
for(; !src.end(); )
{
   int chunk, size;
   src.decIntV(chunk); // read compressed size
   src.decIntV(size); // read original size
   // read exactly 'chunk' bytes from 'src' (not sure if that's what .getFast(...) does?)
   auto res=ZSTD_decompressBlock(ctx, dest.mem(), dest.left(), s.data(), chunk,  size);
   if(ZSTD_isError(res)) Exit(ZSTD_getErrorName(res)); // here the error occurs
   // assert that res == size
   if (!MemWrote(dest, size)) goto error; // this does: dest.mem+=size; and dest.left-=size;
}
//...

Of course this means changing the file format, which may or may not be a big deal.

@Cyan4973
Copy link
Contributor

Cyan4973 commented Jul 5, 2016

My vote is for no special treatment of uncompressed blocks. ZSTD_compressBlock should not surface to the app that they are special

OK, so this seems a vote in favor of proposition : ZSTD_compressBlock() generates valid blocks even for non-compressible data. In which case, such block will be encoded in a way which costs a few more bytes, on top of raw uncompressed size (I'm guessing up to +4 bytes per block, depending on block size).

@GregSlazinski and @FrancescAlted consider these additional bytes as a problem.

Note this proposition uses current format, it only changes the rule that "a compressed block should always be smaller than its uncompressed size". Well, no longer in this case.

@KrzysFR
Copy link
Contributor

KrzysFR commented Jul 5, 2016

Are these +4 bytes stored inside the block, and contain the raw size? If yes, why store them in the block if the app can do it itself (probably with less than 4 bytes).

I also often have the issue of having to know the original size of the block before calling ZSTD_decompressBlock in order to allocate a new byte[..] with the exact size in .NET (until Span<T> lands in the CLR, this is the only way to not have to do an extra copy if size is known only after decompressing), so I have to store it myself.

If it was stored in the block itself, I would have to call a ZSTD_getSizeOfDecompressedBlock(...) before calling ZSTD_decompressBlock(...), which adds to the API, and doesn't allow me to optimize it away if it is known from the context.

Also, by 'change the file format', I was talking of the file format likely used by @GregSlazinski 's code sample which seem to use negative value for uncompressed size, and positive for compressed. It would change to 2 positive integers and no if/else branch. If I'm not mistaken, src.decIntV() uses the 7-bit varint encoding, which means that negative values will take at least 5 bytes in the stream. We could easily fit 2 small positive values in the same 5 bytes, so no extra cost in space

@Cyan4973
Copy link
Contributor

Cyan4973 commented Jul 5, 2016

Are these +4 bytes stored inside the block, and contain the raw size?

Sort of, but not exactly.
It's a consequence of using the current block format "as is".

It contains 2 sections, one for literals, and one for sequences.
For non-compressible data, sequences section is empty. But that cost 1 byte to tell it.
While literals section is flagged as "uncompressed", and its size is indeed provided (since literals section is unaware that sequences section is empty) using a variable size integer, described here.

As stated, this is a consequence of using the compression format "as is".
It wasn't meant to store uncompressed data (indeed, it was even defined as out-of-scope !), and so has no special code to deal with it.

Currently, the proposal with the least undesired side-effects seems to be adding a new prototype ZSTD_insertBlock().

I was also considering adding a "mode" to ZSTD_decompressBlock(), which would interpret automatically srcSize == dstCapacity as uncompressed data, and just copy it.
But it feels strange, dstCapacity sometimes would mean buffer capacity, and sometimes would become actual target length, depending on context. Even if documented, it smells confusion risks.

@GregSlazinski
Copy link
Author

ZSTD_insertBlock
This is indeed the best solution.
Since we need to store the compressed size of the block anyway, then it costs virtually nothing by including a 1-bit flag stating whether this buffer is compressed or not, in the compressed size.
Having to store additionaly decompressed size of the block would cost at least extra 3-4 bytes. Let's not forget we're talking about compressing data, so the goal is to not introduce any unnecessary overhead when it can be done in another way.
Thank you

@KrzysFR
Copy link
Contributor

KrzysFR commented Jul 5, 2016

I see what you mean, since for uncompressed, raw size == compressed size, you don't need to store the same value twice. Using the MSB for this flag (the sign bit) may not be the best with varint, but you could go the other way and store size << 1 | is_compressed which would only use 3 bytes if size == 128 KB, instead of 5 bytes to store -128KB.

But then how to you solve the issue of knowing in advanced the raw size for an actual compressed block before calling ZSTD_decompressBlock? (because you need to pre-allocate a buffer with the exact size). In this case, raw size != compressed size, so you need to store it somewhere.

And this still leave you with having to if/else both the encoder and decoder do deal with two types of blocks.

@FrancescAlted
Copy link

FWIW, Blosc already uses the 1-bit approach for representing an uncompressed chunk in a specific place of the header. I'd prefer to go this route here too.

@GregSlazinski
Copy link
Author

@KrzysFR: for this I'm actually using a variant which I call cmpIntV - compress integer with variable number of bytes. Works similar to unsigned Int version but processes negative numbers in a similar matter as you've mentioned.

As for storing the decompressed size. I store it at the start of the stream. However I do it just once (decompressed size of all blocks) , and not for each block separately. Optionally you can store it in some Metadata.

As for having to do if/else. I don't see this as much of a problem.

@Cyan4973
Copy link
Contributor

Cyan4973 commented Jul 6, 2016

One last detail about ZSTD_insertBlock().

The current proposed prototype is :
size_t ZSTD_insertBlock(ZSTD_DCtx* dctx, const void* blockStart, size_t blockSize);

That is : it inserts the block into dctx history context.

Now, there is an implied condition here :
the block must be referenced from its final destination position,
that is after it has been copied there.
It must not be referenced from "compressed" area.
Even if content is by definition the same, "contiguous" property is not.

With above prototype, there is a risk that a user would reference the block directly from the "compressed" area, rather than the same block but copied into its destination area.
Doing so would break the "contiguous" property, which is likely to result in some problem (offset out-of-bound notably).

Anyway, the point is to avoid such confusion.
I can document it. But would it be enough ?

Another prototype proposition would be something like :

size_t ZSTD_insertBlock(ZSTD_DCtx* dctx, void* dst, size_t dstCapacity, const void* src, size_t srcSize);

Name looks the same, but scope is different :
the function would both copy the non-compressed block into destination buffer, and then reference the block from its final position.
It avoids confusion regarding which version to reference.
On the other hand, it forces copy to be done from inside insertBlock(), which could be a limitation, should user have other usage / possibilities in mind.

I'm unsure if this is really better.

Thoughts ?

@GregSlazinski
Copy link
Author

Hi,

I think the first version should be chosen.
For example when the source is a file, the decoding process can read directly from file into the destination memory. So there is no memory copy at all.

Second version would force the copy even if it's not needed, thus making it slower.

There's no problem with first version having to be called from the destination buffer as long as it's mentioned in the header description.

Thanks

@KrzysFR
Copy link
Contributor

KrzysFR commented Jul 7, 2016

If I summarize in pseudo C++ code (probably does not compile)

Current issue is that the decoder does not have a way to know original_size of a compressed block to pass to ZSTD_decompressBlock(..).

note: fixed a few typos

Encoder:

// input: stream-like object with the plain text to encode
// output: stream-like object where to send the compressed output
const MAX_BLOCK_SIZE = 128 * 1024;
ZSTD_createCCtx(...)
ZSTD_compressBegin(..);
void* buf_in = malloc(MAX_BLOCK_SIZE);
void* buf_out = malloc(MAX_BLOCK_SIZE);
while(!input.eof())
{
    int size = input.read_buffer(buf_in, capacity: MAX_BLOCK_SIZE);
    // probably introduce a loop here to fill the buffer close to the limit,
    // for socket-like streams with partial reads (ex: Transfer-Encoding: chunked)
    int res = ZSTD_compressBlock(..., buf_out, MAX_BLOCK_SIZE, buf_in, size);
    if (ZSTD_isError(res)) { /*... handle error */ }
    if (res == 0)
    { // uncompressed block
        output.write_signed_int(-size);
        output.write_bytes(buf_in, size);
    }
    else
    { // compressed block
        assert(res < MAX_BLOCK_SIZE);
        output.write_signed_int(res);
        output.write_bytes(buf_out, res);
    }
}
// cleanup
free(buf_in);
free(buf_out);
ZSTD_freeCCtx(...)

Decoder:

// input: stream-like object with the compressed data to decode
// output: stream-like object where to send the decompressed output
const MAX_BLOCK_SIZE = 128 * 1024;
ZSTD_createDCtx(...);
ZSTD_decompressBegin(...);
void* buf_in = malloc(MAX_BLOCK_SIZE);
void* buf_out = malloc(MAX_BLOCK_SIZE);
while(!input.eof())
{
    int chunk_size = input.read_signed_int();
    int size = chunk_size < 0 ? -chunk_size : chunk_size;
    int read = input.read_exactly(buf_in, size);
    assert(read == size);
    if (chunk_size < 0)
    { // uncompressed block
        int res = ZSTD_insertBlock(..., buf_out, MAX_BLOCK_SIZE, buf_in, size);
        if (ZSTD_isError(res)) { /* handle error */ }
        output.write_bytes(buf_out, size);
    }
    else
    { // compressed block
        int original_size = /* BUGBUG: how do I know this? */
        int res = ZSTD_decompressBlock(...., buf_out, MAX_BLOCK_SIZE, buf_in, size, original_size);
        if (ZSTD_isError(res)) { /* handle error */ }
        assert(res == original_size);
        output.write_bytes(buf_out, res);
    }
}
// cleanup
free(buf_in);
free(buf_out);
ZSTD_freeDCtx(...);

@KrzysFR
Copy link
Contributor

KrzysFR commented Jul 7, 2016

Now, if ZSTD_compressBlock() would handle uncompressed block silently, it could be changed to the following:

The app creates its own framing format where each block is:

  • compressed_size: varint
  • original_size: varint. Stored as 0 if same as compressed_size
  • chunk_data: buffer of length compressed_size

For large blocks of max size 128KB: header size would be 4 bytes (131,072 needs 3 bytes in varint format unfortunately). For compressed block, depending on the compression ratio it would need 4 to 6 bytes (most probably 5?)

For small blocks of up to 1KB, header size would be 2-3 bytes for uncompressed, and 3-4 bytes for compressed.

Bonus: decoder knows in advance the size of the decompressed block (useful with some languages or API).

Edit: again, this is a format that is decided by the application! Others could use a more compact encoding! The point being that the headers are not handled by ZSTD, but by the app

Encoder:

// input: stream-like object with the plain text to encode
// output: stream-like object where to send the compressed output
const MAX_BLOCK_SIZE = 128 * 1024;
ZSTD_createCCtx(...)
ZSTD_compressBegin(..);
void* buf_in = malloc(MAX_BLOCK_SIZE);
void* buf_out = malloc(MAX_BLOCK_SIZE);
while(!input.eof())
{
    int size = input.read_buffer(buf_in, capacity: MAX_BLOCK_SIZE);
    // probably introduce a loop here to fill the buffer close to the limit,
    // for socket-like streams with partial reads (ex: Transfer-Encoding: chunked)
    int res = ZSTD_compressBlock(..., buf_out, MAX_BLOCK_SIZE, buf_in, size);
    if (ZSTD_isError(res)) { /*... handle error */ }
    assert(res <= MAX_BLOCK_SIZE);
    output.write_int(res);
    output.write_int(res == original_size ? 0 : original_size);
    output.write_bytes(buf_out, res);
}
// cleanup
free(buf_in);
free(buf_out);
ZSTD_freeCCtx(...)

Decoder:

// input: stream-like object with the compressed data to decode
// output: stream-like object where to send the decompressed output
const MAX_BLOCK_SIZE = 128 * 1024;
ZSTD_createDCtx(...);
ZSTD_decompressBegin(...);
void* buf_in = malloc(MAX_BLOCK_SIZE);
void* buf_out = malloc(MAX_BLOCK_SIZE);
while(!input.eof())
{
    int chunk_size = input.read_int();
    int original_size = input.read_int();
    if (!original_size) original_size = chunk_size;
    int read = input.read_exactly(buf_in, chunk_size);
    assert(read == chunk_size);
    int res = ZSTD_decompressBlock(...., buf_out, MAX_BLOCK_SIZE, buf_in, size, original_size);
    if (ZSTD_isError(res)) { /* handle error */ }
    assert(res == original_size);
    output.write_bytes(buf_out, res);
}
// cleanup
free(buf_in);
free(buf_out);
ZSTD_freeDCtx(...);

It looks much simpler to me, but of course would need to change the existing block format internally...

@GregSlazinski
Copy link
Author

GregSlazinski commented Jul 7, 2016

You don't need to store uncompressed size and compressed size for each block, that's a waste of space.
Just store compressed size for each block, and uncompressed size for entire data only once at the start.
And in the decompression:

int total_size=file.readInt();
allocate data for total_size;
loop blocks
{
   int block_compressed_size=file.readInt();
   if(block_compressed_size<0)
   {
     // uncompressed
     int decompressed_size=-block_compressed_size;
    ..
   }else
   {
     // compressed
     int decompressed_size=ZSTD_decompress(..);
     ..
   }
}

(very simplified version)

@Cyan4973
Copy link
Contributor

Cyan4973 commented Jul 7, 2016

There is first tentative implementation of ZSTD_insertBlock() available in latest "dev" branch update.
It seems to work fine, but only received basic tests so far.

@KrzysFR
Copy link
Contributor

KrzysFR commented Jul 7, 2016

I thought ZSTD_decompressBlock(..) was supposed to now require the original size as last parameter?

Also, if you are compressing streams, you don't always know the total original size in advance, so you cannot use that as a header (a footer maybe).

@GregSlazinski
Copy link
Author

GregSlazinski commented Jul 7, 2016

@Cyan4973 awesome, thanks a lot!
I've just tested this, and it works great.
My entire 830 MB data file got compressed and decompressed successfully (decompressed data has the same hash as the original).

The low-level blocks compression is definitely the way to go, instead of built-in frame API:
Original Size: 871197633 bytes
Compression Level = 9
Compressed Size using built-in frame API: 612984129 bytes
Compressed Size using blocks API: 612087709 bytes
Result = 875 KB smaller file when using blocks API.

The data file from the test comes from my game, and includes 3d mesh data, textures, sounds, etc.

In this test I've compressed the entire file data into one ZSTD compressed stream.
If compressing each file separately, then the savings would be even bigger.

Thanks a lot, if it's for me, then the issue can be closed.

@KrzysFR:

I thought ZSTD_decompressBlock(..) was supposed to now require the original size as last parameter?

This was one of the solutions that was considered, however it looks like it was canceled in favor of the better 'ZSTD_insertBlock' solution.

Thanks again Cyan!

@KrzysFR
Copy link
Contributor

KrzysFR commented Jul 7, 2016

This was one of the solutions that was considered, however it looks like it was canceled in favor of the better 'ZSTD_insertBlock' solution.

Ah so this simplifies things a bit.... 👻

IF the compressed block DOES currently encode the original size, it would be great to have an API to access it (before calling ZSTD_decompressBlock).

But I get the feeling that there are a possibly two (or more) use cases that would want to deal with blocks, but not with the same constraints.

For completion, if there was an API where there would be no special handling required for uncompressed block, but where framing would be the responsibility of the app. (see updated code below)

Pros:

  • no need for ZSTD_insertBlock
  • for uncompressed blocks, original_sizecan be optimized with a flag "same as compressed".
  • for compressed blocks, it could be stored as (compressed_size, raw_size - compressed_size) to try to get small integers that play well with varint encoder
  • For apps where raw size is known from context, they don't need to be stored
    • This includes streaming that always compressed full 128KB blocs where the raw size of all blocks except the last is know (128 KB) and only the size of the last block must be stored.
    • Files using a single block where the size is already stored in metadata

Cons:

  • Current block format must be changed, or a new one added
  • Users of the block API must think carefully about their own framing.
  • A naked compressed block cannot be decompressed without knowledge of the original size

Notes:

  • using an extra bit (sign bit, or size << 1 | flag) combined with VarInt encoding is not "free": adding the extra bit will make values in ranges 64..127, 8192..16383, 1048576..2097151 to need one more byte in the stream (for a single bit):
    ex: Varint encoding comparison (flag stored in LSB)
  • 123 => b_0_1111011 => 7B (1 byte)
  • 123 << 1 | 1 => b_1_1110111 => F7 01 (2 bytes, +1)
    ex: another case with one more byte to store the flag
  • 10000 => => b_1001110_0010000 => E8 07
  • 10000 << 1 | 1 => b_1_0011100_0100001 => A1 9C 01

Regarding extra bytes due to custom framing:

  1. The format of compressed block would be changed to remove the original raw size (note: if it is present?)
  2. ZSTD_decompressBlock would be changed to ask for the original raw size from the app.
  3. App would have to store raw_size and compressed_size in the most compact way possible.

We would gain X bytes from 1), and have to spend Y bytes for 2) and would end up with a delta of Y-X bytes.

Encoder:

// input: stream-like object with the plain text to encode
// output: stream-like object where to send the compressed output
const MAX_BLOCK_SIZE = 128 * 1024;
ZSTD_createCCtx(...)
ZSTD_compressBegin(..);
void* buf_in = malloc(MAX_BLOCK_SIZE);
void* buf_out = malloc(MAX_BLOCK_SIZE);
while(!input.eof())
{
    int size = input.read_buffer(buf_in, capacity: MAX_BLOCK_SIZE);
    // probably introduce a loop here to fill the buffer close to the limit,
    // for socket-like streams with partial reads (ex: Transfer-Encoding: chunked)

    // compress block with no special handling here
    int res = ZSTD_compressBlock_no_framing(..., buf_out, MAX_BLOCK_SIZE, buf_in, size);
    if (ZSTD_isError(res)) { /*... handle error */ }
    assert(res <= size); // size cannot be larger than original!

    // write block header and compressed bytes
    output.write_int(res); // compressed_size
    output.write_int(res == size ? 0 : size - res); // raw_size: 0 if uncompressed; else delta
    output.write_bytes(buf_out, res);
}
// note: we could add a 0 marker here to mean "end of stream", and let the decoder know to stop there?
// or have it followed by a footer (with total size, metadata, signature, ...)
output.flush();
// cleanup
free(buf_in);
free(buf_out);
ZSTD_freeCCtx(...)

Decoder:

// input: stream-like object with the compressed data to decode
// output: stream-like object where to send the decompressed output
const MAX_BLOCK_SIZE = 128 * 1024;
ZSTD_createDCtx(...);
ZSTD_decompressBegin(...);
void* buf_in = malloc(MAX_BLOCK_SIZE);
void* buf_out = malloc(MAX_BLOCK_SIZE);
while(!input.eof())
{
    // read block header
    int compressed_size = input.read_int(); // compressed_size
    // note: possibly we could have 0 here mean "end of stream" ?
    int raw_size = input_read_int(); // raw_size: 0 or delta
    raw_size = raw_size == 0 ? chunk_size : raw_size + chunk_size;

    // read compressed bytes
    int read = input.read_exactly(buf_in, chunk_size);
    assert(read == chunk_size);

    // decompress block (must provide original size)
    int res = ZSTD_decompressBlock_no_framing(...., buf_out, MAX_BLOCK_SIZE, buf_in, size, original_size);
    if (ZSTD_isError(res)) { /* handle error */ }

    // write decompressed bytes to output
    output.write_bytes(buf_out, res);
}
output.flush();

// cleanup
free(buf_in);
free(buf_out);
ZSTD_freeDCtx(...);

@KrzysFR
Copy link
Contributor

KrzysFR commented Jul 7, 2016

An example of where both raw_size and compressed_size are known from context and can be omitted is if you store each block as an entry in a key/value store, using the following format (which supports sparse files, as well as random seek)

(file_id, file_offset, len) = [compressed_block_bytes]

You will know the size of the compressed byte by reading the size of the value (which is stored in the k/v store's b-tree anyway). And you can get the original size from the key by reading len.

If the k/v store offers direct pointer access to a memory mapped file, then the encoder/decoder can read/write straight from/to the mmap with no copy to temporary buffers needed.

@Cyan4973
Copy link
Contributor

Cyan4973 commented Jul 7, 2016

IF the compressed block DOES currently encode the original size,

No, it does not.

The frame does, and there is an API to consult it (ZSTD_getDecompressedSize()).

But the block does not store its own size, neither compressed nor uncompressed,
except by accident in one special case :
when there is no sequence, only literals, in which case sizeof(literals)==sizeof(block) .

@KrzysFR
Copy link
Contributor

KrzysFR commented Jul 7, 2016

IF the compressed block DOES currently encode the original size,
No, it does not.

Oh... 😢 🐼

But then how does the decoder knows it is finished? It's just decoding stuff until it stops runing of input with no other check than the max dst capacity? It means that corruption would only be caught after the fact by comparing the result of ZSTD_decompressBlock with what is in the frame...

I see now why it wasn't making sense to me...

Since there is no way to flag a naked uncompressed block without adding at least one bit. I guess that we can only rely on ZSTD_insertBlock() and require extra logic to keep a running total of the decoded size so far, and fail if we run out of "total space" while still having blocks to decode.

Also not forget to check that the dstCapacity passed to ZSTD_decompresBlock does not overrun the end of the allocated total space, if a previous block did not decode correctly, and filled the dst buffer more than expected...

@KrzysFR
Copy link
Contributor

KrzysFR commented Jul 7, 2016

So, trying to make it run with all the checks and alarms:

Version where it is possible to know in advance the total size of the source:

  • Starts with TOTAL size
  • For each block: CHUNK_SIZE, CHUNK_DATA
    • CHUNK_SIZE < 0 means uncompressed
    • CHUNK_SIZE > 0 means compressed
    • CHUNK_SIZE == 0 means ... end of stream?

disclaimer: I'm writing this late, and with a pizza in one hand, so probably buggy!

Encoder:

// input: stream-like object with the plain text to encode
// output: stream-like object where to send the compressed output
const MAX_BLOCK_SIZE = 128 * 1024; // or less
ZSTD_createCCtx(...)
ZSTD_compressBegin(..);
void* buf_in = malloc(MAX_BLOCK_SIZE);
void* buf_out = malloc(MAX_BLOCK_SIZE);

long total = input.magically_know_total_size(); // Can't do that with: sockets, generators, JSON serializer, ...
output.write_int_64(total);

long read = 0;
while(read < total) {
    // read next block from source
    int size = input.read_buffer(buf_in, MAX_BLOCK_SIZE);
    // probably introduce a loop here to fill the buffer close to the limit,
    // for socket-like streams with partial reads (ex: Transfer-Encoding: chunked)
    assert(size <= MAX_BLOCK_SIZE);
    assert(read + size <= total);
    read += size;

    // try compressing
    int res = ZSTD_compressBlock(..., buf_out, MAX_BLOCK_SIZE, buf_in, size);
    if (ZSTD_isError(res)) { /*... handle error */ }
    if (res == 0) {
        // uncompressed
        output.write_sint32(-size);
        output.write_bytes(buf_in, size);
    }
    else {
        // compressed
        assert(res < size, "must reduce size");
        output.write_sint32(res);
        output.write_bytes(buf_out, res);
    }
}
//optional: add checksum, signature, EOF marker...

cleanup:
free(buf_in);
free(buf_out);
ZSTD_freeCCtx(...)

Decoder:

// input: stream-like object with the compressed data to decode
// output: stream-like object where to send the decompressed output
const MAX_BLOCK_SIZE = 128 * 1024; // or less but at least as much as encoder
ZSTD_createDCtx(...);
ZSTD_decompressBegin(...);
void* buf_in = malloc(MAX_BLOCK_SIZE);
void* buf_out = malloc(MAX_BLOCK_SIZE);

long total = input.read_int64();
assert(total <= min(FREE_DISK_SPACE, SIZE_OF_THE_INTERNET); // sanity check here

long decoded = 0;
while(!input.eof()) {
    int chunk_size = input.read_sint32();
    if (chunk_size == 0) { 
        // end of stream
        break;
    }
    int size = chunk_size < 0 ? -chunk_size : chunk_size;
    assert(size <= MAX_BLOCK_SIZE);
    int read = input.read_exactly(buf_in, size);
    if (read < size) { /* error truncated file */ }
    assert(read == size);

    int res;
    int capacity = min(total - decoded, MAX_BLOCK_SIZE);
    if (chunk_size < 0) {
        // uncompressed
        if (size > capacity) { /* error decoded too much probably corruption */ }
        res = ZSTD_insertBlock(..., buf_out, capacity, buf_in, size);
    }
    else {
        // compressed
        res = ZSTD_decompressBlock(...., buf_out, capacity, buf_in, size);
        //note: assumes that it fails if decoded size > capacity?
    }
    if (ZSTD_isError(res)) { /* handle error + potential overflow here */ }
    if (decoded + res > total) { /* error decoded too much probably corruption */ }
    output.write_bytes(buf_out, res);
    decoded += res;
}
if (decoded != total) { /* error decoded not enough */ }
// optional: checksum, signature, ... ?

cleanup:
free(buf_in);
free(buf_out);
ZSTD_freeDCtx(...);

the pizza is now cold 😢

@GregSlazinski
Copy link
Author

Today I was able to perform more tests,
And all works great at following compression levels:
1, 9, 16, 18, 19, 20

However when I tried using level 22, then compression/decompression succeeded without errors, however the decompressed result file had a different hash than the original file.

I'm providing an unoptimized compress/decompress function that I've simplified in order to reproduce the bug.

The compressor reads the source file in each step into a continuous memory capable of storing entire source.
The decompressor writes decompressed chunks in each step into a continuous memory capable of storing entire uncompressed data, and un-compressed blocks are handled properly.

This should avoid any problems with the buffer being reused, however the problem persists.

static Bool ZSTDCompress(File &src, File &dest, Int compression_level)
{
   Bool ok=false;
   if(ZSTD_CCtx *ctx=ZSTD_createCCtx_advanced(ZSTDMem))
   {
      ZSTD_parameters params=ZSTD_getParams(Mid(compression_level, 1, ZSTD_maxCLevel()), src.left(), 0);
      if(!ZSTD_isError(ZSTD_compressBegin_advanced(ctx, null, 0, params, src.left())))
      {
       C Int window_size=1<<params.cParams.windowLog, block_size=Min(window_size, ZSTD_BLOCKSIZE_MAX);
         Memt<Byte> s, d; s.setNum(src.left()); d.setNum(ZSTDSize(block_size)+1); Int s_pos=0;
         dest.cmpUIntV(params.cParams.windowLog);
         for(; !src.end(); )
         {
            Int read=Min(ZSTD_BLOCKSIZE_MAX, Min(s.elms(), src.left()));
            if(s_pos>s.elms()-read)goto error; // if reading will exceed buffer size
            read=src.getReturnSize(&s[s_pos], read); if(read<=0)goto error;
            auto size=ZSTD_compressBlock(ctx, d.data(), d.elms(), &s[s_pos], read); if(ZSTD_isError(size))goto error; // ZSTD_compressBlock returns 0 if failed to compress
            dest.putBool(size!=0); // save if compressed
            if(size>0){dest.putUInt(size); if(!dest.put( d.data(), size))goto error;} // compressed
            else      {dest.putUInt(read); if(!dest.put(&s[s_pos], read))goto error;} // failed to compress
            s_pos+=read;
         }
         if(dest.ok())ok=true;
      }
   error:
      ZSTD_freeCCtx(ctx);
   }
   return ok;
}
static Bool ZSTDDecompress(File &src, File &dest, Long compressed_size, Long decompressed_size)
{
   Bool ok=false;
   if(ZSTD_DCtx *ctx=ZSTD_createDCtx_advanced(ZSTDMem))
   {
      ZSTD_decompressBegin(ctx);
    C auto window_size=1<<src.decUIntV(), block_size=Min(window_size, ZSTD_BLOCKSIZE_MAX);
      Memt<Byte> s, d; s.setNum(block_size); d.setNum(decompressed_size); Int d_pos=0;
      for(; !src.end(); )
      {
         if(src.getBool()) // compressed
         {
            UInt chunk=src.getUInt();
            if(chunk>s.elms())goto error;
            if(!src.getFast(s.data(), chunk))goto error; // need exactly 'chunk' amount
            auto size=ZSTD_decompressBlock(ctx, &d[d_pos], d.elms()-d_pos, s.data(), chunk); if(ZSTD_isError(size))goto error;
            d_pos+=size;
         }else // un-compressed
         {
            Int size=src.getUInt();
            if(d_pos>d.elms()-size)goto error; // if reading will exceed buffer size
            if(!src.getFast(&d[d_pos], size))goto error;
            if(ZSTD_isError(ZSTD_insertBlock(ctx, &d[d_pos], size)))goto error;
            d_pos+=size;
         }
      }
      if(d_pos==decompressed_size)
         ok=dest.put(&d[0], d_pos);
   error:
      ZSTD_freeDCtx(ctx);
   }
   return ok;
}

@Cyan4973: how can I provide the original file for testing?
It includes my game files, so I'd rather not share it publicly, can I send you privately a dropbox link to the file?
I've noticed that hash is different after processing 132 MB file.

@Cyan4973
Copy link
Contributor

Cyan4973 commented Jul 8, 2016

Hi @GregSlazinski

I'd rather not share it publicly,

Yes, you are totally right.
You can use http://www.emailmeform.com/builder/form/a70egol6fyVf15zd46jY88 to initiate a private exchange.

Knowing the exact position of the 1st wrong byte is likely to help.

@Cyan4973
Copy link
Contributor

Cyan4973 commented Jul 8, 2016

I'm not sure to properly follow :

         }else // un-compressed
         {
            Int size=src.getUInt();
            if(d_pos>d.elms()-size)goto error; // if reading will exceed buffer size
            if(!src.getFast(&d[d_pos], size))goto error;
            if(ZSTD_isError(ZSTD_insertBlock(ctx, &d[d_pos], size)))goto error;
            d_pos+=size;
         }

What does it do ?

In particular :
if(!src.getFast(&d[d_pos], size))goto error;
Is it some kind of memcpy() ?

@Cyan4973
Copy link
Contributor

Cyan4973 commented Jul 8, 2016

I made a "blind fix" (as I'm unable to reproduce the issue directly).
It is now uploaded on "dev" branch.
@GregSlazinski : mind testing it ?

@GregSlazinski
Copy link
Author

Hi,

if(!src.getFast(&d[d_pos], size))goto error;

this just reads from 'src' file into specified memory address and given
memory size, returns false on fail, and true on success.

I made a "blind fix" (as I'm unable to reproduce your problem directly).

Sure, I'll test this right away.

On 8 July 2016 at 18:14, Yann Collet notifications@github.com wrote:

I made a "blind fix" (as I'm unable to reproduce your problem directly).
It is now uploaded on "dev" branch.
Mind testing it ?


You are receiving this because you were mentioned.
Reply to this email directly, view it on GitHub
https://github.com/Cyan4973/zstd/issues/227#issuecomment-231335433, or mute
the thread
https://github.com/notifications/unsubscribe/AG0AE_jpJ3DpQCeQugiLzqjWhmEwn_zOks5qTjEugaJpZM4I-kHN
.

@GregSlazinski
Copy link
Author

The 132 MB file now works OK, I'll do the test on the full 800+ MB file
now, and will let you know the results.

On 8 July 2016 at 18:30, Esenthel esenthel@hotmail.com wrote:

Hi,

if(!src.getFast(&d[d_pos], size))goto error;

this just reads from 'src' file into specified memory address and given
memory size, returns false on fail, and true on success.

I made a "blind fix" (as I'm unable to reproduce your problem directly).

Sure, I'll test this right away.

On 8 July 2016 at 18:14, Yann Collet notifications@github.com wrote:

I made a "blind fix" (as I'm unable to reproduce your problem directly).
It is now uploaded on "dev" branch.
Mind testing it ?


You are receiving this because you were mentioned.
Reply to this email directly, view it on GitHub
https://github.com/Cyan4973/zstd/issues/227#issuecomment-231335433, or mute
the thread
https://github.com/notifications/unsubscribe/AG0AE_jpJ3DpQCeQugiLzqjWhmEwn_zOks5qTjEugaJpZM4I-kHN
.

@GregSlazinski
Copy link
Author

Thanks a lot, the big file now works OK too.
I'll let you know if I'll encounter any other problems.

Thanks,

Greg

On 8 July 2016 at 18:44, Esenthel esenthel@hotmail.com wrote:

The 132 MB file now works OK, I'll do the test on the full 800+ MB file
now, and will let you know the results.

On 8 July 2016 at 18:30, Esenthel esenthel@hotmail.com wrote:

Hi,

if(!src.getFast(&d[d_pos], size))goto error;

this just reads from 'src' file into specified memory address and given
memory size, returns false on fail, and true on success.

I made a "blind fix" (as I'm unable to reproduce your problem directly).

Sure, I'll test this right away.

On 8 July 2016 at 18:14, Yann Collet notifications@github.com wrote:

I made a "blind fix" (as I'm unable to reproduce your problem directly).
It is now uploaded on "dev" branch.
Mind testing it ?


You are receiving this because you were mentioned.
Reply to this email directly, view it on GitHub
https://github.com/Cyan4973/zstd/issues/227#issuecomment-231335433,
or mute the thread
https://github.com/notifications/unsubscribe/AG0AE_jpJ3DpQCeQugiLzqjWhmEwn_zOks5qTjEugaJpZM4I-kHN
.

@Cyan4973
Copy link
Contributor

Cyan4973 commented Jul 8, 2016

Fixed in v0.7.3

@Cyan4973 Cyan4973 closed this as completed Jul 8, 2016
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment
Labels
None yet
Projects
None yet
Development

No branches or pull requests

5 participants