Skip to content

Commit

Permalink
Merge branch 'dev' of github.com:facebook/zstd into dev
Browse files Browse the repository at this point in the history
  • Loading branch information
Cyan4973 committed Nov 13, 2018
2 parents 092c4ab + f28af02 commit 768a264
Show file tree
Hide file tree
Showing 5 changed files with 82 additions and 53 deletions.
59 changes: 38 additions & 21 deletions doc/zstd_manual.html
Original file line number Diff line number Diff line change
Expand Up @@ -798,6 +798,38 @@ <h3>Buffer-less streaming decompression functions</h3><pre></pre><b><pre>typedef
* Decoder cannot recognise automatically this format, requiring instructions. */
} ZSTD_format_e;
</b></pre><BR>
<pre><b>typedef enum {
</b>/* Note: this enum and the behavior it controls are effectively internal<b>
* implementation details of the compressor. They are expected to continue
* to evolve and should be considered only in the context of extremely
* advanced performance tuning.
*
* Zstd currently supports the use of a CDict in two ways:
*
* - The contents of the CDict can be copied into the working context. This
* means that the compression can search both the dictionary and input
* while operating on a single set of internal tables. This makes
* the compression faster per-byte of input. However, the initial copy of
* the CDict's tables incurs a fixed cost at the beginning of the
* compression. For small compressions (< 8 KB), that copy can dominate
* the cost of the compression.
*
* - The CDict's tables can be used in-place. In this model, compression is
* slower per input byte, because the compressor has to search two sets of
* tables. However, this model incurs no start-up cost (as long as the
* working context's tables can be reused). For small inputs, this can be
* faster than copying the CDict's tables.
*
* Zstd has a simple internal heuristic that selects which strategy to use
* at the beginning of a compression. However, if experimentation shows that
* Zstd is making poor choices, it is possible to override that choice with
* this enum.
*/
ZSTD_dictDefaultAttach = 0, </b>/* Use the default heuristic. */<b>
ZSTD_dictForceAttach = 1, </b>/* Never copy the dictionary. */<b>
ZSTD_dictForceCopy = 2, </b>/* Always copy the dictionary. */<b>
} ZSTD_dictAttachPref_e;
</b></pre><BR>
<pre><b>typedef enum {
</b>/* compression format */<b>
ZSTD_p_format = 10, </b>/* See ZSTD_format_e enum definition.<b>
Expand Down Expand Up @@ -911,29 +943,14 @@ <h3>Buffer-less streaming decompression functions</h3><pre></pre><b><pre>typedef

ZSTD_p_forceMaxWindow=1100, </b>/* Force back-reference distances to remain < windowSize,<b>
* even when referencing into Dictionary content (default:0) */
ZSTD_p_forceAttachDict, </b>/* ZSTD supports usage of a CDict in-place<b>
* (avoiding having to copy the compression tables
* from the CDict into the working context). Using
* a CDict in this way saves an initial setup step,
* but comes at the cost of more work per byte of
* input. ZSTD has a simple internal heuristic that
* guesses which strategy will be faster. You can
* use this flag to override that guess.
ZSTD_p_forceAttachDict, </b>/* Controls whether the contents of a CDict are<b>
* used in place, or whether they are copied into
* the working context.
*
* Note that the by-reference, in-place strategy is
* only used when reusing a compression context
* with compatible compression parameters. (If
* incompatible / uninitialized, the working
* context needs to be cleared anyways, which is
* about as expensive as overwriting it with the
* dictionary context, so there's no savings in
* using the CDict by-ref.)
*
* Values greater than 0 force attaching the dict.
* Values less than 0 force copying the dict.
* 0 selects the default heuristic-guided behavior.
* Accepts values from the ZSTD_dictAttachPref_e
* enum. See the comments on that enum for an
* explanation of the feature.
*/

} ZSTD_cParameter;
</b></pre><BR>
<pre><b>size_t ZSTD_CCtx_setParameter(ZSTD_CCtx* cctx, ZSTD_cParameter param, unsigned value);
Expand Down
9 changes: 5 additions & 4 deletions lib/compress/zstd_compress.c
Original file line number Diff line number Diff line change
Expand Up @@ -412,11 +412,12 @@ size_t ZSTD_CCtxParam_setParameter(
CCtxParams->forceWindow = (value > 0);
return CCtxParams->forceWindow;

case ZSTD_p_forceAttachDict :
CCtxParams->attachDictPref = value ?
(value > 0 ? ZSTD_dictForceAttach : ZSTD_dictForceCopy) :
ZSTD_dictDefaultAttach;
case ZSTD_p_forceAttachDict : {
const ZSTD_dictAttachPref_e pref = (ZSTD_dictAttachPref_e)value;
CLAMPCHECK(pref, ZSTD_dictDefaultAttach, ZSTD_dictForceCopy);
CCtxParams->attachDictPref = pref;
return CCtxParams->attachDictPref;
}

case ZSTD_p_nbWorkers :
#ifndef ZSTD_MULTITHREAD
Expand Down
6 changes: 0 additions & 6 deletions lib/compress/zstd_compress_internal.h
Original file line number Diff line number Diff line change
Expand Up @@ -48,12 +48,6 @@ extern "C" {
typedef enum { ZSTDcs_created=0, ZSTDcs_init, ZSTDcs_ongoing, ZSTDcs_ending } ZSTD_compressionStage_e;
typedef enum { zcss_init=0, zcss_load, zcss_flush } ZSTD_cStreamStage;

typedef enum {
ZSTD_dictDefaultAttach = 0,
ZSTD_dictForceAttach = 1,
ZSTD_dictForceCopy = -1,
} ZSTD_dictAttachPref_e;

typedef struct ZSTD_prefixDict_s {
const void* dict;
size_t dictSize;
Expand Down
59 changes: 38 additions & 21 deletions lib/zstd.h
Original file line number Diff line number Diff line change
Expand Up @@ -996,6 +996,38 @@ typedef enum {
* Decoder cannot recognise automatically this format, requiring instructions. */
} ZSTD_format_e;

typedef enum {
/* Note: this enum and the behavior it controls are effectively internal
* implementation details of the compressor. They are expected to continue
* to evolve and should be considered only in the context of extremely
* advanced performance tuning.
*
* Zstd currently supports the use of a CDict in two ways:
*
* - The contents of the CDict can be copied into the working context. This
* means that the compression can search both the dictionary and input
* while operating on a single set of internal tables. This makes
* the compression faster per-byte of input. However, the initial copy of
* the CDict's tables incurs a fixed cost at the beginning of the
* compression. For small compressions (< 8 KB), that copy can dominate
* the cost of the compression.
*
* - The CDict's tables can be used in-place. In this model, compression is
* slower per input byte, because the compressor has to search two sets of
* tables. However, this model incurs no start-up cost (as long as the
* working context's tables can be reused). For small inputs, this can be
* faster than copying the CDict's tables.
*
* Zstd has a simple internal heuristic that selects which strategy to use
* at the beginning of a compression. However, if experimentation shows that
* Zstd is making poor choices, it is possible to override that choice with
* this enum.
*/
ZSTD_dictDefaultAttach = 0, /* Use the default heuristic. */
ZSTD_dictForceAttach = 1, /* Never copy the dictionary. */
ZSTD_dictForceCopy = 2, /* Always copy the dictionary. */
} ZSTD_dictAttachPref_e;

typedef enum {
/* compression format */
ZSTD_p_format = 10, /* See ZSTD_format_e enum definition.
Expand Down Expand Up @@ -1109,29 +1141,14 @@ typedef enum {

ZSTD_p_forceMaxWindow=1100, /* Force back-reference distances to remain < windowSize,
* even when referencing into Dictionary content (default:0) */
ZSTD_p_forceAttachDict, /* ZSTD supports usage of a CDict in-place
* (avoiding having to copy the compression tables
* from the CDict into the working context). Using
* a CDict in this way saves an initial setup step,
* but comes at the cost of more work per byte of
* input. ZSTD has a simple internal heuristic that
* guesses which strategy will be faster. You can
* use this flag to override that guess.
ZSTD_p_forceAttachDict, /* Controls whether the contents of a CDict are
* used in place, or whether they are copied into
* the working context.
*
* Note that the by-reference, in-place strategy is
* only used when reusing a compression context
* with compatible compression parameters. (If
* incompatible / uninitialized, the working
* context needs to be cleared anyways, which is
* about as expensive as overwriting it with the
* dictionary context, so there's no savings in
* using the CDict by-ref.)
*
* Values greater than 0 force attaching the dict.
* Values less than 0 force copying the dict.
* 0 selects the default heuristic-guided behavior.
* Accepts values from the ZSTD_dictAttachPref_e
* enum. See the comments on that enum for an
* explanation of the feature.
*/

} ZSTD_cParameter;


Expand Down
2 changes: 1 addition & 1 deletion tests/fuzz/zstd_helpers.c
Original file line number Diff line number Diff line change
Expand Up @@ -71,7 +71,7 @@ void FUZZ_setRandomParameters(ZSTD_CCtx *cctx, size_t srcSize, uint32_t *state)
setRand(cctx, ZSTD_p_contentSizeFlag, 0, 1, state);
setRand(cctx, ZSTD_p_checksumFlag, 0, 1, state);
setRand(cctx, ZSTD_p_dictIDFlag, 0, 1, state);
setRand(cctx, ZSTD_p_forceAttachDict, -2, 2, state);
setRand(cctx, ZSTD_p_forceAttachDict, 0, 2, state);
/* Select long distance matchig parameters */
setRand(cctx, ZSTD_p_enableLongDistanceMatching, 0, 1, state);
setRand(cctx, ZSTD_p_ldmHashLog, ZSTD_HASHLOG_MIN, 16, state);
Expand Down

0 comments on commit 768a264

Please sign in to comment.