Skip to content
This repository has been archived by the owner on Mar 24, 2022. It is now read-only.

Commit

Permalink
lib: zstd: Remove large inline functions from zstd_{double_,}fast.c
Browse files Browse the repository at this point in the history
Backport of upstream PR #2863 [0].

Large functions with excessive force inlining can cause trouble for
compilers, and can sometimes take excess stack space because the
compiler isn't able to fully analyze the function. This commit splits
functions that have multiple copies of the same body into multiple
smaller functions, which can help the compiler.

This was specifically causing issues on the parisc architecture [1].
In this configuration, especially with UBSAN enabled, these functions
stack usage could get quite large. This is because the compiler was
doing a poor job handling the extremely large function which had
multiple copies of the function body inlined into it. After this commit
we see:

[0] facebook/zstd#2863
[1] https://lkml.org/lkml/2021/11/15/710

Reported-by: Geert Uytterhoeven <geert@linux-m68k.org>
Signed-off-by: Nick Terrell <terrelln@fb.com>
  • Loading branch information
terrelln authored and buzzcut-s committed Nov 18, 2021
1 parent 0bbd879 commit 735db60
Show file tree
Hide file tree
Showing 2 changed files with 104 additions and 26 deletions.
61 changes: 48 additions & 13 deletions lib/zstd/compress/zstd_double_fast.c
Original file line number Diff line number Diff line change
Expand Up @@ -313,6 +313,26 @@ size_t ZSTD_compressBlock_doubleFast_generic(
return (size_t)(iend - anchor);
}

#define ZSTD_GEN_FN(dictMode, mls) \
static size_t ZSTD_compressBlock_doubleFast_##dictMode##_##mls( \
ZSTD_matchState_t* ms, seqStore_t* seqStore, U32 rep[ZSTD_REP_NUM], \
void const* src, size_t srcSize) \
{ \
return ZSTD_compressBlock_doubleFast_generic(ms, seqStore, rep, src, srcSize, mls, ZSTD_##dictMode); \
}

ZSTD_GEN_FN(noDict, 4)
ZSTD_GEN_FN(noDict, 5)
ZSTD_GEN_FN(noDict, 6)
ZSTD_GEN_FN(noDict, 7)

ZSTD_GEN_FN(dictMatchState, 4)
ZSTD_GEN_FN(dictMatchState, 5)
ZSTD_GEN_FN(dictMatchState, 6)
ZSTD_GEN_FN(dictMatchState, 7)

#undef ZSTD_GEN_FN


size_t ZSTD_compressBlock_doubleFast(
ZSTD_matchState_t* ms, seqStore_t* seqStore, U32 rep[ZSTD_REP_NUM],
Expand All @@ -323,13 +343,13 @@ size_t ZSTD_compressBlock_doubleFast(
{
default: /* includes case 3 */
case 4 :
return ZSTD_compressBlock_doubleFast_generic(ms, seqStore, rep, src, srcSize, 4, ZSTD_noDict);
return ZSTD_compressBlock_doubleFast_noDict_4(ms, seqStore, rep, src, srcSize);
case 5 :
return ZSTD_compressBlock_doubleFast_generic(ms, seqStore, rep, src, srcSize, 5, ZSTD_noDict);
return ZSTD_compressBlock_doubleFast_noDict_5(ms, seqStore, rep, src, srcSize);
case 6 :
return ZSTD_compressBlock_doubleFast_generic(ms, seqStore, rep, src, srcSize, 6, ZSTD_noDict);
return ZSTD_compressBlock_doubleFast_noDict_6(ms, seqStore, rep, src, srcSize);
case 7 :
return ZSTD_compressBlock_doubleFast_generic(ms, seqStore, rep, src, srcSize, 7, ZSTD_noDict);
return ZSTD_compressBlock_doubleFast_noDict_7(ms, seqStore, rep, src, srcSize);
}
}

Expand All @@ -343,13 +363,13 @@ size_t ZSTD_compressBlock_doubleFast_dictMatchState(
{
default: /* includes case 3 */
case 4 :
return ZSTD_compressBlock_doubleFast_generic(ms, seqStore, rep, src, srcSize, 4, ZSTD_dictMatchState);
return ZSTD_compressBlock_doubleFast_dictMatchState_4(ms, seqStore, rep, src, srcSize);
case 5 :
return ZSTD_compressBlock_doubleFast_generic(ms, seqStore, rep, src, srcSize, 5, ZSTD_dictMatchState);
return ZSTD_compressBlock_doubleFast_dictMatchState_5(ms, seqStore, rep, src, srcSize);
case 6 :
return ZSTD_compressBlock_doubleFast_generic(ms, seqStore, rep, src, srcSize, 6, ZSTD_dictMatchState);
return ZSTD_compressBlock_doubleFast_dictMatchState_6(ms, seqStore, rep, src, srcSize);
case 7 :
return ZSTD_compressBlock_doubleFast_generic(ms, seqStore, rep, src, srcSize, 7, ZSTD_dictMatchState);
return ZSTD_compressBlock_doubleFast_dictMatchState_7(ms, seqStore, rep, src, srcSize);
}
}

Expand Down Expand Up @@ -385,7 +405,7 @@ static size_t ZSTD_compressBlock_doubleFast_extDict_generic(

/* if extDict is invalidated due to maxDistance, switch to "regular" variant */
if (prefixStartIndex == dictStartIndex)
return ZSTD_compressBlock_doubleFast_generic(ms, seqStore, rep, src, srcSize, mls, ZSTD_noDict);
return ZSTD_compressBlock_doubleFast(ms, seqStore, rep, src, srcSize);

/* Search Loop */
while (ip < ilimit) { /* < instead of <=, because (ip+1) */
Expand Down Expand Up @@ -499,6 +519,21 @@ static size_t ZSTD_compressBlock_doubleFast_extDict_generic(
}


#define ZSTD_GEN_FN(mls) \
static size_t ZSTD_compressBlock_doubleFast_extDict_##mls( \
ZSTD_matchState_t* ms, seqStore_t* seqStore, U32 rep[ZSTD_REP_NUM], \
void const* src, size_t srcSize) \
{ \
return ZSTD_compressBlock_doubleFast_extDict_generic(ms, seqStore, rep, src, srcSize, mls); \
}

ZSTD_GEN_FN(4)
ZSTD_GEN_FN(5)
ZSTD_GEN_FN(6)
ZSTD_GEN_FN(7)

#undef ZSTD_GEN_FN

size_t ZSTD_compressBlock_doubleFast_extDict(
ZSTD_matchState_t* ms, seqStore_t* seqStore, U32 rep[ZSTD_REP_NUM],
void const* src, size_t srcSize)
Expand All @@ -508,12 +543,12 @@ size_t ZSTD_compressBlock_doubleFast_extDict(
{
default: /* includes case 3 */
case 4 :
return ZSTD_compressBlock_doubleFast_extDict_generic(ms, seqStore, rep, src, srcSize, 4);
return ZSTD_compressBlock_doubleFast_extDict_4(ms, seqStore, rep, src, srcSize);
case 5 :
return ZSTD_compressBlock_doubleFast_extDict_generic(ms, seqStore, rep, src, srcSize, 5);
return ZSTD_compressBlock_doubleFast_extDict_5(ms, seqStore, rep, src, srcSize);
case 6 :
return ZSTD_compressBlock_doubleFast_extDict_generic(ms, seqStore, rep, src, srcSize, 6);
return ZSTD_compressBlock_doubleFast_extDict_6(ms, seqStore, rep, src, srcSize);
case 7 :
return ZSTD_compressBlock_doubleFast_extDict_generic(ms, seqStore, rep, src, srcSize, 7);
return ZSTD_compressBlock_doubleFast_extDict_7(ms, seqStore, rep, src, srcSize);
}
}
69 changes: 56 additions & 13 deletions lib/zstd/compress/zstd_fast.c
Original file line number Diff line number Diff line change
Expand Up @@ -182,6 +182,20 @@ ZSTD_compressBlock_fast_generic(
return (size_t)(iend - anchor);
}

#define ZSTD_GEN_FN(mls) \
static size_t ZSTD_compressBlock_fast_##mls( \
ZSTD_matchState_t* ms, seqStore_t* seqStore, U32 rep[ZSTD_REP_NUM], \
void const* src, size_t srcSize) \
{ \
return ZSTD_compressBlock_fast_generic(ms, seqStore, rep, src, srcSize, mls); \
}

ZSTD_GEN_FN(4)
ZSTD_GEN_FN(5)
ZSTD_GEN_FN(6)
ZSTD_GEN_FN(7)

#undef ZSTD_GEN_FN

size_t ZSTD_compressBlock_fast(
ZSTD_matchState_t* ms, seqStore_t* seqStore, U32 rep[ZSTD_REP_NUM],
Expand All @@ -193,13 +207,13 @@ size_t ZSTD_compressBlock_fast(
{
default: /* includes case 3 */
case 4 :
return ZSTD_compressBlock_fast_generic(ms, seqStore, rep, src, srcSize, 4);
return ZSTD_compressBlock_fast_4(ms, seqStore, rep, src, srcSize);
case 5 :
return ZSTD_compressBlock_fast_generic(ms, seqStore, rep, src, srcSize, 5);
return ZSTD_compressBlock_fast_5(ms, seqStore, rep, src, srcSize);
case 6 :
return ZSTD_compressBlock_fast_generic(ms, seqStore, rep, src, srcSize, 6);
return ZSTD_compressBlock_fast_6(ms, seqStore, rep, src, srcSize);
case 7 :
return ZSTD_compressBlock_fast_generic(ms, seqStore, rep, src, srcSize, 7);
return ZSTD_compressBlock_fast_7(ms, seqStore, rep, src, srcSize);
}
}

Expand Down Expand Up @@ -351,6 +365,21 @@ size_t ZSTD_compressBlock_fast_dictMatchState_generic(
return (size_t)(iend - anchor);
}

#define ZSTD_GEN_FN(mls) \
static size_t ZSTD_compressBlock_fast_dictMatchState_##mls( \
ZSTD_matchState_t* ms, seqStore_t* seqStore, U32 rep[ZSTD_REP_NUM], \
void const* src, size_t srcSize) \
{ \
return ZSTD_compressBlock_fast_dictMatchState_generic(ms, seqStore, rep, src, srcSize, mls); \
}

ZSTD_GEN_FN(4)
ZSTD_GEN_FN(5)
ZSTD_GEN_FN(6)
ZSTD_GEN_FN(7)

#undef ZSTD_GEN_FN

size_t ZSTD_compressBlock_fast_dictMatchState(
ZSTD_matchState_t* ms, seqStore_t* seqStore, U32 rep[ZSTD_REP_NUM],
void const* src, size_t srcSize)
Expand All @@ -361,13 +390,13 @@ size_t ZSTD_compressBlock_fast_dictMatchState(
{
default: /* includes case 3 */
case 4 :
return ZSTD_compressBlock_fast_dictMatchState_generic(ms, seqStore, rep, src, srcSize, 4);
return ZSTD_compressBlock_fast_dictMatchState_4(ms, seqStore, rep, src, srcSize);
case 5 :
return ZSTD_compressBlock_fast_dictMatchState_generic(ms, seqStore, rep, src, srcSize, 5);
return ZSTD_compressBlock_fast_dictMatchState_5(ms, seqStore, rep, src, srcSize);
case 6 :
return ZSTD_compressBlock_fast_dictMatchState_generic(ms, seqStore, rep, src, srcSize, 6);
return ZSTD_compressBlock_fast_dictMatchState_6(ms, seqStore, rep, src, srcSize);
case 7 :
return ZSTD_compressBlock_fast_dictMatchState_generic(ms, seqStore, rep, src, srcSize, 7);
return ZSTD_compressBlock_fast_dictMatchState_7(ms, seqStore, rep, src, srcSize);
}
}

Expand Down Expand Up @@ -402,7 +431,7 @@ static size_t ZSTD_compressBlock_fast_extDict_generic(

/* switch to "regular" variant if extDict is invalidated due to maxDistance */
if (prefixStartIndex == dictStartIndex)
return ZSTD_compressBlock_fast_generic(ms, seqStore, rep, src, srcSize, mls);
return ZSTD_compressBlock_fast(ms, seqStore, rep, src, srcSize);

/* Search Loop */
while (ip < ilimit) { /* < instead of <=, because (ip+1) */
Expand Down Expand Up @@ -475,6 +504,20 @@ static size_t ZSTD_compressBlock_fast_extDict_generic(
return (size_t)(iend - anchor);
}

#define ZSTD_GEN_FN(mls) \
static size_t ZSTD_compressBlock_fast_extDict_##mls( \
ZSTD_matchState_t* ms, seqStore_t* seqStore, U32 rep[ZSTD_REP_NUM], \
void const* src, size_t srcSize) \
{ \
return ZSTD_compressBlock_fast_extDict_generic(ms, seqStore, rep, src, srcSize, mls); \
}

ZSTD_GEN_FN(4)
ZSTD_GEN_FN(5)
ZSTD_GEN_FN(6)
ZSTD_GEN_FN(7)

#undef ZSTD_GEN_FN

size_t ZSTD_compressBlock_fast_extDict(
ZSTD_matchState_t* ms, seqStore_t* seqStore, U32 rep[ZSTD_REP_NUM],
Expand All @@ -485,12 +528,12 @@ size_t ZSTD_compressBlock_fast_extDict(
{
default: /* includes case 3 */
case 4 :
return ZSTD_compressBlock_fast_extDict_generic(ms, seqStore, rep, src, srcSize, 4);
return ZSTD_compressBlock_fast_extDict_4(ms, seqStore, rep, src, srcSize);
case 5 :
return ZSTD_compressBlock_fast_extDict_generic(ms, seqStore, rep, src, srcSize, 5);
return ZSTD_compressBlock_fast_extDict_5(ms, seqStore, rep, src, srcSize);
case 6 :
return ZSTD_compressBlock_fast_extDict_generic(ms, seqStore, rep, src, srcSize, 6);
return ZSTD_compressBlock_fast_extDict_6(ms, seqStore, rep, src, srcSize);
case 7 :
return ZSTD_compressBlock_fast_extDict_generic(ms, seqStore, rep, src, srcSize, 7);
return ZSTD_compressBlock_fast_extDict_7(ms, seqStore, rep, src, srcSize);
}
}

0 comments on commit 735db60

Please sign in to comment.