[klauspost/deflate-improve-comp] Remove hash7 and use const for long table bytes.

klauspost · klauspost · commit 2c5d12a16952 · 2025-09-27T16:54:32.000+02:00
Change-Id: Ia141c7ec888bf51ceb6351d2a1c3f1501c2c4e12
diff --git a/src/compress/flate/deflatefast.go b/src/compress/flate/deflatefast.go
@@ -35,6 +35,7 @@ func newFastEnc(level int) fastEnc {
 const (
 	tableBits       = 15             // Bits used in the table
 	tableSize       = 1 << tableBits // Size of the table
+	hashLongBytes   = 7              // Bytes used for long table hash
 	baseMatchOffset = 1              // The smallest match offset
 	baseMatchLength = 3              // The smallest match length per the RFC section 3.2.5
 	maxMatchOffset  = 1 << 15        // The largest match offset
@@ -93,12 +94,6 @@ type tableEntryPrev struct {
 	Prev tableEntry
 }
 
-// hash7 returns the hash of the lowest 7 bytes of u to fit in a hash table with h bits.
-// Preferably h should be a constant and should always be <64.
-func hash7(u uint64, h uint8) uint32 {
-	return uint32(((u << (64 - 56)) * prime7bytes) >> ((64 - h) & reg8SizeMask64))
-}
-
 // hashLen returns a hash of the lowest mls bytes of with length output bits.
 // mls must be >=3 and <=8. Any other value will return hash for 4 bytes.
 // length should always be < 32.
diff --git a/src/compress/flate/huffman_bit_writer.go b/src/compress/flate/huffman_bit_writer.go
@@ -412,8 +412,9 @@ func (w *huffmanBitWriter) storedSize(in []byte) (int, bool) {
 	return 0, false
 }
 
+// writeCode writes 'c' to the stream.
+// Inline manually when performance is critical.
 func (w *huffmanBitWriter) writeCode(c hcode) {
-	// The function does not get inlined if we "& 63" the shift.
 	w.bits |= c.code64() << (w.nbits & reg8SizeMask64)
 	w.nbits += c.len()
 	if w.nbits >= 48 {
diff --git a/src/compress/flate/level4.go b/src/compress/flate/level4.go
@@ -82,7 +82,7 @@ func (e *fastEncL4) Encode(dst *tokens, src []byte) {
 		var t int32
 		for {
 			nextHashS := hashLen(cv, tableBits, hashShortBytes)
-			nextHashL := hash7(cv, tableBits)
+			nextHashL := hashLen(cv, tableBits, hashLongBytes)
 
 			s = nextS
 			nextS = s + doEvery + (s-nextEmit)>>skipLog
@@ -106,7 +106,7 @@ func (e *fastEncL4) Encode(dst *tokens, src []byte) {
 			t = sCandidate.offset - e.cur
 			if s-t < maxMatchOffset && uint32(cv) == loadLE32(src, t) {
 				// Found a 4 match...
-				lCandidate = e.bTable[hash7(next, tableBits)]
+				lCandidate = e.bTable[hashLen(next, tableBits, hashLongBytes)]
 
 				// If the next long is a candidate, check if we should use that instead...
 				lOff := lCandidate.offset - e.cur
@@ -155,7 +155,7 @@ func (e *fastEncL4) Encode(dst *tokens, src []byte) {
 			if int(s+8) < len(src) {
 				cv := loadLE64(src, s)
 				e.table[hashLen(cv, tableBits, hashShortBytes)] = tableEntry{offset: s + e.cur}
-				e.bTable[hash7(cv, tableBits)] = tableEntry{offset: s + e.cur}
+				e.bTable[hashLen(cv, tableBits, hashLongBytes)] = tableEntry{offset: s + e.cur}
 			}
 			goto emitRemainder
 		}
@@ -166,17 +166,17 @@ func (e *fastEncL4) Encode(dst *tokens, src []byte) {
 			cv := loadLE64(src, i)
 			t := tableEntry{offset: i + e.cur}
 			t2 := tableEntry{offset: t.offset + 1}
-			e.bTable[hash7(cv, tableBits)] = t
-			e.bTable[hash7(cv>>8, tableBits)] = t2
+			e.bTable[hashLen(cv, tableBits, hashLongBytes)] = t
+			e.bTable[hashLen(cv>>8, tableBits, hashLongBytes)] = t2
 			e.table[hashLen(cv>>8, tableBits, hashShortBytes)] = t2
 
 			i += 3
 			for ; i < s-1; i += 3 {
 				cv := loadLE64(src, i)
 				t := tableEntry{offset: i + e.cur}
 				t2 := tableEntry{offset: t.offset + 1}
-				e.bTable[hash7(cv, tableBits)] = t
-				e.bTable[hash7(cv>>8, tableBits)] = t2
+				e.bTable[hashLen(cv, tableBits, hashLongBytes)] = t
+				e.bTable[hashLen(cv>>8, tableBits, hashLongBytes)] = t2
 				e.table[hashLen(cv>>8, tableBits, hashShortBytes)] = t2
 			}
 		}
@@ -186,7 +186,7 @@ func (e *fastEncL4) Encode(dst *tokens, src []byte) {
 		x := loadLE64(src, s-1)
 		o := e.cur + s - 1
 		prevHashS := hashLen(x, tableBits, hashShortBytes)
-		prevHashL := hash7(x, tableBits)
+		prevHashL := hashLen(x, tableBits, hashLongBytes)
 		e.table[prevHashS] = tableEntry{offset: o}
 		e.bTable[prevHashL] = tableEntry{offset: o}
 		cv = x >> 8
diff --git a/src/compress/flate/level5.go b/src/compress/flate/level5.go
@@ -92,7 +92,7 @@ func (e *fastEncL5) Encode(dst *tokens, src []byte) {
 		var t int32
 		for {
 			nextHashS := hashLen(cv, tableBits, hashShortBytes)
-			nextHashL := hash7(cv, tableBits)
+			nextHashL := hashLen(cv, tableBits, hashLongBytes)
 
 			s = nextS
 			nextS = s + doEvery + (s-nextEmit)>>skipLog
@@ -109,7 +109,7 @@ func (e *fastEncL5) Encode(dst *tokens, src []byte) {
 			eLong.Cur, eLong.Prev = entry, eLong.Cur
 
 			nextHashS = hashLen(next, tableBits, hashShortBytes)
-			nextHashL = hash7(next, tableBits)
+			nextHashL = hashLen(next, tableBits, hashLongBytes)
 
 			t = lCandidate.Cur.offset - e.cur
 			if s-t < maxMatchOffset {
@@ -196,7 +196,7 @@ func (e *fastEncL5) Encode(dst *tokens, src []byte) {
 			// The skipped bytes are tested in Extend backwards,
 			// and still picked up as part of the match if they do.
 			const skipBeginning = 2
-			eLong := e.bTable[hash7(loadLE64(src, sAt), tableBits)].Cur.offset
+			eLong := e.bTable[hashLen(loadLE64(src, sAt), tableBits, hashLongBytes)].Cur.offset
 			t2 := eLong - e.cur - l + skipBeginning
 			s2 := s + skipBeginning
 			off := s2 - t2
@@ -241,13 +241,13 @@ func (e *fastEncL5) Encode(dst *tokens, src []byte) {
 			cv := loadLE64(src, i)
 			t := tableEntry{offset: i + e.cur}
 			e.table[hashLen(cv, tableBits, hashShortBytes)] = t
-			eLong := &e.bTable[hash7(cv, tableBits)]
+			eLong := &e.bTable[hashLen(cv, tableBits, hashLongBytes)]
 			eLong.Cur, eLong.Prev = t, eLong.Cur
 
 			// Do an long at i+1
 			cv >>= 8
 			t = tableEntry{offset: t.offset + 1}
-			eLong = &e.bTable[hash7(cv, tableBits)]
+			eLong = &e.bTable[hashLen(cv, tableBits, hashLongBytes)]
 			eLong.Cur, eLong.Prev = t, eLong.Cur
 
 			// We only have enough bits for a short entry at i+2
@@ -261,7 +261,7 @@ func (e *fastEncL5) Encode(dst *tokens, src []byte) {
 				cv := loadLE64(src, i)
 				t := tableEntry{offset: i + e.cur}
 				t2 := tableEntry{offset: t.offset + 1}
-				eLong := &e.bTable[hash7(cv, tableBits)]
+				eLong := &e.bTable[hashLen(cv, tableBits, hashLongBytes)]
 				eLong.Cur, eLong.Prev = t, eLong.Cur
 				e.table[hashLen(cv>>8, tableBits, hashShortBytes)] = t2
 			}
@@ -272,7 +272,7 @@ func (e *fastEncL5) Encode(dst *tokens, src []byte) {
 		x := loadLE64(src, s-1)
 		o := e.cur + s - 1
 		prevHashS := hashLen(x, tableBits, hashShortBytes)
-		prevHashL := hash7(x, tableBits)
+		prevHashL := hashLen(x, tableBits, hashLongBytes)
 		e.table[prevHashS] = tableEntry{offset: o}
 		eLong := &e.bTable[prevHashL]
 		eLong.Cur, eLong.Prev = tableEntry{offset: o}, eLong.Cur
diff --git a/src/compress/flate/level6.go b/src/compress/flate/level6.go
@@ -92,7 +92,7 @@ func (e *fastEncL6) Encode(dst *tokens, src []byte) {
 		var t int32
 		for {
 			nextHashS := hashLen(cv, tableBits, hashShortBytes)
-			nextHashL := hash7(cv, tableBits)
+			nextHashL := hashLen(cv, tableBits, hashLongBytes)
 			s = nextS
 			nextS = s + doEvery + (s-nextEmit)>>skipLog
 			if nextS > sLimit {
@@ -109,7 +109,7 @@ func (e *fastEncL6) Encode(dst *tokens, src []byte) {
 
 			// Calculate hashes of 'next'
 			nextHashS = hashLen(next, tableBits, hashShortBytes)
-			nextHashL = hash7(next, tableBits)
+			nextHashL = hashLen(next, tableBits, hashLongBytes)
 
 			t = lCandidate.Cur.offset - e.cur
 			if s-t < maxMatchOffset {
@@ -216,7 +216,7 @@ func (e *fastEncL6) Encode(dst *tokens, src []byte) {
 			// The skipped bytes are tested in extend backwards,
 			// and still picked up as part of the match if they do.
 			const skipBeginning = 2
-			eLong := &e.bTable[hash7(loadLE64(src, sAt), tableBits)]
+			eLong := &e.bTable[hashLen(loadLE64(src, sAt), tableBits, hashLongBytes)]
 			// Test current
 			t2 := eLong.Cur.offset - e.cur - l + skipBeginning
 			s2 := s + skipBeginning
@@ -269,7 +269,7 @@ func (e *fastEncL6) Encode(dst *tokens, src []byte) {
 			for i := nextS + 1; i < int32(len(src))-8; i += 2 {
 				cv := loadLE64(src, i)
 				e.table[hashLen(cv, tableBits, hashShortBytes)] = tableEntry{offset: i + e.cur}
-				eLong := &e.bTable[hash7(cv, tableBits)]
+				eLong := &e.bTable[hashLen(cv, tableBits, hashLongBytes)]
 				eLong.Cur, eLong.Prev = tableEntry{offset: i + e.cur}, eLong.Cur
 			}
 			goto emitRemainder
@@ -280,8 +280,8 @@ func (e *fastEncL6) Encode(dst *tokens, src []byte) {
 			cv := loadLE64(src, i)
 			t := tableEntry{offset: i + e.cur}
 			t2 := tableEntry{offset: t.offset + 1}
-			eLong := &e.bTable[hash7(cv, tableBits)]
-			eLong2 := &e.bTable[hash7(cv>>8, tableBits)]
+			eLong := &e.bTable[hashLen(cv, tableBits, hashLongBytes)]
+			eLong2 := &e.bTable[hashLen(cv>>8, tableBits, hashLongBytes)]
 			e.table[hashLen(cv, tableBits, hashShortBytes)] = t
 			eLong.Cur, eLong.Prev = t, eLong.Cur
 			eLong2.Cur, eLong2.Prev = t2, eLong2.Cur