Permalink
Browse files

bzip2: improve performance

Improve performance of move-to-front by using cache-friendly
copies instead of doubly-linked list. Simplify so that the
underlying slice is the object. Remove the n=0 special case,
      which was actually slower with the copy approach.

benchmark                 old ns/op     new ns/op     delta
BenchmarkDecodeDigits     26429714      23859699      -9.72%
BenchmarkDecodeTwain      76684510      67591946      -11.86%

benchmark                 old MB/s     new MB/s     speedup
BenchmarkDecodeDigits     1.63         1.81         1.11x
BenchmarkDecodeTwain      1.63         1.85         1.13x

Updates #6754.

LGTM=adg, agl, josharian
R=adg, agl, josharian
CC=golang-codereviews
https://golang.org/cl/131840043
  • Loading branch information...
1 parent 523aa93 commit 6d248cec56dd56f3ddb92bd587b5c4ac2f9919b1 @jeffallen jeffallen committed with agl Aug 18, 2014
Showing with 55 additions and 62 deletions.
  1. +38 −0 src/pkg/compress/bzip2/bzip2_test.go
  2. +17 −62 src/pkg/compress/bzip2/move_to_front.go
@@ -216,6 +216,44 @@ func TestOutOfRangeSelector(t *testing.T) {
ioutil.ReadAll(decompressor)
}
+func TestMTF(t *testing.T) {
+ mtf := newMTFDecoderWithRange(5)
+
+ // 0 1 2 3 4
+ expect := byte(1)
+ x := mtf.Decode(1)
+ if x != expect {
+ t.Errorf("expected %v, got %v", expect, x)
+ }
+
+ // 1 0 2 3 4
+ x = mtf.Decode(0)
+ if x != expect {
+ t.Errorf("expected %v, got %v", expect, x)
+ }
+
+ // 1 0 2 3 4
+ expect = byte(0)
+ x = mtf.Decode(1)
+ if x != expect {
+ t.Errorf("expected %v, got %v", expect, x)
+ }
+
+ // 0 1 2 3 4
+ expect = byte(4)
+ x = mtf.Decode(4)
+ if x != expect {
+ t.Errorf("expected %v, got %v", expect, x)
+ }
+
+ // 4 0 1 2 3
+ expect = byte(0)
+ x = mtf.Decode(1)
+ if x != expect {
+ t.Errorf("expected %v, got %v", expect, x)
+ }
+}
+
var bufferOverrunBase64 string = `
QlpoNTFBWSZTWTzyiGcACMP/////////////////////////////////3/7f3///
////4N/fCZODak2Xo44GIHZgkGzDRbFAuwAAKoFV7T6AO6qwA6APb6s2rOoAkAAD
@@ -11,88 +11,43 @@ package bzip2
// index into that list. When a symbol is referenced, it's moved to the front
// of the list. Thus, a repeated symbol ends up being encoded with many zeros,
// as the symbol will be at the front of the list after the first access.
-type moveToFrontDecoder struct {
- // Rather than actually keep the list in memory, the symbols are stored
- // as a circular, double linked list with the symbol indexed by head
- // at the front of the list.
- symbols [256]byte
- next [256]uint8
- prev [256]uint8
- head uint8
- len int
-}
+type moveToFrontDecoder []byte
// newMTFDecoder creates a move-to-front decoder with an explicit initial list
// of symbols.
-func newMTFDecoder(symbols []byte) *moveToFrontDecoder {
+func newMTFDecoder(symbols []byte) moveToFrontDecoder {
if len(symbols) > 256 {
panic("too many symbols")
}
-
- m := new(moveToFrontDecoder)
- copy(m.symbols[:], symbols)
- m.len = len(symbols)
- m.threadLinkedList()
- return m
+ return moveToFrontDecoder(symbols)
}
// newMTFDecoderWithRange creates a move-to-front decoder with an initial
// symbol list of 0...n-1.
-func newMTFDecoderWithRange(n int) *moveToFrontDecoder {
+func newMTFDecoderWithRange(n int) moveToFrontDecoder {
if n > 256 {
panic("newMTFDecoderWithRange: cannot have > 256 symbols")
}
- m := new(moveToFrontDecoder)
+ m := make([]byte, n)
for i := 0; i < n; i++ {
- m.symbols[byte(i)] = byte(i)
- }
- m.len = n
- m.threadLinkedList()
- return m
-}
-
-// threadLinkedList creates the initial linked-list pointers.
-func (m *moveToFrontDecoder) threadLinkedList() {
- if m.len == 0 {
- return
- }
-
- m.prev[0] = uint8(m.len - 1)
-
- for i := byte(0); int(i) < m.len-1; i++ {
- m.next[i] = uint8(i + 1)
- m.prev[i+1] = uint8(i)
+ m[i] = byte(i)
}
-
- m.next[m.len-1] = 0
+ return moveToFrontDecoder(m)
}
-func (m *moveToFrontDecoder) Decode(n int) (b byte) {
- // Most of the time, n will be zero so it's worth dealing with this
- // simple case.
- if n == 0 {
- return m.symbols[m.head]
- }
-
- i := m.head
- for j := 0; j < n; j++ {
- i = m.next[i]
- }
- b = m.symbols[i]
-
- m.next[m.prev[i]] = m.next[i]
- m.prev[m.next[i]] = m.prev[i]
- m.next[i] = m.head
- m.prev[i] = m.prev[m.head]
- m.next[m.prev[m.head]] = i
- m.prev[m.head] = i
- m.head = i
-
+func (m moveToFrontDecoder) Decode(n int) (b byte) {
+ // Implement move-to-front with a simple copy. This approach
+ // beats more sophisticated approaches in benchmarking, probably
+ // because it has high locality of reference inside of a
+ // single cache line (most move-to-front operations have n < 64).
+ b = m[n]
+ copy(m[1:], m[:n])
+ m[0] = b
return
}
// First returns the symbol at the front of the list.
-func (m *moveToFrontDecoder) First() byte {
- return m.symbols[m.head]
+func (m moveToFrontDecoder) First() byte {
+ return m[0]
}

0 comments on commit 6d248ce

Please sign in to comment.