Skip to content

Commit

Permalink
Make my matchfinder work more accessible.
Browse files Browse the repository at this point in the history
  • Loading branch information
andybalholm committed Jan 12, 2024
1 parent cf812c0 commit 17e5901
Show file tree
Hide file tree
Showing 4 changed files with 118 additions and 41 deletions.
7 changes: 7 additions & 0 deletions README.md
Original file line number Diff line number Diff line change
Expand Up @@ -2,6 +2,13 @@ This package is a brotli compressor and decompressor implemented in Go.
It was translated from the reference implementation (https://github.com/google/brotli)
with the `c2go` tool at https://github.com/andybalholm/c2go.

I have been working on new compression algorithms (not translated from C)
in the matchfinder package.
You can use them with the NewWriterV2 function.
Currently they give better results than the old implementation
(at least for compressing my test file, Newton’s *Opticks*)
on levels 2 to 6.

I am using it in production with https://github.com/andybalholm/redwood.

API documentation is found at https://pkg.go.dev/github.com/andybalholm/brotli?tab=doc.
107 changes: 67 additions & 40 deletions brotli_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -69,46 +69,6 @@ func TestEncoderEmptyWrite(t *testing.T) {
t.Errorf("Close()=%v, want nil", err)
}
}

func TestWriter(t *testing.T) {
for level := BestSpeed; level <= BestCompression; level++ {
// Test basic encoder usage.
input := []byte("<html><body><H1>Hello world</H1></body></html>")
out := bytes.Buffer{}
e := NewWriterOptions(&out, WriterOptions{Quality: level})
in := bytes.NewReader([]byte(input))
n, err := io.Copy(e, in)
if err != nil {
t.Errorf("Copy Error: %v", err)
}
if int(n) != len(input) {
t.Errorf("Copy() n=%v, want %v", n, len(input))
}
if err := e.Close(); err != nil {
t.Errorf("Close Error after copied %d bytes: %v", n, err)
}
if err := checkCompressedData(out.Bytes(), input); err != nil {
t.Error(err)
}

out2 := bytes.Buffer{}
e.Reset(&out2)
n2, err := e.Write(input)
if err != nil {
t.Errorf("Write error after Reset: %v", err)
}
if n2 != len(input) {
t.Errorf("Write() after Reset n=%d, want %d", n2, len(input))
}
if err := e.Close(); err != nil {
t.Errorf("Close error after Reset (copied %d) bytes: %v", n2, err)
}
if !bytes.Equal(out.Bytes(), out2.Bytes()) {
t.Error("Compressed data after Reset doesn't equal first time")
}
}
}

func TestIssue22(t *testing.T) {
f, err := os.Open("testdata/issue22.gz")
if err != nil {
Expand Down Expand Up @@ -149,6 +109,45 @@ func TestIssue22(t *testing.T) {
}
}

func TestWriterV2(t *testing.T) {
for level := BestSpeed; level <= BestCompression; level++ {
// Test basic encoder usage.
input := []byte("<html><body><H1>Hello world</H1></body></html>")
out := bytes.Buffer{}
e := NewWriterV2(&out, level)
in := bytes.NewReader([]byte(input))
n, err := io.Copy(e, in)
if err != nil {
t.Errorf("Copy Error: %v", err)
}
if int(n) != len(input) {
t.Errorf("Copy() n=%v, want %v", n, len(input))
}
if err := e.Close(); err != nil {
t.Errorf("Close Error after copied %d bytes: %v", n, err)
}
if err := checkCompressedData(out.Bytes(), input); err != nil {
t.Error(err)
}

out2 := bytes.Buffer{}
e.Reset(&out2)
n2, err := e.Write(input)
if err != nil {
t.Errorf("Write error after Reset: %v", err)
}
if n2 != len(input) {
t.Errorf("Write() after Reset n=%d, want %d", n2, len(input))
}
if err := e.Close(); err != nil {
t.Errorf("Close error after Reset (copied %d) bytes: %v", n2, err)
}
if !bytes.Equal(out.Bytes(), out2.Bytes()) {
t.Error("Compressed data after Reset doesn't equal first time")
}
}
}

func TestEncoderStreams(t *testing.T) {
// Test that output is streamed.
// Adjust window size to ensure the encoder outputs at least enough bytes
Expand Down Expand Up @@ -576,6 +575,30 @@ func BenchmarkEncodeLevelsReset(b *testing.B) {
}
}

func BenchmarkEncodeLevelsResetV2(b *testing.B) {
opticks, err := ioutil.ReadFile("testdata/Isaac.Newton-Opticks.txt")
if err != nil {
b.Fatal(err)
}

for level := BestSpeed; level <= 7; level++ {
buf := new(bytes.Buffer)
w := NewWriterV2(buf, level)
w.Write(opticks)
w.Close()
b.Run(fmt.Sprintf("%d", level), func(b *testing.B) {
b.ReportAllocs()
b.ReportMetric(float64(len(opticks))/float64(buf.Len()), "ratio")
b.SetBytes(int64(len(opticks)))
for i := 0; i < b.N; i++ {
w.Reset(ioutil.Discard)
w.Write(opticks)
w.Close()
}
})
}
}

func BenchmarkDecodeLevels(b *testing.B) {
opticks, err := ioutil.ReadFile("testdata/Isaac.Newton-Opticks.txt")
if err != nil {
Expand Down Expand Up @@ -694,6 +717,10 @@ func BenchmarkEncodeM4Chain128(b *testing.B) {
benchmark(b, "testdata/Isaac.Newton-Opticks.txt", &matchfinder.M4{MaxDistance: 1 << 20, ChainLength: 128, HashLen: 5, DistanceBitCost: 57}, 1<<16)
}

func BenchmarkEncodeM4Chain256(b *testing.B) {
benchmark(b, "testdata/Isaac.Newton-Opticks.txt", &matchfinder.M4{MaxDistance: 1 << 20, ChainLength: 256, HashLen: 5, DistanceBitCost: 57}, 1<<16)
}

func TestEncodeM0(t *testing.T) {
test(t, "testdata/Isaac.Newton-Opticks.txt", matchfinder.M0{}, 1<<16)
}
Expand Down
2 changes: 1 addition & 1 deletion http.go
Original file line number Diff line number Diff line change
Expand Up @@ -20,7 +20,7 @@ func HTTPCompressor(w http.ResponseWriter, r *http.Request) io.WriteCloser {
switch encoding {
case "br":
w.Header().Set("Content-Encoding", "br")
return NewWriter(w)
return NewWriterV2(w, DefaultCompression)
case "gzip":
w.Header().Set("Content-Encoding", "gzip")
return gzip.NewWriter(w)
Expand Down
43 changes: 43 additions & 0 deletions writer.go
Original file line number Diff line number Diff line change
Expand Up @@ -3,6 +3,8 @@ package brotli
import (
"errors"
"io"

"github.com/andybalholm/brotli/matchfinder"
)

const (
Expand Down Expand Up @@ -117,3 +119,44 @@ type nopCloser struct {
}

func (nopCloser) Close() error { return nil }

// NewWriterV2 is like NewWriterLevel, but it uses the new implementation
// based on the matchfinder package. It currently supports up to level 7;
// if a higher level is specified, level 7 will be used.
func NewWriterV2(dst io.Writer, level int) *matchfinder.Writer {
var mf matchfinder.MatchFinder
if level < 2 {
mf = matchfinder.M0{Lazy: level == 1}
} else {
hashLen := 6
if level >= 6 {
hashLen = 5
}
chainLen := 64
switch level {
case 2:
chainLen = 0
case 3:
chainLen = 1
case 4:
chainLen = 2
case 5:
chainLen = 4
case 6:
chainLen = 8
}
mf = &matchfinder.M4{
MaxDistance: 1 << 20,
ChainLength: chainLen,
HashLen: hashLen,
DistanceBitCost: 57,
}
}

return &matchfinder.Writer{
Dest: dst,
MatchFinder: mf,
Encoder: &Encoder{},
BlockSize: 1 << 16,
}
}

0 comments on commit 17e5901

Please sign in to comment.