Skip to content

Commit

Permalink
matchfinder.M4: add Score function
Browse files Browse the repository at this point in the history
  • Loading branch information
andybalholm committed Jan 2, 2024
1 parent 4a024e3 commit 24b2bfa
Show file tree
Hide file tree
Showing 4 changed files with 50 additions and 34 deletions.
6 changes: 6 additions & 0 deletions bitwriter.go
Original file line number Diff line number Diff line change
@@ -1,5 +1,7 @@
package brotli

import "github.com/andybalholm/brotli/matchfinder"

/* Copyright 2010 Google Inc. All Rights Reserved.
Distributed under MIT license.
Expand Down Expand Up @@ -54,3 +56,7 @@ func (w *bitWriter) jumpToByteBoundary() {
w.bits = 0
w.dst = dst
}

func matchScore(m matchfinder.AbsoluteMatch) int {
return int(backwardReferenceScore(uint(m.End-m.Start), uint(m.Start-m.Match)))
}
22 changes: 11 additions & 11 deletions brotli_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -651,45 +651,45 @@ func benchmark(b *testing.B, filename string, m matchfinder.MatchFinder, blockSi
}

func TestEncodeM4(t *testing.T) {
test(t, "testdata/Isaac.Newton-Opticks.txt", &matchfinder.M4{MaxDistance: 1 << 18}, 1<<16)
test(t, "testdata/Isaac.Newton-Opticks.txt", &matchfinder.M4{MaxDistance: 1 << 18, Score: matchScore}, 1<<16)
}

func BenchmarkEncodeM4(b *testing.B) {
benchmark(b, "testdata/Isaac.Newton-Opticks.txt", &matchfinder.M4{MaxDistance: 1 << 20}, 1<<16)
benchmark(b, "testdata/Isaac.Newton-Opticks.txt", &matchfinder.M4{MaxDistance: 1 << 20, Score: matchScore}, 1<<16)
}

func TestEncodeM4Chain1(t *testing.T) {
test(t, "testdata/Isaac.Newton-Opticks.txt", &matchfinder.M4{MaxDistance: 1 << 18, ChainLength: 1}, 1<<16)
test(t, "testdata/Isaac.Newton-Opticks.txt", &matchfinder.M4{MaxDistance: 1 << 18, ChainLength: 1, Score: matchScore}, 1<<16)
}

func BenchmarkEncodeM4Chain1(b *testing.B) {
benchmark(b, "testdata/Isaac.Newton-Opticks.txt", &matchfinder.M4{MaxDistance: 1 << 20, ChainLength: 1}, 1<<16)
benchmark(b, "testdata/Isaac.Newton-Opticks.txt", &matchfinder.M4{MaxDistance: 1 << 20, ChainLength: 1, Score: matchScore}, 1<<16)
}

func BenchmarkEncodeM4Chain2(b *testing.B) {
benchmark(b, "testdata/Isaac.Newton-Opticks.txt", &matchfinder.M4{MaxDistance: 1 << 20, ChainLength: 2}, 1<<16)
benchmark(b, "testdata/Isaac.Newton-Opticks.txt", &matchfinder.M4{MaxDistance: 1 << 20, ChainLength: 2, Score: matchScore}, 1<<16)
}

func BenchmarkEncodeM4Chain4(b *testing.B) {
benchmark(b, "testdata/Isaac.Newton-Opticks.txt", &matchfinder.M4{MaxDistance: 1 << 20, ChainLength: 4}, 1<<16)
benchmark(b, "testdata/Isaac.Newton-Opticks.txt", &matchfinder.M4{MaxDistance: 1 << 20, ChainLength: 4, Score: matchScore}, 1<<16)
}

func BenchmarkEncodeM4Chain8(b *testing.B) {
benchmark(b, "testdata/Isaac.Newton-Opticks.txt", &matchfinder.M4{MaxDistance: 1 << 20, ChainLength: 8}, 1<<16)
benchmark(b, "testdata/Isaac.Newton-Opticks.txt", &matchfinder.M4{MaxDistance: 1 << 20, ChainLength: 8, HashLen: 5, Score: matchScore}, 1<<16)
}

func BenchmarkEncodeM4Chain16(b *testing.B) {
benchmark(b, "testdata/Isaac.Newton-Opticks.txt", &matchfinder.M4{MaxDistance: 1 << 20, ChainLength: 16}, 1<<16)
benchmark(b, "testdata/Isaac.Newton-Opticks.txt", &matchfinder.M4{MaxDistance: 1 << 20, ChainLength: 16, HashLen: 5, Score: matchScore}, 1<<16)
}

func BenchmarkEncodeM4Chain32(b *testing.B) {
benchmark(b, "testdata/Isaac.Newton-Opticks.txt", &matchfinder.M4{MaxDistance: 1 << 20, ChainLength: 32}, 1<<16)
benchmark(b, "testdata/Isaac.Newton-Opticks.txt", &matchfinder.M4{MaxDistance: 1 << 20, ChainLength: 32, HashLen: 5, Score: matchScore}, 1<<16)
}

func BenchmarkEncodeM4Chain64(b *testing.B) {
benchmark(b, "testdata/Isaac.Newton-Opticks.txt", &matchfinder.M4{MaxDistance: 1 << 20, ChainLength: 64}, 1<<16)
benchmark(b, "testdata/Isaac.Newton-Opticks.txt", &matchfinder.M4{MaxDistance: 1 << 20, ChainLength: 64, HashLen: 5, Score: matchScore}, 1<<16)
}

func BenchmarkEncodeM4Chain128(b *testing.B) {
benchmark(b, "testdata/Isaac.Newton-Opticks.txt", &matchfinder.M4{MaxDistance: 1 << 20, ChainLength: 128}, 1<<16)
benchmark(b, "testdata/Isaac.Newton-Opticks.txt", &matchfinder.M4{MaxDistance: 1 << 20, ChainLength: 128, HashLen: 5, Score: matchScore}, 1<<16)
}
8 changes: 4 additions & 4 deletions matchfinder/emitter.go
Original file line number Diff line number Diff line change
@@ -1,8 +1,8 @@
package matchfinder

// An absoluteMatch is like a Match, but it stores indexes into the byte
// An AbsoluteMatch is like a Match, but it stores indexes into the byte
// stream instead of lengths.
type absoluteMatch struct {
type AbsoluteMatch struct {
// Start is the index of the first byte.
Start int

Expand All @@ -24,7 +24,7 @@ type matchEmitter struct {
NextEmit int
}

func (e *matchEmitter) emit(m absoluteMatch) {
func (e *matchEmitter) emit(m AbsoluteMatch) {
e.Dst = append(e.Dst, Match{
Unmatched: m.Start - e.NextEmit,
Length: m.End - m.Start,
Expand All @@ -35,7 +35,7 @@ func (e *matchEmitter) emit(m absoluteMatch) {

// trim shortens m if it extends past maxEnd. Then if the length is at least
// minLength, the match is emitted.
func (e *matchEmitter) trim(m absoluteMatch, maxEnd int, minLength int) {
func (e *matchEmitter) trim(m AbsoluteMatch, maxEnd int, minLength int) {
if m.End > maxEnd {
m.End = maxEnd
}
Expand Down
48 changes: 29 additions & 19 deletions matchfinder/m4.go
Original file line number Diff line number Diff line change
Expand Up @@ -32,6 +32,10 @@ type M4 struct {
// locations with the same hash as the current location.
ChainLength int

// Score is the rating function used to choose the best match.
// The default is the length of the match.
Score func(AbsoluteMatch) int

table []uint32
chain []uint16

Expand Down Expand Up @@ -62,6 +66,12 @@ func (q *M4) FindMatches(dst []Match, src []byte) []Match {
if len(q.table) < 1<<q.TableBits {
q.table = make([]uint32, 1<<q.TableBits)
}
if q.Score == nil {
q.Score = func(m AbsoluteMatch) int {
return m.End - m.Start
}
}

e := matchEmitter{Dst: dst}

if len(q.history) > q.MaxDistance*2 {
Expand Down Expand Up @@ -92,16 +102,16 @@ func (q *M4) FindMatches(dst []Match, src []byte) []Match {

// matches stores the matches that have been found but not emitted,
// in reverse order. (matches[0] is the most recent one.)
var matches [3]absoluteMatch
var matches [3]AbsoluteMatch
for i := e.NextEmit; i < len(src)-7; i++ {
if matches[0] != (absoluteMatch{}) && i >= matches[0].End {
if matches[0] != (AbsoluteMatch{}) && i >= matches[0].End {
// We have found some matches, and we're far enough along that we probably
// won't find overlapping matches, so we might as well emit them.
if matches[1] != (absoluteMatch{}) {
if matches[1] != (AbsoluteMatch{}) {
e.trim(matches[1], matches[0].Start, q.MinLength)
}
e.emit(matches[0])
matches = [3]absoluteMatch{}
matches = [3]AbsoluteMatch{}
}

// Calculate and store the hash.
Expand All @@ -123,7 +133,7 @@ func (q *M4) FindMatches(dst []Match, src []byte) []Match {
}

// Look for a match.
var currentMatch absoluteMatch
var currentMatch AbsoluteMatch

if i-candidate != matches[0].Start-matches[0].Match {
if binary.LittleEndian.Uint32(src[candidate:]) == binary.LittleEndian.Uint32(src[i:]) {
Expand All @@ -146,59 +156,59 @@ func (q *M4) FindMatches(dst []Match, src []byte) []Match {
if i-candidate != matches[0].Start-matches[0].Match {
if binary.LittleEndian.Uint32(src[candidate:]) == binary.LittleEndian.Uint32(src[i:]) {
m := extendMatch2(src, i, candidate, e.NextEmit)
if m.End-m.Start > q.MinLength && m.End-m.Start > currentMatch.End-currentMatch.Start {
if m.End-m.Start > q.MinLength && q.Score(m) > q.Score(currentMatch) {
currentMatch = m
}
}
}
}

if currentMatch.End-currentMatch.Start <= matches[0].End-matches[0].Start {
if q.Score(currentMatch) <= q.Score(matches[0]) {
continue
}

matches = [3]absoluteMatch{
matches = [3]AbsoluteMatch{
currentMatch,
matches[0],
matches[1],
}

if matches[2] == (absoluteMatch{}) {
if matches[2] == (AbsoluteMatch{}) {
continue
}

// We have three matches, so it's time to emit one and/or eliminate one.
switch {
case matches[0].Start < matches[2].End:
// The first and third matches overlap; discard the one in between.
matches = [3]absoluteMatch{
matches = [3]AbsoluteMatch{
matches[0],
matches[2],
absoluteMatch{},
AbsoluteMatch{},
}

case matches[0].Start < matches[2].End+q.MinLength:
// The first and third matches don't overlap, but there's no room for
// another match between them. Emit the first match and discard the second.
e.emit(matches[2])
matches = [3]absoluteMatch{
matches = [3]AbsoluteMatch{
matches[0],
absoluteMatch{},
absoluteMatch{},
AbsoluteMatch{},
AbsoluteMatch{},
}

default:
// Emit the first match, shortening it if necessary to avoid overlap with the second.
e.trim(matches[2], matches[1].Start, q.MinLength)
matches[2] = absoluteMatch{}
matches[2] = AbsoluteMatch{}
}
}

// We've found all the matches now; emit the remaining ones.
if matches[1] != (absoluteMatch{}) {
if matches[1] != (AbsoluteMatch{}) {
e.trim(matches[1], matches[0].Start, q.MinLength)
}
if matches[0] != (absoluteMatch{}) {
if matches[0] != (AbsoluteMatch{}) {
e.emit(matches[0])
}

Expand Down Expand Up @@ -255,13 +265,13 @@ func extendMatch(src []byte, i, j int) int {

// Given a 4-byte match at src[start] and src[candidate], extendMatch2 extends it
// upward as far as possible, and downward no farther than to min.
func extendMatch2(src []byte, start, candidate, min int) absoluteMatch {
func extendMatch2(src []byte, start, candidate, min int) AbsoluteMatch {
end := extendMatch(src, candidate+4, start+4)
for start > min && candidate > 0 && src[start-1] == src[candidate-1] {
start--
candidate--
}
return absoluteMatch{
return AbsoluteMatch{
Start: start,
End: end,
Match: candidate,
Expand Down

0 comments on commit 24b2bfa

Please sign in to comment.