/
decoder.go
136 lines (123 loc) · 4.34 KB
/
decoder.go
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
// Copyright 2015, Joe Tsai. All rights reserved.
// Use of this source code is governed by a BSD-style
// license that can be found in the LICENSE.md file.
package prefix
import (
"sort"
"github.com/dsnet/compress/internal"
)
// The algorithm used to decode variable length codes is based on the lookup
// method in zlib. If the code is less-than-or-equal to maxChunkBits,
// then the symbol can be decoded using a single lookup into the chunks table.
// Otherwise, the links table will be used for a second level lookup.
//
// The chunks slice is keyed by the contents of the bit buffer ANDed with
// the chunkMask to avoid a out-of-bounds lookup. The value of chunks is a tuple
// that is decoded as follow:
//
// var length = chunks[bitBuffer&chunkMask] & countMask
// var symbol = chunks[bitBuffer&chunkMask] >> countBits
//
// If the decoded length is larger than chunkBits, then an overflow link table
// must be used for further decoding. In this case, the symbol is actually the
// index into the links tables. The second-level links table returned is
// processed in the same way as the chunks table.
//
// if length > chunkBits {
// var index = symbol // Previous symbol is index into links tables
// length = links[index][bitBuffer>>chunkBits & linkMask] & countMask
// symbol = links[index][bitBuffer>>chunkBits & linkMask] >> countBits
// }
//
// See the following:
// http://www.gzip.org/algorithm.txt
type Decoder struct {
chunks []uint32 // First-level lookup map
links [][]uint32 // Second-level lookup map
chunkMask uint32 // Mask the length of the chunks table
linkMask uint32 // Mask the length of the link table
chunkBits uint32 // Bit-length of the chunks table
MinBits uint32 // The minimum number of bits to safely make progress
NumSyms uint32 // Number of symbols
}
// Init initializes Decoder according to the codes provided.
func (pd *Decoder) Init(codes PrefixCodes) {
// Handle special case trees.
if len(codes) <= 1 {
switch {
case len(codes) == 0: // Empty tree (should error if used later)
*pd = Decoder{chunks: pd.chunks[:0], links: pd.links[:0], NumSyms: 0}
case len(codes) == 1 && codes[0].Len == 0: // Single code tree (bit-length of zero)
pd.chunks = append(pd.chunks[:0], codes[0].Sym<<countBits|0)
*pd = Decoder{chunks: pd.chunks[:1], links: pd.links[:0], NumSyms: 1}
default:
panic("invalid codes")
}
return
}
if internal.Debug && !sort.IsSorted(prefixCodesBySymbol(codes)) {
panic("input codes is not sorted")
}
if internal.Debug && !(codes.checkLengths() && codes.checkPrefixes()) {
panic("detected incomplete or overlapping codes")
}
var minBits, maxBits uint32 = valueBits, 0
for _, c := range codes {
if minBits > c.Len {
minBits = c.Len
}
if maxBits < c.Len {
maxBits = c.Len
}
}
// Allocate chunks table as needed.
const maxChunkBits = 9 // This can be tuned for better performance
pd.NumSyms = uint32(len(codes))
pd.MinBits = minBits
pd.chunkBits = maxBits
if pd.chunkBits > maxChunkBits {
pd.chunkBits = maxChunkBits
}
numChunks := 1 << pd.chunkBits
pd.chunks = allocUint32s(pd.chunks, numChunks)
pd.chunkMask = uint32(numChunks - 1)
// Allocate links tables as needed.
pd.links = pd.links[:0]
pd.linkMask = 0
if pd.chunkBits < maxBits {
numLinks := 1 << (maxBits - pd.chunkBits)
pd.linkMask = uint32(numLinks - 1)
var linkIdx uint32
for i := range pd.chunks {
pd.chunks[i] = 0 // Logic below relies on zero value as uninitialized
}
for _, c := range codes {
if c.Len > pd.chunkBits && pd.chunks[c.Val&pd.chunkMask] == 0 {
pd.chunks[c.Val&pd.chunkMask] = (linkIdx << countBits) | (pd.chunkBits + 1)
linkIdx++
}
}
pd.links = extendSliceUint32s(pd.links, int(linkIdx))
linksFlat := allocUint32s(pd.links[0], numLinks*int(linkIdx))
for i, j := 0, 0; i < len(pd.links); i, j = i+1, j+numLinks {
pd.links[i] = linksFlat[j : j+numLinks]
}
}
// Fill out chunks and links tables with values.
for _, c := range codes {
chunk := c.Sym<<countBits | c.Len
if c.Len <= pd.chunkBits {
skip := 1 << uint(c.Len)
for j := int(c.Val); j < len(pd.chunks); j += skip {
pd.chunks[j] = chunk
}
} else {
linkIdx := pd.chunks[c.Val&pd.chunkMask] >> countBits
links := pd.links[linkIdx]
skip := 1 << uint(c.Len-pd.chunkBits)
for j := int(c.Val >> pd.chunkBits); j < len(links); j += skip {
links[j] = chunk
}
}
}
}