/
mtf_rle2.go
131 lines (118 loc) · 3.29 KB
/
mtf_rle2.go
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
// Copyright 2015, Joe Tsai. All rights reserved.
// Use of this source code is governed by a BSD-style
// license that can be found in the LICENSE.md file.
package bzip2
import "github.com/itchio/dskompress/internal/errors"
// moveToFront implements both the MTF and RLE stages of bzip2 at the same time.
// Any runs of zeros in the encoded output will be replaced by a sequence of
// RUNA and RUNB symbols are encode the length of the run.
//
// The RLE encoding used can actually be encoded to and decoded from using
// normal two's complement arithmetic. The methodology for doing so is below.
//
// Assuming the following:
// num: The value being encoded by RLE encoding.
// run: A sequence of RUNA and RUNB symbols represented as a binary integer,
// where RUNA is the 0 bit, RUNB is the 1 bit, and least-significant RUN
// symbols are at the least-significant bit positions.
// cnt: The number of RUNA and RUNB symbols.
//
// Then the RLE encoding used by bzip2 has this mathematical property:
// num+1 == (1<<cnt) | run
type moveToFront struct {
dictBuf [256]uint8
dictLen int
vals []byte
syms []uint16
blkSize int
}
func (mtf *moveToFront) Init(dict []uint8, blkSize int) {
if len(dict) > len(mtf.dictBuf) {
panicf(errors.Internal, "alphabet too large")
}
copy(mtf.dictBuf[:], dict)
mtf.dictLen = len(dict)
mtf.blkSize = blkSize
}
func (mtf *moveToFront) Encode(vals []byte) (syms []uint16) {
dict := mtf.dictBuf[:mtf.dictLen]
syms = mtf.syms[:0]
if len(vals) > mtf.blkSize {
panicf(errors.Internal, "exceeded block size")
}
var lastNum uint32
for _, val := range vals {
// Normal move-to-front transform.
var idx uint8 // Reverse lookup idx in dict
for di, dv := range dict {
if dv == val {
idx = uint8(di)
break
}
}
copy(dict[1:], dict[:idx])
dict[0] = val
// Run-length encoding augmentation.
if idx == 0 {
lastNum++
continue
}
if lastNum > 0 {
for rc := lastNum + 1; rc != 1; rc >>= 1 {
syms = append(syms, uint16(rc&1))
}
lastNum = 0
}
syms = append(syms, uint16(idx)+1)
}
if lastNum > 0 {
for rc := lastNum + 1; rc != 1; rc >>= 1 {
syms = append(syms, uint16(rc&1))
}
}
mtf.syms = syms
return syms
}
func (mtf *moveToFront) Decode(syms []uint16) (vals []byte) {
dict := mtf.dictBuf[:mtf.dictLen]
vals = mtf.vals[:0]
var lastCnt uint
var lastRun uint32
for _, sym := range syms {
// Run-length encoding augmentation.
if sym < 2 {
lastRun |= uint32(sym) << lastCnt
lastCnt++
continue
}
if lastCnt > 0 {
cnt := int((1<<lastCnt)|lastRun) - 1
if len(vals)+cnt > mtf.blkSize || lastCnt > 24 {
panicf(errors.Corrupted, "run-length decoding exceeded block size")
}
for i := cnt; i > 0; i-- {
vals = append(vals, dict[0])
}
lastCnt, lastRun = 0, 0
}
// Normal move-to-front transform.
val := dict[sym-1] // Forward lookup val in dict
copy(dict[1:], dict[:sym-1])
dict[0] = val
if len(vals) >= mtf.blkSize {
panicf(errors.Corrupted, "run-length decoding exceeded block size")
}
vals = append(vals, val)
}
if lastCnt > 0 {
cnt := int((1<<lastCnt)|lastRun) - 1
if len(vals)+cnt > mtf.blkSize || lastCnt > 24 {
panicf(errors.Corrupted, "run-length decoding exceeded block size")
}
for i := cnt; i > 0; i-- {
vals = append(vals, dict[0])
}
}
mtf.vals = vals
return vals
}