/
tokenizer.go
183 lines (160 loc) · 5.91 KB
/
tokenizer.go
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
// Copyright (c) 2019 The Decred developers
// Use of this source code is governed by an ISC
// license that can be found in the LICENSE file.
package txscript
import (
"encoding/binary"
"fmt"
)
// opcodeArrayRef is used to break initialization cycles.
var opcodeArrayRef *[256]opcode
func init() {
opcodeArrayRef = &opcodeArray
}
// ScriptTokenizer provides a facility for easily and efficiently tokenizing
// transaction scripts without creating allocations. Each successive opcode is
// parsed with the Next function, which returns false when iteration is
// complete, either due to successfully tokenizing the entire script or
// encountering a parse error. In the case of failure, the Err function may be
// used to obtain the specific parse error.
//
// Upon successfully parsing an opcode, the opcode and data associated with it
// may be obtained via the Opcode and Data functions, respectively.
//
// The ByteIndex function may be used to obtain the tokenizer's current offset
// into the raw script.
type ScriptTokenizer struct {
script []byte
version uint16
offset int32
op *opcode
data []byte
err error
}
// Done returns true when either all opcodes have been exhausted or a parse
// failure was encountered and therefore the state has an associated error.
func (t *ScriptTokenizer) Done() bool {
return t.err != nil || t.offset >= int32(len(t.script))
}
// Next attempts to parse the next opcode and returns whether or not it was
// successful. It will not be successful if invoked when already at the end of
// the script, a parse failure is encountered, or an associated error already
// exists due to a previous parse failure.
//
// In the case of a true return, the parsed opcode and data can be obtained with
// the associated functions and the offset into the script will either point to
// the next opcode or the end of the script if the final opcode was parsed.
//
// In the case of a false return, the parsed opcode and data will be the last
// successfully parsed values (if any) and the offset into the script will
// either point to the failing opcode or the end of the script if the function
// was invoked when already at the end of the script.
//
// Invoking this function when already at the end of the script is not
// considered an error and will simply return false.
func (t *ScriptTokenizer) Next() bool {
if t.Done() {
return false
}
op := &opcodeArrayRef[t.script[t.offset]]
switch {
// No additional data. Note that some of the opcodes, notably OP_1NEGATE,
// OP_0, and OP_[1-16] represent the data themselves.
case op.length == 1:
t.offset++
t.op = op
t.data = nil
return true
// Data pushes of specific lengths -- OP_DATA_[1-75].
case op.length > 1:
script := t.script[t.offset:]
if len(script) < op.length {
str := fmt.Sprintf("opcode %s requires %d bytes, but script only "+
"has %d remaining", op.name, op.length, len(script))
t.err = scriptError(ErrMalformedPush, str)
return false
}
// Move the offset forward and set the opcode and data accordingly.
t.offset += int32(op.length)
t.op = op
t.data = script[1:op.length]
return true
// Data pushes with parsed lengths -- OP_PUSHDATA{1,2,4}.
case op.length < 0:
script := t.script[t.offset+1:]
if len(script) < -op.length {
str := fmt.Sprintf("opcode %s requires %d bytes, but script only "+
"has %d remaining", op.name, -op.length, len(script))
t.err = scriptError(ErrMalformedPush, str)
return false
}
// Next -length bytes are little endian length of data.
var dataLen int32
switch op.length {
case -1:
dataLen = int32(script[0])
case -2:
dataLen = int32(binary.LittleEndian.Uint16(script[:2]))
case -4:
dataLen = int32(binary.LittleEndian.Uint32(script[:4]))
default:
str := fmt.Sprintf("invalid opcode length %d", op.length)
t.err = scriptError(ErrMalformedPush, str)
return false
}
// Move to the beginning of the data.
script = script[-op.length:]
// Disallow entries that do not fit script or were sign extended.
if dataLen > int32(len(script)) || dataLen < 0 {
str := fmt.Sprintf("opcode %s pushes %d bytes, but script only "+
"has %d remaining", op.name, dataLen, len(script))
t.err = scriptError(ErrMalformedPush, str)
return false
}
// Move the offset forward and set the opcode and data accordingly.
t.offset += 1 + int32(-op.length) + dataLen
t.op = op
t.data = script[:dataLen]
return true
}
// The only remaining case is an opcode with length zero which is
// impossible.
panic("unreachable")
}
// Script returns the full script associated with the tokenizer.
func (t *ScriptTokenizer) Script() []byte {
return t.script
}
// ByteIndex returns the current offset into the full script that will be parsed
// next and therefore also implies everything before it has already been parsed.
func (t *ScriptTokenizer) ByteIndex() int32 {
return t.offset
}
// Opcode returns the current opcode associated with the tokenizer.
func (t *ScriptTokenizer) Opcode() byte {
return t.op.value
}
// Data returns the data associated with the most recently successfully parsed
// opcode.
func (t *ScriptTokenizer) Data() []byte {
return t.data
}
// Err returns any errors currently associated with the tokenizer. This will
// only be non-nil in the case a parsing error was encountered.
func (t *ScriptTokenizer) Err() error {
return t.err
}
// MakeScriptTokenizer returns a new instance of a script tokenizer. Passing
// an unsupported script version will result in the returned tokenizer
// immediately having an err set accordingly.
//
// See the docs for ScriptTokenizer for more details.
func MakeScriptTokenizer(scriptVersion uint16, script []byte) ScriptTokenizer {
// Only version 0 scripts are currently supported.
var err error
if scriptVersion != 0 {
str := fmt.Sprintf("script version %d is not supported", scriptVersion)
err = scriptError(ErrUnsupportedScriptVersion, str)
}
return ScriptTokenizer{version: scriptVersion, script: script, err: err}
}