/
compress.go
502 lines (482 loc) · 20.5 KB
/
compress.go
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
413
414
415
416
417
418
419
420
421
422
423
424
425
426
427
428
429
430
431
432
433
434
435
436
437
438
439
440
441
442
443
444
445
446
447
448
449
450
451
452
453
454
455
456
457
458
459
460
461
462
463
464
465
466
467
468
469
470
471
472
473
474
475
476
477
478
479
480
481
482
483
484
485
486
487
488
489
490
491
492
493
494
495
496
497
498
499
500
501
502
package blockchain
import (
"github.com/cybriq/p9/pkg/ecc"
"github.com/cybriq/p9/pkg/txscript"
)
// In order to reduce the size of stored scripts, a domain specific compression algorithm is used which recognizes
// standard scripts and stores them using less bytes than the original script. The compression algorithm used here was
// obtained from Bitcoin Core, so all credits for the algorithm go to it.
//
// The general serialized format is:
//
// <script size or type><script data>
//
// Field Type Size
//
// script size or type VLQ variable
// script data []byte variable
//
// The specific serialized format for each recognized standard script is:
//
// - Pay-to-pubkey-hash: (21 bytes) - <0><20-byte pubkey hash>
//
// - Pay-to-script-hash: (21 bytes) - <1><20-byte script hash>
//
// - Pay-to-pubkey**: (33 bytes) - <2, 3, 4, or 5><32-byte pubkey X value>
// 2, 3 = compressed pubkey with bit 0 specifying the y coordinate to use
// 4, 5 = uncompressed pubkey with bit 0 specifying the y coordinate to use
//
// ** Only valid public keys starting with 0x02, 0x03, and 0x04 are supported.
//
// Any scripts which are not recognized as one of the aforementioned standard scripts are encoded using the general
// serialized format and encode the script size as the sum of the actual size of the script and the number of special
// cases. The following constants specify the special constants used to identify a special script type in the
// domain-specific compressed script encoding. NOTE: This section specifically does not use iota since these values are
// serialized and must be stable for long-term storage.
const (
// cstPayToPubKeyHash identifies a compressed pay-to-pubkey-hash script.
cstPayToPubKeyHash = 0
// cstPayToScriptHash identifies a compressed pay-to-script-hash script.
cstPayToScriptHash = 1
// cstPayToPubKeyComp2 identifies a compressed pay-to-pubkey script to a compressed pubkey. Bit 0 specifies which
// y-coordinate to use to reconstruct the full uncompressed pubkey.
cstPayToPubKeyComp2 = 2
// cstPayToPubKeyComp3 identifies a compressed pay-to-pubkey script to a compressed pubkey. Bit 0 specifies which
// y-coordinate to use to reconstruct the full uncompressed pubkey.
cstPayToPubKeyComp3 = 3
// cstPayToPubKeyUncomp4 identifies a compressed pay-to-pubkey script to an uncompressed pubkey. Bit 0 specifies
// which y-coordinate to use to reconstruct the full uncompressed pubkey.
cstPayToPubKeyUncomp4 = 4
// cstPayToPubKeyUncomp5 identifies a compressed pay-to-pubkey script to an uncompressed pubkey. Bit 0 specifies
// which y-coordinate to use to reconstruct the full uncompressed pubkey.
cstPayToPubKeyUncomp5 = 5
// numSpecialScripts is the number of special scripts recognized by the domain-specific script compression
// algorithm.
numSpecialScripts = 6
)
// In order to reduce the size of stored amounts, a domain specific compression algorithm is used which relies on there typically being a lot of zeroes at end of the amounts. The compression algorithm used here was obtained from Bitcoin Core, so all credits for the algorithm go to it. While this is simply exchanging one uint64 for another, the resulting value for typical amounts has a much smaller magnitude which results in fewer bytes when encoded as variable length quantity. For example, consider the amount of 0.1 DUO which is 10000000 satoshi. Encoding 10000000 as a VLQ would take 4 bytes while encoding the compressed value of 8 as a VLQ only takes 1 byte. Essentially the compression is achieved by splitting the value into an exponent in the range [0-9] and a digit in the range [1-9], when possible, and encoding them in a way that can be decoded. More specifically, the encoding is as follows:
//
// - 0 is 0
//
// - Find the exponent, e, as the largest power of 10 that evenly divides the value up to a maximum of 9
//
// - When e < 9, the final digit can't be 0 so store it as d and remove it by dividing the value by 10 (call the result
// n). The encoded value is thus: 1 + 10*(9*n + d-1) + e
//
// - When e==9, the only thing known is the amount is not 0. The encoded value is thus:
//
// 1 + 10*(n-1) + e == 10 + 10*(n-1)
//
// Example encodings:
//
// (The numbers in parenthesis are the number of bytes when serialized as a VLQ)
//
// 0 (1) -> 0 (1) * 0.00000000 DUO
// 1000 (2) -> 4 (1) * 0.00001000 DUO
// 10000 (2) -> 5 (1) * 0.00010000 DUO
// 12345678 (4) -> 111111101(4) * 0.12345678 DUO
// 50000000 (4) -> 47 (1) * 0.50000000 DUO
// 100000000 (4) -> 9 (1) * 1.00000000 DUO
// 500000000 (5) -> 49 (1) * 5.00000000 DUO
// 1000000000 (5) -> 10 (1) * 10.00000000 DUO
// -----------------------------------------------------------------------------
// compressTxOutAmount compresses the passed amount according to the domain specific compression algorithm described
// above.
func compressTxOutAmount(amount uint64) uint64 {
// No need to do any work if it's zero.
if amount == 0 {
return 0
}
// Find the largest power of 10 (max of 9) that evenly divides the value.
exponent := uint64(0)
for amount%10 == 0 && exponent < 9 {
amount /= 10
exponent++
}
// The compressed result for exponents less than 9 is:
// 1 + 10*(9*n + d-1) + e
if exponent < 9 {
lastDigit := amount % 10
amount /= 10
return 1 + 10*(9*amount+lastDigit-1) + exponent
}
// The compressed result for an exponent of 9 is:
// 1 + 10*(n-1) + e == 10 + 10*(n-1)
return 10 + 10*(amount-1)
}
// compressedScriptSize returns the number of bytes the passed script would take when encoded with the domain specific
// compression algorithm described above.
func compressedScriptSize(pkScript []byte) int {
// Pay-to-pubkey-hash script.
if valid, _ := isPubKeyHash(pkScript); valid {
return 21
}
// Pay-to-script-hash script.
if valid, _ := isScriptHash(pkScript); valid {
return 21
}
// Pay-to-pubkey (compressed or uncompressed) script.
if valid, _ := isPubKey(pkScript); valid {
return 33
}
// When none of the above special cases apply, encode the script as is preceded by the sum of its size and the
// number of special cases encoded as a variable length quantity.
return serializeSizeVLQ(uint64(len(pkScript)+numSpecialScripts)) +
len(pkScript)
}
// Compressed transaction outputs consist of an amount and a public key script both compressed using the domain specific
// compression algorithms previously described.
//
// The serialized format is:
//
// <compressed amount><compressed script>
//
// Field Type Size
//
// compressed amount VLQ variable
// compressed script []byte variable
// compressedTxOutSize returns the number of bytes the passed transaction output fields would take when encoded with the
// format described above.
func compressedTxOutSize(amount uint64, pkScript []byte) int {
return serializeSizeVLQ(compressTxOutAmount(amount)) +
compressedScriptSize(pkScript)
}
// decodeCompressedScriptSize treats the passed serialized bytes as a compressed script, possibly followed by other
// data, and returns the number of bytes it occupies taking into account the special encoding of the script size by the
// domain specific compression algorithm described above.
func decodeCompressedScriptSize(serialized []byte) int {
scriptSize, bytesRead := deserializeVLQ(serialized)
if bytesRead == 0 {
return 0
}
switch scriptSize {
case cstPayToPubKeyHash:
return 21
case cstPayToScriptHash:
return 21
case cstPayToPubKeyComp2, cstPayToPubKeyComp3, cstPayToPubKeyUncomp4,
cstPayToPubKeyUncomp5:
return 33
}
scriptSize -= numSpecialScripts
scriptSize += uint64(bytesRead)
return int(scriptSize)
}
// decodeCompressedTxOut decodes the passed compressed txout, possibly followed by other data, into its uncompressed
// amount and script and returns them along with the number of bytes they occupied prior to decompression.
func decodeCompressedTxOut(serialized []byte) (uint64, []byte, int, error) {
// Deserialize the compressed amount and ensure there are bytes remaining for the compressed script.
compressedAmount, bytesRead := deserializeVLQ(serialized)
if bytesRead >= len(serialized) {
return 0, nil, bytesRead, errDeserialize(
"unexpected end of " +
"data after compressed amount",
)
}
// Decode the compressed script size and ensure there are enough bytes left in the slice for it.
scriptSize := decodeCompressedScriptSize(serialized[bytesRead:])
if len(serialized[bytesRead:]) < scriptSize {
return 0, nil, bytesRead, errDeserialize(
"unexpected end of " +
"data after script size",
)
}
// Decompress and return the amount and script.
amount := decompressTxOutAmount(compressedAmount)
script := decompressScript(serialized[bytesRead : bytesRead+scriptSize])
return amount, script, bytesRead + scriptSize, nil
}
// decompressScript returns the original script obtained by decompressing the passed compressed script according to the
// domain specific compression algorithm described above. NOTE: The script parameter must already have been proven to be
// long enough to contain the number of bytes returned by decodeCompressedScriptSize or it will panic. This is
// acceptable since it is only an internal function.
func decompressScript(compressedPkScript []byte) []byte {
// In practice this function will not be called with a zero-length or nil script since the nil script encoding
// includes the length, however the code below assumes the length exists, so just return nil now if the function
// ever ends up being called with a nil script in the future.
if len(compressedPkScript) == 0 {
return nil
}
// Decode the script size and examine it for the special cases.
encodedScriptSize, bytesRead := deserializeVLQ(compressedPkScript)
switch encodedScriptSize {
// Pay-to-pubkey-hash script. The resulting script is:
//
// <OP_DUP><OP_HASH160><20 byte hash><OP_EQUALVERIFY><OP_CHECKSIG>
case cstPayToPubKeyHash:
pkScript := make([]byte, 25)
pkScript[0] = txscript.OP_DUP
pkScript[1] = txscript.OP_HASH160
pkScript[2] = txscript.OP_DATA_20
copy(pkScript[3:], compressedPkScript[bytesRead:bytesRead+20])
pkScript[23] = txscript.OP_EQUALVERIFY
pkScript[24] = txscript.OP_CHECKSIG
return pkScript
// Pay-to-script-hash script. The resulting script is:
//
// <OP_HASH160><20 byte script hash><OP_EQUAL>
case cstPayToScriptHash:
pkScript := make([]byte, 23)
pkScript[0] = txscript.OP_HASH160
pkScript[1] = txscript.OP_DATA_20
copy(pkScript[2:], compressedPkScript[bytesRead:bytesRead+20])
pkScript[22] = txscript.OP_EQUAL
return pkScript
// Pay-to-compressed-pubkey script. The resulting script is:
//
// <OP_DATA_33><33 byte compressed pubkey><OP_CHECKSIG>
case cstPayToPubKeyComp2, cstPayToPubKeyComp3:
pkScript := make([]byte, 35)
pkScript[0] = txscript.OP_DATA_33
pkScript[1] = byte(encodedScriptSize)
copy(pkScript[2:], compressedPkScript[bytesRead:bytesRead+32])
pkScript[34] = txscript.OP_CHECKSIG
return pkScript
// Pay-to-uncompressed-pubkey script. The resulting script is:
//
// <OP_DATA_65><65 byte uncompressed pubkey><OP_CHECKSIG>
case cstPayToPubKeyUncomp4, cstPayToPubKeyUncomp5:
// Change the leading byte to the appropriate compressed pubkey identifier (0x02 or 0x03) so it can be decoded
// as a compressed pubkey. This really should never fail since the encoding ensures it is valid before
// compressing to this type.
compressedKey := make([]byte, 33)
compressedKey[0] = byte(encodedScriptSize - 2)
copy(compressedKey[1:], compressedPkScript[1:])
key, e := ecc.ParsePubKey(compressedKey, ecc.S256())
if e != nil {
E.Ln(e)
return nil
}
pkScript := make([]byte, 67)
pkScript[0] = txscript.OP_DATA_65
copy(pkScript[1:], key.SerializeUncompressed())
pkScript[66] = txscript.OP_CHECKSIG
return pkScript
}
// When none of the special cases apply, the script was encoded using the general format, so reduce the script size
// by the number of special cases and return the unmodified script.
scriptSize := int(encodedScriptSize - numSpecialScripts)
pkScript := make([]byte, scriptSize)
copy(pkScript, compressedPkScript[bytesRead:bytesRead+scriptSize])
return pkScript
}
// decompressTxOutAmount returns the original amount the passed compressed amount represents according to the domain
// specific compression algorithm described above.
func decompressTxOutAmount(amount uint64) uint64 {
// No need to do any work if it's zero.
if amount == 0 {
return 0
}
// The decompressed amount is either of the following two equations:
//
// x = 1 + 10*(9*n + d - 1) + e
//
// x = 1 + 10*(n - 1) + 9
amount--
// The decompressed amount is now one of the following two equations:
//
// x = 10*(9*n + d - 1) + e
//
// x = 10*(n - 1) + 9
exponent := amount % 10
amount /= 10
// The decompressed amount is now one of the following two equations:
//
// x = 9*n + d - 1 | where e < 9
//
// x = n - 1 | where e = 9
n := uint64(0)
if exponent < 9 {
lastDigit := amount%9 + 1
amount /= 9
n = amount*10 + lastDigit
} else {
n = amount + 1
}
// Apply the exponent.
for ; exponent > 0; exponent-- {
n *= 10
}
return n
}
// deserializeVLQ deserializes the provided variable-length quantity according to the format described above. It also
// returns the number of bytes deserialized.
func deserializeVLQ(serialized []byte) (uint64, int) {
var n uint64
var size int
for _, val := range serialized {
size++
n = (n << 7) | uint64(val&0x7f)
if val&0x80 != 0x80 {
break
}
n++
}
return n, size
}
// isPubKey returns whether or not the passed public key script is a standard pay-to-pubkey script that pays to a valid
// compressed or uncompressed public key along with the serialized pubkey it is paying to if it is.
//
// NOTE: This function ensures the public key is actually valid since the compression algorithm requires valid pubkeys.
// It does not support hybrid pubkeys. This means that even if the script has the correct form for a pay-to-pubkey
// script, this function will only return true when it is paying to a valid compressed or uncompressed pubkey.
func isPubKey(script []byte) (bool, []byte) {
// Pay-to-compressed-pubkey script.
if len(script) == 35 && script[0] == txscript.OP_DATA_33 &&
script[34] == txscript.OP_CHECKSIG && (script[1] == 0x02 ||
script[1] == 0x03) {
// Ensure the public key is valid.
serializedPubKey := script[1:34]
_, e := ecc.ParsePubKey(serializedPubKey, ecc.S256())
if e == nil {
return true, serializedPubKey
}
}
// Pay-to-uncompressed-pubkey script.
if len(script) == 67 && script[0] == txscript.OP_DATA_65 &&
script[66] == txscript.OP_CHECKSIG && script[1] == 0x04 {
// Ensure the public key is valid.
serializedPubKey := script[1:66]
_, e := ecc.ParsePubKey(serializedPubKey, ecc.S256())
if e == nil {
return true, serializedPubKey
}
}
return false, nil
}
// isPubKeyHash returns whether or not the passed public key script is a standard pay-to-pubkey-hash script along with
// the pubkey hash it is paying to if it is.
func isPubKeyHash(script []byte) (bool, []byte) {
if len(script) == 25 && script[0] == txscript.OP_DUP &&
script[1] == txscript.OP_HASH160 &&
script[2] == txscript.OP_DATA_20 &&
script[23] == txscript.OP_EQUALVERIFY &&
script[24] == txscript.OP_CHECKSIG {
return true, script[3:23]
}
return false, nil
}
// isScriptHash returns whether or not the passed public key script is a standard pay-to-script-hash script along with
// the script hash it is paying to if it is.
func isScriptHash(script []byte) (bool, []byte) {
if len(script) == 23 && script[0] == txscript.OP_HASH160 &&
script[1] == txscript.OP_DATA_20 &&
script[22] == txscript.OP_EQUAL {
return true, script[2:22]
}
return false, nil
}
// putCompressedScript compresses the passed script according to the domain specific compression algorithm described
// above directly into the passed target byte slice. The target byte slice must be at least large enough to handle the
// number of bytes returned by the compressedScriptSize function or it will panic.
func putCompressedScript(target, pkScript []byte) int {
// Pay-to-pubkey-hash script.
if valid, hash := isPubKeyHash(pkScript); valid {
target[0] = cstPayToPubKeyHash
copy(target[1:21], hash)
return 21
}
// Pay-to-script-hash script.
if valid, hash := isScriptHash(pkScript); valid {
target[0] = cstPayToScriptHash
copy(target[1:21], hash)
return 21
}
// Pay-to-pubkey (compressed or uncompressed) script.
if valid, serializedPubKey := isPubKey(pkScript); valid {
pubKeyFormat := serializedPubKey[0]
switch pubKeyFormat {
case 0x02, 0x03:
target[0] = pubKeyFormat
copy(target[1:33], serializedPubKey[1:33])
return 33
case 0x04:
// Encode the oddness of the serialized pubkey into the
// compressed script type.
target[0] = pubKeyFormat | (serializedPubKey[64] & 0x01)
copy(target[1:33], serializedPubKey[1:33])
return 33
}
}
// When none of the above special cases apply, encode the unmodified script preceded by the sum of its size and the
// number of special cases encoded as a variable length quantity.
encodedSize := uint64(len(pkScript) + numSpecialScripts)
vlqSizeLen := putVLQ(target, encodedSize)
copy(target[vlqSizeLen:], pkScript)
return vlqSizeLen + len(pkScript)
}
// putCompressedTxOut compresses the passed amount and script according to their domain specific compression algorithms
// and encodes them directly into the passed target byte slice with the format described above. The target byte slice
// must be at least large enough to handle the number of bytes returned by the compressedTxOutSize function or it will
// panic.
func putCompressedTxOut(target []byte, amount uint64, pkScript []byte) int {
offset := putVLQ(target, compressTxOutAmount(amount))
offset += putCompressedScript(target[offset:], pkScript)
return offset
}
// putVLQ serializes the provided number to a variable-length quantity according to the format described above and
// returns the number of bytes of the encoded value. The result is placed directly into the passed byte slice which must
// be at least large enough to handle the number of bytes returned by the serializeSizeVLQ function or it will panic.
func putVLQ(target []byte, n uint64) int {
offset := 0
for ; ; offset++ {
// The high bit is set when another byte follows.
highBitMask := byte(0x80)
if offset == 0 {
highBitMask = 0x00
}
target[offset] = byte(n&0x7f) | highBitMask
if n <= 0x7f {
break
}
n = (n >> 7) - 1
}
// Reverse the bytes so it is MSB-encoded.
for i, j := 0, offset; i < j; i, j = i+1, j-1 {
target[i], target[j] = target[j], target[i]
}
return offset + 1
}
// A variable length quantity (VLQ) is an encoding that uses an arbitrary number of binary octets to represent an
// arbitrarily large integer. The scheme employs a most significant byte (MSB) base-128 encoding where the high bit in
// each byte indicates whether or not the byte is the final one. In addition, to ensure there are no redundant
// encodings, an offset is subtracted every time a group of 7 bits is shifted out. Therefore each integer can be
// represented in exactly one way, and each representation stands for exactly one integer.
//
// Another nice property of this encoding is that it provides a compact representation of values that are typically used
// to indicate txsizes. For example, the values 0 - 127 are represented with a single byte, 128 - 16511 with two bytes,
// and 16512 - 2113663 with three bytes.
//
// While the encoding allows arbitrarily large integers, it is artificially limited in this code to an unsigned 64-bit
// integer for efficiency purposes.
//
// Example encodings:
//
// 0 -> [0x00]
// 127 -> [0x7f] * Max 1-byte value
// 128 -> [0x80 0x00]
// 129 -> [0x80 0x01]
// 255 -> [0x80 0x7f]
// 256 -> [0x81 0x00]
// 16511 -> [0xff 0x7f] * Max 2-byte value
// 16512 -> [0x80 0x80 0x00]
// 32895 -> [0x80 0xff 0x7f]
// 2113663 -> [0xff 0xff 0x7f] * Max 3-byte value
// 270549119 -> [0xff 0xff 0xff 0x7f] * Max 4-byte value
// 2^64-1 -> [0x80 0xfe 0xfe 0xfe 0xfe 0xfe 0xfe 0xfe 0xfe 0x7f]
//
// References:
//
// https://en.wikipedia.org/wiki/Variable-length_quantity
// http://www.codecodex.com/wiki/Variable-Length_Integers
//
// -----------------------------------------------------------------------------
// serializeSizeVLQ returns the number of bytes it would take to serialize the passed number as a variable-length
// quantity according to the format described above.
func serializeSizeVLQ(n uint64) int {
size := 1
for ; n > 0x7f; n = (n >> 7) - 1 {
size++
}
return size
}