Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
BIP 158 Golomb-Rice Coded Sets and block filters (#481)
* Implemented GCS (WIP) Added some of Chris' tests for golombEncode and golombDecode Fixed GCS.golombDecodeSet Separated GCS object from GolombFilter class Added hanging test for set encoding Fixed property test for encoding and decoding arbitrary sets Added test for GCS.buildGCS and hashing function consistency Added method to construct a Block Filter from a Block Added scaladocs in GCS.scala Added TempUtxoProvider to make BlockFilter testable Wrote a test for GolombFilter.matchesHash Added test for arbitrary data matching in bip 158 filter Added optional right-padding on GCS BitVector, BlockFilter.fromHex, and got started on BlockFilterTest Cleaned up Bip158TestCase.fromJsArray Fixed parsing of test vectors for bip 158 Wrote test for BlockFilter (doesn't pass) Responded to some code review Included CompactSizeUInt in filter Created BitcoinScriptUtil.getDataTokens for output serialization Wrote test for BitcoinScriptUtil.getDataTokens Added test for BitcoinScriptUtil on the genesis block Fixed name :( * Fixed SipHash and block serialization! * Fixed bugs relating to empty scripts, all tests passgit push --force-with-lease nadav 2019-05-28-bip158 ! * Cleaned up BitcoinScriptUtil.getDataTokens and its test * Responded to code review * Factored out common code from BlockFilter apply methods * Some cleanup
- Loading branch information
1 parent
90b9b6a
commit 5ed0f6d
Showing
11 changed files
with
933 additions
and
2 deletions.
There are no files selected for viewing
Large diffs are not rendered by default.
Oops, something went wrong.
77 changes: 77 additions & 0 deletions
77
core-test/src/test/scala/org/bitcoins/core/gcs/BlockFilterTest.scala
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,77 @@ | ||
package org.bitcoins.core.gcs | ||
|
||
import org.bitcoins.core.crypto.DoubleSha256Digest | ||
import org.bitcoins.core.protocol.blockchain.Block | ||
import org.bitcoins.core.protocol.script.ScriptPubKey | ||
import org.bitcoins.testkit.util.BitcoinSUnitTest | ||
import play.api.libs.json.{JsArray, Json} | ||
|
||
import scala.io.Source | ||
|
||
class BlockFilterTest extends BitcoinSUnitTest { | ||
behavior of "BlockFilter" | ||
|
||
// https://github.com/bitcoin/bips/blob/master/bip-0158.mediawiki#appendix-c-test-vectors | ||
case class Bip158TestCase( | ||
blockHeight: Int, | ||
blockHash: DoubleSha256Digest, | ||
block: Block, | ||
prevOutputScripts: Vector[ScriptPubKey], | ||
// TODO prevHeader: BlockFilterHeader, | ||
filter: GolombFilter, | ||
// TODO header: BlockFilterHeader, | ||
notes: String | ||
) { | ||
|
||
def runTest(): org.scalatest.Assertion = { | ||
val constructedFilter = BlockFilter(block, prevOutputScripts) | ||
|
||
assert(constructedFilter.decodedHashes == filter.decodedHashes, | ||
s"Test Notes: $notes") | ||
} | ||
} | ||
|
||
object Bip158TestCase { | ||
|
||
//["Block Height,Block Hash,Block,[Prev Output Scripts for Block],Previous Basic Header,Basic Filter,Basic Header,Notes"] | ||
def fromJsArray(array: JsArray): Bip158TestCase = { | ||
val parseResult = for { | ||
height <- array(0).validate[Int] | ||
blockHash <- array(1).validate[String].map(DoubleSha256Digest.fromHex) | ||
|
||
block <- array(2).validate[String].map(Block.fromHex) | ||
|
||
scriptArray <- array(3).validate[JsArray] | ||
scripts = parseScripts(scriptArray) | ||
|
||
//prevHeader <- array(4).validate[String].map(BlockFilterHeader.fromHex) | ||
|
||
filter <- array(5) | ||
.validate[String] | ||
.map(BlockFilter.fromHex(_, blockHash)) | ||
|
||
//header <- array(6).validate[String].map(BlockFilterHeader.fromHex) | ||
|
||
notes <- array(7).validate[String] | ||
} yield Bip158TestCase(height, blockHash, block, scripts, filter, notes) | ||
|
||
parseResult.get | ||
} | ||
|
||
private def parseScripts(array: JsArray): Vector[ScriptPubKey] = { | ||
val hexScripts = array.validate[Vector[String]].get | ||
|
||
hexScripts.map(ScriptPubKey.fromAsmHex) | ||
} | ||
} | ||
|
||
it must "pass bip 158 test vectors" in { | ||
val source = Source.fromURL(getClass.getResource("/testnet-19.json")) | ||
|
||
val vec: Vector[JsArray] = | ||
Json.parse(source.mkString).validate[Vector[JsArray]].get.tail | ||
val testCases = vec.map(Bip158TestCase.fromJsArray) | ||
|
||
testCases.foreach(_.runTest()) | ||
} | ||
} |
257 changes: 257 additions & 0 deletions
257
core-test/src/test/scala/org/bitcoins/core/gcs/GCSTest.scala
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,257 @@ | ||
package org.bitcoins.core.gcs | ||
|
||
import org.bitcoins.core.number.{UInt64, UInt8} | ||
import org.bitcoins.core.util.NumberUtil | ||
import org.bitcoins.testkit.core.gen.NumberGenerator | ||
import org.bitcoins.testkit.util.BitcoinSUnitTest | ||
import org.scalacheck.Gen | ||
import scodec.bits.{BinStringSyntax, ByteVector} | ||
|
||
class GCSTest extends BitcoinSUnitTest { | ||
behavior of "GCS" | ||
|
||
//https://github.com/bitcoin/bips/blob/master/bip-0158.mediawiki#golomb-rice-coding | ||
it must "encode and decode Golomb Coded Set example 1" in { | ||
val p = UInt8(2) | ||
val original = UInt64.zero | ||
|
||
val encoding = GCS.golombEncode(item = original, p = p) | ||
|
||
assert(encoding == bin"000") | ||
|
||
val decode = GCS.golombDecode(codedItem = encoding, p = p) | ||
|
||
assert(decode == original) | ||
} | ||
|
||
it must "encode and decode Golomb Coded set example 2" in { | ||
val p = UInt8(2) | ||
val original = UInt64.one | ||
|
||
val encoding = GCS.golombEncode(item = original, p = p) | ||
|
||
assert(encoding == bin"001") | ||
|
||
val decode = GCS.golombDecode(codedItem = encoding, p = p) | ||
|
||
assert(decode == original) | ||
} | ||
|
||
it must "encode and decode Golomb Coded set example 3" in { | ||
val p = UInt8(2) | ||
val original = UInt64(2) | ||
|
||
val encoding = GCS.golombEncode(item = original, p = p) | ||
|
||
assert(encoding == bin"010") | ||
|
||
val decode = GCS.golombDecode(codedItem = encoding, p = p) | ||
|
||
assert(decode == original) | ||
} | ||
|
||
it must "encode and decode Golomb Coded set example 4" in { | ||
val p = UInt8(2) | ||
val original = UInt64(3) | ||
|
||
val encoding = GCS.golombEncode(item = original, p = p) | ||
|
||
assert(encoding == bin"011") | ||
|
||
val decode = GCS.golombDecode(codedItem = encoding, p = p) | ||
|
||
assert(decode == original) | ||
} | ||
|
||
it must "encode and decode Golomb Coded set example 5" in { | ||
val p = UInt8(2) | ||
val original = UInt64(4) | ||
|
||
val encoding = GCS.golombEncode(item = original, p = p) | ||
|
||
assert(encoding == bin"1000") | ||
|
||
val decode = GCS.golombDecode(codedItem = encoding, p = p) | ||
|
||
assert(decode == original) | ||
} | ||
|
||
it must "encode and decode Golomb Coded set example 6" in { | ||
val p = UInt8(2) | ||
val original = UInt64(5) | ||
|
||
val encoding = GCS.golombEncode(item = original, p = p) | ||
|
||
assert(encoding == bin"1001") | ||
|
||
val decode = GCS.golombDecode(codedItem = encoding, p = p) | ||
|
||
assert(decode == original) | ||
} | ||
|
||
it must "encode and decode Golomb Coded set example 7" in { | ||
val p = UInt8(2) | ||
val original = UInt64(6) | ||
|
||
val encoding = GCS.golombEncode(item = original, p = p) | ||
|
||
assert(encoding == bin"1010") | ||
|
||
val decode = GCS.golombDecode(codedItem = encoding, p = p) | ||
|
||
assert(decode == original) | ||
} | ||
|
||
it must "encode and decode Golomb Coded set example 8" in { | ||
val p = UInt8(2) | ||
val original = UInt64(7) | ||
|
||
val encoding = GCS.golombEncode(item = original, p = p) | ||
|
||
assert(encoding == bin"1011") | ||
|
||
val decode = GCS.golombDecode(codedItem = encoding, p = p) | ||
|
||
assert(decode == original) | ||
} | ||
|
||
it must "encode and decode Golomb Coded set example 9" in { | ||
val p = UInt8(2) | ||
val original = UInt64(8) | ||
|
||
val encoding = GCS.golombEncode(item = original, p = p) | ||
|
||
assert(encoding == bin"11000") | ||
|
||
val decode = GCS.golombDecode(codedItem = encoding, p = p) | ||
|
||
assert(decode == original) | ||
} | ||
|
||
it must "encode and decode Golomb Coded set example 10" in { | ||
val p = UInt8(2) | ||
val original = UInt64(9) | ||
|
||
val encoding = GCS.golombEncode(item = original, p = p) | ||
|
||
assert(encoding == bin"11001") | ||
|
||
val decode = GCS.golombDecode(codedItem = encoding, p = p) | ||
|
||
assert(decode == original) | ||
} | ||
|
||
it must "encode and decode an arbitrary item for an arbitrary p" in { | ||
|
||
def delta: Gen[UInt64] = { | ||
//what is a reasonable delta? This is means the delta | ||
//can be 1 - 16384 | ||
//if we do a full uint64 it takes forever to encode it | ||
Gen | ||
.choose(1, NumberUtil.pow2(14).toInt) | ||
.map(UInt64(_)) | ||
} | ||
|
||
forAll(delta, NumberGenerator.genP) { | ||
case (item, p) => | ||
val encoded = GCS.golombEncode(item = item, p = p) | ||
val decode = GCS.golombDecode(codedItem = encoded, p = p) | ||
|
||
assert(decode == item) | ||
} | ||
} | ||
|
||
it must "encode and decode a set of elements already tested" in { | ||
val p = UInt8(2) | ||
|
||
// Diffs are 1, 2, 3, 4, 5 | ||
val sortedItems = | ||
Vector(UInt64(0), UInt64(1), UInt64(3), UInt64(6), UInt64(10), UInt64(15)) | ||
|
||
val codedSet = GCS.encodeSortedSet(sortedItems, p) | ||
|
||
val coded0 = bin"000" | ||
val coded1 = bin"001" | ||
val coded2 = bin"010" | ||
val coded3 = bin"011" | ||
val coded4 = bin"1000" | ||
val coded5 = bin"1001" | ||
val expectedCodedSet = coded0 ++ coded1 ++ coded2 ++ coded3 ++ coded4 ++ coded5 | ||
|
||
assert(codedSet == expectedCodedSet) | ||
|
||
val decodedSet = GCS.golombDecodeSet(codedSet, p) | ||
|
||
assert(decodedSet == sortedItems) | ||
} | ||
|
||
it must "encode and decode arbitrary sets of elements for arbitrary p" in { | ||
|
||
def items: Gen[(Vector[UInt64], UInt8)] = { | ||
NumberGenerator.genP.flatMap { p => | ||
Gen.choose(1, 1000).flatMap { size => | ||
// If hash's quotient when divided by 2^p is too large, we hang converting to unary | ||
val upperBound: Long = 1L << (p.toInt * 1.75).toInt | ||
|
||
val hashGen = Gen | ||
.chooseNum(0L, upperBound) | ||
.map(UInt64(_)) | ||
|
||
Gen.listOfN(size, hashGen).map(_.toVector).map { vec => | ||
(vec, p) | ||
} | ||
} | ||
} | ||
} | ||
|
||
forAll(items) { | ||
case (items, p) => | ||
val sorted = items.sortWith(_ < _) | ||
|
||
val codedSet = GCS.encodeSortedSet(sorted, p) | ||
val decodedSet = GCS.golombDecodeSet(codedSet, p) | ||
|
||
assert(decodedSet == sorted) | ||
} | ||
} | ||
|
||
it must "encode and decode arbitrary ByteVectors for arbitrary p" in { | ||
|
||
def genP: Gen[UInt8] = { | ||
// We have 8 as a lower bound since N in hashToRange is in the order of 1000 | ||
Gen.choose(8, 32).map(UInt8(_)) | ||
} | ||
|
||
def genPM: Gen[(UInt8, UInt64)] = genP.flatMap { p => | ||
// If hash's quotient when divided by 2^p is too large, we hang converting to unary | ||
val upperBound: Long = p.toInt * 1000 | ||
|
||
val mGen = Gen | ||
.chooseNum(0L, upperBound) | ||
.map(UInt64(_)) | ||
|
||
mGen.map(m => (p, m)) | ||
} | ||
|
||
def genItems: Gen[Vector[ByteVector]] = { | ||
Gen.choose(1, 1000).flatMap { size => | ||
Gen.listOfN(size, NumberGenerator.bytevector).map(_.toVector) | ||
} | ||
} | ||
|
||
def genKey: Gen[ByteVector] = | ||
Gen.listOfN(16, NumberGenerator.byte).map(ByteVector(_)) | ||
|
||
forAll(genPM, genItems, genKey) { | ||
case ((p, m), items, k) => | ||
val hashes = GCS.hashedSetConstruct(items, k, m) | ||
val sortedHashes = hashes.sortWith(_ < _) | ||
|
||
val codedSet = GCS.buildGCS(items, k, p, m) | ||
val decodedSet = GCS.golombDecodeSet(codedSet, p) | ||
|
||
assert(decodedSet == sortedHashes) | ||
} | ||
|
||
} | ||
} |
Oops, something went wrong.