Skip to content

Commit

Permalink
BIP 158 Golomb-Rice Coded Sets and block filters (#481)
Browse files Browse the repository at this point in the history
* Implemented GCS (WIP)

Added some of Chris' tests for golombEncode and golombDecode

Fixed GCS.golombDecodeSet

Separated GCS object from GolombFilter class

Added hanging test for set encoding

Fixed property test for encoding and decoding arbitrary sets

Added test for GCS.buildGCS and hashing function consistency

Added method to construct a Block Filter from a Block

Added scaladocs in GCS.scala

Added TempUtxoProvider to make BlockFilter testable

Wrote a test for GolombFilter.matchesHash

Added test for arbitrary data matching in bip 158 filter

Added optional right-padding on GCS BitVector, BlockFilter.fromHex, and got started on BlockFilterTest

Cleaned up Bip158TestCase.fromJsArray

Fixed parsing of test vectors for bip 158

Wrote test for BlockFilter (doesn't pass)

Responded to some code review

Included CompactSizeUInt in filter

Created BitcoinScriptUtil.getDataTokens for output serialization

Wrote test for BitcoinScriptUtil.getDataTokens

Added test for BitcoinScriptUtil on the genesis block

Fixed name

:(

* Fixed SipHash and block serialization!

* Fixed bugs relating to empty scripts, all tests passgit push --force-with-lease nadav 2019-05-28-bip158 !

* Cleaned up BitcoinScriptUtil.getDataTokens and its test

* Responded to code review

* Factored out common code from BlockFilter apply methods

* Some cleanup
  • Loading branch information
nkohen authored and Christewart committed Jun 17, 2019
1 parent 90b9b6a commit 5ed0f6d
Show file tree
Hide file tree
Showing 11 changed files with 933 additions and 2 deletions.
13 changes: 13 additions & 0 deletions core-test/src/test/resources/testnet-19.json

Large diffs are not rendered by default.

@@ -0,0 +1,77 @@
package org.bitcoins.core.gcs

import org.bitcoins.core.crypto.DoubleSha256Digest
import org.bitcoins.core.protocol.blockchain.Block
import org.bitcoins.core.protocol.script.ScriptPubKey
import org.bitcoins.testkit.util.BitcoinSUnitTest
import play.api.libs.json.{JsArray, Json}

import scala.io.Source

class BlockFilterTest extends BitcoinSUnitTest {
behavior of "BlockFilter"

// https://github.com/bitcoin/bips/blob/master/bip-0158.mediawiki#appendix-c-test-vectors
case class Bip158TestCase(
blockHeight: Int,
blockHash: DoubleSha256Digest,
block: Block,
prevOutputScripts: Vector[ScriptPubKey],
// TODO prevHeader: BlockFilterHeader,
filter: GolombFilter,
// TODO header: BlockFilterHeader,
notes: String
) {

def runTest(): org.scalatest.Assertion = {
val constructedFilter = BlockFilter(block, prevOutputScripts)

assert(constructedFilter.decodedHashes == filter.decodedHashes,
s"Test Notes: $notes")
}
}

object Bip158TestCase {

//["Block Height,Block Hash,Block,[Prev Output Scripts for Block],Previous Basic Header,Basic Filter,Basic Header,Notes"]
def fromJsArray(array: JsArray): Bip158TestCase = {
val parseResult = for {
height <- array(0).validate[Int]
blockHash <- array(1).validate[String].map(DoubleSha256Digest.fromHex)

block <- array(2).validate[String].map(Block.fromHex)

scriptArray <- array(3).validate[JsArray]
scripts = parseScripts(scriptArray)

//prevHeader <- array(4).validate[String].map(BlockFilterHeader.fromHex)

filter <- array(5)
.validate[String]
.map(BlockFilter.fromHex(_, blockHash))

//header <- array(6).validate[String].map(BlockFilterHeader.fromHex)

notes <- array(7).validate[String]
} yield Bip158TestCase(height, blockHash, block, scripts, filter, notes)

parseResult.get
}

private def parseScripts(array: JsArray): Vector[ScriptPubKey] = {
val hexScripts = array.validate[Vector[String]].get

hexScripts.map(ScriptPubKey.fromAsmHex)
}
}

it must "pass bip 158 test vectors" in {
val source = Source.fromURL(getClass.getResource("/testnet-19.json"))

val vec: Vector[JsArray] =
Json.parse(source.mkString).validate[Vector[JsArray]].get.tail
val testCases = vec.map(Bip158TestCase.fromJsArray)

testCases.foreach(_.runTest())
}
}
257 changes: 257 additions & 0 deletions core-test/src/test/scala/org/bitcoins/core/gcs/GCSTest.scala
@@ -0,0 +1,257 @@
package org.bitcoins.core.gcs

import org.bitcoins.core.number.{UInt64, UInt8}
import org.bitcoins.core.util.NumberUtil
import org.bitcoins.testkit.core.gen.NumberGenerator
import org.bitcoins.testkit.util.BitcoinSUnitTest
import org.scalacheck.Gen
import scodec.bits.{BinStringSyntax, ByteVector}

class GCSTest extends BitcoinSUnitTest {
behavior of "GCS"

//https://github.com/bitcoin/bips/blob/master/bip-0158.mediawiki#golomb-rice-coding
it must "encode and decode Golomb Coded Set example 1" in {
val p = UInt8(2)
val original = UInt64.zero

val encoding = GCS.golombEncode(item = original, p = p)

assert(encoding == bin"000")

val decode = GCS.golombDecode(codedItem = encoding, p = p)

assert(decode == original)
}

it must "encode and decode Golomb Coded set example 2" in {
val p = UInt8(2)
val original = UInt64.one

val encoding = GCS.golombEncode(item = original, p = p)

assert(encoding == bin"001")

val decode = GCS.golombDecode(codedItem = encoding, p = p)

assert(decode == original)
}

it must "encode and decode Golomb Coded set example 3" in {
val p = UInt8(2)
val original = UInt64(2)

val encoding = GCS.golombEncode(item = original, p = p)

assert(encoding == bin"010")

val decode = GCS.golombDecode(codedItem = encoding, p = p)

assert(decode == original)
}

it must "encode and decode Golomb Coded set example 4" in {
val p = UInt8(2)
val original = UInt64(3)

val encoding = GCS.golombEncode(item = original, p = p)

assert(encoding == bin"011")

val decode = GCS.golombDecode(codedItem = encoding, p = p)

assert(decode == original)
}

it must "encode and decode Golomb Coded set example 5" in {
val p = UInt8(2)
val original = UInt64(4)

val encoding = GCS.golombEncode(item = original, p = p)

assert(encoding == bin"1000")

val decode = GCS.golombDecode(codedItem = encoding, p = p)

assert(decode == original)
}

it must "encode and decode Golomb Coded set example 6" in {
val p = UInt8(2)
val original = UInt64(5)

val encoding = GCS.golombEncode(item = original, p = p)

assert(encoding == bin"1001")

val decode = GCS.golombDecode(codedItem = encoding, p = p)

assert(decode == original)
}

it must "encode and decode Golomb Coded set example 7" in {
val p = UInt8(2)
val original = UInt64(6)

val encoding = GCS.golombEncode(item = original, p = p)

assert(encoding == bin"1010")

val decode = GCS.golombDecode(codedItem = encoding, p = p)

assert(decode == original)
}

it must "encode and decode Golomb Coded set example 8" in {
val p = UInt8(2)
val original = UInt64(7)

val encoding = GCS.golombEncode(item = original, p = p)

assert(encoding == bin"1011")

val decode = GCS.golombDecode(codedItem = encoding, p = p)

assert(decode == original)
}

it must "encode and decode Golomb Coded set example 9" in {
val p = UInt8(2)
val original = UInt64(8)

val encoding = GCS.golombEncode(item = original, p = p)

assert(encoding == bin"11000")

val decode = GCS.golombDecode(codedItem = encoding, p = p)

assert(decode == original)
}

it must "encode and decode Golomb Coded set example 10" in {
val p = UInt8(2)
val original = UInt64(9)

val encoding = GCS.golombEncode(item = original, p = p)

assert(encoding == bin"11001")

val decode = GCS.golombDecode(codedItem = encoding, p = p)

assert(decode == original)
}

it must "encode and decode an arbitrary item for an arbitrary p" in {

def delta: Gen[UInt64] = {
//what is a reasonable delta? This is means the delta
//can be 1 - 16384
//if we do a full uint64 it takes forever to encode it
Gen
.choose(1, NumberUtil.pow2(14).toInt)
.map(UInt64(_))
}

forAll(delta, NumberGenerator.genP) {
case (item, p) =>
val encoded = GCS.golombEncode(item = item, p = p)
val decode = GCS.golombDecode(codedItem = encoded, p = p)

assert(decode == item)
}
}

it must "encode and decode a set of elements already tested" in {
val p = UInt8(2)

// Diffs are 1, 2, 3, 4, 5
val sortedItems =
Vector(UInt64(0), UInt64(1), UInt64(3), UInt64(6), UInt64(10), UInt64(15))

val codedSet = GCS.encodeSortedSet(sortedItems, p)

val coded0 = bin"000"
val coded1 = bin"001"
val coded2 = bin"010"
val coded3 = bin"011"
val coded4 = bin"1000"
val coded5 = bin"1001"
val expectedCodedSet = coded0 ++ coded1 ++ coded2 ++ coded3 ++ coded4 ++ coded5

assert(codedSet == expectedCodedSet)

val decodedSet = GCS.golombDecodeSet(codedSet, p)

assert(decodedSet == sortedItems)
}

it must "encode and decode arbitrary sets of elements for arbitrary p" in {

def items: Gen[(Vector[UInt64], UInt8)] = {
NumberGenerator.genP.flatMap { p =>
Gen.choose(1, 1000).flatMap { size =>
// If hash's quotient when divided by 2^p is too large, we hang converting to unary
val upperBound: Long = 1L << (p.toInt * 1.75).toInt

val hashGen = Gen
.chooseNum(0L, upperBound)
.map(UInt64(_))

Gen.listOfN(size, hashGen).map(_.toVector).map { vec =>
(vec, p)
}
}
}
}

forAll(items) {
case (items, p) =>
val sorted = items.sortWith(_ < _)

val codedSet = GCS.encodeSortedSet(sorted, p)
val decodedSet = GCS.golombDecodeSet(codedSet, p)

assert(decodedSet == sorted)
}
}

it must "encode and decode arbitrary ByteVectors for arbitrary p" in {

def genP: Gen[UInt8] = {
// We have 8 as a lower bound since N in hashToRange is in the order of 1000
Gen.choose(8, 32).map(UInt8(_))
}

def genPM: Gen[(UInt8, UInt64)] = genP.flatMap { p =>
// If hash's quotient when divided by 2^p is too large, we hang converting to unary
val upperBound: Long = p.toInt * 1000

val mGen = Gen
.chooseNum(0L, upperBound)
.map(UInt64(_))

mGen.map(m => (p, m))
}

def genItems: Gen[Vector[ByteVector]] = {
Gen.choose(1, 1000).flatMap { size =>
Gen.listOfN(size, NumberGenerator.bytevector).map(_.toVector)
}
}

def genKey: Gen[ByteVector] =
Gen.listOfN(16, NumberGenerator.byte).map(ByteVector(_))

forAll(genPM, genItems, genKey) {
case ((p, m), items, k) =>
val hashes = GCS.hashedSetConstruct(items, k, m)
val sortedHashes = hashes.sortWith(_ < _)

val codedSet = GCS.buildGCS(items, k, p, m)
val decodedSet = GCS.golombDecodeSet(codedSet, p)

assert(decodedSet == sortedHashes)
}

}
}

0 comments on commit 5ed0f6d

Please sign in to comment.