BIP 158 Golomb-Rice Coded Sets and block filters (#481)
* Implemented GCS (WIP)

Added some of Chris' tests for golombEncode and golombDecode

Fixed GCS.golombDecodeSet

Separated GCS object from GolombFilter class

Added hanging test for set encoding

Fixed property test for encoding and decoding arbitrary sets

Added test for GCS.buildGCS and hashing function consistency

Added method to construct a Block Filter from a Block

Added scaladocs in GCS.scala

Added TempUtxoProvider to make BlockFilter testable

Wrote a test for GolombFilter.matchesHash

Added test for arbitrary data matching in bip 158 filter

Added optional right-padding on GCS BitVector, BlockFilter.fromHex, and got started on BlockFilterTest

Cleaned up Bip158TestCase.fromJsArray

Fixed parsing of test vectors for bip 158

Wrote test for BlockFilter (doesn't pass)

Responded to some code review

Included CompactSizeUInt in filter

Created BitcoinScriptUtil.getDataTokens for output serialization

Wrote test for BitcoinScriptUtil.getDataTokens

Added test for BitcoinScriptUtil on the genesis block

Fixed name


* Fixed SipHash and block serialization!

* Fixed bugs relating to empty scripts, all tests passgit push --force-with-lease nadav 2019-05-28-bip158 !

* Cleaned up BitcoinScriptUtil.getDataTokens and its test

* Responded to code review

* Factored out common code from BlockFilter apply methods

* Some cleanup
@@ -0,0 +1,77 @@
package org.bitcoins.core.gcs

import org.bitcoins.core.crypto.DoubleSha256Digest
import org.bitcoins.core.protocol.blockchain.Block
import org.bitcoins.core.protocol.script.ScriptPubKey
import org.bitcoins.testkit.util.BitcoinSUnitTest
import play.api.libs.json.{JsArray, Json}


class BlockFilterTest extends BitcoinSUnitTest {
behavior of "BlockFilter"

case class Bip158TestCase(
blockHeight: Int,
blockHash: DoubleSha256Digest,
block: Block,
prevOutputScripts: Vector[ScriptPubKey],
// TODO prevHeader: BlockFilterHeader,
filter: GolombFilter,
// TODO header: BlockFilterHeader,
notes: String
) {

def runTest(): org.scalatest.Assertion = {
val constructedFilter = BlockFilter(block, prevOutputScripts)

assert(constructedFilter.decodedHashes == filter.decodedHashes,
s"Test Notes: $notes")

object Bip158TestCase {

//["Block Height,Block Hash,Block,[Prev Output Scripts for Block],Previous Basic Header,Basic Filter,Basic Header,Notes"]
def fromJsArray(array: JsArray): Bip158TestCase = {
val parseResult = for {
height <- array(0).validate[Int]
blockHash <- array(1).validate[String].map(DoubleSha256Digest.fromHex)

block <- array(2).validate[String].map(Block.fromHex)

scriptArray <- array(3).validate[JsArray]
scripts = parseScripts(scriptArray)

//prevHeader <- array(4).validate[String].map(BlockFilterHeader.fromHex)

filter <- array(5)
.map(BlockFilter.fromHex(_, blockHash))

//header <- array(6).validate[String].map(BlockFilterHeader.fromHex)

notes <- array(7).validate[String]
} yield Bip158TestCase(height, blockHash, block, scripts, filter, notes)


private def parseScripts(array: JsArray): Vector[ScriptPubKey] = {
val hexScripts = array.validate[Vector[String]].get

it must "pass bip 158 test vectors" in {
val source = Source.fromURL(getClass.getResource("/testnet-19.json"))

val vec: Vector[JsArray] =
val testCases =

257 changes: 257 additions & 0 deletions core-test/src/test/scala/org/bitcoins/core/gcs/GCSTest.scala
@@ -0,0 +1,257 @@
package org.bitcoins.core.gcs

import org.bitcoins.core.number.{UInt64, UInt8}
import org.bitcoins.core.util.NumberUtil
import org.bitcoins.testkit.core.gen.NumberGenerator
import org.bitcoins.testkit.util.BitcoinSUnitTest
import org.scalacheck.Gen
import scodec.bits.{BinStringSyntax, ByteVector}

class GCSTest extends BitcoinSUnitTest {
behavior of "GCS"

it must "encode and decode Golomb Coded Set example 1" in {
val p = UInt8(2)
val original =

val encoding = GCS.golombEncode(item = original, p = p)

assert(encoding == bin"000")

val decode = GCS.golombDecode(codedItem = encoding, p = p)

assert(decode == original)

it must "encode and decode Golomb Coded set example 2" in {
val p = UInt8(2)
val original =

val encoding = GCS.golombEncode(item = original, p = p)

assert(encoding == bin"001")

val decode = GCS.golombDecode(codedItem = encoding, p = p)

assert(decode == original)

it must "encode and decode Golomb Coded set example 3" in {
val p = UInt8(2)
val original = UInt64(2)

val encoding = GCS.golombEncode(item = original, p = p)

assert(encoding == bin"010")

val decode = GCS.golombDecode(codedItem = encoding, p = p)

assert(decode == original)

it must "encode and decode Golomb Coded set example 4" in {
val p = UInt8(2)
val original = UInt64(3)

val encoding = GCS.golombEncode(item = original, p = p)

assert(encoding == bin"011")

val decode = GCS.golombDecode(codedItem = encoding, p = p)

assert(decode == original)

it must "encode and decode Golomb Coded set example 5" in {
val p = UInt8(2)
val original = UInt64(4)

val encoding = GCS.golombEncode(item = original, p = p)

assert(encoding == bin"1000")

val decode = GCS.golombDecode(codedItem = encoding, p = p)

assert(decode == original)

it must "encode and decode Golomb Coded set example 6" in {
val p = UInt8(2)
val original = UInt64(5)

val encoding = GCS.golombEncode(item = original, p = p)

assert(encoding == bin"1001")

val decode = GCS.golombDecode(codedItem = encoding, p = p)

assert(decode == original)

it must "encode and decode Golomb Coded set example 7" in {
val p = UInt8(2)
val original = UInt64(6)

val encoding = GCS.golombEncode(item = original, p = p)

assert(encoding == bin"1010")

val decode = GCS.golombDecode(codedItem = encoding, p = p)

assert(decode == original)

it must "encode and decode Golomb Coded set example 8" in {
val p = UInt8(2)
val original = UInt64(7)

val encoding = GCS.golombEncode(item = original, p = p)

assert(encoding == bin"1011")

val decode = GCS.golombDecode(codedItem = encoding, p = p)

assert(decode == original)

it must "encode and decode Golomb Coded set example 9" in {
val p = UInt8(2)
val original = UInt64(8)

val encoding = GCS.golombEncode(item = original, p = p)

assert(encoding == bin"11000")

val decode = GCS.golombDecode(codedItem = encoding, p = p)

assert(decode == original)

it must "encode and decode Golomb Coded set example 10" in {
val p = UInt8(2)
val original = UInt64(9)

val encoding = GCS.golombEncode(item = original, p = p)

assert(encoding == bin"11001")

val decode = GCS.golombDecode(codedItem = encoding, p = p)

assert(decode == original)

it must "encode and decode an arbitrary item for an arbitrary p" in {

def delta: Gen[UInt64] = {
//what is a reasonable delta? This is means the delta
//can be 1 - 16384
//if we do a full uint64 it takes forever to encode it
.choose(1, NumberUtil.pow2(14).toInt)

forAll(delta, NumberGenerator.genP) {
case (item, p) =>
val encoded = GCS.golombEncode(item = item, p = p)
val decode = GCS.golombDecode(codedItem = encoded, p = p)

assert(decode == item)

it must "encode and decode a set of elements already tested" in {
val p = UInt8(2)

// Diffs are 1, 2, 3, 4, 5
val sortedItems =
Vector(UInt64(0), UInt64(1), UInt64(3), UInt64(6), UInt64(10), UInt64(15))

val codedSet = GCS.encodeSortedSet(sortedItems, p)

val coded0 = bin"000"
val coded1 = bin"001"
val coded2 = bin"010"
val coded3 = bin"011"
val coded4 = bin"1000"
val coded5 = bin"1001"
val expectedCodedSet = coded0 ++ coded1 ++ coded2 ++ coded3 ++ coded4 ++ coded5

assert(codedSet == expectedCodedSet)

val decodedSet = GCS.golombDecodeSet(codedSet, p)

assert(decodedSet == sortedItems)

it must "encode and decode arbitrary sets of elements for arbitrary p" in {

def items: Gen[(Vector[UInt64], UInt8)] = {
NumberGenerator.genP.flatMap { p =>
Gen.choose(1, 1000).flatMap { size =>
// If hash's quotient when divided by 2^p is too large, we hang converting to unary
val upperBound: Long = 1L << (p.toInt * 1.75).toInt

val hashGen = Gen
.chooseNum(0L, upperBound)

Gen.listOfN(size, hashGen).map(_.toVector).map { vec =>
(vec, p)

forAll(items) {
case (items, p) =>
val sorted = items.sortWith(_ < _)

val codedSet = GCS.encodeSortedSet(sorted, p)
val decodedSet = GCS.golombDecodeSet(codedSet, p)

assert(decodedSet == sorted)

it must "encode and decode arbitrary ByteVectors for arbitrary p" in {

def genP: Gen[UInt8] = {
// We have 8 as a lower bound since N in hashToRange is in the order of 1000
Gen.choose(8, 32).map(UInt8(_))

def genPM: Gen[(UInt8, UInt64)] = genP.flatMap { p =>
// If hash's quotient when divided by 2^p is too large, we hang converting to unary
val upperBound: Long = p.toInt * 1000

val mGen = Gen
.chooseNum(0L, upperBound)
.map(UInt64(_)) => (p, m))

def genItems: Gen[Vector[ByteVector]] = {
Gen.choose(1, 1000).flatMap { size =>
Gen.listOfN(size, NumberGenerator.bytevector).map(_.toVector)

def genKey: Gen[ByteVector] =
Gen.listOfN(16, NumberGenerator.byte).map(ByteVector(_))

forAll(genPM, genItems, genKey) {
case ((p, m), items, k) =>
val hashes = GCS.hashedSetConstruct(items, k, m)
val sortedHashes = hashes.sortWith(_ < _)

val codedSet = GCS.buildGCS(items, k, p, m)
val decodedSet = GCS.golombDecodeSet(codedSet, p)

assert(decodedSet == sortedHashes)


