txoutsbyaddress index (take 3) #9806

Open
wants to merge 15 commits into
from
View
@@ -79,6 +79,74 @@ $ curl localhost:18332/rest/getutxos/checkmempool/b2cdfd7b89def827ff8af7cd9bff76
}
```
+####Query UTXO set (by address/script)
+`GET /rest/getutxoindex/<checkmempool>/<address>/<address>/.../<address>.json`
+
+The getutxoindex command allows querying of the UTXO set given a set of addresses (or script).
+
+To use this function, you must start bitcoin with the -txoutindex parameter.
+
+Output:
+```
+[ (array of json object)
+ {
+ \"confirmations\" : n, (numeric) The number of confirmations
+ \"txid\" : \"txid\", (string) The transaction id
+ \"vout\" : n, (numeric) The vout value
+ \"value\" : x.xxx, (numeric) The transaction value in btc
+ \"scriptPubKey\" : { (json object)
+ \"asm\" : \"code\", (string)
+ \"hex\" : \"hex\", (string)
+ \"reqSigs\" : n, (numeric) Number of required signatures
+ \"type\" : \"pubkeyhash\", (string) The type, eg pubkeyhash
+ \"addresses\" : [ (array of string) array of bitcoin addresses
+ \"bitcoinaddress\" (string) bitcoin address
+ ,...
+ ]
+ },
+ \"version\" : n, (numeric) The transaction version
+ \"coinbase\" : true|false (boolean) Coinbase or not
+ \"bestblockhash\" : \"hash\", (string) The block hash of the best block
+ \"bestblockheight\" : n, (numeric) The block height of the best block
+ \"bestblocktime\" : n, (numeric) The block time of the best block
+ \"blockhash\" : \"hash\", (string) The block hash of the block the tx is in (only if confirmations > 0)
+ \"blockheight\" : n, (numeric) The block height of the block the tx is in (only if confirmations > 0)
+ \"blocktime\" : ttt, (numeric) The block time in seconds since 1.1.1970 GMT (only if confirmations > 0)
+ }
+ ,...
+]
+```
+
+Example:
+```
+$ curl localhost:18332/rest/getutxoindex/checkmempool/mvkA8gYrKUmXFiuFpoxNGjMjYcV9oCkwGV.json 2>/dev/null | json_pp
+[
+ {
+ "confirmations" : 721918,
+ "txid" : "75bc54c673ed535db361a6e89c08bf7256d1378e2c645229d469d41042356e54",
+ "vout" : 0,
+ "value" : 0.001,
+ "scriptPubKey" : {
+ "asm" : "OP_DUP OP_HASH160 a7092d2dc8778b56d4c352697081c687b451ab6d OP_EQUALVERIFY OP_CHECKSIG",
+ "hex" : "76a914a7092d2dc8778b56d4c352697081c687b451ab6d88ac",
+ "reqSigs" : 1,
+ "type" : "pubkeyhash",
+ "addresses" : [
+ "mvkA8gYrKUmXFiuFpoxNGjMjYcV9oCkwGV"
+ ]
+ },
+ "version" : 1,
+ "coinbase" : false,
+ "bestblockhash" : "00000000007872ee19923a5604d86a6c9bfa3041c417a7ecf60dc034387b173f",
+ "blockheight" : 244755,
+ "bestblocktime" : 1475309084,
+ "blockhash" : "000000000001c163caa76dbc16c7b383fb10257829b3617c5a1ffb91ea3824db",
+ "bestblockheight" : 966672,
+ "blocktime" : 1400786412,
+ }
+]
+```
+
####Memory pool
`GET /rest/mempool/info.json`
View
@@ -89,6 +89,8 @@ BITCOIN_CORE_H = \
checkqueue.h \
clientversion.h \
coins.h \
+ coinsbyscript.h \
+ coinstats.h \
compat.h \
compat/byteswap.h \
compat/endian.h \
@@ -305,6 +307,8 @@ libbitcoin_common_a_SOURCES = \
base58.cpp \
chainparams.cpp \
coins.cpp \
+ coinsbyscript.cpp \
+ coinstats.cpp \
compressor.cpp \
core_read.cpp \
core_write.cpp \
@@ -96,7 +96,7 @@ static void MempoolEviction(benchmark::State& state)
tx7.vout[1].scriptPubKey = CScript() << OP_7 << OP_EQUAL;
tx7.vout[1].nValue = 10 * COIN;
- CTxMemPool pool;
+ CTxMemPool pool(false);
while (state.KeepRunning()) {
AddTx(tx1, 10000LL, pool);
View
@@ -0,0 +1,280 @@
+// Copyright (c) 2014-2016 The Bitcoin developers
@luke-jr

luke-jr Apr 17, 2017

Member

Standard copyright line says "The Bitcoin Core developers".

Might as well start this one off with the end year 2017.

+// Distributed under the MIT software license, see the accompanying
+// file COPYING or http://www.opensource.org/licenses/mit-license.php.
+
+#include "coinsbyscript.h"
+#include "txdb.h"
+#include "hash.h"
+#include "ui_interface.h"
+
+#include <assert.h>
+
+#include <boost/thread.hpp>
+
+using namespace std;
@luke-jr

luke-jr Apr 17, 2017

Member

Don't do this.

+
+static const char DB_COINS_BYSCRIPT = 'd';
+static const char DB_FLAG = 'F';
+static const char DB_BEST_BLOCK = 'B';
@luke-jr

luke-jr Apr 17, 2017

Member

IMO it would be better to not have overlapping chars between databases, such that they could be combined cleanly if desired. Therefore, I suggest using 'D' for DB_BEST_BLOCK (and eliminating DB_FLAG here entirely).

+
+CCoinsViewByScript::CCoinsViewByScript(CCoinsViewByScriptDB* viewIn) : base(viewIn) { }
+
+bool CCoinsViewByScript::GetCoinsByScript(const CScript &script, CCoinsByScript &coins) {
+ const CScriptID key = CScriptID(script);
@luke-jr

luke-jr Apr 17, 2017

Member

This isn't the purpose of CScriptID, which is specific to P2SH addresses. Suggest having a static ScriptIndexHash function that returns a (possibly typedef'd) uint160.

+ if (cacheCoinsByScript.count(key)) {
+ coins = cacheCoinsByScript[key];
+ return true;
+ }
+ if (base->GetCoinsByScriptID(key, coins)) {
+ cacheCoinsByScript[key] = coins;
+ return true;
+ }
+ return false;
+}
+
+CCoinsMapByScript::iterator CCoinsViewByScript::FetchCoinsByScript(const CScript &script, bool fRequireExisting) {
@luke-jr

luke-jr Apr 17, 2017

Member

The name sucks. :/

+ const CScriptID key = CScriptID(script);
+ CCoinsMapByScript::iterator it = cacheCoinsByScript.find(key);
+ if (it != cacheCoinsByScript.end())
+ return it;
+
+ CCoinsByScript tmp;
+ if (!base->GetCoinsByScriptID(key, tmp))
+ {
+ if (fRequireExisting)
+ return cacheCoinsByScript.end();
+ }
+
+ return cacheCoinsByScript.emplace_hint(it, key, tmp);
+}
+
+CCoinsByScript &CCoinsViewByScript::GetCoinsByScript(const CScript &script, bool fRequireExisting) {
@luke-jr

luke-jr Apr 17, 2017

Member

Name also sucks, and overlaps with a fairly different usage-case.

+ CCoinsMapByScript::iterator it = FetchCoinsByScript(script, fRequireExisting);
+ assert(it != cacheCoinsByScript.end());
@luke-jr

luke-jr Apr 17, 2017

Member

Rather throw an exception here.

+ return it->second;
+}
+
+uint256 CCoinsViewByScript::GetBestBlock() const {
+ return hashBlock;
+}
+
+void CCoinsViewByScript::SetBestBlock(const uint256 &hashBlockIn) {
+ hashBlock = hashBlockIn;
+}
+
+bool CCoinsViewByScript::Flush() {
+ bool fOk = base->BatchWrite(this, hashBlock);
+ return fOk;
+}
+
+CCoinsViewByScriptDB::CCoinsViewByScriptDB(size_t nCacheSize, bool fMemory, bool fWipe) : db(GetDataDir() / "coinsbyscript", nCacheSize, fMemory, fWipe, true)
+{
+}
+
+bool CCoinsViewByScriptDB::GetCoinsByScriptID(const CScriptID &scriptID, CCoinsByScript &coins) const {
+ return db.Read(make_pair(DB_COINS_BYSCRIPT, scriptID), coins);
+}
+
+bool CCoinsViewByScriptDB::BatchWrite(CCoinsViewByScript* pcoinsViewByScriptIn, const uint256 &hashBlock) {
+ CDBBatch batch(db);
+ size_t count = 0;
+ for (CCoinsMapByScript::iterator it = pcoinsViewByScriptIn->cacheCoinsByScript.begin(); it != pcoinsViewByScriptIn->cacheCoinsByScript.end();) {
+ if (it->second.IsEmpty())
+ batch.Erase(make_pair(DB_COINS_BYSCRIPT, it->first));
+ else
+ batch.Write(make_pair(DB_COINS_BYSCRIPT, it->first), it->second);
+ CCoinsMapByScript::iterator itOld = it++;
+ pcoinsViewByScriptIn->cacheCoinsByScript.erase(itOld);
@luke-jr

luke-jr Apr 17, 2017

Member

I don't see why this is necessary: we clear the entire cache when complete.

Eliminating this erase allows simplifying the entire loop to a normal C++11 for-each.

+ count++;
+ }
+ pcoinsViewByScriptIn->cacheCoinsByScript.clear();
+
+ if (!hashBlock.IsNull())
+ batch.Write(DB_BEST_BLOCK, hashBlock);
+
+ LogPrint(BCLog::COINDB, "Committing %zu coin address indexes to coin database...\n", (unsigned int)count);
+ return db.WriteBatch(batch);
+}
+
+bool CCoinsViewByScriptDB::WriteFlag(const std::string &name, bool fValue) {
+ return db.Write(std::make_pair(DB_FLAG, name), fValue ? '1' : '0');
+}
+
+bool CCoinsViewByScriptDB::ReadFlag(const std::string &name, bool &fValue) {
+ char ch;
+ if (!db.Read(std::make_pair(DB_FLAG, name), ch))
+ return false;
+ fValue = ch == '1';
+ return true;
+}
+
+CCoinsViewByScriptDBCursor *CCoinsViewByScriptDB::Cursor() const
+{
+ CCoinsViewByScriptDBCursor *i = new CCoinsViewByScriptDBCursor(const_cast<CDBWrapper*>(&db)->NewIterator());
+ /* It seems that there are no "const iterators" for LevelDB. Since we
+ only need read operations on it, use a const-cast to get around
+ that restriction. */
+ i->pcursor->Seek(DB_COINS_BYSCRIPT);
+ if (!i->pcursor->Valid())
+ // If db empty then set this cursor invalid
+ i->keyTmp.first = 0;
+ else
+ // Cache key of first record
+ i->pcursor->GetKey(i->keyTmp);
+ return i;
+}
+
+bool CCoinsViewByScriptDBCursor::GetKey(CScriptID &key) const
+{
+ // Return cached key
+ if (keyTmp.first == DB_COINS_BYSCRIPT) {
+ key = keyTmp.second;
+ return true;
+ }
+ return false;
+}
+
+bool CCoinsViewByScriptDBCursor::GetValue(CCoinsByScript &coins) const
+{
+ return pcursor->GetValue(coins);
+}
+
+unsigned int CCoinsViewByScriptDBCursor::GetValueSize() const
+{
+ return pcursor->GetValueSize();
+}
+
+bool CCoinsViewByScriptDBCursor::Valid() const
+{
+ return keyTmp.first == DB_COINS_BYSCRIPT;
+}
+
+void CCoinsViewByScriptDBCursor::Next()
+{
+ pcursor->Next();
+ if (!pcursor->Valid() || !pcursor->GetKey(keyTmp))
+ keyTmp.first = 0; // Invalidate cached key after last record so that Valid() and GetKey() return false
+}
+
+bool CCoinsViewByScriptDB::DeleteAllCoinsByScript()
+{
+ std::unique_ptr<CCoinsViewByScriptDBCursor> pcursor(Cursor());
+
+ std::vector<CScriptID> v;
+ int64_t i = 0;
+ while (pcursor->Valid()) {
+ boost::this_thread::interruption_point();
+ try {
+ CScriptID hash;
+ if (!pcursor->GetKey(hash))
+ break;
+ v.push_back(hash);
+ if (v.size() >= 10000)
@luke-jr

luke-jr Apr 17, 2017

Member

This is lacking comments explaining why.

+ {
+ i += v.size();
+ CDBBatch batch(db);
+ for(auto& av: v)
+ {
+ const CScriptID& _hash = av;
+ batch.Erase(make_pair(DB_COINS_BYSCRIPT, _hash)); // delete
+ }
+ db.WriteBatch(batch);
+ v.clear();
+ }
+
+ pcursor->Next();
+ } catch (std::exception &e) {
+ return error("%s : Deserialize or I/O error - %s", __func__, e.what());
+ }
+ }
+ if (!v.empty())
+ {
+ i += v.size();
+ CDBBatch batch(db);
+ for(auto& av: v)
+ {
+ const CScriptID& hash = av;
+ batch.Erase(make_pair(DB_COINS_BYSCRIPT, hash)); // delete
+ }
+ db.WriteBatch(batch);
+ }
+ if (i > 0)
+ LogPrintf("Address index with %d addresses successfully deleted.\n", i);
+
+ return true;
@luke-jr

luke-jr Apr 17, 2017

Member

Fails to delete DB_BEST_BLOCK

+}
+
+bool CCoinsViewByScriptDB::GenerateAllCoinsByScript(CCoinsViewDB* coinsIn)
+{
+ LogPrintf("Building address index for -txoutindex. Be patient...\n");
+ int64_t nTxCount = coinsIn->CountCoins();
+
+ std::unique_ptr<CCoinsViewCursor> pcursor(coinsIn->Cursor());
+
+ CCoinsMapByScript mapCoinsByScript;
+ int64_t i = 0;
+ int64_t progress = 0;
+ while (pcursor->Valid()) {
+ boost::this_thread::interruption_point();
+ try {
+ if (progress % 1000 == 0 && nTxCount > 0)
+ uiInterface.ShowProgress(_("Building address index..."), (int)(((double)progress / (double)nTxCount) * (double)100));
+ progress++;
+
+ uint256 txhash;
+ CCoins coins;
+ if (!pcursor->GetKey(txhash) || !pcursor->GetValue(coins))
+ break;
+
+ for (unsigned int j = 0; j < coins.vout.size(); j++)
@luke-jr

luke-jr Apr 17, 2017

Member

Use size_t and ++j. It may also be better to do this backward:

for (size_t j = coins.vout.size(); j--; ) {

(note j-- in this case because we want to look at the pre-decrement value)

+ {
+ if (coins.vout[j].IsNull() || coins.vout[j].scriptPubKey.IsUnspendable())
+ continue;
+
+ const CScriptID key = CScriptID(coins.vout[j].scriptPubKey);
+ if (!mapCoinsByScript.count(key))
+ {
+ CCoinsByScript coinsByScript;
+ GetCoinsByScriptID(key, coinsByScript);
+ mapCoinsByScript.insert(make_pair(key, coinsByScript));
+ }
+ mapCoinsByScript[key].setCoins.insert(COutPoint(txhash, (uint32_t)j));
+ i++;
+ }
+
+ if (mapCoinsByScript.size() >= 10000)
@luke-jr

luke-jr Apr 17, 2017

Member

Since we're doing partial writes, we should ensure DB_BEST_BLOCK is cleared before we begin.

+ {
+ CDBBatch batch(db);
+ for (CCoinsMapByScript::iterator it = mapCoinsByScript.begin(); it != mapCoinsByScript.end();) {
+ if (it->second.IsEmpty())
+ batch.Erase(make_pair(DB_COINS_BYSCRIPT, it->first));
+ else
+ batch.Write(make_pair(DB_COINS_BYSCRIPT, it->first), it->second);
+ CCoinsMapByScript::iterator itOld = it++;
+ mapCoinsByScript.erase(itOld);
@luke-jr

luke-jr Apr 17, 2017

Member

Needless erase

+ }
+ db.WriteBatch(batch);
+ mapCoinsByScript.clear();
+ }
+
+ pcursor->Next();
+ } catch (std::exception &e) {
+ return error("%s : Deserialize or I/O error - %s", __func__, e.what());
+ }
+ }
+ if (!mapCoinsByScript.empty())
+ {
+ CDBBatch batch(db);
+ for (CCoinsMapByScript::iterator it = mapCoinsByScript.begin(); it != mapCoinsByScript.end();) {
+ if (it->second.IsEmpty())
+ batch.Erase(make_pair(DB_COINS_BYSCRIPT, it->first));
+ else
+ batch.Write(make_pair(DB_COINS_BYSCRIPT, it->first), it->second);
+ CCoinsMapByScript::iterator itOld = it++;
+ mapCoinsByScript.erase(itOld);
@luke-jr

luke-jr Apr 17, 2017

Member

Needless erase

+ }
+ db.WriteBatch(batch);
+ }
+ LogPrintf("Address index with %d outputs successfully built.\n", i);
+ return true;
@luke-jr

luke-jr Apr 17, 2017

Member

DB_BEST_BLOCK is never written here.

+}
Oops, something went wrong.