Skip to content

Commit

Permalink
Refactor export.py to use height from block inv
Browse files Browse the repository at this point in the history
  • Loading branch information
ayeowch committed May 28, 2022
1 parent 625c9af commit 6de9532
Show file tree
Hide file tree
Showing 4 changed files with 110 additions and 48 deletions.
3 changes: 3 additions & 0 deletions conf/export.conf.default
Original file line number Diff line number Diff line change
Expand Up @@ -14,3 +14,6 @@ debug = False

# Relative path to directory containing timestamp-prefixed JSON export files
export_dir = data/export/f9beb4d9

# List of recent block heights from external URL
block_heights_url = https://bitnodes.io/api/v1/bitcoind/getblockheights/
3 changes: 1 addition & 2 deletions crawl.py
Original file line number Diff line number Diff line change
Expand Up @@ -227,7 +227,7 @@ def restart(timestamp):
Removes keys for all nodes from current crawl.
Updates included ASNs with current list from external URL.
Updates excluded networks with current list of bogons.
Updates number of reachable nodes and most common height in Redis.
Updates number of reachable nodes in Redis.
"""
redis_pipe = REDIS_CONN.pipeline()

Expand Down Expand Up @@ -265,7 +265,6 @@ def restart(timestamp):
REDIS_CONN.lpush('nodes', (timestamp, reachable_nodes))

height = dump(timestamp, nodes)
REDIS_CONN.set('height', height)
logging.info("Height: %d", height)


Expand Down
150 changes: 105 additions & 45 deletions export.py
Original file line number Diff line number Diff line change
Expand Up @@ -31,9 +31,11 @@
import json
import logging
import os
import requests
import sys
import time
from binascii import hexlify, unhexlify
from collections import Counter
from ConfigParser import ConfigParser

from utils import new_redis_conn
Expand All @@ -42,52 +44,108 @@
CONF = {}


def get_row(node):
class Export(object):
"""
Returns enumerated row data from Redis for the specified node.
Exports nodes into timestamp-prefixed JSON file and sets consensus height
using the most common height from these nodes.
"""
# address, port, version, user_agent, timestamp, services
node = eval(node)
address = node[0]
port = node[1]
services = node[-1]

height = REDIS_CONN.get('height:{}-{}-{}'.format(address, port, services))
if height is None:
height = (0,)
else:
height = (int(height),)

hostname = REDIS_CONN.hget('resolve:{}'.format(address), 'hostname')
hostname = (hostname,)

geoip = REDIS_CONN.hget('resolve:{}'.format(address), 'geoip')
if geoip is None:
# city, country, latitude, longitude, timezone, asn, org
geoip = (None, None, 0.0, 0.0, None, None, None)
else:
geoip = eval(geoip)

return node + height + hostname + geoip


def export_nodes(nodes, timestamp):
"""
Merges enumerated data for the specified nodes and exports them into
timestamp-prefixed JSON file.
"""
rows = []
start = time.time()
for node in nodes:
row = get_row(node)
rows.append(row)
end = time.time()
elapsed = end - start
logging.info("Elapsed: %d", elapsed)

dump = os.path.join(CONF['export_dir'], "{}.json".format(timestamp))
open(dump, 'w').write(json.dumps(rows, encoding="latin-1"))
logging.info("Wrote %s", dump)
def __init__(self, timestamp, nodes):
self.start_t = time.time()
self.timestamp = timestamp
self.nodes = nodes
self.heights = self.get_heights()

def export_nodes(self):
"""
Merges enumerated data for the nodes and exports them into
timestamp-prefixed JSON file and then sets consensus height in Redis
using the most common height from these nodes.
"""
rows = []
for node in self.nodes:
row = self.get_row(node)
rows.append(row)

if self.heights:
height = Counter(self.heights.values()).most_common(1)[0][0]
logging.info("Consensus height: %s", height)
REDIS_CONN.set('height', height)

dump = os.path.join(
CONF['export_dir'], "{}.json".format(self.timestamp))
open(dump, 'w').write(json.dumps(rows, encoding="latin-1"))
logging.info("Wrote %s", dump)

logging.info("Elapsed: %d", time.time() - self.start_t)

def get_row(self, node):
"""
Returns enumerated row data from Redis for the specified node.
"""
# address, port, version, user_agent, timestamp, services
node = eval(node)
address = node[0]
port = node[1]
services = node[-1]

n = '{}-{}'.format(address, port)
if n in self.heights:
# Height from received block inv message in ping.py.
height = (self.heights[n],)
else:
# Height from handshake in crawl.py.
height = REDIS_CONN.get(
'height:{}-{}-{}'.format(address, port, services))
if height is None:
height = (0,)
else:
height = (int(height),)
logging.debug("Using handshake height %s: %s", node, height)

hostname = REDIS_CONN.hget('resolve:{}'.format(address), 'hostname')
hostname = (hostname,)

geoip = REDIS_CONN.hget('resolve:{}'.format(address), 'geoip')
if geoip is None:
# city, country, latitude, longitude, timezone, asn, org
geoip = (None, None, 0.0, 0.0, None, None, None)
else:
geoip = eval(geoip)

return node + height + hostname + geoip

def get_heights(self):
"""
Returns the latest heights based on received block inv messages.
"""
heights = {}
recent_blocks = []
timestamp_ms = self.timestamp * 1000

try:
response = requests.get(CONF['block_heights_url'], timeout=15)
except requests.exceptions.RequestException as err:
logging.warning(err)
else:
if response.status_code == 200:
recent_blocks = response.json()['blocks']

for block in recent_blocks:
block_height, block_time, block_hash = block
if block_time > self.timestamp:
continue

key = "binv:{}".format(block_hash)
# [('ADDRESS-PORT', EPOCH_MS),..]
nodes = REDIS_CONN.zrangebyscore(
key, '-inf', '+inf', withscores=True, score_cast_func=int)
for node in nodes:
n, t = node
if n not in heights and t <= timestamp_ms:
heights[n] = block_height

logging.info("Heights: %d", len(heights))
return heights


def init_conf(argv):
Expand All @@ -101,6 +159,7 @@ def init_conf(argv):
CONF['db'] = conf.getint('export', 'db')
CONF['debug'] = conf.getboolean('export', 'debug')
CONF['export_dir'] = conf.get('export', 'export_dir')
CONF['block_heights_url'] = conf.get('export', 'block_heights_url')
if not os.path.exists(CONF['export_dir']):
os.makedirs(CONF['export_dir'])

Expand Down Expand Up @@ -146,7 +205,8 @@ def main(argv):
logging.info("Timestamp: %d", timestamp)
nodes = REDIS_CONN.smembers('opendata')
logging.info("Nodes: %d", len(nodes))
export_nodes(nodes, timestamp)
export = Export(timestamp, nodes)
export.export_nodes()
REDIS_CONN.publish(publish_key, timestamp)

return 0
Expand Down
2 changes: 1 addition & 1 deletion seeder.py
Original file line number Diff line number Diff line change
Expand Up @@ -176,7 +176,7 @@ def get_consensus_height(self):
height = REDIS_CONN.get('height')
if height:
height = int(height)
logging.info("Consensus. height: %s", height)
logging.info("Consensus height: %s", height)
return height

def get_min_age(self):
Expand Down

0 comments on commit 6de9532

Please sign in to comment.