Skip to content

Commit

Permalink
Update linearize scripts.
Browse files Browse the repository at this point in the history
Break into two steps:
* Generate hash list
* Build data file(s) from local bitcoind blocks/ directory.

This supports building one large bootstrap.dat, or multiple
smaller blocks/blkNNNNN.dat files.
  • Loading branch information
Jeff Garzik committed Aug 24, 2014
1 parent 57fe1ea commit 476eb7e
Show file tree
Hide file tree
Showing 4 changed files with 226 additions and 39 deletions.
31 changes: 29 additions & 2 deletions contrib/linearize/README.md
Original file line number Diff line number Diff line change
@@ -1,2 +1,29 @@
### Linearize ###
Construct a linear, no-fork, best version of the blockchain.
# Linearize
Construct a linear, no-fork, best version of the blockchain.

## Step 1: Download hash list

$ ./linearize-hashes.py linearize.cfg > hashlist.txt

Required configuration file settings for linearize-hashes:
* RPC: rpcuser, rpcpassword

Optional config file setting for linearize-hashes:
* RPC: host, port
* Block chain: min_height, max_height

## Step 2: Copy local block data

$ ./linearize-data.py linearize.cfg

Required configuration file settings:
* "input": bitcoind blocks/ directory containing blkNNNNN.dat
* "hashlist": text file containing list of block hashes, linearized-hashes.py
output.
* "output_file": bootstrap.dat
or
* "output": output directory for linearized blocks/blkNNNNN.dat output

Optional config file setting for linearize-data:
* "netmagic": network magic number

12 changes: 8 additions & 4 deletions contrib/linearize/example-linearize.cfg
Original file line number Diff line number Diff line change
@@ -1,12 +1,16 @@

# bitcoind RPC settings
# bitcoind RPC settings (linearize-hashes)
rpcuser=someuser
rpcpassword=somepassword
host=127.0.0.1
port=8332

# bootstrap.dat settings
# bootstrap.dat hashlist settings (linearize-hashes)
max_height=313000

# bootstrap.dat input/output settings (linearize-data)
netmagic=f9beb4d9
max_height=279000
output=bootstrap.dat
input=/home/example/.bitcoin/blocks
output_file=/home/example/Downloads/bootstrap.dat
hashlist=hashlist.txt

182 changes: 182 additions & 0 deletions contrib/linearize/linearize-data.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,182 @@
#!/usr/bin/python
#
# linearize-data.py: Construct a linear, no-fork version of the chain.
#
# Copyright (c) 2013 The Bitcoin developers
# Distributed under the MIT/X11 software license, see the accompanying
# file COPYING or http://www.opensource.org/licenses/mit-license.php.
#

import json
import struct
import re
import base64
import httplib
import sys
import hashlib

MAX_OUT_SZ = 128 * 1024 * 1024

settings = {}


def uint32(x):
return x & 0xffffffffL

def bytereverse(x):
return uint32(( ((x) << 24) | (((x) << 8) & 0x00ff0000) |
(((x) >> 8) & 0x0000ff00) | ((x) >> 24) ))

def bufreverse(in_buf):
out_words = []
for i in range(0, len(in_buf), 4):
word = struct.unpack('@I', in_buf[i:i+4])[0]
out_words.append(struct.pack('@I', bytereverse(word)))
return ''.join(out_words)

def wordreverse(in_buf):
out_words = []
for i in range(0, len(in_buf), 4):
out_words.append(in_buf[i:i+4])
out_words.reverse()
return ''.join(out_words)

def calc_hdr_hash(rawblock):
blk_hdr = rawblock[:80]

hash1 = hashlib.sha256()
hash1.update(blk_hdr)
hash1_o = hash1.digest()

hash2 = hashlib.sha256()
hash2.update(hash1_o)
hash2_o = hash2.digest()

return hash2_o

def calc_hash_str(rawblock):
hash = calc_hdr_hash(rawblock)
hash = bufreverse(hash)
hash = wordreverse(hash)
hash_str = hash.encode('hex')
return hash_str

def get_block_hashes(settings):
blkindex = []
f = open(settings['hashlist'], "r")
for line in f:
line = line.rstrip()
blkindex.append(line)

print("Read " + str(len(blkindex)) + " hashes")

return blkindex

def mkblockset(blkindex):
blkmap = {}
for hash in blkindex:
blkmap[hash] = True
return blkmap

def copydata(settings, blkindex, blkset):
inFn = 0
inF = None
outFn = 0
outsz = 0
outF = None
blkCount = 0

fileOutput = True
if 'output' in settings:
fileOutput = False

while True:
if not inF:
fname = "%s/blk%05d.dat" % (settings['input'], inFn)
print("Input file" + fname)
inF = open(fname, "rb")

inhdr = inF.read(8)
if (not inhdr or (inhdr[0] == "\0")):
inF.close()
inF = None
inFn = inFn + 1
continue

inMagic = inhdr[:4]
if (inMagic != settings['netmagic']):
print("Invalid magic:" + inMagic)
return
inLenLE = inhdr[4:]
su = struct.unpack("<I", inLenLE)
inLen = su[0]
rawblock = inF.read(inLen)

hash_str = calc_hash_str(rawblock)
if not hash_str in blkset:
print("Skipping unknown block " + hash_str)
continue

if not fileOutput and ((outsz + inLen) > MAX_OUT_SZ):
outF.close()
outF = None
outFn = outFn + 1
outsz = 0
if not outF:
if fileOutput:
fname = settings['output_file']
else:
fname = "%s/blk%05d.dat" % (settings['output'], outFn)
print("Output file" + fname)
outF = open(fname, "wb")

outF.write(inhdr)
outF.write(rawblock)
outsz = outsz + inLen + 8

blkCount = blkCount + 1

if (blkCount % 1000) == 0:
print("Wrote " + str(blkCount) + " blocks")

if __name__ == '__main__':
if len(sys.argv) != 2:
print "Usage: linearize-data.py CONFIG-FILE"
sys.exit(1)

f = open(sys.argv[1])
for line in f:
# skip comment lines
m = re.search('^\s*#', line)
if m:
continue

# parse key=value lines
m = re.search('^(\w+)\s*=\s*(\S.*)$', line)
if m is None:
continue
settings[m.group(1)] = m.group(2)
f.close()

if 'netmagic' not in settings:
settings['netmagic'] = 'f9beb4d9'
if 'input' not in settings:
settings['input'] = 'input'
if 'hashlist' not in settings:
settings['hashlist'] = 'hashlist.txt'

settings['netmagic'] = settings['netmagic'].decode('hex')

if 'output_file' not in settings and 'output' not in settings:
print("Missing output file / directory")
sys.exit(1)

blkindex = get_block_hashes(settings)
blkset = mkblockset(blkindex)

if not "000000000019d6689c085ae165831e934ff763ae46a2a6c172b3f1b60a8ce26f" in blkset:
print("not found")
else:
copydata(settings, blkindex, blkset)


40 changes: 7 additions & 33 deletions contrib/linearize/linearize.py → contrib/linearize/linearize-hashes.py
100644 → 100755
Original file line number Diff line number Diff line change
@@ -1,7 +1,6 @@
#!/usr/bin/python
#
# linearize.py: Construct a linear, no-fork, best version of the blockchain.
#
# linearize-hashes.py: List blocks in a linear, no-fork version of the chain.
#
# Copyright (c) 2013 The Bitcoin developers
# Distributed under the MIT/X11 software license, see the accompanying
Expand All @@ -15,9 +14,6 @@
import httplib
import sys

ERR_SLEEP = 15
MAX_NONCE = 1000000L

settings = {}

class BitcoinRPC:
Expand Down Expand Up @@ -62,34 +58,18 @@ def getblock(self, hash, verbose=True):
def getblockhash(self, index):
return self.rpc('getblockhash', [index])

def getblock(rpc, settings, n):
hash = rpc.getblockhash(n)
hexdata = rpc.getblock(hash, False)
data = hexdata.decode('hex')

return data

def get_blocks(settings):
def get_block_hashes(settings):
rpc = BitcoinRPC(settings['host'], settings['port'],
settings['rpcuser'], settings['rpcpassword'])

outf = open(settings['output'], 'ab')

for height in xrange(settings['min_height'], settings['max_height']+1):
data = getblock(rpc, settings, height)

outhdr = settings['netmagic']
outhdr += struct.pack("<i", len(data))
hash = rpc.getblockhash(height)

outf.write(outhdr)
outf.write(data)

if (height % 1000) == 0:
sys.stdout.write("Wrote block " + str(height) + "\n")
print(hash)

if __name__ == '__main__':
if len(sys.argv) != 2:
print "Usage: linearize.py CONFIG-FILE"
print "Usage: linearize-hashes.py CONFIG-FILE"
sys.exit(1)

f = open(sys.argv[1])
Expand All @@ -106,27 +86,21 @@ def get_blocks(settings):
settings[m.group(1)] = m.group(2)
f.close()

if 'netmagic' not in settings:
settings['netmagic'] = 'f9beb4d9'
if 'output' not in settings:
settings['output'] = 'bootstrap.dat'
if 'host' not in settings:
settings['host'] = '127.0.0.1'
if 'port' not in settings:
settings['port'] = 8332
if 'min_height' not in settings:
settings['min_height'] = 0
if 'max_height' not in settings:
settings['max_height'] = 279000
settings['max_height'] = 313000
if 'rpcuser' not in settings or 'rpcpassword' not in settings:
print "Missing username and/or password in cfg file"
sys.exit(1)

settings['netmagic'] = settings['netmagic'].decode('hex')
settings['port'] = int(settings['port'])
settings['min_height'] = int(settings['min_height'])
settings['max_height'] = int(settings['max_height'])

get_blocks(settings)

get_block_hashes(settings)

0 comments on commit 476eb7e

Please sign in to comment.