# Block reading examples

Examples and functions for decoding serialised blocks. These functions form the basis of the methods in py3.Blocks.

Includes:
  - read_next() - read the next n bytes
  - read_var() - read a variable number of bytes, depnding on first byte
  - read_head() - read the whole block header
  - read_trans() - read a whole transaction


In [1]:
from datetime import datetime as dt
import mmap
import codecs

# Reading functions

In [2]:
def read_next(m, cursor,
              length=None,
              asHex=True,
              rev=False,
              pr=False):
    """
    Read next input with specified length
    """

    start = cursor
    end = cursor + length
    out = m[start:end]

    if rev:
        out = out[::-1]

    if asHex:
        out = codecs.encode(out, "hex")

    if pr:
        print("{0}-{1}: {2}".format(start, end, out))

    return out


def read_var(m, cursor,
             pr=False):
    """
    Read next variable length input. These are described in specifiction:
    https://en.bitcoin.it/wiki/Protocol_documentation#Variable_length_integer

    Returns output and number of steps taken by cursor
    """

    # Get the next byte
    by = ord(m[cursor:cursor+1])
    if pr:
        print(by)
    cursor += 1
    steps = 1

    if by < 253:  # 0xfd
        # Return as is
        # by is already int here
        out = by
    elif by == 253:
        # Read next 2 bytes
        # Reverse endedness
        # Convert to int in base 16
        out = int(read_next(m, cursor, 2,
                            rev=True), 16)
        steps += 2
    elif by == 254:  # 0xfe
        # Read next 4 bytes, convert as above
        out = int(read_next(m, cursor, 4,
                            rev=True), 16)
        steps += 4
    elif by == 255:  # 0xff
        # Read next 8 bytes, convert as above
        out = int(read_next(m, cursor, 8,
                            rev=True), 16)
        steps += 8

    if pr:
        print(out)

    return out, steps

# Read the genesis block

Each block has one header and a variable number of transactions. 
Each transaction has one header and variable numbers of inputs and outputs.

The genesis block contains only one transaction, which has one input (coinbase) and output.

First open the file for reading.

In [3]:
f = '../Blocks/blk00000.dat'
blk = open(f, 'rb')
m = mmap.mmap(blk.fileno(), 0,
              access=mmap.ACCESS_READ)

## Read header

In [29]:
cursor = 0

# Read magic number: 4 bytes
magic = read_next(m, cursor, 4)
cursor += 4
print('magic: {0}'.format(magic))

# Read block size: 4 bytes
blockSize = read_next(m, cursor, 4,
                      rev=True)
blockSize = int(blockSize, 16)
cursor += 4
print('block_size: {0}'.format(blockSize))

# Read version: 4 bytes
version = read_next(m, cursor, 4)
cursor += 4
print('version: {0}'.format(version))

# Read the previous hash: 32 bytes
prevHash = read_next(m, cursor, 32,
                     rev=True)
cursor += 32
print('prevHash: {0}'.format(prevHash))

# Read the merkle root: 32 bytes
merkleRootHash = read_next(m, cursor, 32)
cursor += 32
print('merkle_root: {0}'.format(merkleRootHash))

# Read the time stamp: 32 bytes
timestamp = read_next(m, cursor, 4,
                      rev=True)
cursor += 4
print('times: {0}'.format(dt.fromtimestamp(int(timestamp, 16))))

# Read the size: 4 bytes
nBits = read_next(m, cursor, 4)
cursor += 4
print('nBits: {0}'.format(nBits))

# Read the nonce: 4 bytes
nonce = read_next(m, cursor, 4)
cursor += 4
print('nonce: {0}'.format(nonce))

# Read the number of transactions: varint (1-9 bytes)
nTransactions, steps = read_var(m, cursor)
cursor += steps
print('n transactions: {0}'.format(nTransactions))

magic: b'f9beb4d9'
block_size: 285
version: b'01000000'
prevHash: b'0000000000000000000000000000000000000000000000000000000000000000'
merkle_root: b'3ba3edfd7a7b12b27ac72c3e67768f617fc81bc3888a51323a9fb8aa4b1e5e4a'
times: 2009-01-03 18:15:05
nBits: b'ffff001d'
nonce: b'1dac2b7c'
n transactions: 1


## Read the transaction header

In [30]:
tVersion = read_next(m, cursor, 4)
print("{0}-{1}: Version: {2}".format(cursor, cursor+4, tVersion))
cursor += 4

89-93: Version: b'01000000'


## Read the single transaction input

In [31]:
# Read number of inputs: varint (1-9 bytes)
nInputs, steps = read_var(m, cursor)
print("{0}-{1}: nInputs: {2}".format(cursor, cursor+1, nInputs))
cursor += steps

# Read the inputs (previous_outputs): 32 bytes
prevOutput = read_next(m, cursor, 32)
print("{0}-{1}: prevOutput: {2}".format(cursor,
                                        cursor+32,
                                        prevOutput))
cursor += 32

prevIndex = read_next(m, cursor, 4)
print("    {0}-{1}: prevIndex: {2}".format(cursor,
                                           cursor+4,
                                           prevIndex))
cursor += 4
        
# Read the script length: 1 byte
scriptLength, steps = read_var(m, cursor)
print("{0}-{1}: scriptLength: {2}".format(cursor,
                                          cursor+1,
                                          scriptLength))
cursor += 1

# Read the script sig: Variable
scriptSig = read_next(m, cursor, scriptLength)
print("{0}-{1}: scriptSig: {2}".format(cursor,
                                       cursor+scriptLength,
                                       scriptSig))
cursor += scriptLength

# Read sequence: 4 bytes
sequence = read_next(m, cursor, 4)
print("{0}-{1}: sequence: {2}".format(cursor,
                                      cursor+1,
                                      sequence))
cursor += 4

93-94: nInputs: 1
94-126: prevOutput: b'0000000000000000000000000000000000000000000000000000000000000000'
    126-130: prevIndex: b'ffffffff'
130-131: scriptLength: 77
131-208: scriptSig: b'04ffff001d0104455468652054696d65732030332f4a616e2f32303039204368616e63656c6c6f72206f6e206272696e6b206f66207365636f6e64206261696c6f757420666f722062616e6b73'
208-209: sequence: b'ffffffff'


## Read the single transaction output

In [33]:
# Read number of outputs:  varint (1-9 bytes)
nOutputs, steps = read_var(m, cursor)
print(nOutputs)
print("{0}-{1}: nOutputs: {2}".format(cursor, cursor+1, nOutputs))
cursor += steps


# Read value: 8 bytes
value = read_next(m, cursor, 8)
print("{0}-{1}: output value: {2}".format(cursor,
                                          cursor+8,
                                          value))
cursor += 8

# pk script
pkScriptLen, steps = read_var(m, cursor)
print("{0}-{1}: pkScriptLen: {2}".format(cursor,
                                         cursor+steps,
                                         pkScriptLen))
cursor += steps

pkScript = read_next(m, cursor, pkScriptLen)
print("{0}-{1}: pkScript: {2}".format(cursor,
                                      cursor+pkScriptLen,
                                      pkScript))
cursor += pkScriptLen

# lock time: 4 bytes
lockTime = read_next(m, cursor, 4)
print("{0}-{1}: lockTime: {2}".format(cursor, cursor+4, lockTime))
cursor += 4

1
212-213: nOutputs: 1
213-221: output value: b'00f2052a01000000'
221-222: pkScriptLen: 67
222-289: pkScript: b'4104678afdb0fe5548271967f1a67130b7105cd6a828e03909a67962e0ea1f61deb649f6bc3f4cef38c4f35504e51ec112de5c384df7ba0b8d578a4c702b6bf11d5fac'
289-293: lockTime: b'00000000'


# Header and transaction reading functions

Function versions of the above. These loop over the variable fields to handle cases where there are >1 transactions, transaction inputs or transaction outputs.

In [40]:
def read_header(m, cursor,
                pr=True):
    """
    Read:
        - Block header:
            - Magic number
            - Block size
            - Version
            - Previous hash
            - Merkle root hash
            - Timestamp
            - Size
            - Nonce
        - Number of transactions in the bock

    Note here this function only returns cursor position and the number of
    transactions to read with read_trans. The rest of the information is just
    printed.
    In the Block class this information will be saved to the object.
    """
    if pr:
        print(cursor)

    # Read magic number: 4 bytes
    magic = read_next(m, cursor, 4)
    cursor += 4
    if pr:
        print('magic: {0}'.format(magic))

    # Read block size: 4 bytes
    blockSize = read_next(m, cursor, 4,
                          rev=True)
    blockSize = int(blockSize, 16)
    cursor += 4
    if pr:
        print('block_size: {0}'.format(blockSize))

    # Read version: 4 bytes
    version = read_next(m, cursor, 4)
    cursor += 4
    if pr:
        print('version: {0}'.format(version))

    # Read the previous hash: 32 bytes
    prevHash = read_next(m, cursor, 32,
                         rev=True)
    cursor += 32
    if pr:
        print('prevHash: {0}'.format(prevHash))

    # Read the merkle root: 32 bytes
    merkleRootHash = read_next(m, cursor, 32)
    cursor += 32
    if pr:
        print('merkle_root: {0}'.format(merkleRootHash))

    # Read the time stamp: 32 bytes
    timestamp = read_next(m, cursor, 4,
                          rev=True)
    cursor += 4
    if pr:
        print('timestamp: {0}'.format(timestamp))
        print('times: {0}'.format(dt.fromtimestamp(int(timestamp, 16))))

    # Read the size: 4 bytes
    nBits = read_next(m, cursor, 4)
    cursor += 4
    if pr:
        print('nBits: {0}'.format(nBits))

    # Read the nonce: 4 bytes
    nonce = read_next(m, cursor, 4)
    cursor += 4
    if pr:
        print('nonce: {0}'.format(nonce))

    # Read the number of transactions: varint (1-9 bytes)
    nTransactions, steps = read_var(m, cursor)
    cursor += steps
    if pr:
        print('n transactions: {0}'.format(nTransactions))

    if pr:
        print(cursor)

    return cursor, nTransactions


def read_trans(m, cursor,
               pr=True):
    """
    Read transaction header (just version) and inputs and outputs.

    Note here this function only returns cursor position and prints the other
    information. In the transaction class this information will be saved to
    the object.
    """
    tVersion = read_next(m, cursor, 4)
    if pr:
        print("  {0}-{1}: Version: {2}".format(cursor, cursor+4, tVersion))
    cursor += 4

    # Read number of inputs: varint (1-9 bytes)
    nInputs, steps = read_var(m, cursor)
    if pr:
        print("  {0}-{1}: nInputs: {2}".format(cursor, cursor+1, nInputs))
    cursor += steps

    # Read each input
    inputs = []
    print(nInputs)
    for inp in range(nInputs):
        # Read the inputs (previous_outputs): 32 bytes
        prevOutput = read_next(m, cursor, 32)
        if pr:
            print("    {0}-{1}: prevOutput: {2}".format(cursor,
                                                        cursor+32,
                                                        prevOutput))
        cursor += 32

        # Read the index of the previous output (input): 4 bytes
        prevIndex = read_next(m, cursor, 4)
        if pr:
            print("    {0}-{1}: prevIndex: {2}".format(cursor,
                                                       cursor+4,
                                                       prevIndex))
        cursor += 4

        # Read the script length: Variable bytes
        scriptLength, steps = read_var(m, cursor)
        if pr:
            print("    {0}-{1}: scriptLength: {2}".format(cursor,
                                                          cursor+steps,
                                                          scriptLength))
        cursor += steps

        # Read the script sig: Variable
        scriptSig = read_next(m, cursor, scriptLength)
        if pr:
            print("    {0}-{1}: scriptSig: {2}".format(cursor,
                                                       cursor+scriptLength,
                                                       scriptSig))
        cursor += scriptLength

        # Read sequence: 4 bytes
        sequence = read_next(m, cursor, 4)
        if pr:
            print("    {0}-{1}: sequence: {2}".format(cursor,
                                                      cursor+1,
                                                      sequence))
        cursor += 4

        # Compile input info
        txIn = {'n': inp,
                'prevOutput': prevOutput,
                'scriptLength': scriptLength,
                'scriptSig': scriptSig,
                'sequence': sequence}

        # Collect into list of inputs
        inputs.append(txIn)

    # Read number of outputs:  varint (1-9 bytes)
    nOutputs, steps = read_var(m, cursor)
    print(nOutputs)
    if pr:
        print("  {0}-{1}: nOutputs: {2}".format(cursor, cursor+1, nOutputs))
    cursor += steps

    outputs = []
    for oup in range(nOutputs):
        # Read value: 8 bytes
        value = read_next(m, cursor, 8)
        if pr:
            print("    {0}-{1}: output value: {2}".format(cursor,
                                                          cursor+8,
                                                          value))
        cursor += 8

        # pk script
        pkScriptLen, steps = read_var(m, cursor)
        if pr:
            print("    {0}-{1}: pkScriptLen: {2}".format(cursor,
                                                         cursor+steps,
                                                         pkScriptLen))
        cursor += steps

        pkScript = read_next(m, cursor, pkScriptLen)
        if pr:
            print("    {0}-{1}: pkScript: {2}".format(cursor,
                                                      cursor+pkScriptLen,
                                                      pkScript))
        cursor += pkScriptLen

        # Compile output info
        txOut = {'n': oup,
                 'value': value,
                 'pkScriptLen': pkScriptLen,
                 'pkScript ': pkScript}

        # Add to list of outputs
        outputs.append(txOut)

    # lock time: 4 bytes
    lockTime = read_next(m, cursor, 4)
    if pr:
        print("  {0}-{1}: lockTime: {2}".format(cursor, cursor+4, lockTime))
    cursor += 4

    return cursor


# Read block

## Load .dat

In [44]:
f = '../Blocks/blk00000.dat'
blk = open(f, 'rb')
m = mmap.mmap(blk.fileno(), 0,
              access=mmap.ACCESS_READ)

## Read first block

In [45]:
block = 0
cursor = 0
print("\n\nBlock {0}".format(block))
cursor, nTransactions = read_header(m, cursor)
for t in range(nTransactions):
    print("\nTRANSACTION {0}/{1}".format(t+1, nTransactions))
    cursor = read_trans(m, cursor)

print("\nExpected {0}, and read {1} transactions read from block 1".format(
        nTransactions, t+1))




Block 0
0
magic: b'f9beb4d9'
block_size: 285
version: b'01000000'
prevHash: b'0000000000000000000000000000000000000000000000000000000000000000'
merkle_root: b'3ba3edfd7a7b12b27ac72c3e67768f617fc81bc3888a51323a9fb8aa4b1e5e4a'
timestamp: b'495fab29'
times: 2009-01-03 18:15:05
nBits: b'ffff001d'
nonce: b'1dac2b7c'
n transactions: 1
89

TRANSACTION 1/1
  89-93: Version: b'01000000'
  93-94: nInputs: 1
1
    94-126: prevOutput: b'0000000000000000000000000000000000000000000000000000000000000000'
    126-130: prevIndex: b'ffffffff'
    130-131: scriptLength: 77
    131-208: scriptSig: b'04ffff001d0104455468652054696d65732030332f4a616e2f32303039204368616e63656c6c6f72206f6e206272696e6b206f66207365636f6e64206261696c6f757420666f722062616e6b73'
    208-209: sequence: b'ffffffff'
1
  212-213: nOutputs: 1
    213-221: output value: b'00f2052a01000000'
    221-222: pkScriptLen: 67
    222-289: pkScript: b'4104678afdb0fe5548271967f1a67130b7105cd6a828e03909a67962e0ea1f61deb649f6bc3f4cef38c4f35504e51e

## Second block

Cursor position and block count continue from cell above.


In [46]:
# %% Second block
block += 1
print("\n\nBlock {0}".format(block))
cursor, nTransactions = read_header(m, cursor)
for t in range(nTransactions):
    print("\nTRANSACTION {0}/{1}".format(t+1, nTransactions))
    cursor = read_trans(m, cursor)

print("\nExpected {0}, and read {1} transactions read from block 2".format(
        nTransactions, t+1))




Block 1
293
magic: b'f9beb4d9'
block_size: 215
version: b'01000000'
prevHash: b'000000000019d6689c085ae165831e934ff763ae46a2a6c172b3f1b60a8ce26f'
merkle_root: b'982051fd1e4ba744bbbe680e1fee14677ba1a3c3540bf7b1cdb606e857233e0e'
timestamp: b'4966bc61'
times: 2009-01-09 02:54:25
nBits: b'ffff001d'
nonce: b'01e36299'
n transactions: 1
382

TRANSACTION 1/1
  382-386: Version: b'01000000'
  386-387: nInputs: 1
1
    387-419: prevOutput: b'0000000000000000000000000000000000000000000000000000000000000000'
    419-423: prevIndex: b'ffffffff'
    423-424: scriptLength: 7
    424-431: scriptSig: b'04ffff001d0104'
    431-432: sequence: b'ffffffff'
1
  435-436: nOutputs: 1
    436-444: output value: b'00f2052a01000000'
    444-445: pkScriptLen: 67
    445-512: pkScript: b'410496b538e853519c726a2c91e61ec11600ae1390813a627c66fb8be7947be63c52da7589379515d4e0a604f8141781e62294721166bf621e73a82cbf2342c858eeac'
  512-516: lockTime: b'00000000'

Expected 1, and read 1 transactions read from block 2
