In [2]:
"""
transactions.ipynb

Goal with this notebook is to extend the analysis of blocks.ipynb
to transactions within each block for more detail.
"""

import os
import matplotlib.pyplot as plt
import numpy as np
import pandas as pd
import typing as tp

from ape import accounts, chain, Contract, networks
from scipy import stats

In [3]:
# SEE: https://gist.github.com/banteg/dcf6082ff7fc6ad51ce220146f29d9ff
networks.parse_network_choice('ethereum:mainnet:alchemy').__enter__()

<alchemy chain_id=1>

In [5]:
# look at the current block
current_block = chain.blocks[-1]
current_block

Block(num_transactions=111, hash=HexBytes('0x78531930c5d099842e4db1acfccdeabb4e52837654722b9be33bba65457783e4'), number=15775055, parent_hash=HexBytes('0x68c76e401c5b27c995330769e5602b2bfc061d89a87f7fa079c9fb99ccbfc8e8'), size=54872, timestamp=1666096895, gas_limit=30000000, gas_used=10508242, base_fee=13916923898, difficulty=0, total_difficulty=58750003716598352816469)

In [6]:
# some relevant numbers
start_block_number = 15338009 # ~ 1 month before merge
merge_block_number = 15537394
current_block_number = current_block.number

In [7]:
# load EL blocks from csv if already queried, else do the query and save in csv
# SEE: blocks.ipynb
def load_blocks(fp: str) -> pd.DataFrame:
    if os.path.exists(fp):
        return pd.read_csv(fp)
    else:
        qb = chain.blocks.query('*', start_block=start_block_number)
        qb.to_csv(fp, index=False)
        return qb

In [8]:
# get blocks over last 2 months (since mid Aug to mid October)
# NOTE: query takes around ~15h if not from csv
%time qb = load_blocks('./data/blocks.csv')

CPU times: user 921 ms, sys: 84.9 ms, total: 1.01 s
Wall time: 1.04 s


In [9]:
qb

Unnamed: 0,num_transactions,hash,number,parent_hash,size,timestamp,gas_limit,gas_used,base_fee,difficulty,total_difficulty
0,97,"b'\xf0\x82\x95\x1e@\xe4by\xfc,\x15\xd6\xb6xFw\...",15338009,b'm\xb0\x02|\xa9}\x84\xa3\xdd\xc1\x84\xb6\xe4\...,39308,1660456780,30000000,9954560,8122412901,12045723921070914,56321843715293942409414
1,305,b'\xdb\x91:\xfc8Qk\xc1c\x82\xe2\xdbA\xe9b#R\xe...,15338010,"b'\xf0\x82\x95\x1e@\xe4by\xfc,\x15\xd6\xb6xFw\...",99210,1660456787,30000000,22464133,7780903344,12051743061157721,56321855767037003567135
2,51,b'\xb0\x0c2\x7f$\xc0\xec$i\xbf\xd9\xcb\x13\x0c...,15338011,b'\xdb\x91:\xfc8Qk\xc1c\x82\xe2\xdbA\xe9b#R\xe...,13806,1660456811,30000000,3781159,8264884155,12045995859944613,56321867813032863511748
3,24,"b""0\xbc\xf0\xa6\x89\xf1w\x1d\x9d\x05\x1c\xc8\x...",15338012,b'\xb0\x0c2\x7f$\xc0\xec$i\xbf\xd9\xcb\x13\x0c...,7824,1660456814,29970705,1551283,7492197312,12057896966730061,56321879870929830241809
4,63,b'`/\xd0\xe0\x7fvc>\x94\xf2\xd1\xca\xbf\x11\x9...,15338013,"b""0\xbc\xf0\xa6\x89\xf1w\x1d\x9d\x05\x1c\xc8\x...",16003,1660456819,29941438,3174533,6652621642,12063922050686819,56321891934851880928628
...,...,...,...,...,...,...,...,...,...,...,...
416045,189,b'\xecP\xf7g\x08\xc2\x19\x12_\x0c\xd8X\x92\x9b...,15754054,b'\x12Y[\x91y.\xb8\xcf\xd7\xca\xb5\x81\x87\x82...,82178,1665843383,30000000,18028304,14001417244,0,58750003716598352816469
416046,253,b'n\xf2R{ \xa9t\xabUD\xbc\x8e\x04B\x87\xb3\t\x...,15754055,b'\xecP\xf7g\x08\xc2\x19\x12_\x0c\xd8X\x92\x9b...,159816,1665843395,30000000,29936690,14354755142,0,58750003716598352816469
416047,137,b'\xb7\x06\xba9s\x7f>p\xd1\xa3\x16\xb0)Y\n\xd4...,15754056,b'n\xf2R{ \xa9t\xabUD\xbc\x8e\x04B\x87\xb3\t\x...,71368,1665843407,30000000,14425421,16141526205,0,58750003716598352816469
416048,117,b'\x86\xd9n\xfb+\x9e\n\xbc\xb3\x99\x155@\xbb\x...,15754057,b'\xb7\x06\xba9s\x7f>p\xd1\xa3\x16\xb0)Y\n\xd4...,56671,1665843419,30000000,11198548,16064238022,0,58750003716598352816469


In [50]:
from functools import partial
from typing import List

from ape.api.providers import BlockAPI, TransactionAPI
from ape.api.query import BlockTransactionQuery, extract_fields, validate_and_expand_columns

# for each block in blocks query, query for transactions
def transaction_container_query(block: BlockAPI, *columns: List[str]) -> pd.DataFrame:
    """
    Implements what could be a transaction "container" query analogous
    to https://github.com/ApeWorX/ape/blob/main/src/ape/managers/chain.py#L94
    but for transactions.
    """
    # perform BlockTransactionQuery
    # SEE: https://github.com/ApeWorX/ape/blob/main/src/ape/api/providers.py#L92
    query = BlockTransactionQuery(columns=columns, block_id=block.hash)
    transactions = block.query_manager.query(query)
    
    # put into a dataframe and return
    columns = validate_and_expand_columns(columns, TransactionAPI) # NOTE: this might not include all fields we want!
    transactions = map(partial(extract_fields, columns=columns), transactions)
    df = pd.DataFrame(columns=columns, data=transactions)
    
    # add in columns for block number and block hash then return
    df['block_hash'] = [ block.hash for i in range(len(df)) ]
    df['block_number'] = [ block.number for i in range(len(df)) ]
    return df

In [51]:
# :) works
transaction_container_query(current_block, '*')

Unnamed: 0,chain_id,receiver,sender,gas_limit,nonce,value,data,type,max_fee,max_priority_fee,required_confirmations,signature,block_hash,block_number
0,1,0x00000000219ab540356cBB839Cbe05303d7705Fa,0xBdD75A97c29294FF805FB2fEe65aBd99492b32A8,200000,6660,32000000000000000000,b'0x228951180000000000000000000000000000000000...,0x00,0,,7,<TransactionSignature v=38 r=27a0fd9a2d752cdfe...,b'xS\x190\xc5\xd0\x99\x84.M\xb1\xac\xfc\xcd\xe...,15775055
1,1,0x7a250d5630B4cF539739dF2C5dAcb4c659F2488D,0xF2c15f61C98798693ba3E5d2fbC63cD6D5096a46,1200000,47,0,b'0x791ac9470000000000000000000000000000000000...,0x00,0,,7,<TransactionSignature v=38 r=347d30b794b556ba8...,b'xS\x190\xc5\xd0\x99\x84.M\xb1\xac\xfc\xcd\xe...,15775055
2,1,0x7a250d5630B4cF539739dF2C5dAcb4c659F2488D,0x9d825600A05d9083AfE97ab10be990178eC22358,1200000,606,0,b'0x791ac9470000000000000000000000000000000000...,0x00,0,,7,<TransactionSignature v=37 r=c93aa4bb6de7aec5a...,b'xS\x190\xc5\xd0\x99\x84.M\xb1\xac\xfc\xcd\xe...,15775055
3,1,0x7a250d5630B4cF539739dF2C5dAcb4c659F2488D,0xdaBfeD9DC78Ba5747220505061CA7F399ECe35e4,410000,638,0,b'0x791ac9470000000000000000000000000000000000...,0x00,0,,7,<TransactionSignature v=38 r=fdbf47edef9d5ce55...,b'xS\x190\xc5\xd0\x99\x84.M\xb1\xac\xfc\xcd\xe...,15775055
4,1,0x7a250d5630B4cF539739dF2C5dAcb4c659F2488D,0x6a75Ab6741Ef07b927d22c3befbfB7794E395a40,890000,1935,0,b'0x791ac9470000000000000000000000000000000000...,0x00,0,,7,<TransactionSignature v=38 r=b32347019363f27db...,b'xS\x190\xc5\xd0\x99\x84.M\xb1\xac\xfc\xcd\xe...,15775055
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
106,1,0xd4B6BaDC53eb1e66f452554333c03A5A94f20D22,0x11b5627E7d5498B809273cE8795F36c794FB8AA9,21000,0,11606000000000000,b'0x',0x00,0,,7,<TransactionSignature v=37 r=3de8fcd9c51ed160c...,b'xS\x190\xc5\xd0\x99\x84.M\xb1\xac\xfc\xcd\xe...,15775055
107,1,0xf8F8a98D04Dc78a83A19014b5Dd171B5eDFCe483,0xeB5699B91335f6f4Aa51B8554100689966940d09,21000,18,4513645481968470000,b'0x',0x00,0,,7,<TransactionSignature v=37 r=72026af8ef1f272cd...,b'xS\x190\xc5\xd0\x99\x84.M\xb1\xac\xfc\xcd\xe...,15775055
108,1,0x9351b082D94cBc149d076407EdA22716FC72eCEb,0xFb2c91240aE310D051Ca1168b29b3adE28F3b22A,21000,0,3000000000000000,b'0x',0x00,0,,7,<TransactionSignature v=38 r=7ef8058cb671904a9...,b'xS\x190\xc5\xd0\x99\x84.M\xb1\xac\xfc\xcd\xe...,15775055
109,1,0xCD18eAa163733Da39c232722cBC4E8940b1D8888,0x31188536865De4593040fAfC4e175E190518e4Ef,99085,50,0,b'0x7a9b2c6c0000000000000000000000000000000000...,0x02,13960670496,1.396067e+10,7,<TransactionSignature v=0 r=f3f6db5181ce022086...,b'xS\x190\xc5\xd0\x99\x84.M\xb1\xac\xfc\xcd\xe...,15775055


In [None]:
# TODO: get txs in each block over last 2 months (since mid Aug to mid October)

In [55]:
# load EL transactions from csv if already queried, else do the query and save in csv
def load_transactions(fp: str, qb: pd.DataFrame) -> pd.DataFrame:
    if os.path.exists(fp):
        return pd.read_csv(fp)
    else:
        # query transactions in each block
        # TODO: need to init BlockAPI from pandas row. is this a pain? (vs just chain.blocks range)
        qt = qb.apply(lambda b: transaction_container_query(b, '*'), axis=1)
        print('qt', qt)
        print('type(qt)', type(qt))
        
        # flatten into dataframe
        qt = pd.concat(qt)
        print('qt (flattened)', qt)
        print('type(qt) (flattened)', type(qt))
        
        # save to csv and return
        qt.to_csv(fp, index=False)
        return qt

In [56]:
qb[-4:]

Unnamed: 0,num_transactions,hash,number,parent_hash,size,timestamp,gas_limit,gas_used,base_fee,difficulty,total_difficulty
416046,253,b'n\xf2R{ \xa9t\xabUD\xbc\x8e\x04B\x87\xb3\t\x...,15754055,b'\xecP\xf7g\x08\xc2\x19\x12_\x0c\xd8X\x92\x9b...,159816,1665843395,30000000,29936690,14354755142,0,58750003716598352816469
416047,137,b'\xb7\x06\xba9s\x7f>p\xd1\xa3\x16\xb0)Y\n\xd4...,15754056,b'n\xf2R{ \xa9t\xabUD\xbc\x8e\x04B\x87\xb3\t\x...,71368,1665843407,30000000,14425421,16141526205,0,58750003716598352816469
416048,117,b'\x86\xd9n\xfb+\x9e\n\xbc\xb3\x99\x155@\xbb\x...,15754057,b'\xb7\x06\xba9s\x7f>p\xd1\xa3\x16\xb0)Y\n\xd4...,56671,1665843419,30000000,11198548,16064238022,0,58750003716598352816469
416049,132,b'\xb4G\x9e\xa6\xa0\x18\x8d\xee\xb0\x15\x9faK\...,15754058,b'\x86\xd9n\xfb+\x9e\n\xbc\xb3\x99\x155@\xbb\x...,56908,1665843431,30000000,10183036,15555342775,0,58750003716598352816469


In [58]:
# TODO: load_transactions('./data/transactions.csv', qb[-4:])