## Prepare data reader for a given chain and date

In [1]:
from op_analytics.coreutils.duckdb_inmem import init_client
from op_analytics.coreutils.partitioned.reader import DataReader
from op_analytics.coreutils.partitioned.location import DataLocation
from op_analytics.datapipeline.etl.intermediate.construct import construct_data_readers

from op_analytics.datapipeline.models.compute.udfs import create_duckdb_macros


# Define the input data range.
read_batches: list[DataReader] = construct_data_readers(
    chains=["op"],
    models=["refined_transactions_traces_address_models"],
    range_spec="@20241118:+1",
    read_from=DataLocation.GCS
)


# Select input for one date and build the intermediate model inputs.
batch = read_batches[0]


duckdb_client = init_client()
create_duckdb_macros(duckdb_client)


[2m2024-12-13 18:21:53[0m [[32m[1mdebug    [0m] [1mconnecting to OPLABS Clickhouse client...[0m [36mfilename[0m=[35mclient.py[0m [36mlineno[0m=[35m25[0m [36mprocess[0m=[35m79296[0m
[2m2024-12-13 18:21:53[0m [[32m[1minfo     [0m] [1mloaded vault from .env file   [0m [36mfilename[0m=[35mvault.py[0m [36mlineno[0m=[35m32[0m [36mprocess[0m=[35m79296[0m
[2m2024-12-13 18:21:53[0m [[32m[1mdebug    [0m] [1mloaded vault: 17 items        [0m [36mfilename[0m=[35mvault.py[0m [36mlineno[0m=[35m76[0m [36mprocess[0m=[35m79296[0m
[2m2024-12-13 18:21:54[0m [[32m[1mdebug    [0m] [1minitialized OPLABS Clickhouse client.[0m [36mfilename[0m=[35mclient.py[0m [36mlineno[0m=[35m37[0m [36mprocess[0m=[35m79296[0m
[2m2024-12-13 18:21:54[0m [[32m[1minfo     [0m] [1mprepared 1 input batches.     [0m [36mfilename[0m=[35mbydate.py[0m [36mlineno[0m=[35m96[0m [36mprocess[0m=[35m79296[0m


## Run the model

This automatically registers the model outputs as duckdb tables.

In [2]:
from op_analytics.datapipeline.models.compute.testutils import execute_model_in_memory

execute_model_in_memory(
    duckdb_client=duckdb_client,
    model="refined_transactions_traces_address_models",
    data_reader=batch,
    limit_input_parquet_files=1
)

# The duckdb database will have the following:
#   - input tables
#   - views used by the model
#   - model outputs
# 
# You can use duckdb to inspect any of the above results.
duckdb_client.sql("SHOW TABLES")

[2m2024-12-13 18:21:54[0m [[32m[1minfo     [0m] [1mExecuting model...            [0m [36mfilename[0m=[35mtestutils.py[0m [36mlineno[0m=[35m220[0m [36mprocess[0m=[35m79296[0m
[2m2024-12-13 18:21:54[0m [[32m[1minfo     [0m] [1mduckdb dataset='ingestion/transactions_v1' using 1/22 parquet paths, first path is gs://oplabs-tools-data-sink/ingestion/transactions_v1/chain=op/dt=2024-11-18/000128144000.parquet[0m [36mfilename[0m=[35mreader.py[0m [36mlineno[0m=[35m68[0m [36mprocess[0m=[35m79296[0m
[2m2024-12-13 18:21:55[0m [[32m[1minfo     [0m] [1mregistered view: 'ingestion_transactions_v1' using 1 parquet paths[0m [36mfilename[0m=[35mclient.py[0m [36mlineno[0m=[35m53[0m [36mprocess[0m=[35m79296[0m
[2m2024-12-13 18:21:55[0m [[32m[1minfo     [0m] [1mduckdb dataset='ingestion/blocks_v1' using 1/22 parquet paths, first path is gs://oplabs-tools-data-sink/ingestion/blocks_v1/chain=op/dt=2024-11-18/000128144000.parquet[0m [36mfilename

┌────────────────────────────────┐
│              name              │
│            varchar             │
├────────────────────────────────┤
│ daily_address_summary          │
│ event_emitting_transactions    │
│ event_emitting_transactions_v1 │
│ ingestion_blocks_v1            │
│ ingestion_logs_v1              │
│ ingestion_traces_v1            │
│ ingestion_transactions_v1      │
│ logs_topic0_filters            │
│ refined_trace_calls            │
│ refined_trace_calls_v1         │
│ refined_transactions_fees      │
│ refined_transactions_fees_v1   │
│ summary_v1                     │
├────────────────────────────────┤
│            13 rows             │
└────────────────────────────────┘

## Verify model results

In [3]:
duckdb_client.sql("""
    SELECT * FROM refined_trace_calls_v1
                  where transaction_hash = '0xc620133c2339f36d8bfae889ea29e9986a70182f7bbe3380d0622f3801619eda'
                  AND block_number = 128145924
                  ORDER BY trace_address ASC
                  LIMIT 10
                  """)

┌────────────┬─────────┬──────────┬─────────┬──────────────┬─────────────────┬────────────────────────────────────────────────────────────────────┬───────────────────┬────────────────────────────────────────────┬────────────────────────────────────────────┬───────────┬──────────┬───────────────┬────────────┬─────────────┬────────────────┬────────────────────────────────────────────┬────────────────────────────────────────────┬───────────────────────┬───────────────────────┬───────────────────────┬───────────────────────────┬───────────────────────┬───────────────────────────────┬──────────────────────┬───────────────────────────┬───────────────────────────────┬───────────────────────────────────┬───────────────────────────┬────────────────────────────────┬──────────────┬─────────────────────┬────────────┬─────────┬────────────┬─────────┬───────────────────────────┬───────────────────────┬─────────────────────────────┬───────────────────────┬──────────────────────────┬──────────────────

In [4]:
duckdb_client.sql("""
    SELECT chain, from_address, to_address
                , SUM(tx_fee_native_per_call_amortized) AS tx_fee_native_amortized
                , SUM(tx_gas_used_per_call_amortized) AS tx_gas_used_amortized
                , COUNT(DISTINCT transaction_hash) AS num_txs
                , COUNT(*) AS num_calls
                  FROM refined_trace_calls_v1
                  GROUP BY 1,2,3
                  ORDER BY 4 DESC
                  LIMIT 10
                  """)

┌─────────┬────────────────────────────────────────────┬────────────────────────────────────────────┬─────────────────────────┬───────────────────────┬─────────┬───────────┐
│  chain  │                from_address                │                 to_address                 │ tx_fee_native_amortized │ tx_gas_used_amortized │ num_txs │ num_calls │
│ varchar │                  varchar                   │                  varchar                   │         double          │        double         │  int64  │   int64   │
├─────────┼────────────────────────────────────────────┼────────────────────────────────────────────┼─────────────────────────┼───────────────────────┼─────────┼───────────┤
│ op      │ 0x0b2c639c533813f4aa9d7837caf62653d097ff85 │ 0xded3b9a8dbedc2f9cb725b55d0e686a81e6d06dc │     0.01984228587366746 │    206503287.08812115 │    6684 │     24102 │
│ op      │ 0x478946bcd4a5a22b316470f5486fafb928c0ba25 │ 0xc28ad28853a547556780bebf7847628501a3bcbb │    0.012270676533798753 │   

In [5]:
duckdb_client.sql("""
    SELECT chain, to_address, method_id
                , SUM(tx_l2_base_fee_native_minus_subtraces) AS tx_l2_base_fee_native_minus_subtraces
                , SUM(tx_l2_fee_native_minus_subtraces) AS tx_l2_fee_native_minus_subtraces
                , SUM(gas_used_minus_subtraces) AS gas_used_minus_subtraces
                , SUM(tx_fee_native_per_call_amortized) AS tx_fee_native_amortized
                , SUM(tx_gas_used_per_call_amortized) AS tx_gas_used_amortized
                , SUM(tx_fee_native_l1_amortized_l2_minus_subtraces) AS tx_fee_native_l1_amortized_l2_minus_subtraces
                , SUM(gas_used_minus_subtraces) AS gas_used_minus_subtraces
                , SUM(tx_gas_used_per_call_amortized) AS tx_gas_used_amortized
                , COUNT(DISTINCT transaction_hash) AS num_txs
                , COUNT(*) AS num_calls
                  FROM refined_trace_calls_v1
                  GROUP BY 1,2,3
                  ORDER BY 4 DESC
                  LIMIT 10
                  """)

┌─────────┬────────────────────────────────────────────┬────────────┬───────────────────────────────────────┬──────────────────────────────────┬──────────────────────────┬─────────────────────────┬───────────────────────┬───────────────────────────────────────────────┬──────────────────────────┬───────────────────────┬─────────┬───────────┐
│  chain  │                 to_address                 │ method_id  │ tx_l2_base_fee_native_minus_subtraces │ tx_l2_fee_native_minus_subtraces │ gas_used_minus_subtraces │ tx_fee_native_amortized │ tx_gas_used_amortized │ tx_fee_native_l1_amortized_l2_minus_subtraces │ gas_used_minus_subtraces │ tx_gas_used_amortized │ num_txs │ num_calls │
│ varchar │                  varchar                   │  varchar   │                double                 │              double              │      decimal(38,0)       │         double          │        double         │                    double                     │      decimal(38,0)       │        double    

In [11]:
duckdb_client.sql("""
    SELECT chain, to_address
                , SUM(tx_l2_base_fee_native_minus_subtraces) AS tx_l2_base_fee_native_minus_subtraces
                , SUM(tx_l2_fee_native_minus_subtraces) AS tx_l2_fee_native_minus_subtraces
                , SUM(gas_used_minus_subtraces) AS gas_used_minus_subtraces
                , SUM(tx_fee_native_per_call_amortized) AS tx_fee_native_amortized
                , SUM(tx_gas_used_per_call_amortized) AS tx_gas_used_amortized
                , SUM(tx_fee_native_l1_amortized_l2_minus_subtraces) AS tx_fee_native_l1_amortized_l2_minus_subtraces
                , SUM(gas_used_minus_subtraces) AS gas_used_minus_subtraces
                , SUM(tx_gas_used_per_call_amortized) AS tx_gas_used_amortized
                , COUNT(DISTINCT transaction_hash) AS num_txs
                , COUNT(*) AS num_calls
                  FROM refined_trace_calls_v1
                  GROUP BY 1,2
                  ORDER BY 4 DESC
                  LIMIT 10
                  """)

┌─────────┬────────────────────────────────────────────┬───────────────────────────────────────┬──────────────────────────────────┬──────────────────────────┬─────────────────────────┬───────────────────────┬───────────────────────────────────────────────┬──────────────────────────┬───────────────────────┬─────────┬───────────┐
│  chain  │                 to_address                 │ tx_l2_base_fee_native_minus_subtraces │ tx_l2_fee_native_minus_subtraces │ gas_used_minus_subtraces │ tx_fee_native_amortized │ tx_gas_used_amortized │ tx_fee_native_l1_amortized_l2_minus_subtraces │ gas_used_minus_subtraces │ tx_gas_used_amortized │ num_txs │ num_calls │
│ varchar │                  varchar                   │                double                 │              double              │      decimal(38,0)       │         double          │        double         │                    double                     │      decimal(38,0)       │        double         │  int64  │   int64   │
├─────────

### Check the data output size

In [12]:
duckdb_client.sql("SELECT COUNT(*) AS interm_num_calls, COUNT(DISTINCT transaction_hash) AS num_txs FROM refined_trace_calls_v1")

┌──────────────────┬─────────┐
│ interm_num_calls │ num_txs │
│      int64       │  int64  │
├──────────────────┼─────────┤
│          1274668 │   33166 │
└──────────────────┴─────────┘

In [7]:
duckdb_client.sql("SELECT method_id FROM refined_trace_calls_v1 LIMIT 10")


┌────────────┐
│ method_id  │
│  varchar   │
├────────────┤
│ 0xa6f19c84 │
│ 0xa34123a7 │
│ 0x514ea4bf │
│ 0xa6f19c84 │
│ 0x4f1eb3d8 │
│ 0x4f1eb3d8 │
│ 0xa9059cbb │
│ 0xdf2ab5bb │
│ 0x2e1a7d4d │
│ 0x2e1a7d4d │
├────────────┤
│  10 rows   │
└────────────┘

### You can also convert the results to dataframes to inspect them in more familiar ways

In [8]:
duckdb_client.sql("SELECT * FROM refined_trace_calls_v1 LIMIT 5").pl()

dt,chain,chain_id,network,block_number,block_timestamp,transaction_hash,transaction_index,from_address,to_address,gas_limit,gas_used,trace_address,trace_type,tx_gas_used,tx_l1_gas_used,tx_from_address,tx_to_address,tx_fee_native,tx_l1_fee_native,tx_l2_fee_native,tx_l2_priority_fee_native,tx_l2_base_fee_native,tx_l2_legacy_extra_fee_native,tx_l2_gas_price_gwei,tx_l2_base_gas_price_gwei,tx_l2_priority_gas_price_gwei,tx_l2_legacy_extra_gas_price_gwei,tx_l1_base_gas_price_gwei,tx_l1_blob_base_gas_price_gwei,tx_method_id,block_hour,tx_success,error,method_id,success,trace_address_cardinality,trace_address_uplevel,count_traces_in_transaction,gas_used_in_subtraces,gas_used_minus_subtraces,tx_l2_fee_native_minus_subtraces,tx_l2_base_fee_native_minus_subtraces,tx_l2_priority_fee_native_minus_subtraces,tx_l2_legacy_base_fee_native_minus_subtraces,tx_gas_used_per_call_amortized,tx_l1_gas_used_per_call_amortized,tx_fee_native_per_call_amortized,tx_l2_fee_native_per_call_amortized,tx_l1_fee_native_per_call_amortized,tx_l2_base_fee_native_per_call_amortized,tx_l2_priority_fee_native_per_call_amortized,tx_fee_native_l1_amortized_l2_minus_subtraces
date,str,i32,str,i64,u32,str,i64,str,str,i64,i64,str,str,i64,i64,str,str,"decimal[38,19]","decimal[38,19]","decimal[38,19]","decimal[38,19]","decimal[38,19]","decimal[38,19]","decimal[38,10]","decimal[38,10]","decimal[38,10]","decimal[38,10]","decimal[38,10]","decimal[38,10]",str,datetime[μs],bool,str,str,bool,i64,str,i64,"decimal[38,0]","decimal[38,0]",f64,f64,f64,f64,f64,f64,f64,f64,f64,f64,f64,f64
2024-11-18,"""op""",10,"""mainnet""",128144652,1731888081,"""0xd11bb31573669ca4a4cf0df6cdf6…",9,"""0x416b433906b1b72fa758e166e239…","""0x478946bcd4a5a22b316470f5486f…",532667,5136,"""0,1""","""call""",306540,3949,"""0xaa971288ea224d94014d7cafd31f…","""0x416b433906b1b72fa758e166e239…",1.0340242394e-06,2.674575156e-07,7.665667237e-07,7.663530654e-07,2.136583e-10,0.0,0.002500707,6.97e-07,0.00250001,0.0,7.166269782,0.477396198,"""0xac9650d8""",2024-11-18 00:00:00,True,"""""","""0xa6f19c84""",True,2,"""0""",27,2470,2666,6.6669e-09,1.8582e-12,6.665e-09,0.0,11353.333333,146.259259,3.8297e-08,2.8391e-08,9.9058e-09,7.9133e-12,2.8383e-08,1.6573e-08
2024-11-18,"""op""",10,"""mainnet""",128144652,1731888081,"""0xd11bb31573669ca4a4cf0df6cdf6…",9,"""0x416b433906b1b72fa758e166e239…","""0x478946bcd4a5a22b316470f5486f…",525605,84738,"""0,2""","""call""",306540,3949,"""0xaa971288ea224d94014d7cafd31f…","""0x416b433906b1b72fa758e166e239…",1.0340242394e-06,2.674575156e-07,7.665667237e-07,7.663530654e-07,2.136583e-10,0.0,0.002500707,6.97e-07,0.00250001,0.0,7.166269782,0.477396198,"""0xac9650d8""",2024-11-18 00:00:00,True,"""""","""0xa34123a7""",True,2,"""0""",27,84551,187,4.6763e-10,1.3034e-13,4.675e-10,0.0,11353.333333,146.259259,3.8297e-08,2.8391e-08,9.9058e-09,7.9133e-12,2.8383e-08,1.0373e-08
2024-11-18,"""op""",10,"""mainnet""",128144652,1731888081,"""0xd11bb31573669ca4a4cf0df6cdf6…",9,"""0x416b433906b1b72fa758e166e239…","""0x478946bcd4a5a22b316470f5486f…",441068,1250,"""0,3""","""call""",306540,3949,"""0xaa971288ea224d94014d7cafd31f…","""0x416b433906b1b72fa758e166e239…",1.0340242394e-06,2.674575156e-07,7.665667237e-07,7.663530654e-07,2.136583e-10,0.0,0.002500707,6.97e-07,0.00250001,0.0,7.166269782,0.477396198,"""0xac9650d8""",2024-11-18 00:00:00,True,"""""","""0x514ea4bf""",True,2,"""0""",27,1057,193,4.8264e-10,1.3452e-13,4.825e-10,0.0,11353.333333,146.259259,3.8297e-08,2.8391e-08,9.9058e-09,7.9133e-12,2.8383e-08,1.0388e-08
2024-11-18,"""op""",10,"""mainnet""",128144652,1731888081,"""0xd11bb31573669ca4a4cf0df6cdf6…",9,"""0x416b433906b1b72fa758e166e239…","""0x478946bcd4a5a22b316470f5486f…",401968,636,"""1,1""","""call""",306540,3949,"""0xaa971288ea224d94014d7cafd31f…","""0x416b433906b1b72fa758e166e239…",1.0340242394e-06,2.674575156e-07,7.665667237e-07,7.663530654e-07,2.136583e-10,0.0,0.002500707,6.97e-07,0.00250001,0.0,7.166269782,0.477396198,"""0xac9650d8""",2024-11-18 00:00:00,True,"""""","""0xa6f19c84""",True,2,"""1""",27,470,166,4.1512e-10,1.157e-13,4.15e-10,0.0,11353.333333,146.259259,3.8297e-08,2.8391e-08,9.9058e-09,7.9133e-12,2.8383e-08,1.0321e-08
2024-11-18,"""op""",10,"""mainnet""",128144652,1731888081,"""0xd11bb31573669ca4a4cf0df6cdf6…",9,"""0x416b433906b1b72fa758e166e239…","""0x478946bcd4a5a22b316470f5486f…",399004,89966,"""1,2""","""call""",306540,3949,"""0xaa971288ea224d94014d7cafd31f…","""0x416b433906b1b72fa758e166e239…",1.0340242394e-06,2.674575156e-07,7.665667237e-07,7.663530654e-07,2.136583e-10,0.0,0.002500707,6.97e-07,0.00250001,0.0,7.166269782,0.477396198,"""0xac9650d8""",2024-11-18 00:00:00,True,"""""","""0x4f1eb3d8""",True,2,"""1""",27,89767,199,4.9764e-10,1.387e-13,4.975e-10,0.0,11353.333333,146.259259,3.8297e-08,2.8391e-08,9.9058e-09,7.9133e-12,2.8383e-08,1.0403e-08


### Get table schema

In [9]:
duckdb_client.sql("DESCRIBE refined_trace_calls_v1")


┌───────────────────────────────────────────────┬─────────────┬─────────┬─────────┬─────────┬─────────┐
│                  column_name                  │ column_type │  null   │   key   │ default │  extra  │
│                    varchar                    │   varchar   │ varchar │ varchar │ varchar │ varchar │
├───────────────────────────────────────────────┼─────────────┼─────────┼─────────┼─────────┼─────────┤
│ dt                                            │ DATE        │ YES     │ NULL    │ NULL    │ NULL    │
│ chain                                         │ VARCHAR     │ YES     │ NULL    │ NULL    │ NULL    │
│ chain_id                                      │ INTEGER     │ YES     │ NULL    │ NULL    │ NULL    │
│ network                                       │ VARCHAR     │ YES     │ NULL    │ NULL    │ NULL    │
│ block_number                                  │ BIGINT      │ YES     │ NULL    │ NULL    │ NULL    │
│ block_timestamp                               │ UINTEGER    │ 