## Prepare data reader for a given chain and date

In [1]:
from op_analytics.coreutils.duckdb_inmem import init_client
from op_analytics.coreutils.partitioned.reader import DataReader
from op_analytics.coreutils.partitioned.location import DataLocation
from op_analytics.datapipeline.etl.intermediate.construct import construct_data_readers

from op_analytics.datapipeline.models.compute.udfs import create_duckdb_macros


# Define the input data range.
read_batches: list[DataReader] = construct_data_readers(
    chains=["op"],
    models=["event_emitting_transactions"],
    range_spec="@20241118:+1",
    read_from=DataLocation.GCS
)


# Select input for one date and build the intermediate model inputs.
batch = read_batches[0]


duckdb_client = init_client()
create_duckdb_macros(duckdb_client)


[2m2024-12-13 13:59:06[0m [[32m[1mdebug    [0m] [1mconnecting to OPLABS Clickhouse client...[0m [36mfilename[0m=[35mclient.py[0m [36mlineno[0m=[35m25[0m [36mprocess[0m=[35m52305[0m
[2m2024-12-13 13:59:06[0m [[32m[1minfo     [0m] [1mloaded vault from .env file   [0m [36mfilename[0m=[35mvault.py[0m [36mlineno[0m=[35m32[0m [36mprocess[0m=[35m52305[0m
[2m2024-12-13 13:59:06[0m [[32m[1mdebug    [0m] [1mloaded vault: 17 items        [0m [36mfilename[0m=[35mvault.py[0m [36mlineno[0m=[35m76[0m [36mprocess[0m=[35m52305[0m
[2m2024-12-13 13:59:06[0m [[32m[1mdebug    [0m] [1minitialized OPLABS Clickhouse client.[0m [36mfilename[0m=[35mclient.py[0m [36mlineno[0m=[35m37[0m [36mprocess[0m=[35m52305[0m
[2m2024-12-13 13:59:06[0m [[32m[1minfo     [0m] [1mprepared 1 input batches.     [0m [36mfilename[0m=[35mbydate.py[0m [36mlineno[0m=[35m96[0m [36mprocess[0m=[35m52305[0m


## Run the model

This automatically registers the model outputs as duckdb tables.

In [2]:
from op_analytics.datapipeline.models.compute.testutils import execute_model_in_memory

execute_model_in_memory(
    duckdb_client=duckdb_client,
    model="event_emitting_transactions",
    data_reader=batch,
    limit_input_parquet_files=1
)

# The duckdb database will have the following:
#   - input tables
#   - views used by the model
#   - model outputs
# 
# You can use duckdb to inspect any of the above results.
duckdb_client.sql("SHOW TABLES")

[2m2024-12-13 13:59:07[0m [[32m[1minfo     [0m] [1mExecuting model...            [0m [36mfilename[0m=[35mtestutils.py[0m [36mlineno[0m=[35m220[0m [36mprocess[0m=[35m52305[0m
[2m2024-12-13 13:59:07[0m [[32m[1minfo     [0m] [1mduckdb dataset='ingestion/logs_v1' using 1/22 parquet paths, first path is gs://oplabs-tools-data-sink/ingestion/logs_v1/chain=op/dt=2024-11-18/000128144000.parquet[0m [36mfilename[0m=[35mreader.py[0m [36mlineno[0m=[35m68[0m [36mprocess[0m=[35m52305[0m
[2m2024-12-13 13:59:08[0m [[32m[1minfo     [0m] [1mregistered view: 'ingestion_logs_v1' using 1 parquet paths[0m [36mfilename[0m=[35mclient.py[0m [36mlineno[0m=[35m53[0m [36mprocess[0m=[35m52305[0m
[2m2024-12-13 13:59:08[0m [[32m[1minfo     [0m] [1mduckdb dataset='ingestion/transactions_v1' using 1/22 parquet paths, first path is gs://oplabs-tools-data-sink/ingestion/transactions_v1/chain=op/dt=2024-11-18/000128144000.parquet[0m [36mfilename[0m=[35mre

┌────────────────────────────────┐
│              name              │
│            varchar             │
├────────────────────────────────┤
│ event_emitting_transactions    │
│ event_emitting_transactions_v1 │
│ ingestion_blocks_v1            │
│ ingestion_logs_v1              │
│ ingestion_transactions_v1      │
│ logs_topic0_filters            │
│ refined_transactions_fees      │
└────────────────────────────────┘

## Verify model results

In [3]:
duckdb_client.sql("SELECT * FROM event_emitting_transactions_v1 LIMIT 10")

┌────────────┬─────────────────────┬─────────────────┬─────────┬─────────┬──────────┬──────────────┬────────────────────────────────────────────────────────────────────┬────────────────────┬───────────────────────┬───────────────────────┬───────────────────────┬────────────────────────────────────────────────┬───────────────────────────────────────┬───────────────────┬──────────────────┬───────────────────────┬───────────────────────┬───────────────────────┬───────────────────────┬───────────────────────┬───────────────────────┬────────────────────────┬───────────────────────────┬───────────────────┬────────────────────┬─────────────┬──────────────────┐
│     dt     │     block_hour      │ block_timestamp │ network │  chain  │ chain_id │ block_number │                          transaction_hash                          │ count_total_events │ count_approval_events │ count_wrapping_events │ count_transfer_events │ is_qualified_tx_not_approval_wrapping_transfer │ is_qualified_tx_not_approv

### Check the data output size

In [4]:
duckdb_client.sql("SELECT COUNT(*) AS interm_num_txs FROM event_emitting_transactions_v1")

┌────────────────┐
│ interm_num_txs │
│     int64      │
├────────────────┤
│          16679 │
└────────────────┘

In [5]:
duckdb_client.sql("SELECT COUNT(DISTINCT transaction_hash) as ingestion_num_txs FROM ingestion_logs_v1")

┌───────────────────┐
│ ingestion_num_txs │
│       int64       │
├───────────────────┤
│             16679 │
└───────────────────┘

In [6]:
duckdb_client.sql("SELECT topic0 FROM ingestion_logs_v1 LIMIT 10")


┌────────────────────────────────────────────────────────────────────┐
│                               topic0                               │
│                              varchar                               │
├────────────────────────────────────────────────────────────────────┤
│ 0xddf252ad1be2c89b69c2b068fc378daa952ba7f163c4a11628f55a4df523b3ef │
│ 0xddf252ad1be2c89b69c2b068fc378daa952ba7f163c4a11628f55a4df523b3ef │
│ 0xddf252ad1be2c89b69c2b068fc378daa952ba7f163c4a11628f55a4df523b3ef │
│ 0x112c256902bf554b6ed882d2936687aaeb4225e8cd5b51303c90ca6cf43a8602 │
│ 0xcf2aa50876cdfbb541206f89af0ee78d44a2abf8d328e37fa4917f982149848a │
│ 0xb3e2773606abfd36b5bd91394b3a54d1398336c65005baf7bf7a05efeffaf75b │
│ 0xddf252ad1be2c89b69c2b068fc378daa952ba7f163c4a11628f55a4df523b3ef │
│ 0x1c411e9a96e071241c2f21f7726b17ae89e3cab4c78be50e062b03a9fffbbad1 │
│ 0xd78ad95fa46c994b6551d0da85fc275fe613ce37657fb8d5e3d130840159d822 │
│ 0xc42079f94a6350d7e6235f29174924f928cc2ac818eb64fed8004e115fbcca67 │
├─────

### You can also convert the results to dataframes to inspect them in more familiar ways

In [7]:
duckdb_client.sql("SELECT * FROM event_emitting_transactions_v1 LIMIT 5").pl()

dt,block_hour,block_timestamp,network,chain,chain_id,block_number,transaction_hash,count_total_events,count_approval_events,count_wrapping_events,count_transfer_events,is_qualified_tx_not_approval_wrapping_transfer,is_qualified_tx_not_approval_wrapping,transaction_index,transaction_type,tx_fee_native,l1_fee_native,l2_fee_native,l1_base_fee_native,l1_blob_fee_native,l2_base_fee_native,l2_priority_fee_native,l2_base_legacy_fee_native,input_byte_length,input_calldata_gas,l1_gas_used,receipt_gas_used
date,datetime[μs],u32,str,str,i32,i64,str,i64,"decimal[38,0]","decimal[38,0]","decimal[38,0]",bool,bool,i64,i32,"decimal[38,19]","decimal[38,19]","decimal[38,19]","decimal[38,19]","decimal[38,19]","decimal[38,19]","decimal[38,19]","decimal[38,19]",f64,f64,i64,i64
2024-11-18,2024-11-18 00:00:00,1731890759,"""mainnet""","""op""",10,128145991,"""0x1235a81f6e53a98fd084110155e1…",12,3,0,3,True,True,20,0,2.0874702928e-06,1.4879054668e-06,5.99564826e-07,4.307828968e-07,1.0570047134e-06,1.5123672e-09,0.0,5.980524587e-07,2340.0,15192.0,7857,598010
2024-11-18,2024-11-18 00:00:00,1731890761,"""mainnet""","""op""",10,128145992,"""0xedd5381353abf8e805af44d583c9…",7,1,0,2,True,True,9,2,3.0584908877e-06,6.801850001e-07,2.3783058875e-06,2.110564649e-07,4.690025804e-07,5.537082e-10,2.3777521793e-06,0.0,708.0,4668.0,3922,218857
2024-11-18,2024-11-18 00:00:00,1731890761,"""mainnet""","""op""",10,128145992,"""0xa541416b4022b2de958bdc9e4640…",21,1,0,10,True,True,14,0,1.7034381682e-06,6.616191454e-07,1.0418190227e-06,2.052984226e-07,4.562072525e-07,2.1909622e-09,0.0,1.0396280604e-06,254.0,3668.0,3815,865993
2024-11-18,2024-11-18 00:00:00,1731890765,"""mainnet""","""op""",10,128145994,"""0x882dc42f47ecea249d7540fda95a…",10,0,1,3,True,True,12,2,3.6917430291e-06,3.251130291e-07,3.36663e-06,1.008464597e-07,2.240976123e-07,8.564706e-10,3.36663e-06,0.0,260.0,1736.0,1874,336663
2024-11-18,2024-11-18 00:00:00,1731890765,"""mainnet""","""op""",10,128145994,"""0x68a86601a960770e35d97b6b809c…",6,0,0,4,True,True,15,0,6.501825344e-07,3.784898614e-07,2.71692673e-07,1.174210113e-07,2.609290235e-07,5.745344e-10,0.0,2.711181386e-07,119.0,1568.0,2182,225839


### Get table schema

In [8]:
duckdb_client.sql("DESCRIBE event_emitting_transactions_v1")


┌───────────────────────────┬────────────────┬─────────┬─────────┬─────────┬─────────┐
│        column_name        │  column_type   │  null   │   key   │ default │  extra  │
│          varchar          │    varchar     │ varchar │ varchar │ varchar │ varchar │
├───────────────────────────┼────────────────┼─────────┼─────────┼─────────┼─────────┤
│ dt                        │ DATE           │ YES     │ NULL    │ NULL    │ NULL    │
│ block_hour                │ TIMESTAMP      │ YES     │ NULL    │ NULL    │ NULL    │
│ block_timestamp           │ UINTEGER       │ YES     │ NULL    │ NULL    │ NULL    │
│ network                   │ VARCHAR        │ YES     │ NULL    │ NULL    │ NULL    │
│ chain                     │ VARCHAR        │ YES     │ NULL    │ NULL    │ NULL    │
│ chain_id                  │ INTEGER        │ YES     │ NULL    │ NULL    │ NULL    │
│ block_number              │ BIGINT         │ YES     │ NULL    │ NULL    │ NULL    │
│ transaction_hash          │ VARCHAR      