## Prepare data reader for a given chain and date

In [1]:
from op_analytics.coreutils.duckdb_inmem import init_client
from op_analytics.coreutils.partitioned.reader import DataReader
from op_analytics.coreutils.partitioned.location import DataLocation
from op_analytics.datapipeline.etl.intermediate.construct import construct_data_readers

from op_analytics.datapipeline.models.compute.udfs import create_duckdb_macros


# Define the input data range.
read_batches: list[DataReader] = construct_data_readers(
    chains=["op"],
    models=["refined_transactions_fees"],
    range_spec="@20241118:+1",
    read_from=DataLocation.GCS
)


# Select input for one date and build the intermediate model inputs.
batch = read_batches[0]


duckdb_client = init_client()
create_duckdb_macros(duckdb_client)


[2m2024-12-13 10:24:38[0m [[32m[1mdebug    [0m] [1mconnecting to OPLABS Clickhouse client...[0m [36mfilename[0m=[35mclient.py[0m [36mlineno[0m=[35m25[0m [36mprocess[0m=[35m28905[0m
[2m2024-12-13 10:24:38[0m [[32m[1minfo     [0m] [1mloaded vault from .env file   [0m [36mfilename[0m=[35mvault.py[0m [36mlineno[0m=[35m32[0m [36mprocess[0m=[35m28905[0m
[2m2024-12-13 10:24:38[0m [[32m[1mdebug    [0m] [1mloaded vault: 17 items        [0m [36mfilename[0m=[35mvault.py[0m [36mlineno[0m=[35m76[0m [36mprocess[0m=[35m28905[0m
[2m2024-12-13 10:24:39[0m [[32m[1mdebug    [0m] [1minitialized OPLABS Clickhouse client.[0m [36mfilename[0m=[35mclient.py[0m [36mlineno[0m=[35m37[0m [36mprocess[0m=[35m28905[0m
[2m2024-12-13 10:24:39[0m [[32m[1minfo     [0m] [1mprepared 1 input batches.     [0m [36mfilename[0m=[35mreader_bydate.py[0m [36mlineno[0m=[35m97[0m [36mprocess[0m=[35m28905[0m


## Run the model

This automatically registers the model outputs as duckdb tables.

In [2]:
from op_analytics.datapipeline.models.compute.testutils import execute_model_in_memory

execute_model_in_memory(
    duckdb_client=duckdb_client,
    model="refined_transactions_fees",
    data_reader=batch,
    limit_input_parquet_files=1
)

# The duckdb database will have the following:
#   - input tables
#   - views used by the model
#   - model outputs
# 
# You can use duckdb to inspect any of the above results.
duckdb_client.sql("SHOW TABLES")

[2m2024-12-13 10:24:39[0m [[32m[1minfo     [0m] [1mExecuting model...            [0m [36mfilename[0m=[35mtestutils.py[0m [36mlineno[0m=[35m220[0m [36mprocess[0m=[35m28905[0m
[2m2024-12-13 10:24:39[0m [[32m[1minfo     [0m] [1mduckdb dataset='ingestion/transactions_v1' using 1/22 parquet paths, first path is gs://oplabs-tools-data-sink/ingestion/transactions_v1/chain=op/dt=2024-11-18/000128144000.parquet[0m [36mfilename[0m=[35mreader.py[0m [36mlineno[0m=[35m68[0m [36mprocess[0m=[35m28905[0m
[2m2024-12-13 10:24:40[0m [[32m[1minfo     [0m] [1mregistered view: 'ingestion_transactions_v1' using 1 parquet paths[0m [36mfilename[0m=[35mclient.py[0m [36mlineno[0m=[35m53[0m [36mprocess[0m=[35m28905[0m
[2m2024-12-13 10:24:40[0m [[32m[1minfo     [0m] [1mduckdb dataset='ingestion/blocks_v1' using 1/22 parquet paths, first path is gs://oplabs-tools-data-sink/ingestion/blocks_v1/chain=op/dt=2024-11-18/000128144000.parquet[0m [36mfilename

┌──────────────────────────────────┐
│               name               │
│             varchar              │
├──────────────────────────────────┤
│ base_transactions_fees           │
│ event_emitting_transactions_list │
│ ingestion_blocks_v1              │
│ ingestion_logs_v1                │
│ ingestion_transactions_v1        │
│ refined_transactions_fees        │
│ refined_transactions_fees_v1     │
└──────────────────────────────────┘

## Verify model results

In [3]:
duckdb_client.sql("SELECT * FROM refined_transactions_fees_v1 LIMIT 10")

┌────────────┬─────────┬──────────┬─────────┬─────────┬───────────────────┬────────────────────────────────────────────┬────────────────────────────────────────────┬──────────────┬─────────────────┬────────────────────────────────────────────────────────────────────┬───────────┬───────────┬──────────────────┬─────────────────────┬───────────────┬──────────────────────┬──────────────────────────┬──────────────────┬──────────────────────────┬─────────────────────────┬──────────────┬─────────────────┬─────────────┬─────────────────────┬────────────┬─────────┬────────────────────┬─────────────────────────┬──────────────────┬───────────────────┬────────────────────┬───────────────────────┬─────────────────────────────────────┬─────────────┬───────────────┬────────────────────┬───────────────────────────┬─────────────────────┬────────────────────────────┬─────────────────────┬───────────────────────┬───────────────────────┬───────────────────────┬───────────────────────┬─────────────────────

### Check the data output size

In [4]:
duckdb_client.sql("SELECT COUNT(*) FROM refined_transactions_fees_v1")

┌──────────────┐
│ count_star() │
│    int64     │
├──────────────┤
│        33168 │
└──────────────┘

In [5]:
duckdb_client.sql("SELECT COUNT(*) FROM ingestion_transactions_v1")

┌──────────────┐
│ count_star() │
│    int64     │
├──────────────┤
│        33168 │
└──────────────┘

In [6]:
# duckdb_client.sql("SELECT input FROM ingestion_transactions_v1 LIMIT 10")


### You can also convert the results to dataframes to inspect them in more familiar ways

In [7]:
duckdb_client.sql("SELECT * FROM refined_transactions_fees_v1 ORDER BY tx_fee_native DESC LIMIT 10").pl().head()

dt,chain,chain_id,network,nonce,transaction_index,from_address,to_address,block_number,block_timestamp,hash,gas_price,gas_limit,receipt_gas_used,receipt_l1_gas_used,l1_fee,receipt_l1_gas_price,receipt_l1_blob_base_fee,base_fee_per_gas,max_priority_fee_per_gas,base_legacy_fee_per_gas,l2_fee,l2_priority_fee,l2_base_fee,block_hour,method_id,success,l1_base_fee_scalar,l1_blob_base_fee_scalar,transaction_type,input_byte_length,input_calldata_gas,is_system_transaction,is_attributes_deposited_transaction,l1_gas_used,tx_fee,l2_base_legacy_fee,l1_base_fee,l1_base_scaled_size,l1_blob_fee,l1_blob_scaled_size,tx_fee_native,l1_fee_native,l2_fee_native,l1_base_fee_native,l1_blob_fee_native,l2_base_fee_native,l2_priority_fee_native,l2_base_legacy_fee_native,l2_gas_price_gwei,l2_base_gas_price_gwei,l2_priority_gas_price_gwei,l2_base_legacy_gas_price_gwei,l1_base_gas_price_gwei,l1_blob_base_gas_price_gwei,log_count_total_events,log_count_approval_events,log_count_wrapping_events,log_count_transfer_events,is_qualified_tx_not_approval_wrapping_transfer,is_qualified_tx_not_approval_wrapping
date,str,i32,str,i64,i64,str,str,i64,u32,str,i64,i64,i64,i64,i64,i64,i64,i64,i64,i64,i64,i64,i64,datetime[μs],str,bool,"decimal[36,7]","decimal[26,7]",i32,f64,f64,bool,bool,i64,i64,i64,"decimal[38,12]","decimal[38,12]","decimal[38,12]","decimal[38,12]","decimal[38,19]","decimal[38,19]","decimal[38,19]","decimal[38,19]","decimal[38,19]","decimal[38,19]","decimal[38,19]","decimal[38,19]","decimal[38,10]","decimal[38,10]","decimal[38,10]","decimal[38,10]","decimal[38,10]","decimal[38,10]",i64,"decimal[38,0]","decimal[38,0]","decimal[38,0]",bool,bool
2024-11-18,"""op""",10,"""mainnet""",161383,1,"""0xf0161ba9e648dee61d803bbdb896…","""0x6f1fe5b048de0417c4e838b1fc42…",128145723,1731890223,"""0x8d4b95d012045188a679fb118180…",24106181875,321281,178421,1600,413100442666,8168396263,3399547344,1533,24106181875,0,4301049076319375,4301049076319375,273519393,2024-11-18 00:00:00,"""0xb2460c48""",True,0.083632,1.014213,2,36.0,444.0,False,False,1600,4301462176762041,0,68313931626.7216,8.3632,344786511040.0272,101.4213,0.004301462176762,4.131004426e-07,0.0043010490763193,6.83139316e-08,3.44786511e-07,2.735193e-10,0.0043010490763193,0.0,24.106181875,1.533e-06,24.106181875,0.0,8.168396263,3.399547344,3,0,0,2,True,True
2024-11-18,"""op""",10,"""mainnet""",32703,2,"""0x870c4b80f61065aa4df267cbac02…","""0x6f1fe5b048de0417c4e838b1fc42…",128145723,1731890223,"""0xaa5703576bc911f19748dcb5f8d3…",22805151129,319950,169719,1600,413100442666,8168396263,3399547344,1533,22805151129,0,3870467444462751,3870467444462751,260179227,2024-11-18 00:00:00,"""0xb2460c48""",True,0.083632,1.014213,2,36.0,444.0,False,False,1600,3870880544905417,0,68313931626.7216,8.3632,344786511040.0272,101.4213,0.0038708805449054,4.131004426e-07,0.0038704674444627,6.83139316e-08,3.44786511e-07,2.601792e-10,0.0038704674444627,0.0,22.805151129,1.533e-06,22.805151129,0.0,8.168396263,3.399547344,3,0,0,2,True,True
2024-11-18,"""op""",10,"""mainnet""",32653,1,"""0x46619117bfccce427350754988b8…","""0x6f1fe5b048de0417c4e838b1fc42…",128145391,1731889559,"""0x00ad68344e805a7b6a86a0b2e933…",17763065777,294506,178388,1600,271010594468,7658457854,2040610599,956,17763065777,0,3168717777827476,3168717777827476,170538928,2024-11-18 00:00:00,"""0xb2460c48""",True,0.083632,1.014213,2,36.0,444.0,False,False,1600,3168988788421944,0,64049214724.5728,8.3632,206961379744.35873,101.4213,0.0031689887884219,2.710105944e-07,0.0031687177778274,6.40492147e-08,2.069613797e-07,1.705389e-10,0.0031687177778274,0.0,17.763065777,9.56e-07,17.763065777,0.0,7.658457854,2.040610599,3,0,0,2,True,True
2024-11-18,"""op""",10,"""mainnet""",32623,3,"""0x574214d3cd9fdb353e17a4768436…","""0x6f1fe5b048de0417c4e838b1fc42…",128145723,1731890223,"""0x88901c244073013c5a8e47162f54…",14972171875,216953,161408,1600,413100442666,8168396263,3399547344,1533,14972171875,0,2416628318000000,2416628318000000,247438464,2024-11-18 00:00:00,"""0xb2460c48""",True,0.083632,1.014213,2,36.0,432.0,False,False,1600,2417041418442666,0,68313931626.7216,8.3632,344786511040.0272,101.4213,0.0024170414184426,4.131004426e-07,0.002416628318,6.83139316e-08,3.44786511e-07,2.474384e-10,0.002416628318,0.0,14.972171875,1.533e-06,14.972171875,0.0,8.168396263,3.399547344,3,0,0,2,True,True
2024-11-18,"""op""",10,"""mainnet""",161414,1,"""0xcbc390b984a0578d563ad214a2f6…","""0x6f1fe5b048de0417c4e838b1fc42…",128145259,1731889295,"""0xf2875774e3fd5c6e99a46a52863b…",12116292491,294104,172062,1600,196474826433,7424938854,1324954204,802,12116292491,0,2084753518586442,2084753518586442,137993724,2024-11-18 00:00:00,"""0xb2460c48""",True,0.083632,1.014213,2,36.0,456.0,False,False,1600,2084949993412875,0,62096248623.7728,8.3632,134378577810.1452,101.4213,0.0020849499934128,1.964748264e-07,0.0020847535185864,6.20962486e-08,1.343785778e-07,1.379937e-10,0.0020847535185864,0.0,12.116292491,8.02e-07,12.116292491,0.0,7.424938854,1.324954204,3,0,0,2,True,True


### Get table schema

In [8]:
duckdb_client.sql("DESCRIBE refined_transactions_fees_v1")


┌────────────────────────────────────────────────┬────────────────┬─────────┬─────────┬─────────┬─────────┐
│                  column_name                   │  column_type   │  null   │   key   │ default │  extra  │
│                    varchar                     │    varchar     │ varchar │ varchar │ varchar │ varchar │
├────────────────────────────────────────────────┼────────────────┼─────────┼─────────┼─────────┼─────────┤
│ dt                                             │ DATE           │ YES     │ NULL    │ NULL    │ NULL    │
│ chain                                          │ VARCHAR        │ YES     │ NULL    │ NULL    │ NULL    │
│ chain_id                                       │ INTEGER        │ YES     │ NULL    │ NULL    │ NULL    │
│ network                                        │ VARCHAR        │ YES     │ NULL    │ NULL    │ NULL    │
│ nonce                                          │ BIGINT         │ YES     │ NULL    │ NULL    │ NULL    │
│ transaction_index         