## Prepare data reader and model execution context

In [1]:
from pprint import pprint

from op_analytics.coreutils.partitioned.location import DataLocation
from op_analytics.coreutils.partitioned.reader import DataReader
from op_analytics.datapipeline.etl.ingestion.reader.byblock import construct_readers_byblock
from op_analytics.datapipeline.etl.ingestion.reader.request import BlockBatchRequest
from op_analytics.datapipeline.models.compute.markers import ModelsDataSpec
from op_analytics.datapipeline.models.compute.testutils import setup_execution_context

model_name = "account_abstraction"


# Prepare data raeders
data_spec = ModelsDataSpec(models=[model_name])
blockbatch_request = BlockBatchRequest.build(
    chains=["base"],
    range_spec="19910194:+1",
    root_paths_to_read=data_spec.input_root_paths,
)
readers: list[DataReader] = construct_readers_byblock(
    blockbatch_request=blockbatch_request,
    read_from=DataLocation.GCS,
)


# Show details for the batch we are processing.
pprint(readers[0])

# Set up execution context and get handles to model input args.
# In subsequent cells you can use the model input args however you want.
ctx, input_datasets, auxiliary_views = setup_execution_context(
    model_name=model_name,
    data_reader=readers[0],  # use the first reader
)


dict_keys([ModelPath(module='account_abstraction.model', function_name='account_abstraction'), ModelPath(module='account_abstraction', function_name='account_abstraction')])
[2m2025-01-22 12:38:06[0m [[32m[1mdebug    [0m] [1mconnecting to GOLDSKY Clickhouse client...[0m [36mfilename[0m=[35mclient.py[0m [36mlineno[0m=[35m36[0m [36mprocess[0m=[35m42705[0m
[2m2025-01-22 12:38:06[0m [[32m[1minfo     [0m] [1mloaded vault from .env file   [0m [36mfilename[0m=[35mvault.py[0m [36mlineno[0m=[35m32[0m [36mprocess[0m=[35m42705[0m
[2m2025-01-22 12:38:06[0m [[32m[1mdebug    [0m] [1mloaded vault: 18 items        [0m [36mfilename[0m=[35mvault.py[0m [36mlineno[0m=[35m79[0m [36mprocess[0m=[35m42705[0m
[2m2025-01-22 12:38:07[0m [[32m[1mdebug    [0m] [1minitialized GOLDSKY Clickhouse client.[0m [36mfilename[0m=[35mclient.py[0m [36mlineno[0m=[35m41[0m [36mprocess[0m=[35m42705[0m
[2m2025-01-22 12:38:07[0m [[32m[1mdebug    [0m]

In [13]:
# MODEL VERSION B
# Filter aggressively first and then apply UDFs.


filtered_logs = input_datasets["ingestion/logs_v1"].create_table(
    projections=[
        "chain_id",
        "chain",
        "dt",
        "block_timestamp",
        "block_number",
        "block_hash",
        "transaction_hash",
        "transaction_index",
        "log_index",
        "address AS contract_address",
        "indexed_args[1] AS user_op_hash",
        "indexed_args[2] AS sender",
        "indexed_args[3] AS paymaster",
        "data",
    ],
    additional_sql="""
    WHERE
        topic0 = '0x49628fd1471006c1482da88028e9ce4dbb080b815c9b0344d39e5a8e6ec1419f' -- UserOperationEvent
        AND array_length(indexed_args) = 3
    """  
)

ctx.client.sql(f"""
CREATE OR REPLACE TABLE target_tx_hashes AS
SELECT DISTINCT block_number, transaction_hash FROM {filtered_logs}
ORDER BY transaction_hash
""")

filtered_transactions = input_datasets["ingestion/transactions_v1"].create_table(
    projections="read_parquet.*",
    # parenthesis=True,
    additional_sql="""
    INNER JOIN target_tx_hashes ops
    ON read_parquet.block_number = ops.block_number
    AND read_parquet.hash = ops.transaction_hash
    ORDER BY transaction_hash
    """,
)


filtered_traces = input_datasets["ingestion/traces_v1"].create_table(
    projections="read_parquet.*",
    # parenthesis=True,
    additional_sql="""
    INNER JOIN target_tx_hashes ops
    ON read_parquet.block_number = ops.block_number
    AND read_parquet.transaction_hash = ops.transaction_hash
    ORDER BY read_parquet.transaction_hash
    """,
)


ctx.client.sql("SHOW TABLES")

# filtered_logs = auxiliary_views["account_abstraction/user_ops"].create_table(
#     duckdb_context=ctx,
#     template_parameters={
#         "raw_logs": input_datasets["ingestion/logs_v1"].as_subquery(),
#     }
# )
# print(user_ops_events)

# # Filter raw transactions to the ones having UserOperationEvent logs.
# # This is a lazy operation. Returns the raw SQL string. 
# filtered_transactions = input_datasets["ingestion/transactions_v1"].select_string(
#     projections="read_parquet.*",
#     parenthesis=True,
#     additional_sql=f"""
#     INNER JOIN (SELECT DISTINCT block_number, transaction_hash FROM {user_ops_events}) ops
#     ON read_parquet.block_number = ops.block_number
#     AND read_parquet.hash = ops.transaction_hash
#     """,
# )




[2m2025-01-22 09:59:48[0m [[32m[1minfo     [0m] [1mconstructed read_parquet() string with 1 paths[0m [36mfilename[0m=[35mclient.py[0m [36mlineno[0m=[35m255[0m [36mprocess[0m=[35m1342[0m
[2m2025-01-22 09:59:52[0m [[32m[1minfo     [0m] [1mcreated table/view ingestion_logs_v1_tbl[0m [36mfilename[0m=[35mclient.py[0m [36mlineno[0m=[35m199[0m [36mprocess[0m=[35m1342[0m
[2m2025-01-22 09:59:52[0m [[32m[1minfo     [0m] [1mduck db size: 82.8MB          [0m [36mfilename[0m=[35mclient.py[0m [36mlineno[0m=[35m36[0m [36mprocess[0m=[35m1342[0m
[2m2025-01-22 09:59:52[0m [[32m[1minfo     [0m] [1mconstructed read_parquet() string with 1 paths[0m [36mfilename[0m=[35mclient.py[0m [36mlineno[0m=[35m255[0m [36mprocess[0m=[35m1342[0m
[2m2025-01-22 09:59:58[0m [[32m[1minfo     [0m] [1mcreated table/view ingestion_transactions_v1_tbl[0m [36mfilename[0m=[35mclient.py[0m [36mlineno[0m=[35m199[0m [36mprocess[0m=[35m1342

┌───────────────────────────────┐
│             name              │
│            varchar            │
├───────────────────────────────┤
│ account_abstraction__user_ops │
│ ingestion_logs_v1_tbl         │
│ ingestion_traces_v1_tbl       │
│ ingestion_transactions_v1_tbl │
│ refined_transactions_fees     │
│ target_tx_hashes              │
└───────────────────────────────┘

In [16]:
ctx.client.sql("""
    SELECT COUNT(*), 'logs' as table FROM ingestion_logs_v1_tbl
    UNION ALL
    SELECT COUNT(*), 'transactions' as table FROM ingestion_transactions_v1_tbl
    UNION ALL
    SELECT COUNT(*), 'traces' as table FROM ingestion_traces_v1_tbl
""")

┌──────────────┬──────────────┐
│ count_star() │    table     │
│    int64     │   varchar    │
├──────────────┼──────────────┤
│        11964 │ logs         │
│         5877 │ transactions │
│       199565 │ traces       │
└──────────────┴──────────────┘

In [20]:
ctx.client.sql("""
SELECT * FROM ingestion_traces_v1_tbl WHERE
transaction_hash = '0xa6afb687ed95e708b6086b8fd864cd56bd46746c9850e943a035c4863f88fbed'
AND call_type != 'delegatecall'
""")

┌─────────┬──────────┬─────────────────┬──────────────┬────────────────────────────────────────────────────────────────────┬────────────────────────────────────────────────────────────────────┬───────────────────┬────────────────────────────────────────────┬────────────────────────────────────────────┬───────────────┬────────────────┬────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────

In [2]:
# MODEL VERSION A
# Filter and apply UDFs at the same time.

from op_analytics.datapipeline.models.code.account_abstraction.event_user_op import register_decode_user_ops, unregister_decode_user_ops
from op_analytics.datapipeline.models.code.account_abstraction.function_handle_ops import register_decode_handle_ops_input, unregister_decode_handle_ops_input
    

# unregister_decode_user_ops(ctx)
# unregister_decode_handle_ops_input(ctx)

register_decode_user_ops(ctx)
register_decode_handle_ops_input(ctx)

user_ops_events = auxiliary_views["account_abstraction/user_ops"].create_table(
    duckdb_context=ctx,
    template_parameters={
        "raw_logs": input_datasets["ingestion/logs_v1"].as_subquery(),
    }
)
print(user_ops_events)

# Filter raw transactions to the ones having UserOperationEvent logs.
# This is a lazy operation. Returns the raw SQL string. 
filtered_transactions = input_datasets["ingestion/transactions_v1"].select_string(
    projections="read_parquet.*",
    parenthesis=True,
    additional_sql=f"""
    INNER JOIN (SELECT DISTINCT block_number, transaction_hash FROM {user_ops_events}) ops
    ON read_parquet.block_number = ops.block_number
    AND read_parquet.hash = ops.transaction_hash
    """,
)

# Create a table where the filtered transactions are enhanced with the refined
# transactions fees transformation.
refined_txs = auxiliary_views["refined_transactions_fees"].create_table(
    duckdb_context=ctx,
    template_parameters={
        "raw_blocks": input_datasets["ingestion/blocks_v1"].as_subquery(),
        "raw_transactions": filtered_transactions,
        "extra_cols": ["decode_handle_ops_input(t.input) AS decoded_input"],
    },
)
print(refined_txs)

ctx.client.sql("SHOW TABLES")

[2m2025-01-22 12:38:29[0m [[32m[1minfo     [0m] [1mconstructed read_parquet() string with 1 paths[0m [36mfilename[0m=[35mclient.py[0m [36mlineno[0m=[35m255[0m [36mprocess[0m=[35m42705[0m
[2m2025-01-22 12:38:29[0m [[32m[1minfo     [0m] [1mRendering query               [0m [36mfilename[0m=[35mquerybuilder.py[0m [36mlineno[0m=[35m40[0m [36mprocess[0m=[35m42705[0m [36mtemplate[0m=[35maccount_abstraction/user_ops[0m
[2m2025-01-22 12:38:32[0m [[32m[1minfo     [0m] [1mduck db size: 12.3KB          [0m [36mfilename[0m=[35mclient.py[0m [36mlineno[0m=[35m36[0m [36mprocess[0m=[35m42705[0m
account_abstraction__user_ops
[2m2025-01-22 12:38:32[0m [[32m[1minfo     [0m] [1mconstructed read_parquet() string with 1 paths[0m [36mfilename[0m=[35mclient.py[0m [36mlineno[0m=[35m255[0m [36mprocess[0m=[35m42705[0m
[2m2025-01-22 12:38:32[0m [[32m[1minfo     [0m] [1mconstructed read_parquet() string with 1 paths[0m [36mfilen

┌───────────────────────────────┐
│             name              │
│            varchar            │
├───────────────────────────────┤
│ account_abstraction__user_ops │
│ refined_transactions_fees     │
└───────────────────────────────┘

In [3]:
ctx.client.sql("""
SELECT * FROM account_abstraction__user_ops
WHERE transaction_hash = '0xa6afb687ed95e708b6086b8fd864cd56bd46746c9850e943a035c4863f88fbed'
""")

┌──────────┬─────────┬────────────┬─────────────────┬──────────────┬────────────────────────────────────────────────────────────────────┬────────────────────────────────────────────────────────────────────┬───────────────────┬───────────┬────────────────────────────────────────────┬────────────────────────────────────────────────────────────────────┬────────────────────────────────────────────────────────────────────┬────────────────────────────────────────────────────────────────────┬────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────┬──────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────┐
│ chain_id │  chain  │     dt     │ block_timestamp │

In [4]:
ctx.client.sql("""
SELECT * FROM refined_transactions_fees
WHERE hash = '0xa6afb687ed95e708b6086b8fd864cd56bd46746c9850e943a035c4863f88fbed'
""")

┌────────────┬─────────┬──────────┬─────────┬────────┬───────────────────┬────────────────────────────────────────────┬────────────────────────────────────────────┬──────────────┬─────────────────┬────────────────────────────────────────────────────────────────────┬──────────────────┬───────────┬───────────┬─────────────┬─────────────────────┬──────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────┬──────────────┬──────────────┬──────────────────┬──────────────────┬──────────────────────────┬───────────────┬────────────────────┬─────────────────────────┬──────────────────────────┬───────────────┬─────────────────┬───────────────┬────────────┬─────────┬───────────────────┬──────────────────┬───────────

In [8]:
filtered_traces = input_datasets["ingestion/traces_v1"].create_table(
    projections=[
        "read_parquet.block_number",
        "read_parquet.transaction_hash",
        "read_parquet.from_address",
        "read_parquet.to_address",
        "read_parquet.call_type",
        "read_parquet.trace_address",
        "trace_address_depth(read_parquet.trace_address) AS trace_depth",
        "trace_address_root(read_parquet.trace_address) AS trace_root",
        "read_parquet.input",
        "read_parquet.output",
    ],
    # parenthesis=True,
    additional_sql=f"""
    INNER JOIN (SELECT DISTINCT block_number, transaction_hash FROM {user_ops_events}) ops
    ON read_parquet.block_number = ops.block_number
    AND read_parquet.transaction_hash = ops.transaction_hash
    ORDER BY read_parquet.transaction_hash
    """,
)


[2m2025-01-22 13:31:04[0m [[32m[1minfo     [0m] [1mconstructed read_parquet() string with 1 paths[0m [36mfilename[0m=[35mclient.py[0m [36mlineno[0m=[35m255[0m [36mprocess[0m=[35m42705[0m
[2m2025-01-22 13:31:16[0m [[32m[1minfo     [0m] [1mcreated table/view ingestion_traces_v1_tbl[0m [36mfilename[0m=[35mclient.py[0m [36mlineno[0m=[35m199[0m [36mprocess[0m=[35m42705[0m
[2m2025-01-22 13:31:16[0m [[32m[1minfo     [0m] [1mduck db size: 815.5MB         [0m [36mfilename[0m=[35mclient.py[0m [36mlineno[0m=[35m36[0m [36mprocess[0m=[35m42705[0m


In [9]:
ctx.client.sql(f"""
SELECT * FROM {filtered_traces}
WHERE transaction_hash = '0xa6afb687ed95e708b6086b8fd864cd56bd46746c9850e943a035c4863f88fbed'
""")

┌──────────────┬────────────────────────────────────────────────────────────────────┬────────────────────────────────────────────┬────────────────────────────────────────────┬──────────────┬───────────────────┬──────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────

In [3]:
ctx.client.sql("""
SELECT decoded_user_ops.* FROM account_abstraction__user_ops
WHERE 
decoded_user_ops.actual_gas_cost is null OR 
decoded_user_ops.actual_gas_used is null 
LIMIT 10
""")

┌────────────────┬────────┬─────────┬──────────────────────────┬─────────────────┬──────────────────────────┬─────────────────┐
│ nonce_lossless │ nonce  │ success │ actual_gas_cost_lossless │ actual_gas_cost │ actual_gas_used_lossless │ actual_gas_used │
│    varchar     │ uint64 │ boolean │         varchar          │     uint64      │         varchar          │     uint64      │
├────────────────┴────────┴─────────┴──────────────────────────┴─────────────────┴──────────────────────────┴─────────────────┤
│                                                           0 rows                                                            │
└─────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────┘

In [4]:
ctx.client.sql("""
SELECT decoded_user_ops.* FROM account_abstraction__user_ops
WHERE data = '0x313e82b5e46736cb31ff49c9e0537a6b08afdfb2d0ff6870000000000000000000000000000000000000000000000000000000000000000000000000000000010000000000000000000000000000000000000000000000000000019785d71cf7000000000000000000000000000000000000000000000000000000000005f267'
LIMIT 10
""")

┌───────────────────────────────────────────────────────────────────────────────┬────────┬─────────┬──────────────────────────┬─────────────────┬──────────────────────────┬─────────────────┐
│                                nonce_lossless                                 │ nonce  │ success │ actual_gas_cost_lossless │ actual_gas_cost │ actual_gas_used_lossless │ actual_gas_used │
│                                    varchar                                    │ uint64 │ boolean │         varchar          │     uint64      │         varchar          │     uint64      │
├───────────────────────────────────────────────────────────────────────────────┼────────┼─────────┼──────────────────────────┼─────────────────┼──────────────────────────┼─────────────────┤
│ 22273776229425404679199247798345442738024096472586274001161783678383614328832 │   NULL │ true    │ 1750297156855            │   1750297156855 │ 389735                   │          389735 │
└────────────────────────────────────────────

In [12]:
ctx.client.sql("SELECT count(*) FROM account_abstraction__user_ops LIMIT 10")

┌──────────────┐
│ count_star() │
│    int64     │
├──────────────┤
│        11964 │
└──────────────┘

In [6]:
ctx.client.sql("SELECT * FROM account_abstraction__user_ops LIMIT 10")

┌──────────┬─────────┬────────────┬─────────────────┬──────────────┬────────────────────────────────────────────────────────────────────┬────────────────────────────────────────────────────────────────────┬───────────────────┬───────────┬────────────────────────────────────────────┬────────────────────────────────────────────────────────────────────┬────────────────────────────────────────────────────────────────────┬────────────────────────────────────────────────────────────────────┬────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────┐
│ chain_id │  chain  │     dt     │ block_timestamp │ block_number │                             block_hash                             │                          transaction_hash                          │ transaction_index │ log_index │          

In [7]:
ctx.client.sql(f"SELECT COUNT(*) FROM {user_ops_events} LIMIT 10")

┌──────────────┐
│ count_star() │
│    int64     │
├──────────────┤
│        11964 │
└──────────────┘

In [6]:
ctx.client.sql(f"""
SELECT COUNT(*) FROM {input_datasets["ingestion/logs_v1"].as_subquery()}
""")

[2m2025-01-21 06:09:25[0m [[32m[1minfo     [0m] [1mconstructed read_parquet() string with 1 paths[0m [36mfilename[0m=[35mclient.py[0m [36mlineno[0m=[35m245[0m [36mprocess[0m=[35m17005[0m


┌──────────────┐
│ count_star() │
│    int64     │
├──────────────┤
│       731998 │
└──────────────┘

In [8]:
11964 / 731998

0.01634430695165834

In [2]:
refined_txs_table = aux_views["refined_transactions_fees"].create_table(
    duckdb_context=ctx,
    template_parameters={
        "raw_blocks": input_datasets["ingestion/blocks_v1"].as_subquery(),
        "raw_transactions": input_datasets["ingestion/transactions_v1"].as_subquery(),
    }
)

refined_traces_projection = aux_views["refined_traces/traces_projection"].create_table(
    duckdb_context=ctx,
    template_parameters={
        "raw_traces": input_datasets["ingestion/traces_v1"].as_subquery(),
    },
)

traces_with_gas_used = aux_views["refined_traces/traces_with_gas_used"].create_table(
    duckdb_context=ctx,
    template_parameters={
        "refined_traces_projection": refined_traces_projection,
    },
)

traces_txs_join = aux_views["refined_traces/traces_txs_join"].create_table(
    duckdb_context=ctx,
    template_parameters={
        "traces_with_gas_used": traces_with_gas_used,
        "refined_transactions_fees": refined_txs_table,
    },
)

[2m2025-01-10 04:15:26[0m [[32m[1minfo     [0m] [1mconstructed read_parquet() string with 1 paths[0m [36mfilename[0m=[35mclient.py[0m [36mlineno[0m=[35m245[0m [36mprocess[0m=[35m2658[0m
[2m2025-01-10 04:15:26[0m [[32m[1minfo     [0m] [1mconstructed read_parquet() string with 1 paths[0m [36mfilename[0m=[35mclient.py[0m [36mlineno[0m=[35m245[0m [36mprocess[0m=[35m2658[0m
[2m2025-01-10 04:15:26[0m [[32m[1minfo     [0m] [1mRendering query               [0m [36mfilename[0m=[35mquerybuilder.py[0m [36mlineno[0m=[35m40[0m [36mprocess[0m=[35m2658[0m [36mtemplate[0m=[35mrefined_transactions_fees[0m
[2m2025-01-10 04:15:31[0m [[32m[1minfo     [0m] [1mduck db size: 8.4MB           [0m [36mfilename[0m=[35mclient.py[0m [36mlineno[0m=[35m36[0m [36mprocess[0m=[35m2658[0m
[2m2025-01-10 04:15:31[0m [[32m[1minfo     [0m] [1mconstructed read_parquet() string with 1 paths[0m [36mfilename[0m=[35mclient.py[0m [36mlinen

In [5]:
ctx.client.sql("""
SELECT * FROM refined_traces__traces_txs_join
WHERE transaction_hash = '0x99d4d4dea9419810d61e1899565d10a0f6c7d284e7dcf0e01eff5025833fb0c6'
ORDER BY trace_address
LIMIT 10
""").show(max_rows=100)

┌────────────┬─────────┬──────────┬─────────┬──────────────┬─────────────────┬────────────────────────────────────────────────────────────────────┬───────────────────┬────────────────────────────────────────────┬────────────────────────────────────────────┬─────────────────┬────────────────┬───────────────┬────────────┬──────────────┬─────────┬─────────────────┬───────────────┬─────────────┬──────────────────────┬───────────────────┬────────────────────────┬──────────────────────────┬──────────────────────────────────┬───────────────────────────────────────┬───────────────────────────────────────────┬──────────────────────────────────────────────┬──────────────────────────────────┬──────────────────────────────────────────┬──────────────────────────────────────────┬──────────────────────────────────────────┬─────────────────────────────────────┬─────────────────────────────────┬────────────────────────────────────┬────────────────────────────────────┬───────────────────────────────────

In [6]:
ctx.client.sql(f"""DESCRIBE {refined_txs_table}""").show(max_rows=100)

┌─────────────────────────────────────┬────────────────┬─────────┬─────────┬─────────┬─────────┐
│             column_name             │  column_type   │  null   │   key   │ default │  extra  │
│               varchar               │    varchar     │ varchar │ varchar │ varchar │ varchar │
├─────────────────────────────────────┼────────────────┼─────────┼─────────┼─────────┼─────────┤
│ dt                                  │ DATE           │ YES     │ NULL    │ NULL    │ NULL    │
│ chain                               │ VARCHAR        │ YES     │ NULL    │ NULL    │ NULL    │
│ chain_id                            │ INTEGER        │ YES     │ NULL    │ NULL    │ NULL    │
│ network                             │ VARCHAR        │ YES     │ NULL    │ NULL    │ NULL    │
│ nonce                               │ BIGINT         │ YES     │ NULL    │ NULL    │ NULL    │
│ transaction_index                   │ BIGINT         │ YES     │ NULL    │ NULL    │ NULL    │
│ from_address                