## Prepare data reader and model execution context

In [1]:
from pprint import pprint

from op_analytics.coreutils.partitioned.location import DataLocation
from op_analytics.coreutils.partitioned.reader import DataReader
from op_analytics.datapipeline.etl.ingestion.reader.byblock import construct_readers_byblock
from op_analytics.datapipeline.etl.ingestion.reader.request import BlockBatchRequest
from op_analytics.datapipeline.models.compute.markers import ModelsDataSpec
from op_analytics.datapipeline.models.compute.testutils import setup_execution_context

model_name = "account_abstraction"


# Prepare data raeders
data_spec = ModelsDataSpec(models=[model_name])
blockbatch_request = BlockBatchRequest.build(
    chains=["base"],
    range_spec="19910194:+1",
    root_paths_to_read=data_spec.input_root_paths,
)
readers: list[DataReader] = construct_readers_byblock(
    blockbatch_request=blockbatch_request,
    read_from=DataLocation.GCS,
)


# Show details for the batch we are processing.
pprint(readers[0])

# Set up execution context and get handles to model input args.
# In subsequent cells you can use the model input args however you want.
ctx, input_datasets, auxiliary_views = setup_execution_context(
    model_name=model_name,
    data_reader=readers[0],  # use the first reader
)


[2m2025-01-22 16:03:46[0m [[32m[1mdebug    [0m] [1mconnecting to GOLDSKY Clickhouse client...[0m [36mfilename[0m=[35mclient.py[0m [36mlineno[0m=[35m36[0m [36mprocess[0m=[35m73754[0m
[2m2025-01-22 16:03:46[0m [[32m[1minfo     [0m] [1mloaded vault from .env file   [0m [36mfilename[0m=[35mvault.py[0m [36mlineno[0m=[35m32[0m [36mprocess[0m=[35m73754[0m
[2m2025-01-22 16:03:46[0m [[32m[1mdebug    [0m] [1mloaded vault: 18 items        [0m [36mfilename[0m=[35mvault.py[0m [36mlineno[0m=[35m79[0m [36mprocess[0m=[35m73754[0m
[2m2025-01-22 16:03:47[0m [[32m[1mdebug    [0m] [1minitialized GOLDSKY Clickhouse client.[0m [36mfilename[0m=[35mclient.py[0m [36mlineno[0m=[35m41[0m [36mprocess[0m=[35m73754[0m
[2m2025-01-22 16:03:47[0m [[32m[1mdebug    [0m] [1mconnecting to OPLABS Clickhouse client...[0m [36mfilename[0m=[35mclient.py[0m [36mlineno[0m=[35m36[0m [36mprocess[0m=[35m73754[0m
[2m2025-01-22 16:03:48[0

In [2]:
# MODEL VERSION B
# Filter aggressively first and then apply UDFs.


filtered_logs = input_datasets["ingestion/logs_v1"].create_table(
    projections=[
        "chain_id",
        "chain",
        "dt",
        "block_timestamp",
        "block_number",
        "block_hash",
        "transaction_hash",
        "transaction_index",
        "log_index",
        "address AS contract_address",
        "indexed_args[1] AS user_op_hash",
        "indexed_args[2] AS sender",
        "indexed_args[3] AS paymaster",
        "data",
    ],
    additional_sql="""
    WHERE
        topic0 = '0x49628fd1471006c1482da88028e9ce4dbb080b815c9b0344d39e5a8e6ec1419f' -- UserOperationEvent
        AND array_length(indexed_args) = 3
    """  
)

ctx.client.sql(f"""
CREATE OR REPLACE TABLE target_tx_hashes AS
SELECT DISTINCT block_number, transaction_hash FROM {filtered_logs}
ORDER BY transaction_hash
""")

filtered_transactions = input_datasets["ingestion/transactions_v1"].create_table(
    projections="read_parquet.*",
    # parenthesis=True,
    additional_sql="""
    INNER JOIN target_tx_hashes ops
    ON read_parquet.block_number = ops.block_number
    AND read_parquet.hash = ops.transaction_hash
    ORDER BY transaction_hash
    """,
)


filtered_traces = input_datasets["ingestion/traces_v1"].create_table(
    projections="read_parquet.*",
    # parenthesis=True,
    additional_sql="""
    INNER JOIN target_tx_hashes ops
    ON read_parquet.block_number = ops.block_number
    AND read_parquet.transaction_hash = ops.transaction_hash
    ORDER BY read_parquet.transaction_hash
    """,
)


ctx.client.sql("SHOW TABLES")



[2m2025-01-22 16:03:48[0m [[32m[1minfo     [0m] [1mconstructed read_parquet() string with 1 paths[0m [36mfilename[0m=[35mclient.py[0m [36mlineno[0m=[35m255[0m [36mprocess[0m=[35m73754[0m
[2m2025-01-22 16:03:51[0m [[32m[1minfo     [0m] [1mcreated table/view ingestion_logs_v1_tbl[0m [36mfilename[0m=[35mclient.py[0m [36mlineno[0m=[35m199[0m [36mprocess[0m=[35m73754[0m
[2m2025-01-22 16:03:51[0m [[32m[1minfo     [0m] [1mduck db size: 12.3KB          [0m [36mfilename[0m=[35mclient.py[0m [36mlineno[0m=[35m36[0m [36mprocess[0m=[35m73754[0m
[2m2025-01-22 16:03:51[0m [[32m[1minfo     [0m] [1mconstructed read_parquet() string with 1 paths[0m [36mfilename[0m=[35mclient.py[0m [36mlineno[0m=[35m255[0m [36mprocess[0m=[35m73754[0m
[2m2025-01-22 16:03:56[0m [[32m[1minfo     [0m] [1mcreated table/view ingestion_transactions_v1_tbl[0m [36mfilename[0m=[35mclient.py[0m [36mlineno[0m=[35m199[0m [36mprocess[0m=[35m7

┌───────────────────────────────┐
│             name              │
│            varchar            │
├───────────────────────────────┤
│ ingestion_logs_v1_tbl         │
│ ingestion_traces_v1_tbl       │
│ ingestion_transactions_v1_tbl │
│ target_tx_hashes              │
└───────────────────────────────┘

In [3]:
ctx.client.sql("""
    SELECT COUNT(*), 'logs' as table FROM ingestion_logs_v1_tbl
    UNION ALL
    SELECT COUNT(*), 'transactions' as table FROM ingestion_transactions_v1_tbl
    UNION ALL
    SELECT COUNT(*), 'traces' as table FROM ingestion_traces_v1_tbl
""")

┌──────────────┬──────────────┐
│ count_star() │    table     │
│    int64     │   varchar    │
├──────────────┼──────────────┤
│        11964 │ logs         │
│         5877 │ transactions │
│       199565 │ traces       │
└──────────────┴──────────────┘

In [4]:
ctx.client.sql("""
SELECT * FROM ingestion_traces_v1_tbl WHERE
transaction_hash = '0xa6afb687ed95e708b6086b8fd864cd56bd46746c9850e943a035c4863f88fbed'
AND call_type != 'delegatecall'
""")

┌─────────┬──────────┬─────────────────┬──────────────┬────────────────────────────────────────────────────────────────────┬────────────────────────────────────────────────────────────────────┬───────────────────┬────────────────────────────────────────────┬────────────────────────────────────────────┬───────────────┬────────────────┬────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────────