## Prepare data reader and model execution context

In [1]:
from pprint import pprint

from op_analytics.coreutils.partitioned.location import DataLocation
from op_analytics.coreutils.partitioned.reader import DataReader
from op_analytics.datapipeline.etl.ingestion.reader.byblock import construct_readers_byblock
from op_analytics.datapipeline.etl.ingestion.reader.request import BlockBatchRequest
from op_analytics.datapipeline.models.compute.markers import ModelsDataSpec
from op_analytics.datapipeline.models.compute.testutils import setup_execution_context

model_name = "account_abstraction_prefilter"


# Select a model.
data_spec = ModelsDataSpec(root_path_prefix="blockbatch", models=[model_name])

# Select a block batch.
blockbatch_request = BlockBatchRequest.build(
    chains=["base"],
    # range_spec="19894001:+1",
    range_spec="19910194:+1",
    root_paths_to_read=data_spec.input_root_paths,
)

# Construct readers
readers: list[DataReader] = construct_readers_byblock(
    blockbatch_request=blockbatch_request,
    read_from=DataLocation.GCS,
)

# Show details for the batch we are processing.
pprint(readers[0])

# Ensure existence of data needed by the reader.
assert readers[0].inputs_ready

# Set up execution context and get handles to model input args.
# In subsequent cells you can use the model input args however you want.
ctx, input_datasets, auxiliary_templates = setup_execution_context(
    model_name=model_name,
    data_reader=readers[0],  # use the first reader
)


[2m2025-01-29 21:40:18[0m [[32m[1mdebug    [0m] [1mconnecting to GOLDSKY Clickhouse client...[0m [36mfilename[0m=[35mclient.py[0m [36mlineno[0m=[35m37[0m [36mprocess[0m=[35m39167[0m
[2m2025-01-29 21:40:18[0m [[32m[1minfo     [0m] [1mloaded vault from .env file   [0m [36mfilename[0m=[35mvault.py[0m [36mlineno[0m=[35m32[0m [36mprocess[0m=[35m39167[0m
[2m2025-01-29 21:40:18[0m [[32m[1mdebug    [0m] [1mloaded vault: 18 items        [0m [36mfilename[0m=[35mvault.py[0m [36mlineno[0m=[35m79[0m [36mprocess[0m=[35m39167[0m
[2m2025-01-29 21:40:19[0m [[32m[1mdebug    [0m] [1minitialized GOLDSKY Clickhouse client.[0m [36mfilename[0m=[35mclient.py[0m [36mlineno[0m=[35m42[0m [36mprocess[0m=[35m39167[0m
[2m2025-01-29 21:40:19[0m [[32m[1mdebug    [0m] [1mconnecting to OPLABS Clickhouse client...[0m [36mfilename[0m=[35mclient.py[0m [36mlineno[0m=[35m37[0m [36mprocess[0m=[35m39167[0m
[2m2025-01-29 21:40:19[0

In [2]:
from op_analytics.datapipeline.models.code.account_abstraction_prefilter.model import account_abstraction_prefilter


results = account_abstraction_prefilter(ctx, input_datasets, auxiliary_templates)

[2m2025-01-29 21:40:19[0m [[32m[1minfo     [0m] [1mconstructed read_parquet() string with 1 paths[0m [36mfilename[0m=[35mclient.py[0m [36mlineno[0m=[35m263[0m [36mprocess[0m=[35m39167[0m
[2m2025-01-29 21:40:19[0m [[32m[1minfo     [0m] [1mRendering query               [0m [36mfilename[0m=[35mquerybuilder.py[0m [36mlineno[0m=[35m40[0m [36mprocess[0m=[35m39167[0m [36mtemplate[0m=[35maccount_abstraction_prefilter/entrypoint_logs[0m
[2m2025-01-29 21:40:21[0m [[32m[1minfo     [0m] [1mduck db size: 12.3KB          [0m [36mfilename[0m=[35mclient.py[0m [36mlineno[0m=[35m36[0m [36mprocess[0m=[35m39167[0m
[2m2025-01-29 21:40:21[0m [[32m[1minfo     [0m] [1mconstructed read_parquet() string with 1 paths[0m [36mfilename[0m=[35mclient.py[0m [36mlineno[0m=[35m263[0m [36mprocess[0m=[35m39167[0m
[2m2025-01-29 21:40:21[0m [[32m[1minfo     [0m] [1mRendering query               [0m [36mfilename[0m=[35mquerybuilder.py

In [3]:
ctx.client.sql("SHOW TABLES")

┌──────────────────────────────────────────────────────────────┐
│                             name                             │
│                           varchar                            │
├──────────────────────────────────────────────────────────────┤
│ account_abstraction_prefilter__entrypoint_logs               │
│ account_abstraction_prefilter__entrypoint_prefiltered_traces │
│ txhashes                                                     │
└──────────────────────────────────────────────────────────────┘

In [4]:
# NOTES:
# 
# Block batch filtering 
#
# Batch=19910000:
#  logs   :  731998  ->  19725  (2.7%)
#  traces : 3997893  -> 199594  (4.9%)
#  txs    :  248980  ->   5882  (2.3%)
#
# Batch=19910000:
#  logs   :  680683  ->  30251  (4.4%)
#  traces : 4036203  -> 348751  (8.6%)  245413 if we filter traces with !=delegatecall

ctx.client.sql("""
SELECT 'logs' AS table, count(*) as num_rows FROM account_abstraction_prefilter__entrypoint_logs
UNION ALL
SELECT 'traces' AS table, count(*) as num_rows FROM account_abstraction_prefilter__entrypoint_prefiltered_traces
""")

┌─────────┬──────────┐
│  table  │ num_rows │
│ varchar │  int64   │
├─────────┼──────────┤
│ logs    │    19725 │
│ traces  │   145824 │
└─────────┴──────────┘