In [22]:
from pprint import pprint

from op_analytics.coreutils.partitioned.location import DataLocation
from op_analytics.coreutils.partitioned.reader import DataReader
from op_analytics.datapipeline.etl.ingestion.reader.byblock import construct_readers_byblock
from op_analytics.datapipeline.etl.ingestion.reader.request import BlockBatchRequest
from op_analytics.datapipeline.models.compute.markers import ModelsDataSpec
from op_analytics.datapipeline.models.compute.testutils import setup_execution_context
from op_analytics.coreutils.duckdb_inmem import init_client
from op_analytics.datapipeline.models.compute.udfs import create_duckdb_macros

model_name = "token_transfers"


# Prepare data raeders
data_spec = ModelsDataSpec(models=[model_name])
blockbatch_request = BlockBatchRequest.build(
    chains=["op"],
    range_spec="@20241118:+1",
    root_paths_to_read=data_spec.input_root_paths,
)
readers: list[DataReader] = construct_readers_byblock(
    blockbatch_request=blockbatch_request,
    read_from=DataLocation.GCS,
)


# Show details for the batch we are processing.
pprint(readers[0])

# Set up execution context and get handles to model input args.
# In subsequent cells you can use the model input args however you want.
ctx, input_datasets, aux_views = setup_execution_context(
    model_name=model_name,
    data_reader=readers[0],  # use the first reader
)

duckdb_client = init_client()
create_duckdb_macros(duckdb_client)

[2m2025-01-15 21:31:42[0m [[32m[1minfo     [0m] [1mprepared 22 input batches.    [0m [36mfilename[0m=[35mbyblock.py[0m [36mlineno[0m=[35m78[0m [36mprocess[0m=[35m52558[0m
DataReader(partitions=Partition(cols=[PartitionColumn(name='chain', value='op'),
                                      PartitionColumn(name='dt',
                                                      value='2024-11-18')]),
           read_from=DataLocation.GCS,
           dataset_paths={'ingestion/logs_v1': ['gs://oplabs-tools-data-sink/ingestion/logs_v1/chain=op/dt=2024-11-18/000128144000.parquet']},
           inputs_ready=True,
           extra_marker_data={'max_block': 128146000,
                              'min_block': 128144000,
                              'num_blocks': 2000})
[2m2025-01-15 21:31:42[0m [[32m[1minfo     [0m] [1mreading dataset='ingestion/logs_v1' using 1/1 parquet paths, first path is gs://oplabs-tools-data-sink/ingestion/logs_v1/chain=op/dt=2024-11-18/000128144000.pa

In [23]:
logs_view = input_datasets["ingestion/logs_v1"].create_view()

all_transfers = aux_views["token_transfers"].to_relation(
    duckdb_context=ctx,
    template_parameters={
        "raw_logs": logs_view,
    },
)

[2m2025-01-15 21:31:44[0m [[32m[1minfo     [0m] [1mconstructed read_parquet() string with 1 paths[0m [36mfilename[0m=[35mclient.py[0m [36mlineno[0m=[35m245[0m [36mprocess[0m=[35m52558[0m
[2m2025-01-15 21:31:45[0m [[32m[1minfo     [0m] [1mcreated table/view ingestion_logs_v1_view[0m [36mfilename[0m=[35mclient.py[0m [36mlineno[0m=[35m189[0m [36mprocess[0m=[35m52558[0m
[2m2025-01-15 21:31:45[0m [[32m[1minfo     [0m] [1mduck db size: 12.3KB          [0m [36mfilename[0m=[35mclient.py[0m [36mlineno[0m=[35m36[0m [36mprocess[0m=[35m52558[0m
[2m2025-01-15 21:31:45[0m [[32m[1minfo     [0m] [1mRendering query               [0m [36mfilename[0m=[35mquerybuilder.py[0m [36mlineno[0m=[35m40[0m [36mprocess[0m=[35m52558[0m [36mtemplate[0m=[35mtoken_transfers[0m


In [24]:
df = ctx.client.sql(f"SELECT * FROM {logs_view} as l where l.topic0 LIKE '0xddf252ad1be2c89b69c2b068fc378daa952ba7f163c4a11628f55a4df523b3ef%' LIMIT 10").df()
df.head()

Unnamed: 0,network,chain_id,block_timestamp,block_number,block_hash,transaction_hash,transaction_index,log_index,address,topics,data,topic0,indexed_args,chain,dt
0,mainnet,10,1731890755,128145989,0x98f6d6cfb9de5b5ccf9c3d9849bc04ab9a2c4725b657...,0x6ba43fabde9f03dded7c56677751114907201a9e4462...,1,0,0xdc6ff44d5d932cbd77b52e5612ba0529dc6226f1,0xddf252ad1be2c89b69c2b068fc378daa952ba7f163c4...,0x00000000000000000000000000000000000000000000...,0xddf252ad1be2c89b69c2b068fc378daa952ba7f163c4...,[0x000000000000000000000000f89d7b9c864f589bbf5...,op,2024-11-18
1,mainnet,10,1731890755,128145989,0x98f6d6cfb9de5b5ccf9c3d9849bc04ab9a2c4725b657...,0x026e632a5d3a57e54c0f786ae4249b318dcc9ff728f5...,4,1,0x4200000000000000000000000000000000000042,0xddf252ad1be2c89b69c2b068fc378daa952ba7f163c4...,0x00000000000000000000000000000000000000000000...,0xddf252ad1be2c89b69c2b068fc378daa952ba7f163c4...,[0x0000000000000000000000004dc22588ade05c40338...,op,2024-11-18
2,mainnet,10,1731890755,128145989,0x98f6d6cfb9de5b5ccf9c3d9849bc04ab9a2c4725b657...,0x0f3da35693644abe74388ab718881855a50e147201bb...,6,10,0x4200000000000000000000000000000000000006,0xddf252ad1be2c89b69c2b068fc378daa952ba7f163c4...,0x00000000000000000000000000000000000000000000...,0xddf252ad1be2c89b69c2b068fc378daa952ba7f163c4...,[0x00000000000000000000000088391365c2259730322...,op,2024-11-18
3,mainnet,10,1731890755,128145989,0x98f6d6cfb9de5b5ccf9c3d9849bc04ab9a2c4725b657...,0x0f3da35693644abe74388ab718881855a50e147201bb...,6,14,0x4200000000000000000000000000000000000042,0xddf252ad1be2c89b69c2b068fc378daa952ba7f163c4...,0x00000000000000000000000000000000000000000000...,0xddf252ad1be2c89b69c2b068fc378daa952ba7f163c4...,[0x000000000000000000000000442659a6d04b907c879...,op,2024-11-18
4,mainnet,10,1731890755,128145989,0x98f6d6cfb9de5b5ccf9c3d9849bc04ab9a2c4725b657...,0x026e632a5d3a57e54c0f786ae4249b318dcc9ff728f5...,4,2,0x4200000000000000000000000000000000000006,0xddf252ad1be2c89b69c2b068fc378daa952ba7f163c4...,0x00000000000000000000000000000000000000000000...,0xddf252ad1be2c89b69c2b068fc378daa952ba7f163c4...,[0x000000000000000000000000802b65b5d9016621e66...,op,2024-11-18


In [34]:
erc20_transfers = (
    all_transfers
    .filter("token_id IS NULL")
    .project("* EXCLUDE token_id")
)

In [35]:
df = ctx.client.sql(f"SELECT * FROM erc20_transfers").df()
df.head()

Unnamed: 0,chain_id,chain,dt,block_timestamp,block_number,block_hash,transaction_hash,transaction_index,log_index,contract_address,amount,amount_lossless,from_address,to_address
0,10,op,2024-11-18,1731890755,128145989,0x98f6d6cfb9de5b5ccf9c3d9849bc04ab9a2c4725b657...,0x6ba43fabde9f03dded7c56677751114907201a9e4462...,1,0,0xdc6ff44d5d932cbd77b52e5612ba0529dc6226f1,8e+17,800000000000000000,0xf89d7b9c864f589bbf53a82105107622b35eaa40,0x73981e74c1b3d94cbe97e2cd03691dd2e7c533fa
1,10,op,2024-11-18,1731890755,128145989,0x98f6d6cfb9de5b5ccf9c3d9849bc04ab9a2c4725b657...,0x026e632a5d3a57e54c0f786ae4249b318dcc9ff728f5...,4,1,0x4200000000000000000000000000000000000042,,238897558498147901946,0x4dc22588ade05c40338a9d95a6da9dcee68bcd60,0x802b65b5d9016621e66003aed0b16615093f328b
2,10,op,2024-11-18,1731890755,128145989,0x98f6d6cfb9de5b5ccf9c3d9849bc04ab9a2c4725b657...,0x0f3da35693644abe74388ab718881855a50e147201bb...,6,10,0x4200000000000000000000000000000000000006,11330520000000.0,11330523516624,0x88391365c225973032275db256b9d15f845d2c72,0x07169e885228d41d986a5b66130ac6e3d8f44324
3,10,op,2024-11-18,1731890755,128145989,0x98f6d6cfb9de5b5ccf9c3d9849bc04ab9a2c4725b657...,0x0f3da35693644abe74388ab718881855a50e147201bb...,6,14,0x4200000000000000000000000000000000000042,6.755863e+18,6755862736030762399,0x442659a6d04b907c879032da1ef634548110dd37,0xfc1f3296458f9b2a27a0b91dd7681c4020e09d05
4,10,op,2024-11-18,1731890755,128145989,0x98f6d6cfb9de5b5ccf9c3d9849bc04ab9a2c4725b657...,0x026e632a5d3a57e54c0f786ae4249b318dcc9ff728f5...,4,2,0x4200000000000000000000000000000000000006,1.337134e+17,133713384334713000,0x802b65b5d9016621e66003aed0b16615093f328b,0x4dc22588ade05c40338a9d95a6da9dcee68bcd60
