In [1]:
from pprint import pprint

from op_analytics.coreutils.partitioned.location import DataLocation
from op_analytics.coreutils.partitioned.reader import DataReader
from op_analytics.datapipeline.etl.ingestion.reader.byblock import construct_readers_byblock
from op_analytics.datapipeline.etl.ingestion.reader.request import BlockBatchRequest
from op_analytics.datapipeline.models.compute.modelspec import ModelsDataSpec
from op_analytics.datapipeline.models.compute.testutils import setup_execution_context

In [2]:
model_name = "token_transfers"


# Prepare data raeders
data_spec = ModelsDataSpec(models=[model_name],
    root_path_prefix="blockbatch")
blockbatch_request = BlockBatchRequest.build(
    chains=["op"],
    range_spec="@20241118:+1",
    root_paths_to_read=data_spec.input_root_paths,
)
readers: list[DataReader] = construct_readers_byblock(
    blockbatch_request=blockbatch_request,
    read_from=DataLocation.GCS,
)


# Show details for the batch we are processing.
pprint(readers[0])

# Set up execution context and get handles to model input args.
# In subsequent cells you can use the model input args however you want.
ctx, input_datasets, auxiliary_templates = setup_execution_context(
    model_name=model_name,
    data_reader=readers[0],  # use the first reader
)

[2m2025-06-17 17:16:05[0m [[32m[1mdebug    [0m] [1mconnecting to GOLDSKY Clickhouse client...[0m [36mcounter[0m=[35m001/001[0m [36meta[0m=[35mNone[0m [36mfilename[0m=[35mclient.py[0m [36mlineno[0m=[35m56[0m [36mprocess[0m=[35m14954[0m
[2m2025-06-17 17:16:05[0m [[32m[1minfo     [0m] [1mloaded vault from .env file   [0m [36mcounter[0m=[35m001/001[0m [36meta[0m=[35mNone[0m [36mfilename[0m=[35mvault.py[0m [36mlineno[0m=[35m32[0m [36mprocess[0m=[35m14954[0m
[2m2025-06-17 17:16:05[0m [[32m[1mdebug    [0m] [1mloaded vault: 28 items        [0m [36mcounter[0m=[35m001/001[0m [36meta[0m=[35mNone[0m [36mfilename[0m=[35mvault.py[0m [36mlineno[0m=[35m79[0m [36mprocess[0m=[35m14954[0m
[2m2025-06-17 17:16:06[0m [[32m[1mdebug    [0m] [1minitialized GOLDSKY Clickhouse client.[0m [36mcounter[0m=[35m001/001[0m [36meta[0m=[35mNone[0m [36mfilename[0m=[35mclient.py[0m [36mlineno[0m=[35m61[0m [36mprocess[

In [4]:
logs_view = input_datasets["ingestion/logs_v1"].create_view()
traces_view = input_datasets["ingestion/traces_v1"].create_view()

all_transfers = auxiliary_templates["token_transfers"].to_relation(
    duckdb_context=ctx,
    template_parameters={
        "raw_logs": logs_view,
    },
)

native_transfers = auxiliary_templates["native_transfers"].to_relation(
    duckdb_context=ctx,
    template_parameters={
        "raw_traces": traces_view,
    },
)
revshare_transfers = auxiliary_templates["revshare_transfers"].to_relation(
    duckdb_context=ctx,
    template_parameters={
        "all_transfers": all_transfers,
            "native_transfers": native_transfers,
        "from_addresses_config": "src/op_analytics/datapipeline/models/config/revshare_from_addresses.yaml",
            "to_addresses_config": "src/op_analytics/datapipeline/models/config/revshare_to_addresses.yaml",
    },
)

[2m2025-06-17 17:17:29[0m [[32m[1minfo     [0m] [1mconstructed read_parquet() string with 1 paths[0m [36mfilename[0m=[35mclient.py[0m [36mlineno[0m=[35m291[0m [36mprocess[0m=[35m14954[0m
[2m2025-06-17 17:17:29[0m [[32m[1minfo     [0m] [1mcreated table/view ingestion_logs_v1_view[0m [36mfilename[0m=[35mclient.py[0m [36mlineno[0m=[35m230[0m [36mprocess[0m=[35m14954[0m
[2m2025-06-17 17:17:29[0m [[32m[1minfo     [0m] [1mduck db size: 12.3KB          [0m [36mfilename[0m=[35mclient.py[0m [36mlineno[0m=[35m40[0m [36mprocess[0m=[35m14954[0m
[2m2025-06-17 17:17:29[0m [[32m[1minfo     [0m] [1mconstructed read_parquet() string with 1 paths[0m [36mfilename[0m=[35mclient.py[0m [36mlineno[0m=[35m291[0m [36mprocess[0m=[35m14954[0m
[2m2025-06-17 17:17:30[0m [[32m[1minfo     [0m] [1mcreated table/view ingestion_traces_v1_view[0m [36mfilename[0m=[35mclient.py[0m [36mlineno[0m=[35m230[0m [36mprocess[0m=[35m14954

UndefinedError: 'native_transfers' is undefined

In [None]:
df = ctx.client.sql(f"SELECT * FROM {logs_view} as l where l.topic0 LIKE '0xddf252ad1be2c89b69c2b068fc378daa952ba7f163c4a11628f55a4df523b3ef%' LIMIT 10").df()
df.head()

In [None]:
erc20_transfers = (
    all_transfers
    .filter("token_id IS NULL")
    .project("* EXCLUDE token_id")
)

df = ctx.client.sql(f"SELECT * FROM erc20_transfers").df()
df.head()

In [None]:
erc721_transfers = (
    all_transfers
    .filter("token_id IS NOT NULL")
    .project("* EXCLUDE (amount, amount_lossless)")
)
df = ctx.client.sql(f"SELECT * FROM erc721_transfers").df()
df.head()

In [None]:
native_transfers = (
    native_transfers
    .filter("transfer_type = 'native'")
    .project("*")
)
df = ctx.client.sql(f"SELECT * FROM native_transfers").df()
df.head()

In [None]:
revshare_transfers = (
    revshare_transfers
    .project("*")
)
df = ctx.client.sql(f"SELECT * FROM native_transfers").df()
df.head()