## Prepare data reader and up model execution context

In [None]:

from op_analytics.coreutils.partitioned.location import DataLocation
from op_analytics.coreutils.partitioned.reader import DataReader
from op_analytics.datapipeline.etl.blockbatch.construct import construct_data_readers
from op_analytics.datapipeline.models.compute.testutils import setup_execution_context

model_name = "refined_traces"

# Define the input data range.
readers: list[DataReader] = construct_data_readers(
    chains=["op"],
    models=[model_name],
    range_spec="@20241118:+1",
    read_from=DataLocation.GCS,
)


# Set up execution context and get handles to model input args.
# In subsequent cells you can use the model input args however you want.
ctx, input_datasets, aux_views = setup_execution_context(
    model_name=model_name,
    data_reader=readers[0] # use the first reader
)


[2m2024-12-18 12:33:27[0m [[32m[1minfo     [0m] [1mprepared 22 input batches.    [0m [36mfilename[0m=[35mbyblock.py[0m [36mlineno[0m=[35m85[0m [36mprocess[0m=[35m96452[0m
[2m2024-12-18 12:33:27[0m [[32m[1minfo     [0m] [1mreading dataset='ingestion/blocks_v1' using 1/1 parquet paths, first path is gs://oplabs-tools-data-sink/ingestion/blocks_v1/chain=op/dt=2024-11-18/000128144000.parquet[0m [36mfilename[0m=[35mreader.py[0m [36mlineno[0m=[35m68[0m [36mprocess[0m=[35m96452[0m
[2m2024-12-18 12:33:27[0m [[32m[1minfo     [0m] [1mreading dataset='ingestion/traces_v1' using 1/1 parquet paths, first path is gs://oplabs-tools-data-sink/ingestion/traces_v1/chain=op/dt=2024-11-18/000128144000.parquet[0m [36mfilename[0m=[35mreader.py[0m [36mlineno[0m=[35m68[0m [36mprocess[0m=[35m96452[0m
[2m2024-12-18 12:33:27[0m [[32m[1minfo     [0m] [1mreading dataset='ingestion/transactions_v1' using 1/1 parquet paths, first path is gs://oplabs-tool

## Execute the model

It's up to you how the model manipulates the data. 

Develop the various steps in notebook cells and then copy the final code over to the model function.

In [7]:
# Create a table with the refined transactions

blocks_view = input_datasets["ingestion/blocks_v1"].create_view()
transactions_view = input_datasets["ingestion/transactions_v1"].create_view()

refined_txs = aux_views["refined_transactions_fees"].create_table(
    duckdb_context=ctx,
    template_parameters={
        "raw_blocks": blocks_view,
        "raw_transactions": transactions_view,
    },
)

[2m2024-12-18 12:41:20[0m [[32m[1minfo     [0m] [1mcreated view ingestion_blocks_v1_view using 1 parquet paths[0m [36mfilename[0m=[35mclient.py[0m [36mlineno[0m=[35m138[0m [36mprocess[0m=[35m96452[0m
[2m2024-12-18 12:41:20[0m [[32m[1minfo     [0m] [1mduck db size: 12.3KB          [0m [36mfilename[0m=[35mclient.py[0m [36mlineno[0m=[35m33[0m [36mprocess[0m=[35m96452[0m
[2m2024-12-18 12:41:21[0m [[32m[1minfo     [0m] [1mcreated view ingestion_transactions_v1_view using 1 parquet paths[0m [36mfilename[0m=[35mclient.py[0m [36mlineno[0m=[35m138[0m [36mprocess[0m=[35m96452[0m
[2m2024-12-18 12:41:21[0m [[32m[1minfo     [0m] [1mduck db size: 12.3KB          [0m [36mfilename[0m=[35mclient.py[0m [36mlineno[0m=[35m33[0m [36mprocess[0m=[35m96452[0m
[2m2024-12-18 12:41:21[0m [[32m[1minfo     [0m] [1mRendering query               [0m [36mfilename[0m=[35mquerybuilder.py[0m [36mlineno[0m=[35m40[0m [36mprocess[0

In [8]:
client.sql(f"DESCRIBE {refined_txs}").show(max_rows=1000)

┌─────────────────────────────────────┬────────────────┬─────────┬─────────┬─────────┬─────────┐
│             column_name             │  column_type   │  null   │   key   │ default │  extra  │
│               varchar               │    varchar     │ varchar │ varchar │ varchar │ varchar │
├─────────────────────────────────────┼────────────────┼─────────┼─────────┼─────────┼─────────┤
│ dt                                  │ DATE           │ YES     │ NULL    │ NULL    │ NULL    │
│ chain                               │ VARCHAR        │ YES     │ NULL    │ NULL    │ NULL    │
│ chain_id                            │ INTEGER        │ YES     │ NULL    │ NULL    │ NULL    │
│ network                             │ VARCHAR        │ YES     │ NULL    │ NULL    │ NULL    │
│ nonce                               │ BIGINT         │ YES     │ NULL    │ NULL    │ NULL    │
│ transaction_index                   │ BIGINT         │ YES     │ NULL    │ NULL    │ NULL    │
│ from_address                

In [None]:
# traces_view  = input_datasets["ingestion/traces_v1"].create_view()

# refined_traces = aux_views["refined_traces_fees"].create_table(
#     duckdb_context=duckdb_context,
#     template_parameters={
#         "raw_traces": traces_view,
#         "refined_txs": refined_txs,
#     },
# )

In [9]:
client.sql("SHOW TABLES")

┌────────────────────────────────┐
│              name              │
│            varchar             │
├────────────────────────────────┤
│ ingestion_blocks_v1_view       │
│ ingestion_transactions_v1_view │
│ refined_transactions_fees      │
└────────────────────────────────┘

In [10]:
client.sql("SELECT COUNT(*) FROM refined_transactions_fees")

┌──────────────┐
│ count_star() │
│    int64     │
├──────────────┤
│        33168 │
└──────────────┘

In [None]:
client.sql("SELECT COUNT(*) FROM refined_traces_fees")

In [11]:
import polars as pl

# Configure Polars 
cfg = pl.Config()
cfg.set_tbl_rows(2000)
cfg.set_tbl_width_chars(2000)
cfg.set_fmt_str_lengths(1000)

client.sql(f"DESCRIBE {refined_txs}").pl()


column_name,column_type,null,key,default,extra
str,str,str,str,str,str
"""dt""","""DATE""","""YES""",,,
"""chain""","""VARCHAR""","""YES""",,,
"""chain_id""","""INTEGER""","""YES""",,,
"""network""","""VARCHAR""","""YES""",,,
"""nonce""","""BIGINT""","""YES""",,,
"""transaction_index""","""BIGINT""","""YES""",,,
"""from_address""","""VARCHAR""","""YES""",,,
"""to_address""","""VARCHAR""","""YES""",,,
"""block_number""","""BIGINT""","""YES""",,,
"""block_timestamp""","""UINTEGER""","""YES""",,,


In [None]:
client.sql(f"DESCRIBE {refined_traces}").pl()

In [None]:
client.sql("SHOW TABLES")

In [None]:
client.sql("SELECT COUNT(*) FROM refined_transactions_fees")

## Verify model results - Traces

In [None]:
duckdb_client.sql("SELECT * FROM daily_traces_tr_to_v1 ORDER BY count_transactions_called DESC LIMIT 10")

### Test Transaction Metrics

In [None]:
duckdb_client.sql("""
                  SELECT trace_to_address,
                    count_transactions_called_with_internal_type_call, count_transactions_called_with_internal_type_call_or_delegate,
                    count_transactions_called
                  FROM daily_traces_tr_to_v1
                  ORDER BY count_transactions_called_with_internal_type_call DESC
                  LIMIT 10
                  """)

### Test Gas Used Metrics

In [None]:
duckdb_client.sql("""
                  SELECT
                    trace_to_address,
                  
                    sum_trace_gas_used_minus_subtraces_tx_success_called_with_internal_type_call,
                    sum_tx_l2_gas_used_amortized_by_call_tx_success_called_with_internal_type_call,
                  
                    sum_tx_l2_fee_native_minus_subtraces_tx_success_called_with_internal_type_call
                    sum_tx_l2_fee_native_amortized_by_call_tx_success_called_with_internal_type_call,
                    sum_tx_fee_native_amortized_by_call_tx_success_called_with_internal_type_call,
                  
                    count_transactions_called_with_internal_type_call,
                    count_transactions_called_with_internal_type_call_or_delegate,
                    count_transactions_called
                  
                  FROM daily_traces_tr_to_v1
                  ORDER BY sum_trace_gas_used_minus_subtraces_tx_success_called_with_internal_type_call DESC
                  LIMIT 10
                  """)

In [None]:
duckdb_client.sql("""
    SELECT * FROM refined_traces_fees_v1
                  where transaction_hash = '0xc620133c2339f36d8bfae889ea29e9986a70182f7bbe3380d0622f3801619eda'
                  AND block_number = 128145924
                  ORDER BY trace_address ASC
                  LIMIT 10
                  """)

### Check the data output size

In [None]:
duckdb_client.sql("SELECT COUNT(*) AS interm_num_calls, COUNT(DISTINCT transaction_hash) AS num_txs, COUNT(DISTINCT trace_to_address) AS num_trace_tos FROM refined_traces_fees_v1")

In [None]:
duckdb_client.sql("SELECT COUNT(*) AS num_rows, COUNT(DISTINCT transaction_hash) AS num_txs, COUNT(DISTINCT trace_to_address) AS num_trace_tos FROM aggregated_traces_tr_to_hash_v1")

In [None]:
duckdb_client.sql("SELECT COUNT(*) AS num_rows, COUNT(DISTINCT trace_to_address) AS num_trace_tos FROM daily_traces_tr_to_v1")

### Get table schema

In [None]:
duckdb_client.sql("DESCRIBE daily_traces_tr_to_v1")
