In [1]:
# Run configs
schemas_to_select = [
        'op', 
        # 'base',
        # 'mode',
        # 'fraxtal',
        # 'zora'
        ]  # Add more schemas as needed

days_of_data = 28
chunk_strategy = 'day' #'hour'

end_date = '2024-05-30' # -1 if 'now'

#FastLZ Regression Metrics
# Specs - https://specs.optimism.io/fjord/exec-engine.html?search=#fjord-l1-cost-fee-changes-fastlz-estimator
intercept = -42_585_600
fastlzCoef = 836_500
minTransactionSize = 100
scaled_by = 1e6

### Readme
FastLZ needs Python version 3.9x or lower, make sure your environment is using a later python version

In [2]:
import pandas as pd
from web3 import Web3
from hexbytes import HexBytes
import ast
import rlp
from rlp.sedes import Binary, big_endian_int, binary, List
from eth_utils import to_bytes, to_hex, int_to_big_endian
import fastlz
import sys
import os
import dotenv
import time
from dune_client.client import DuneClient
from dune_client.types import QueryParameter
dotenv.load_dotenv()
sys.path.append("../../helper_functions")
import clickhouse_utils as ch
import duneapi_utils as du
import pandas_utils as pu
sys.path.pop()

client = ch.connect_to_clickhouse_db() #Default is OPLabs DB

In [3]:
import warnings
warnings.filterwarnings("ignore", category=DeprecationWarning) #Supress internal fastlz warnings

### Execute

In [4]:
# Read OP Stack Metadata
csv_path = '../../op_chains_tracking/outputs/chain_metadata.csv'
df = pd.read_csv(csv_path)

# Filter the DataFrame based on the schemas_to_select list
filtered_df = df[df['oplabs_db_schema'].isin(schemas_to_select)]

# Select the required columns and convert to a list of dictionaries
chain_mappings_list = filtered_df[['oplabs_db_schema', 'display_name', 'mainnet_chain_id']].rename(
    columns={'oplabs_db_schema': 'schema_name', 'mainnet_chain_id': 'chain_id'}
).to_dict(orient='records')

# Print the resulting list of dictionaries
# print(chain_mappings_list)

In [5]:
# # Test transaction receipt
# from web3 import Web3
# op_rpc = os.getenv("OP_PUBLIC_RPC")
# w3 = Web3(Web3.HTTPProvider(op_rpc))

# tx_test = '0xcea81f2e836a37b38ba82afd37e6f66c02e348e7b89538aa232013d91edcb926'
# tx = w3.eth.get_transaction(tx_test)
# txr = w3.eth.get_transaction_receipt(tx_test)
# # # txraw = w3.eth.get_raw_transaction(tx_test)
# print(tx)
# # print(txr)
# # # print(txraw)                

In [6]:
# Get L2 Txs from Clickhouse / Goldsky
query_by_day = '''
        SELECT @chain_id@ as chain_id, nonce, gas, gas_price,
                to_address, value, input, block_timestamp, hash
        FROM @chain_db_name@_transactions
        WHERE gas_price > 0
        -- 1 day chunk
        AND block_timestamp < DATE_TRUNC('day',NOW()) - interval '@num@ @chunk_strategy@s'
        AND block_timestamp >= DATE_TRUNC('day',NOW()) - (interval '@num@ @chunk_strategy@s') - (interval '1 @chunk_strategy@')
        group by 1,2,3,4,5,6,7,8,9 --distincts
        SETTINGS max_execution_time = 10000
'''
# AND hash = '0xcea81f2e836a37b38ba82afd37e6f66c02e348e7b89538aa232013d91edcb926'
# AND block_number = 120731426

# txs_df

In [7]:
# Process transactions and RLP encode
#https://ethereum.org/en/developers/docs/transactions/

# NOTE THE RLP ENCODING IS NOT 1:1 WITH ETHERSCAN YET (but it's ~close-ish)
def process_and_encode_transaction(row):
    try:
        
        # Process "to" field
        to_field = row['to_address']
        try:
            if isinstance(to_field, str):
                if to_field:
                    to_field = to_field.decode('utf-8', errors='ignore')
                    to_bytes = bytes.fromhex(to_field[2:])
                else:
                    to_bytes = b''  # Set to an empty bytes object if "to" address is null
            elif isinstance(to_field, bytes):
                if to_field.startswith(b'0x'):
                    to_field = to_field.decode('utf-8', errors='ignore')
                    to_bytes = bytes.fromhex(to_field[2:])
                else:
                    to_bytes = to_field
            else:
                raise ValueError("Invalid 'to_address' field type")
        except Exception as e:
            print(f'Error in "to_address" field: {e}')
            print(f'Problematic value: {to_field}')
            return pd.Series([None, None])

        # Prepare transaction parameters
        try:
            tx_params = {
                'nonce': int_to_big_endian(int(row['nonce'])),
                'gasPrice': int_to_big_endian(int(row['gas_price'])),
                'gas': int_to_big_endian(int(row['gas'])),
                'to': to_bytes,
                'value': int_to_big_endian(int(row['value'])) if row['value'] != 0 else b'',  # Encode value as byte array if 0
                'input': bytes.fromhex(row['input'][2:]),
                'v': int_to_big_endian(int(row['v'])),  # Convert v to a bytes object
                'r': bytes.fromhex(row['r'][2:]),
                's': bytes.fromhex(row['s'][2:])
            }
        except:
            print(row)

        # # Print transaction parameters for debugging
        # for key, value in tx_params.items():
        #     print(f"{key}: {value}, {type(value)}")

        # Prepare the transaction fields for RLP encoding
        transaction = [
            tx_params['nonce'],
            tx_params['gasPrice'],
            tx_params['gas'],
            tx_params['to'],
            tx_params['value'],
            tx_params['input'],
            tx_params['v'],
            tx_params['r'],
            tx_params['s']
        ]

        # Encode the entire transaction
        encoded_tx = rlp.encode(transaction)
        encoded_tx_hex = "0x" + encoded_tx.hex()
        return pd.Series([encoded_tx_hex, len(encoded_tx)])

    except (ValueError, TypeError, UnicodeDecodeError) as e:
        print("Error:", e)
        print("Failed Transaction Info:")
        print(row)
        return pd.Series([None, None])

# Function to compress transaction data
def compress_transaction(encoded_transaction):

    hex_string = encoded_transaction[2:]
    # Convert the hexadecimal string to bytes
    byte_string = bytes.fromhex(hex_string)
    compressed_data = fastlz.compress(byte_string)

    return compressed_data.hex(), len(compressed_data)
# Define a function to apply to each row of the DataFrame
def process_and_compress_transaction(row):
    encoded_tx = row['encoded_transaction']
    compressed_tx, len_tx = compress_transaction(encoded_tx)
    return compressed_tx, len_tx

In [8]:
if chunk_strategy == 'day':
        nums_of_data = days_of_data
elif chunk_strategy == 'hour':
        nums_of_data = days_of_data*24
else:
        print('ERROR')

In [9]:
dfs = []
for chain in chain_mappings_list:
        for num in range(0,nums_of_data):
                result_df = None #Kill so we don't rerun
                print(chain['schema_name'] + ' : '+chunk_strategy+' ' + str(num+1) + '/' + str(nums_of_data))
                query_map = query_by_day

                query_map = query_map.replace("@chain_db_name@", chain['schema_name'])
                query_map = query_map.replace("@chain_id@", str(int(chain['chain_id'])))
                query_map = query_map.replace("@num@", str(num))
                query_map = query_map.replace("@chunk_strategy@", str(chunk_strategy))

                if end_date != -1: #end date config
                        query_map = query_map.replace("DATE_TRUNC('day',NOW())", f"(toDateTime('{end_date} 00:00:00') + interval '1 days')")

                # print(query_map)
                query_start_time = time.time()
                try:
                        result_df = client.query_df(query_map)
                except UnicodeDecodeError as e:
                        print(f"UnicodeDecodeError: {e}")
                        print(f"Problematic byte sequence: {e.object[e.start:e.end]}")

                query_end_time = time.time()  # Record the start time
                query_elapsed_time = query_end_time - query_start_time
                print (f"        Query Done: Completed in {query_elapsed_time:.2f} seconds")

                if result_df is None or result_df.empty:
                        print(f"No data found for {chain['schema_name']} on day {num + 1}")
                        continue  # Skip to the next day if there's no data
                
                # try:
                # Add Dummy Signature and fields
                result_df['access_list'] = '[]'
                result_df['access_list'] = result_df['access_list'].apply(ast.literal_eval)
                result_df['r'] = '0x6727a53c0972c55923242cea052dc4e1105d7b65c91c442e2741440965eac357'
                result_df['s'] = '0x0a8e71aea623adb7b5562fb9a779634f3b84dad7be1e1f22caaa640db352a6ff'
                result_df['v'] = '55'

                # Assuming `txs_df` is your DataFrame
                # print(result_df.apply(process_and_encode_transaction, axis=1, result_type='expand'))

                result_df[['encoded_transaction', 'len_encoded_transaction']] = result_df.apply(process_and_encode_transaction, axis=1, result_type='expand')
                enc_end_time = time.time()  # Record the start time
                enc_elapsed_time = enc_end_time - query_end_time
                print (f"        Encoding Done: Completed in {enc_elapsed_time:.2f} seconds")

                # Apply compression to each transaction in the DataFrame
                result_df[['compressed_transaction', 'compressed_transaction_length']] = result_df.apply(process_and_compress_transaction, axis=1, result_type='expand')
                comp_end_time = time.time()
                comp_elapsed_time = comp_end_time - enc_end_time
                print (f"        Compression Done: Completed in {comp_elapsed_time:.2f} seconds")
                
                # Calculate estimated size for each row
                result_df['estimatedSize_raw'] = result_df.apply(lambda row: (intercept + (row['compressed_transaction_length'] * fastlzCoef)) / scaled_by, axis=1)
                # Calculate minimum value for 'estimatedSize' column
                result_df['estimatedSize'] = result_df.apply(lambda row: max(minTransactionSize, row['estimatedSize_raw']), axis=1)
                est_end_time = time.time()
                est_elapsed_time = est_end_time - comp_end_time
                print (f"        Estimation Done: Completed in {est_elapsed_time:.2f} seconds")

                # Agg L2
                # Convert block_timestamp to date (truncate to day)
                result_df['block_date'] = pd.to_datetime(result_df['block_timestamp']).dt.date
                result_df['block_date'] = pd.to_datetime(result_df['block_date']).dt.tz_localize(None)
                grouped_df = result_df.groupby(['block_date', 'chain_id'])
                # Define aggregation functions
                agg_functions = {
                        'len_encoded_transaction': ['sum', 'mean', 'count'],
                        'compressed_transaction_length': ['sum', 'mean'],
                        'estimatedSize': ['sum', 'mean']
                }
                # Perform aggregation
                aggregated_df = grouped_df.agg(agg_functions).reset_index()
                # Rename columns for clarity
                aggregated_df.columns = ['block_date', 'chain_id', 
                                        'total_len_encoded_transaction', 'average_len_encoded_transaction', 'transaction_count',
                                        'total_len_compressed_transaction','average_len_compressed_transaction',
                                        'total_estimatedSize', 'average_estimatedSize']
                formatted_value = pu.format_num(aggregated_df['transaction_count'][0])
                print (f"        Transactions Processed: {formatted_value}")
                try:
                        aggregated_df['chain_name'] = chain['schema_name']
                        dfs.append(aggregated_df)
                except:
                        print('nothing to append')
                        continue

aggregated_df = pd.concat(dfs)

op : day 1/28
        Query Done: Completed in 52.02 seconds
        Encoding Done: Completed in 60.24 seconds
        Compression Done: Completed in 13.13 seconds
        Estimation Done: Completed in 4.71 seconds
        Transactions Processed: 523.9k
op : day 2/28
        Query Done: Completed in 52.47 seconds
        Encoding Done: Completed in 64.31 seconds
        Compression Done: Completed in 16.97 seconds
        Estimation Done: Completed in 9.26 seconds
        Transactions Processed: 523.4k
op : day 3/28
        Query Done: Completed in 21.54 seconds
        Encoding Done: Completed in 67.05 seconds
        Compression Done: Completed in 18.82 seconds
        Estimation Done: Completed in 11.45 seconds
        Transactions Processed: 516.5k
op : day 4/28
        Query Done: Completed in 23.09 seconds
        Encoding Done: Completed in 65.45 seconds
        Compression Done: Completed in 18.27 seconds
        Estimation Done: Completed in 11.60 seconds
        Transactions 

In [10]:
opstack_metadata = pd.read_csv('../../op_chains_tracking/outputs/chain_metadata.csv')
meta_columns = ['alignment', 'display_name', 'mainnet_chain_id','op_based_version','is_op_chain','oplabs_db_schema']
opstack_metadata = opstack_metadata[meta_columns][~opstack_metadata['oplabs_db_schema'].isna()]

opstack_metadata = opstack_metadata.rename(columns={'mainnet_chain_id':'chain_id'})

In [11]:
aggregated_df_map = aggregated_df.merge(opstack_metadata[['chain_id','display_name']], on = 'chain_id', how = 'left')

# aggregated_df_map

  aggregated_df_map = aggregated_df.merge(opstack_metadata[['chain_id','display_name']], on = 'chain_id', how = 'left')


In [12]:
# Pull aggregate L1 data
query_id = 3807789

# if query_id == None:
#         dune_query = '''
#         SELECT *
#         FROM dune.oplabspbc.result_op_stack_chains_l_1_data_with_op_chains_from_gs --https://dune.com/queries/3397786
#         WHERE dt >= DATE_TRUNC('day',NOW() - interval '{{total_days}}' day)
#         AND dt < DATE_TRUNC('day',NOW())
#         '''

#         dotenv.load_dotenv()
#         dune = DuneClient(os.environ["DUNE_API_KEY"])

#         query = dune.create_query(
#                 name="aggregate L1 data",
#                 query_sql=dune_query,
#                 params = [QueryParameter.number_type(name="total_days", value=days_of_data)]
#                 )
#         query_id = query.base.query_id
#         print(f"Created query with id {query.base.query_id}")
# else:
        
#         query_id = query_id

param_dt = du.generate_query_parameter(days_of_data, 'total_days','text')
if end_date != -1: #end date config
        param_end = du.generate_query_parameter(end_date + ' 00:00:00', 'date_end','date')
        param_list = [param_dt,param_end]
else:
        param_list = [param_dt]

dune_df = du.get_dune_data(query_id, params=param_list)

[32m2024-06-08 09:14:10.816[0m | [1mINFO    [0m | [36mduneapi_utils[0m:[36mget_dune_data[0m:[36m59[0m - [1mResults available at https://dune.com/queries/3807789?total_days=28&date_end=2024-05-30+00%3A00%3A00[0m
2024-06-08 09:14:11,994 INFO dune_client.api.base executing 3807789 on medium cluster
2024-06-08 09:14:12,285 INFO dune_client.api.base waiting for query execution 01HZVZRD7NVRMZ9VBW4A716NKZ to complete: ExecutionState.PENDING (queue position: 1)
2024-06-08 09:14:13,434 INFO dune_client.api.base waiting for query execution 01HZVZRD7NVRMZ9VBW4A716NKZ to complete: ExecutionState.PENDING (queue position: 1)
2024-06-08 09:14:14,584 INFO dune_client.api.base waiting for query execution 01HZVZRD7NVRMZ9VBW4A716NKZ to complete: ExecutionState.PENDING (queue position: 1)
2024-06-08 09:14:15,740 INFO dune_client.api.base waiting for query execution 01HZVZRD7NVRMZ9VBW4A716NKZ to complete: ExecutionState.PENDING (queue position: 1)
2024-06-08 09:14:16,917 INFO dune_client.api.ba

In [13]:
dune_df = dune_df[['name','dt','num_l1_submissions','num_l1_txs_inbox','l1_blobgas_purchased_inbox']]
dune_df['dt'] = pd.to_datetime(dune_df['dt']).dt.tz_localize(None)
dune_df = dune_df.rename(columns={'name':'display_name','dt':'block_date'})
dune_df.sample(5)

Unnamed: 0,display_name,block_date,num_l1_submissions,num_l1_txs_inbox,l1_blobgas_purchased_inbox
99,BOB (Build on Bitcoin),2024-05-13,14,8,1835008.0
408,CyberConnect,2024-05-17,138,138,
197,OP Mainnet,2024-05-27,2160,360,283115520.0
533,Ancient8,2024-05-12,47,47,
195,Lyra,2024-05-27,13,13,


In [14]:
# Generate L2 : L1 ratio metrics
combined_df = aggregated_df_map.merge(dune_df[['display_name','block_date','l1_blobgas_purchased_inbox']], on =['display_name','block_date'], how = 'inner')
combined_df['blobgas_per_l2_tx'] = combined_df['l1_blobgas_purchased_inbox'] / combined_df['transaction_count']

combined_df.sample(5)

Unnamed: 0,block_date,chain_id,total_len_encoded_transaction,average_len_encoded_transaction,transaction_count,total_len_compressed_transaction,average_len_compressed_transaction,total_estimatedSize,average_estimatedSize,chain_name,display_name,l1_blobgas_purchased_inbox,blobgas_per_l2_tx
27,2024-05-03,10,586688546,1025.978819,571833,241138071,421.693171,186562000.0,326.252614,op,OP Mainnet,191102976.0,334.193682
16,2024-05-14,10,614298058,1274.448214,482011,274387689,569.256073,215764900.0,447.634765,op,OP Mainnet,228851712.0,474.785248
0,2024-05-30,10,519209274,991.033283,523907,227948102,435.092682,174611000.0,333.286267,op,OP Mainnet,177733632.0,339.24653
1,2024-05-29,10,689924791,1318.194967,523386,293814683,561.372836,229778200.0,439.022495,op,OP Mainnet,243793920.0,465.801378
21,2024-05-09,10,744046362,1427.692754,521153,307931954,590.866701,242434100.0,465.187878,op,OP Mainnet,257163264.0,493.450607


In [15]:
# print(aggregated_df['encoded_transaction'][0])
# print(len(aggregated_df['encoded_transaction'][0]))

In [16]:
# Calculate weighted averages and mean
def weighted_avg(df, value_column, weight_column):
    return (df[value_column] * df[weight_column]).sum() / df[weight_column].sum()


In [17]:
# agg_cols = ['average_len_encoded_transaction','average_estimatedSize','transaction_count','l1_blobgas_purchased_inbox','blobgas_per_l2_tx']
grouped_df = combined_df.groupby(['chain_id','chain_name','display_name'])
total_aggregated_df = grouped_df.apply(
    lambda x: pd.Series({
        'average_len_encoded_transaction': weighted_avg(x, 'average_len_encoded_transaction', 'transaction_count'),
        'average_len_compressed_transaction': weighted_avg(x, 'average_len_compressed_transaction', 'transaction_count'),
        'average_estimatedSize': weighted_avg(x, 'average_estimatedSize', 'transaction_count'),
        'average_blobgas_per_l2_tx': x['l1_blobgas_purchased_inbox'].sum() / x['transaction_count'].sum(),
        'wt_average_blobgas_per_l2_tx': weighted_avg(x, 'blobgas_per_l2_tx', 'transaction_count'),
        'average_daily_l1_blobgas_purchased_inbox': x['l1_blobgas_purchased_inbox'].mean(),
        'average_daily_transaction_count': x['transaction_count'].mean(),
        'start_dt': x['block_date'].min(),
        'end_dt': x['block_date'].max()
    })
).reset_index()
total_aggregated_df
total_aggregated_df =total_aggregated_df.reset_index()
total_aggregated_df

Unnamed: 0,index,chain_id,chain_name,display_name,average_len_encoded_transaction,average_len_compressed_transaction,average_estimatedSize,average_blobgas_per_l2_tx,wt_average_blobgas_per_l2_tx,average_daily_l1_blobgas_purchased_inbox,average_daily_transaction_count,start_dt,end_dt
0,0,10,op,OP Mainnet,1246.553512,531.635282,415.872157,436.427916,436.427916,222227900.0,509197.25,2024-05-03,2024-05-30


In [18]:
from datetime import datetime
# Generate current timestamp
current_timestamp = datetime.now().strftime("%Y%m%d_%H%M")
# Define the file path
file_path = f"outputs/l2_output_{current_timestamp}.csv"
total_file_path = f"outputs/total_l2_output_{current_timestamp}.csv"
# Save the DataFrame to CSV
aggregated_df_map.to_csv(file_path, index=False)
total_aggregated_df.to_csv(total_file_path, index=False)
print(f"DataFrame saved to: {file_path}")

DataFrame saved to: outputs/l2_output_20240608_0914.csv
