In [1]:
# ! pip install pandas web3 hexbytes rlp fastlz clickhouse-connect
# ! pip install python-dotenv
# ! pip install dune-client
# ! pip install loguru

### Readme
FastLZ needs Python version 3.9x or lower, make sure your environment is using a later python version

In [2]:
import pandas as pd
from web3 import Web3
from hexbytes import HexBytes
import ast
import rlp
from rlp.sedes import Binary, big_endian_int, binary, List
from eth_utils import to_bytes, to_hex, int_to_big_endian
import fastlz
import sys
import os
import dotenv
import time
from dune_client.client import DuneClient
from dune_client.types import QueryParameter
dotenv.load_dotenv()
sys.path.append("../../helper_functions")
import clickhouse_utils as ch
import duneapi_utils as du
sys.path.pop()

client = ch.connect_to_clickhouse_db() #Default is OPLabs DB

In [3]:
import warnings
warnings.filterwarnings("ignore", category=DeprecationWarning) #Supress internal fastlz warnings

In [4]:
# Run configs
schemas_to_select = [
        # 'op', 
        # 'base',
        'mode',
        # 'fraxtal',
        'zora'
        ]  # Add more schemas as needed
days_of_data = 28

#FastLZ Regression Metrics
# Specs - https://specs.optimism.io/fjord/exec-engine.html?search=#fjord-l1-cost-fee-changes-fastlz-estimator
intercept = -42_585_600
fastlzCoef = 836_500
minTransactionSize = 100
scaled_by = 1e6

### Execute

In [5]:
# Read the CSV file
csv_path = '../../op_chains_tracking/outputs/chain_metadata.csv'
df = pd.read_csv(csv_path)

# Filter the DataFrame based on the schemas_to_select list
filtered_df = df[df['oplabs_db_schema'].isin(schemas_to_select)]

# Select the required columns and convert to a list of dictionaries
chain_mappings_list = filtered_df[['oplabs_db_schema', 'display_name', 'mainnet_chain_id']].rename(
    columns={'oplabs_db_schema': 'schema_name', 'mainnet_chain_id': 'chain_id'}
).to_dict(orient='records')

# Print the resulting list of dictionaries
# print(chain_mappings_list)

In [6]:
# # Test transaction receipt
# from web3 import Web3
# op_rpc = os.getenv("OP_PUBLIC_RPC")
# w3 = Web3(Web3.HTTPProvider(op_rpc))

# tx_test = '0xcea81f2e836a37b38ba82afd37e6f66c02e348e7b89538aa232013d91edcb926'
# tx = w3.eth.get_transaction(tx_test)
# txr = w3.eth.get_transaction_receipt(tx_test)
# # # txraw = w3.eth.get_raw_transaction(tx_test)
# print(tx)
# # print(txr)
# # # print(txraw)

In [7]:
# may not sufficent due to missing transaction signature fields

# Get L2 Txs from Clickhouse / Goldsky
query_by_day = '''
        SELECT distinct @chain_id@ as chain_id, nonce, gas, gas_price,
                to_address, value, input, block_timestamp, hash
        FROM @chain_db_name@_transactions
        WHERE gas_price > 0
        # 1 day chunk
        AND block_timestamp < DATE_TRUNC('day',NOW()) - interval '@day_num@ days'
        AND block_timestamp >= DATE_TRUNC('day',NOW()) - (interval '@day_num@ days') - (interval '1 day')

        SETTINGS max_execution_time = 7000
'''
# AND hash = '0xcea81f2e836a37b38ba82afd37e6f66c02e348e7b89538aa232013d91edcb926'
# AND block_number = 120731426

# txs_df

In [8]:
# Process transactions and RLP encode
#https://ethereum.org/en/developers/docs/transactions/

# NOTE THE RLP ENCODING IS NOT 1:1 WITH ETHERSCAN YET (but it's ~close-ish)
def process_and_encode_transaction(row):
    try:
        # Process "to" field
        to_field = row['to_address']
        if isinstance(to_field, str):
            if to_field:
                to_field = to_field.decode('utf-8')
                to_bytes = bytes.fromhex(to_field[2:])
            else:
                to_bytes = b''  # Set to an empty bytes object if "to" address is null
        elif isinstance(to_field, bytes):
            if to_field.startswith(b'0x'):
                to_field = to_field.decode('utf-8')
                to_bytes = bytes.fromhex(to_field[2:])
            else:
                to_bytes = to_field
        else:
            raise ValueError("Invalid 'to_address' field type")

        # Prepare transaction parameters
        try:
            tx_params = {
                'nonce': int_to_big_endian(int(row['nonce'])),
                'gasPrice': int_to_big_endian(int(row['gas_price'])),
                'gas': int_to_big_endian(int(row['gas'])),
                'to': to_bytes,
                'value': int_to_big_endian(int(row['value'])) if row['value'] != 0 else b'',  # Encode value as byte array if 0
                'input': bytes.fromhex(row['input'][2:]),
                'v': int_to_big_endian(int(row['v'])),  # Convert v to a bytes object
                'r': bytes.fromhex(row['r'][2:]),
                's': bytes.fromhex(row['s'][2:])
            }
        except:
            print(row)

        # # Print transaction parameters for debugging
        # for key, value in tx_params.items():
        #     print(f"{key}: {value}, {type(value)}")

        # Prepare the transaction fields for RLP encoding
        transaction = [
            tx_params['nonce'],
            tx_params['gasPrice'],
            tx_params['gas'],
            tx_params['to'],
            tx_params['value'],
            tx_params['input'],
            tx_params['v'],
            tx_params['r'],
            tx_params['s']
        ]

        # Encode the entire transaction
        encoded_tx = rlp.encode(transaction)
        encoded_tx_hex = "0x" + encoded_tx.hex()
        return encoded_tx_hex, len(encoded_tx)

    except (ValueError, TypeError, UnicodeDecodeError) as e:
        print("Error:", e)
        print("Failed Transaction Info:")
        print(row)
        return None, None

# Function to compress transaction data
def compress_transaction(encoded_transaction):

    hex_string = encoded_transaction[2:]
    # Convert the hexadecimal string to bytes
    byte_string = bytes.fromhex(hex_string)
    compressed_data = fastlz.compress(byte_string)

    return compressed_data.hex(), len(compressed_data)
# Define a function to apply to each row of the DataFrame
def process_and_compress_transaction(row):
    encoded_tx = row['encoded_transaction']
    compressed_tx, len_tx = compress_transaction(encoded_tx)
    return compressed_tx, len_tx

In [9]:
dfs = []
for chain in chain_mappings_list:
        for day_num in range(0,days_of_data):
                result_df = None #Kill so we don't rerun
                print(chain['schema_name'] + ' : day ' + str(day_num))
                query_map = query_by_day

                query_map = query_map.replace("@chain_db_name@", chain['schema_name'])
                query_map = query_map.replace("@chain_id@", str(chain['chain_id']))
                query_map = query_map.replace("@day_num@", str(day_num))
                
                query_start_time = time.time()
                try:
                        result_df = client.query_df(query_map)
                except UnicodeDecodeError as e:
                        print(f"UnicodeDecodeError: {e}")
                        print(f"Problematic byte sequence: {e.object[e.start:e.end]}")

                query_end_time = time.time()  # Record the start time
                query_elapsed_time = query_end_time - query_start_time
                print (f"        Query Done: Completed in {query_elapsed_time:.2f} seconds")
                # try:
                # Add Dummy Signature and fields
                result_df['access_list'] = '[]'
                result_df['access_list'] = result_df['access_list'].apply(ast.literal_eval)
                result_df['r'] = '0x6727a53c0972c55923242cea052dc4e1105d7b65c91c442e2741440965eac357'
                result_df['s'] = '0x0a8e71aea623adb7b5562fb9a779634f3b84dad7be1e1f22caaa640db352a6ff'
                result_df['v'] = '55'

                # Assuming `txs_df` is your DataFrame
                result_df[['encoded_transaction', 'len_encoded_transaction']] = result_df.apply(process_and_encode_transaction, axis=1, result_type='expand')
                enc_end_time = time.time()  # Record the start time
                enc_elapsed_time = enc_end_time - query_end_time
                print (f"        Encoding Done: Completed in {enc_elapsed_time:.2f} seconds")

                # Apply compression to each transaction in the DataFrame
                result_df[['compressed_transaction', 'compressed_transaction_length']] = result_df.apply(process_and_compress_transaction, axis=1, result_type='expand')
                comp_end_time = time.time()
                comp_elapsed_time = comp_end_time - enc_end_time
                print (f"        Compression Done: Completed in {comp_elapsed_time:.2f} seconds")
                
                # Calculate estimated size for each row
                result_df['estimatedSize_raw'] = result_df.apply(lambda row: (intercept + (row['compressed_transaction_length'] * fastlzCoef)) / scaled_by, axis=1)
                # Calculate minimum value for 'estimatedSize' column
                result_df['estimatedSize'] = result_df.apply(lambda row: max(minTransactionSize, row['estimatedSize_raw']), axis=1)
                est_end_time = time.time()
                est_elapsed_time = est_end_time - comp_end_time
                print (f"        Estimation Done: Completed in {est_elapsed_time:.2f} seconds")

                # Agg L2
                # Convert block_timestamp to date (truncate to day)
                result_df['block_date'] = pd.to_datetime(result_df['block_timestamp']).dt.date
                result_df['block_date'] = pd.to_datetime(result_df['block_date']).dt.tz_localize(None)
                grouped_df = result_df.groupby(['block_date', 'chain_id'])
                # Define aggregation functions
                agg_functions = {
                        'len_encoded_transaction': ['sum', 'mean', 'count'],
                        'compressed_transaction_length': ['sum', 'mean'],
                        'estimatedSize': ['sum', 'mean']
                }
                # Perform aggregation
                aggregated_df = grouped_df.agg(agg_functions).reset_index()
                # Rename columns for clarity
                aggregated_df.columns = ['block_date', 'chain_id', 
                                        'total_len_encoded_transaction', 'average_len_encoded_transaction', 'transaction_count',
                                        'total_len_compressed_transaction','average_len_compressed_transaction',
                                        'total_estimatedSize', 'average_estimatedSize']
                try:
                        aggregated_df['chain_name'] = chain['schema_name']
                        dfs.append(aggregated_df)
                except:
                        print('nothing to append')
                        continue

aggregated_df = pd.concat(dfs)

zora : day 0
        Query Done: Completed in 2.83 seconds
        Encoding Done: Completed in 6.19 seconds
        Compression Done: Completed in 6.19 seconds
        Estimation Done: Completed in 0.92 seconds
zora : day 1
        Query Done: Completed in 3.01 seconds
        Encoding Done: Completed in 5.99 seconds
        Compression Done: Completed in 5.99 seconds
        Estimation Done: Completed in 0.83 seconds
zora : day 2
        Query Done: Completed in 4.12 seconds
        Encoding Done: Completed in 5.75 seconds
        Compression Done: Completed in 5.75 seconds
        Estimation Done: Completed in 0.90 seconds
zora : day 3
        Query Done: Completed in 3.47 seconds
        Encoding Done: Completed in 6.46 seconds
        Compression Done: Completed in 6.46 seconds
        Estimation Done: Completed in 0.84 seconds
zora : day 4
        Query Done: Completed in 3.76 seconds
        Encoding Done: Completed in 5.98 seconds
        Compression Done: Completed in 5.98 seco

In [None]:
opstack_metadata = pd.read_csv('../../op_chains_tracking/outputs/chain_metadata.csv')
meta_columns = ['alignment', 'display_name', 'mainnet_chain_id','op_based_version','is_op_chain','oplabs_db_schema']
opstack_metadata = opstack_metadata[meta_columns][~opstack_metadata['oplabs_db_schema'].isna()]

opstack_metadata = opstack_metadata.rename(columns={'mainnet_chain_id':'chain_id'})

In [None]:
aggregated_df_map = aggregated_df.merge(opstack_metadata[['chain_id','display_name']], on = 'chain_id', how = 'left')

# aggregated_df_map

In [None]:
# Pull aggregate L1 data
query_id = 3807789

if query_id == None:
        dune_query = '''
        SELECT *
        FROM dune.oplabspbc.result_op_stack_chains_l_1_data_with_op_chains_from_gs --https://dune.com/queries/3397786
        WHERE dt >= DATE_TRUNC('day',NOW() - interval '{{total_days}}' day)
        AND dt < DATE_TRUNC('day',NOW())
        '''

        dotenv.load_dotenv()
        dune = DuneClient(os.environ["DUNE_API_KEY"])

        query = dune.create_query(
                name="aggregate L1 data",
                query_sql=dune_query,
                params = [QueryParameter.number_type(name="total_days", value=days_of_data)]
                )
        query_id = query.base.query_id
        print(f"Created query with id {query.base.query_id}")
else:
        query_id = query_id

param_dt = du.generate_query_parameter(days_of_data, 'total_days','text')

dune_df = du.get_dune_data(query_id, params=[param_dt])

In [None]:
dune_df = dune_df[['name','dt','num_l1_submissions','num_l1_txs_inbox','l1_blobgas_purchased_inbox']]
dune_df['dt'] = pd.to_datetime(dune_df['dt']).dt.tz_localize(None)
dune_df = dune_df.rename(columns={'name':'display_name','dt':'block_date'})
dune_df.sample(5)

In [None]:
# Generate L2 : L1 ratio metrics
combined_df = aggregated_df_map.merge(dune_df[['display_name','block_date','l1_blobgas_purchased_inbox']], on =['display_name','block_date'], how = 'inner')
combined_df['blobgas_per_l2_tx'] = combined_df['l1_blobgas_purchased_inbox'] / combined_df['transaction_count']

combined_df.sample(5)

In [None]:
# print(aggregated_df['encoded_transaction'][0])
# print(len(aggregated_df['encoded_transaction'][0]))

In [None]:
# Calculate weighted averages and mean
def weighted_avg(df, value_column, weight_column):
    return (df[value_column] * df[weight_column]).sum() / df[weight_column].sum()


In [None]:
# agg_cols = ['average_len_encoded_transaction','average_estimatedSize','transaction_count','l1_blobgas_purchased_inbox','blobgas_per_l2_tx']
grouped_df = combined_df.groupby(['chain_id','chain_name','display_name'])
total_aggregated_df = grouped_df.apply(
    lambda x: pd.Series({
        'average_len_encoded_transaction': weighted_avg(x, 'average_len_encoded_transaction', 'transaction_count'),
        'average_estimatedSize': weighted_avg(x, 'average_estimatedSize', 'transaction_count'),
        'average_blobgas_per_l2_tx': x['l1_blobgas_purchased_inbox'].sum() / x['transaction_count'].sum(),
        'average_daily_l1_blobgas_purchased_inbox': x['l1_blobgas_purchased_inbox'].mean(),
        'average_daily_transaction_count': x['transaction_count'].mean(),
        'start_dt': x['dt'].min(),
        'end_dt': x['dt'].max()
    })
).reset_index()
total_aggregated_df
total_aggregated_df =total_aggregated_df.reset_index()
total_aggregated_df

In [None]:
from datetime import datetime
# Generate current timestamp
current_timestamp = datetime.now().strftime("%Y%m%d_%H%M")
# Define the file path
file_path = f"outputs/l2_output_{current_timestamp}.csv"
total_file_path = f"outputs/total_l2_output_{current_timestamp}.csv"
# Save the DataFrame to CSV
aggregated_df_map.to_csv(file_path, index=False)
total_aggregated_df.to_csv(total_file_path, index=False)
print(f"DataFrame saved to: {file_path}")