See readme.md for ideal fields and descriptions

In [None]:
# ! pip install pandas pyarrow
# ! pip install polars

In [None]:
import subprocess
import os
import datetime
import time
import pyarrow.parquet as pq
import pandas as pd


In [None]:
import polars as pl

In [None]:
# Init timestamps
current_time = datetime.datetime.utcnow()
time_24_hours_ago = current_time - datetime.timedelta(hours=24)
print('current time: ' + str(current_time))
print('24 hrs ago time: ' + str(time_24_hours_ago))

In [None]:
# Test doing t24h for Lyra

fields = 'blocks txs'# traces'
rpc_url = 'https://rpc.lyra.finance/'
chain_name = 'lyra'
start_timestamp = int(time_24_hours_ago.timestamp())
end_timestamp = int(current_time.timestamp())

dry_run = 0

In [None]:
# Generate Command
if dry_run == 1:
    dry_txt = '--dry'
else:
    dry_txt = ''

command = f"cryo {fields} --rpc {rpc_url} --timestamps {start_timestamp}:{end_timestamp} --subdirs datatype --label {chain_name} {dry_txt}"
print(command)

In [None]:
start_time = time.time()
# Run the command using subprocess.run and capture the output
result = subprocess.run(
    command, 
    shell=True, 
    stdout=subprocess.PIPE,  # Capture standard output
    stderr=subprocess.PIPE,  # Capture standard error
    text=True  # Capture output as text (Python 3.7+)
)

# Display the captured output
if result.returncode == 0:
    print("Command succeeded. Output:")
    print(result.stdout)
# else:
#     print("Command failed. Error output:")
#     print(result.stderr)

end_time = time.time()

In [None]:
# Calculate the elapsed time
elapsed_time = end_time - start_time
# Print the elapsed time in seconds
print(f"Elapsed time: {elapsed_time:.4f} seconds")

In [None]:
# # Read parquet files
txs = pl.scan_parquet('transactions__' + chain_name + '/*.parquet')
blocks = pl.scan_parquet('blocks__' + chain_name + '/*.parquet')

# Rename the 'gas_used' column to 'block_gas_used' in the 'blocks' DataFrame
blocks = blocks.rename({"gas_used": "block_gas_used"})

# Perform the join on 'block_number' and 'chain_id'
joined_df = blocks.join(
    txs,
    on=["block_number", "chain_id"],
    how="inner"  # You can specify the type of join you want (inner, outer, left, right)
)

# Convert Unix timestamp to datetime and create a new column 'timestamp_dt'
joined_df = joined_df.with_columns(
    pl.from_epoch("timestamp", time_unit="s").alias("timestamp_dt")
)

# Truncate the 'timestamp_dt' column to the day and create a new column 'timestamp_date'
joined_df = joined_df.with_columns(
    pl.col("timestamp_dt").dt.truncate("1d").alias("timestamp_date")
)

In [None]:
# print(blocks.schema)
# print(txs.schema)
print(joined_df.schema)

#test output
joined_pd = joined_df.collect().to_pandas()
joined_pd.head(5)

In [97]:
# Assuming you have a DataFrame named 'joined_df' with the required columns

result_df = joined_df.group_by(pl.col("timestamp_date")).agg(
    num_blocks=pl.col("block_number").n_unique(),
    num_user_transactions=
        pl.when(pl.col("gas_price") > 0).then(pl.col("transaction_hash")).n_unique(),
    num_success_user_transactions=
        pl.when((pl.col("gas_price") > 0) & pl.col("success")).then(pl.col("transaction_hash")).n_unique(),
    total_gas_used=pl.col("gas_used").sum(),
    user_gas_used=pl.col("gas_used").filter(pl.col("gas_price") > 0).sum(),
    num_senders=pl.col("from_address").filter(pl.col("gas_price") > 0).n_unique(),
    l2_fees_base_fees=(pl.col("base_fee_per_gas") * pl.col("gas_used")).sum(),
    l2_fees_priority_fees=pl.when(pl.col("gas_price") > 0).then((pl.col("gas_price") - pl.col("base_fee_per_gas")) * pl.col("gas_used")).sum(),
    l2_fees_total_fees=(pl.col("gas_price") * pl.col("gas_used")).sum(),
)
result_df

In [98]:
result_df.collect()

timestamp_date,num_blocks,num_user_transactions,num_success_user_transactions,total_gas_used,user_gas_used,num_senders,l2_fees_base_fee,l2_fees_priority_fee,l2_fees_total_fee
datetime[μs],u32,u32,u32,u64,u64,u32,u64,u64,u64
2023-12-17 00:00:00,4497,15,15,245841890,20026765,7,12292094500,4342699000000,4343700338250
2023-12-16 00:00:00,25705,75,75,1406723392,117023699,10,70336169600,20655953300000,20661804484950
