In [1]:
import requests as r
import pandas as pd
import os

import sys
import csv
from datetime import datetime, timedelta
import time
sys.path.append("../../helper_functions")
import clickhouse_utils as ch
sys.path.pop()

'../../helper_functions'

In [2]:
client = ch.connect_to_clickhouse_db() #Default is OPLabs DB

end_date = datetime(2024, 6, 27)   # Year, Month, Day
trailing_days = 14

chain_names = [
        ['op',True],
        ['base',False]
        ]

In [3]:
extra_fields = '''
    'receipt_l1_blob_base_fee',
	'receipt_l1_blob_base_fee_scalar',
	'receipt_l1_base_fee_scalar',
'''

In [4]:
query = '''
SELECT
        hash,
	nonce,
	block_hash,
	block_number,
	transaction_index,
	from_address,
	to_address,
	value,
	gas,
	gas_price,
	substring(input,1,10) AS tx_method_id,
	max_fee_per_gas,
	max_priority_fee_per_gas,
	transaction_type,
	block_timestamp,
	receipt_cumulative_gas_used,
	receipt_gas_used,
	receipt_contract_address,
	receipt_effective_gas_price,
	receipt_root_hash,
	receipt_l1_fee,
	receipt_l1_gas_used,
	receipt_l1_gas_price,
	receipt_l1_fee_scalar,
	@extra_fields@
	chain
FROM @chain_name@_transactions
WHERE gas_price > 0
        AND block_timestamp >= '@start_date@'
        AND block_timestamp < '@end_date@'

SETTINGS max_execution_time = 50000
'''


In [5]:
# Calculate start date
start_date = end_date - timedelta(days=trailing_days)


In [6]:
# GPT did this
for chain_info in chain_names:
    chain_name, has_extra_fields = chain_info
    result = None
    print(f"Starting processing for {chain_name}")

    csv_filename = f'csv_outputs/{chain_name}_{end_date.strftime("%Y%m%d")}_{trailing_days}days_v.csv'
    file_exists = os.path.isfile(csv_filename)

    current_date = start_date
    while current_date < end_date:
        start_time = time.time()
        day_start = current_date
        day_end = min(day_start + timedelta(days=1), end_date)

        q_run = query.replace('@chain_name@', chain_name)
        q_run = q_run.replace('@start_date@', day_start.strftime('%Y-%m-%d'))
        q_run = q_run.replace('@end_date@', day_end.strftime('%Y-%m-%d'))
        if has_extra_fields:
            q_run = q_run.replace('@extra_fields@', extra_fields)
        else:
            q_run = q_run.replace('@extra_fields@', '')

        print(f"Querying data for {day_start.date()}")
        result = client.query_df(q_run)
        
        # Append the result to the CSV file
        if not file_exists and current_date == start_date:
            # If it's the first run and file doesn't exist, create new file with header
            result.to_csv(csv_filename, index=False, mode='w')
            file_exists = True
        else:
            # Append without header
            result.to_csv(csv_filename, index=False, mode='a', header=False)

        int_time = time.time()
        execution_time = int_time - start_time
        print(f"Query for {chain_name} on {day_start.date()} completed in {execution_time:.2f} seconds")

        current_date = day_end

    end_time = time.time()
    execution_time = end_time - start_time
    print(f"Processing for {chain_name} completed in {execution_time:.2f} seconds")
    print(f"Processed data for {trailing_days} days, ending on {end_date.date()}")
    print(f"Results saved to {csv_filename}")
    print("---")

Starting processing for op
Querying data for 2024-06-13
Query for op on 2024-06-13 completed in 20.96 seconds
Querying data for 2024-06-14
Query for op on 2024-06-14 completed in 18.65 seconds
Querying data for 2024-06-15
Query for op on 2024-06-15 completed in 21.12 seconds
Querying data for 2024-06-16
Query for op on 2024-06-16 completed in 17.66 seconds
Querying data for 2024-06-17
Query for op on 2024-06-17 completed in 18.44 seconds
Querying data for 2024-06-18
Query for op on 2024-06-18 completed in 20.66 seconds
Querying data for 2024-06-19
Query for op on 2024-06-19 completed in 17.28 seconds
Querying data for 2024-06-20
Query for op on 2024-06-20 completed in 25.64 seconds
Querying data for 2024-06-21
Query for op on 2024-06-21 completed in 22.12 seconds
Querying data for 2024-06-22
Query for op on 2024-06-22 completed in 15.07 seconds
Querying data for 2024-06-23
Query for op on 2024-06-23 completed in 15.06 seconds
Querying data for 2024-06-24
Query for op on 2024-06-24 comp

In [7]:
# print(result.result_rows)