In [None]:
import requests as r
import pandas as pd
import os

import sys
import csv
from datetime import datetime, timedelta
import time
sys.path.append("../../helper_functions")
import clickhouse_utils as ch
sys.path.pop()

In [None]:
client = ch.connect_to_clickhouse_db() #Default is OPLabs DB

end_date = datetime(2024, 6, 27)   # Year, Month, Day
trailing_days = 14

chain_names = [
        # 'op',
        'base']

In [None]:
query = '''
SELECT *
FROM @chain_name@_transactions
WHERE gas_price > 0
        AND block_timestamp >= '@start_date@'
        AND block_timestamp < '@end_date@'
        AND receipt_status = 1

SETTINGS max_execution_time = 50000
'''


In [None]:
# Calculate start date
start_date = end_date - timedelta(days=trailing_days)


In [None]:
for chain_name in chain_names:
    result = None
    start_time = time.time()
    print(f"Starting processing for {chain_name}")

    csv_filename = f'csv_outputs/{chain_name}_{end_date.strftime("%Y%m%d")}_{trailing_days}days.csv'
    file_exists = os.path.isfile(csv_filename)

    current_date = start_date
    while current_date < end_date:
        day_start = current_date
        day_end = min(day_start + timedelta(days=1), end_date)

        q_run = query.replace('@chain_name@', chain_name)
        q_run = q_run.replace('@start_date@', day_start.strftime('%Y-%m-%d'))
        q_run = q_run.replace('@end_date@', day_end.strftime('%Y-%m-%d'))

        print(f"Querying data for {day_start.date()}")
        result = client.query_df(q_run)
        
        # Append the result to the CSV file
        if not file_exists and current_date == start_date:
            # If it's the first run and file doesn't exist, create new file with header
            result.to_csv(csv_filename, index=False, mode='w')
            file_exists = True
        else:
            # Append without header
            result.to_csv(csv_filename, index=False, mode='a', header=False)

        int_time = time.time()
        execution_time = int_time - start_time
        print(f"Query for {chain_name} on {day_start.date()} completed in {execution_time:.2f} seconds")

        current_date = day_end

    end_time = time.time()
    execution_time = end_time - start_time
    print(f"Processing for {chain_name} completed in {execution_time:.2f} seconds")
    print(f"Processed data for {trailing_days} days, ending on {end_date.date()}")
    print(f"Results saved to {csv_filename}")
    print("---")

In [None]:
# print(result.result_rows)