In [None]:
# List of materialized view names
mv_names = [
        'erc20_transfers',
        # 'erc721_transfers',
        'eth_transfers',
        ]

In [None]:
import pandas as pd
import sys
import datetime
sys.path.append("../../helper_functions")
import clickhouse_utils as ch
import opstack_metadata_utils as ops
sys.path.pop()
client = ch.connect_to_clickhouse_db()

import dotenv
import os
dotenv.load_dotenv()

In [None]:
# Get Chain List
chain_configs = ops.get_superchain_metadata_by_data_source('oplabs') # OPLabs db
# Start date for backfilling
start_date = datetime.date(2021, 1, 1)
end_date = datetime.date.today()

if client is None:
        client = ch.connect_to_clickhouse_db()

# Function to create ClickHouse view
def get_chain_names_from_df(df):
    return df['blockchain'].dropna().unique().tolist()

In [None]:
# List of chains
chains = get_chain_names_from_df(chain_configs)

# Start date for backfilling
start_date = datetime.date(2021, 1, 1)
end_date = datetime.date.today() + datetime.timedelta(days=1)

In [None]:
def get_query_from_file(mv_name):
    try:
        # Try to get the directory of the current script
        script_dir = os.path.dirname(os.path.abspath(__file__))
    except NameError:
        # If __file__ is not defined (e.g., in Jupyter), use the current working directory
        script_dir = os.getcwd()
    
    query_file_path = os.path.join(script_dir, 'mv_inputs', f'{mv_name}.sql')
    print(f"Attempting to read query from: {query_file_path}")
    
    try:
        with open(query_file_path, 'r') as file:
            return file.read()
    except FileNotFoundError:
        print(f"Error: Query file not found: {query_file_path}")
        raise

In [None]:
def create_materialized_view(client, chain, mv_name):
    full_view_name = f'{chain}_{mv_name}_mv'
    
    # Check if view already exists
    result = client.query(f"SHOW TABLES LIKE '{full_view_name}'")
    if result.result_rows:
        print(f"Materialized view {full_view_name} already exists. Skipping creation.")
        return

    query_template = get_query_from_file(f'{mv_name}_mv')
    query = query_template.format(chain=chain, view_name=full_view_name)
    
    client.command(query)
    print(f"Created materialized view {full_view_name}")

def ensure_backfill_tracking_table_exists(client):
    check_table_query = """
    SELECT 1 FROM system.tables 
    WHERE database = currentDatabase() AND name = 'backfill_tracking'
    """
    result = client.query(check_table_query)
    
    if not result.result_rows:
        create_table_query = """
        CREATE TABLE backfill_tracking (
            chain String,
            mv_name String,
            start_date Date,
            end_date Date
        ) ENGINE = MergeTree()
        ORDER BY (chain, mv_name, start_date)
        """
        client.command(create_table_query)
        print("Created backfill_tracking table.")
    else:
        print("backfill_tracking table already exists.")

def backfill_data(client, chain, mv_name):
    full_view_name = f'{chain}_{mv_name}_mv'
    current_date = start_date
    batch_size = datetime.timedelta(days=14)

    while current_date < end_date:
        batch_end = min(current_date + batch_size, end_date)
        
        # Check if this range has been backfilled
        check_query = f"""
        SELECT 1
        FROM backfill_tracking
        WHERE chain = '{chain}'
          AND mv_name = '{mv_name}'
          AND start_date <= toDate('{current_date}')
          AND end_date >= toDate('{batch_end}')
        LIMIT 1
        """
        
        result = client.query(check_query)
        
        if not result.result_rows:
            # No record of backfill, proceed
            query_template = get_query_from_file(f'{mv_name}_backfill')
            query = query_template.format(
                view_name=full_view_name,
                chain=chain,
                start_date=current_date,
                end_date=batch_end
            )
            
            try:
                client.command(query)
                
                # Record the backfill
                track_query = f"""
                INSERT INTO backfill_tracking (chain, mv_name, start_date, end_date)
                VALUES ('{chain}', '{mv_name}', toDate('{current_date}'), toDate('{batch_end}'))
                """
                client.command(track_query)
                
                print(f"Backfilled data for {full_view_name} from {current_date} to {batch_end}")
            except Exception as e:
                print(f"Error during backfill for {full_view_name} from {current_date} to {batch_end}: {str(e)}")
        else:
            print(f"Data already backfilled for {full_view_name} from {current_date} to {batch_end}. Skipping.")
        
        current_date = batch_end + datetime.timedelta(days=1)

def optimize_remove_dupes(client, chain, mv_name):
    full_view_name = f'{chain}_{mv_name}_mv'
    # Optimize table to merge and remove duplicates
    optimize_query = f"OPTIMIZE TABLE {full_view_name} FINAL"
    client.command(optimize_query)

In [None]:
def reset_materialized_view(client, chain, mv_name):
    full_view_name = f'{chain}_{mv_name}_mv'

    try:
        # Drop the existing materialized view
        client.command(f"DROP TABLE IF EXISTS {full_view_name}")
        print(f"Dropped materialized view {full_view_name}")

        # Recreate the materialized view using the existing function
        create_materialized_view(client, chain, mv_name)
        print(f"Recreated materialized view {full_view_name}")

        # Clear the backfill tracking for this view
        client.command(f"""
        ALTER TABLE backfill_tracking 
        DELETE WHERE chain = '{chain}' AND mv_name = '{mv_name}'
        """)
        print(f"Cleared backfill tracking for {full_view_name}")

    except Exception as e:
        print(f"Error resetting materialized view {full_view_name}: {str(e)}")

In [None]:
# # To reset a view
# reset_materialized_view(client, 'op', 'erc20_transfers')

In [None]:
# Main execution
ensure_backfill_tracking_table_exists(client)

for chain in chains:
    print(f"Processing chain: {chain}")
    for mv_name in mv_names:
        create_materialized_view(client, chain, mv_name)
        backfill_data(client, chain, mv_name)
        optimize_remove_dupes(client, chain, mv_name)
    print(f"Completed processing for {chain}")

print("All chains and views processed successfully")