In [None]:
import pandas as pd
import requests
import datetime
from dotenv import load_dotenv
import os
from tqdm import tqdm
import utilities

load_dotenv()


def get_libmev_data(start_date, end_date, interval_minutes=5):
    """
    Fetches data from the libMEV API within a specified date range.
    Parameters:
        start_date (int): Start timestamp in Unix epoch seconds.
        end_date (int): End timestamp in Unix epoch seconds.
        interval_minutes (int): Interval for API requests in minutes (default: 5 minutes)
    Returns: pd.DataFrame: A DataFrame containing the concatenated data from the API.
    """
    df_final = pd.DataFrame()
    interval_seconds = interval_minutes * 60
    total_intervals = (end_date - start_date) // interval_seconds


    for i in tqdm(range(total_intervals)):
        try:
            
            url = f"https://api.libmev.com/v1/bundles?timestampRange={end_date - interval_seconds},{end_date}"

            # Fetch and append data
            response = requests.get(url)
            response.raise_for_status()  # Raise an error for HTTP issues

            data = response.json().get("data", [])
            if data:
                df = pd.DataFrame(data)
                df_final = pd.concat([df_final, df], ignore_index=True)

            # Update the end_date for the next iteration
            end_date -= interval_seconds

        except Exception as e:
            print(f"Unexpected error at timestamp {end_date}: {e}")

    # Filter, clean, and preprocess the data
    df_final =  df_final.loc[df_final.timestamp > start_date]
    df_final.index = pd.to_datetime(df_final.timestamp, unit= 's')
    df_final.drop('tokens', axis=1, inplace=True) 
    df_final.drop('token_balance_delta', axis=1, inplace=True) 
    df_final.rename_axis('DATETIME', inplace = True)

    print(f"DataFrame successfully created with {len(df_final)} rows.")
    return df_final


if __name__ == "__main__":

    GOOGLE_CREDENTIALS_PATH = os.getenv("GOOGLE_CREDENTIALS_PATH")
    BQ_DATASET_ID = os.getenv("LIBMEV_DATASET_ID")
    BQ_TABLE_ID = os.getenv("LIBMEV_TABLE_ID")

    start_timestamp = utilities.get_BQ(
        """
        SELECT MAX(TIMESTAMP) AS max_timestamp
        FROM `silken-mile-379810.libmev_dataset.searchers_bundles`
        """, GOOGLE_CREDENTIALS_PATH)
    
    start_timestamp = int(start_timestamp.iloc[0])
    
    start_timestamp = int(datetime.datetime(2024,3,1).timestamp())
    end_timestamp = int(datetime.datetime(2024,4,1).timestamp())  

    # Fetch data from libMEV
    df_libmev = get_libmev_data(start_timestamp, end_timestamp)

    # Load data into BigQuery
    # utilities.load_to_table(df_libmev, BQ_DATASET_ID, BQ_TABLE_ID, GOOGLE_CREDENTIALS_PATH )

Data successfully loaded to table 'silken-mile-379810.libmev_dataset.searchers_bundles'.
