In [2]:
import requests
import os
import pandas as pd
from sqlalchemy import create_engine, Column, Integer, MetaData, Table, select
from sqlalchemy import Column, Table, MetaData, TIMESTAMP, VARCHAR, NUMERIC, INTEGER, BIGINT, Index
from sqlalchemy.sql import text
from sqlalchemy.orm import sessionmaker

Pyarrow will become a required dependency of pandas in the next major release of pandas (pandas 3.0),
(to allow more performant data types, such as the Arrow string type, and better interoperability with other libraries)
but was not found to be installed on your system.
If this would cause problems for you,
please provide us feedback at https://github.com/pandas-dev/pandas/issues/54466
        
  import pandas as pd


In [4]:
localhost_name = 'localhost'

db_params = {
    'host': localhost_name,
    'database': 'thesisdb',
    'user': 'postgres',
    'password': 'admin',
    'port': '5432'
}

table_name = 'zeromev_data'

# Create a SQLAlchemy engine
engine = create_engine(f"postgresql://{db_params['user']}:{db_params['password']}@{db_params['host']}:{db_params['port']}/{db_params['database']}")
metadata = MetaData()
table = Table(table_name, metadata,
    Column('curblocknumber', NUMERIC(18), primary_key=True),
    Column('mev_tx_count', INTEGER, index=True),
    Column('detect_date', VARCHAR(100), index=True)
    )
    # Create the table
metadata.create_all(engine)

In [5]:
# Function to get the MEV transaction count and other details for a specific block number
def get_mev_tx_info(block_number):
    url = f"https://data.zeromev.org/v1/mevBlock?block_number={block_number}"
    response = requests.get(url, headers={'accept': 'application/json'})
    
    if response.status_code == 200:
        data = response.json()
        print(f"Request successful, retrieved data for block number {block_number}")

        # Extracting and processing data
        transactions = []
        for item in data:
            if item.get('arrival_time_us'):  # Checks if 'arrival_time_us' is not None and not empty
                date_str = item['arrival_time_us'].split('T')[0]  # Extracting date
            else:
                date_str = None
            transactions.append({'block_number': block_number, 'detect_date': date_str})

        # Creating DataFrame
        df = pd.DataFrame(transactions)
        total_counts = df['block_number'].value_counts().rename('mev_tx_count')

        # Grouping by block_number and detect_date to calculate mev_tx_count
        df_summary_one = df.groupby(['block_number', 'detect_date']).size().reset_index(name='mev_tx_count')
        
        # Merging total_counts back to df_summary
        df_summary = df_summary_one.merge(total_counts, on='block_number', how='left')

        return df_summary
    else:
        print(f"Failed to fetch data for block number {block_number}")
        return None

def write_to_db(df, engine):
    if not df.empty:
        df.to_sql(name=table_name, con=engine, if_exists='append', index=False)
        df = pd.DataFrame()
        print("Data written successfully to the database.")
    else:
        print("No data to write to the database.")

In [15]:
start_block_number = 11172686
count = 20
url = f"https://data.zeromev.org/v1/mevBlock?block_number={start_block_number}&count={count}"
response = requests.get(url, headers={'accept': 'application/json'})
    
if response.status_code == 200:
    data = response.json()

    # Initialize an empty dictionary to count transactions per block
    transaction_counts = {}

    # Loop through each item in the response data
    for item in data:
        block_number = item['block_number']
        # Increase the transaction count for this block number
        if block_number in transaction_counts:
            transaction_counts[block_number] += 1
        else:
            transaction_counts[block_number] = 1
        
    # Create a list of dictionaries for DataFrame conversion
    transactions = [{'block_number': k, 'mev_tx_count': v} for k, v in transaction_counts.items()]
    
    df = pd.DataFrame(transactions)

In [6]:
start_block_number = 11172686
end_block_number = 11172690

for block_number in range(start_block_number, end_block_number + 1):
    mev_df = get_mev_tx_info(block_number)
    if mev_df is not None:
        print(f"Processing for block {block_number}")
        write_to_db(mev_df, engine)
    else:
        print(f"No data processed for block {block_number}")

Request successful, retrieved data for block number 11172686
block_number
11172686    24
Name: mev_tx_count, dtype: int64
Processing for block 11172686
No data to write to the database.
Request successful, retrieved data for block number 11172687
block_number
11172687    34
Name: mev_tx_count, dtype: int64
Processing for block 11172687
No data to write to the database.
Request successful, retrieved data for block number 11172688
block_number
11172688    28
Name: mev_tx_count, dtype: int64
Processing for block 11172688
No data to write to the database.
Request successful, retrieved data for block number 11172689
block_number
11172689    24
Name: mev_tx_count, dtype: int64
Processing for block 11172689
No data to write to the database.
Request successful, retrieved data for block number 11172690
block_number
11172690    24
Name: mev_tx_count, dtype: int64
Processing for block 11172690
No data to write to the database.
