In [9]:
import pandas as pd
import ijson
import json


In [10]:
def find_max_min_block_number_and_df(file_path, column_name='block_number', chunk_size=500000):
    max_value = -float('inf')  # Initialize to negative infinity for maximum
    min_value = float('inf')  # Initialize to infinity for minimum
    df = pd.DataFrame()  # Initialize an empty DataFrame to store all chunks
    
    # Use an iterator to load chunks of the dataset
    for chunk in pd.read_csv(file_path, chunksize=chunk_size):
        if column_name in chunk.columns:
            current_max = chunk[column_name].max()
            current_min = chunk[column_name].min()
            if current_max > max_value:
                max_value = current_max
            if current_min < min_value:
                min_value = current_min
        df = pd.concat([df, chunk], ignore_index=True)  # Append the current chunk to the DataFrame
    
    return max_value, min_value, df  # Return the max, min, and the complete DataFrame

# Adjust the file path as necessary
file_path = '../data_fetch_flashbots/output.csv'

# Run the function and unpack the returned values
max_block_number, min_block_number, df = find_max_min_block_number_and_df(file_path)

# Display the maximum and minimum block number
print("Maximum Block Number:", max_block_number)
print("Minimum Block Number:", min_block_number)

# You can now inspect the DataFrame 'df' to see the structure of your data
print(df.head())  # Display the first few rows of the DataFrame


Maximum Block Number: 14986955
Minimum Block Number: 11834049
   block_number        miner_reward  \
0      14986955  358920921362317961   
1      14986954  266926641844315607   
2      14986952  689259070093777007   
3      14986949  381788848142916699   
4      14986948  286956710025995738   

                                        miner  coinbase_transfers  gas_used  \
0  0x646dB8ffC21e7ddc2B6327448dd9Fa560Df41087  358920921362317961    453908   
1  0x7F101fE45e6649A6fB8F3F8B43ed03D353f2B90c  233815223189565477    707043   
2  0x646dB8ffC21e7ddc2B6327448dd9Fa560Df41087  154128719565454796   5304326   
3  0x829BD824B016326A401d083B33D092293333A830  340862284848386550    702543   
4  0xEA674fdDe714fd979de3EdF0F56AA9716B898ec8  286803725093765114    600407   

      gas_price                                       transactions  
0  790734953696  [{'transaction_hash': '0x7b2640a1915092f9c14a0...  
1  377525329922  [{'transaction_hash': '0xae32bea44c40890ad2bee...  
2  129942818388  [{'t

In [11]:
# Function to calculate max bundle index + 1
def calculate_max_bundle_index(transactions):
    if transactions and isinstance(transactions, list):
        # Extract bundle_index from each transaction dictionary
        max_index = max(transaction['bundle_index'] for transaction in transactions)
        return max_index + 1
    return 1  # Default value if no transactions or wrong format

# Apply this function to each row in the DataFrame
df['all_bundles_count'] = df['transactions'].apply(calculate_max_bundle_index)

print(df)

         block_number        miner_reward  \
0            14986955  358920921362317961   
1            14986954  266926641844315607   
2            14986952  689259070093777007   
3            14986949  381788848142916699   
4            14986948  286956710025995738   
...               ...                 ...   
1484616      11835001   26661835635854408   
1484617      11834918   32353817313917581   
1484618      11834911  227431738484650945   
1484619      11834380    7298671650726701   
1484620      11834049  372250038378476828   

                                              miner  coinbase_transfers  \
0        0x646dB8ffC21e7ddc2B6327448dd9Fa560Df41087  358920921362317961   
1        0x7F101fE45e6649A6fB8F3F8B43ed03D353f2B90c  233815223189565477   
2        0x646dB8ffC21e7ddc2B6327448dd9Fa560Df41087  154128719565454796   
3        0x829BD824B016326A401d083B33D092293333A830  340862284848386550   
4        0xEA674fdDe714fd979de3EdF0F56AA9716B898ec8  286803725093765114   
...      

In [None]:
def find_max_min_block_number(file_path, item_name='item.block_number'):
    max_value = -float('inf')  # Initialize to negative infinity for maximum
    min_value = float('inf')  # Initialize to infinity for minimum
    with open(file_path, 'rb') as file:
        # ijson parses the file incrementally
        block_numbers = ijson.items(file, item_name)
        for number in block_numbers:
            if number > max_value:
                max_value = number
            if number < min_value:
                min_value = number
    return max_value, min_value

# Adjust the file path as necessary
file_path = '../output.json'

# Assuming the JSON attribute containing block numbers is named 'block_number'
# Modify the item path as necessary based on your JSON structure
max_block_number, min_block_number = find_max_min_block_number(file_path)

# Display the maximum and minimum block number
print("Maximum Block Number:", max_block_number)
print("Minimum Block Number:", min_block_number)



# 15540733

In [None]:
def read_until_block_number(file_path, target_block_number=15040733, item_name='item.block_number'):
    try:
        with open(file_path, 'rb') as file:
            # ijson parses the file incrementally
            block_numbers = ijson.items(file, item_name)
            for number in block_numbers:
                if number == target_block_number:
                    print(f"Block number {target_block_number} found.")
                    break
            else:
                # This else executes only if the for loop is not terminated by break
                print(f"Block number {target_block_number} not found before EOF.")
    except ijson.IncompleteJSONError as e:
        print(f"Incomplete JSON error: {e}. Block number {target_block_number} may not be present.")
    except Exception as e:
        print(f"An error occurred: {e}")

# Adjust the file path as necessary
file_path = '../output.json'

# Call the function with the path to your JSON file
read_until_block_number(file_path)


In [7]:
def count_flashbots_bundles(csv_file):
    # create a dictionary to store the count of flashbots bundles per block
    flashbots_count = {}

    # read the csv file in chunks
    for chunk in pd.read_csv(csv_file, chunksize=100000):
        # iterate over each row in the chunk
        for index, row in chunk.iterrows():
            # parse the transactions field as a json list
            transactions = json.loads(row['transactions'])
            # create an empty list to store flashbots transactions
            flashbots_txs = []
            for tx in transactions:
                # check if the transaction has bundle_type "flashbots"
                if tx['bundle_type'] == 'flashbots':
                    flashbots_txs.append(tx)
            # add the count of flashbots transactions to the dictionary
            if row['block_number'] in flashbots_count:
                flashbots_count[row['block_number']] += len(flashbots_txs)
            else:
                flashbots_count[row['block_number']] = len(flashbots_txs)

    # create a pandas DataFrame from the dictionary
    df = pd.DataFrame.from_dict(flashbots_count, orient='index', columns=['flashbots_bundle_count'])
    df.index.name = 'block_number'

    # write the DataFrame to a csv file
    df.to_csv('flashbots_bundle_counts.csv')

    return df

In [8]:
file_path = '../data_fetch_flashbots/output.csv'
count_flashbots_bundles(file_path)

JSONDecodeError: Expecting property name enclosed in double quotes: line 1 column 3 (char 2)