In [1]:
from data_prep_utils import *
from plot_prep_utils import *
import pandas as pd

dfe = get_eigenphi_march_blocks_with_to_and_from()
dft = get_titan_march_blocks_with_to_and_from()

In [None]:
top_50_rewards = dfe['builder_reward'].nlargest(50)

# 计算titan中每个Top 50 builder_reward值大于该值的行数
counts_greater_than_each = {(reward): (dft['builder_reward'] > reward).sum() for reward in top_50_rewards}

for reward, count in counts_greater_than_each.items():
    print(f"Top {reward} reward: {count} entries in 'titan' are greater")
    # Result:
    # Top 0.8940509081507859 reward: 635 entries in 'titan' are greater
    # Top 0.5914029142684857 reward: 1288 entries in 'titan' are greater
    # Top 0.4111837835087299 reward: 2540 entries in 'titan' are greater

In [14]:
dft_top_txs = dft[dft['builder_reward']>0.5914029142684857]
# Read the previous all txs from top To Addresses in to df_top_to_txs
dft_top_to_txs = pd.read_csv('TxsAll.csv')
# Go thru rows of dft_top_txs to see if each of them is in df_top_to_txs, if so, copy the data in the row with the same tx hash in df_top_to_txs to df_top_txs


# Check for duplicate 'transaction_hash' entries in 'dft_top_to_txs'
duplicates = dft_top_to_txs['transaction_hash'].duplicated().sum()
print(f"Number of duplicate transaction hashes in dft_top_to_txs: {duplicates}")

# Also, check in 'dft_top_txs'
duplicates_top_txs = dft_top_txs['transaction_hash'].duplicated().sum()
print(f"Number of duplicate transaction hashes in dft_top_txs: {duplicates_top_txs}")


dft_top_to_txs = dft_top_to_txs.drop_duplicates(subset=['transaction_hash'], keep='first')


# Perform a left merge on the dataframes based on 'transaction_hash'
# The suffixes parameter will help differentiate columns with the same name in both dataframes
dft_result = pd.merge(dft_top_txs, dft_top_to_txs, on='transaction_hash', how='left', suffixes=('', '_right'))

# Drop duplicated columns keeping the '_right' versions where not NaN
for col in dft_top_txs.columns:
    if col in dft_top_to_txs.columns and col != 'transaction_hash':
        # Fill NaN values in '_right' columns with values from the original columns
        dft_result[col] = dft_result[col + '_right'].combine_first(dft_result[col])

# Drop the temporary '_right' columns
# dft_result.drop(columns=[col + '_right' for col in dft_top_txs.columns if col in dft_top_to_txs.columns], inplace=True)

# This DataFrame now includes:
# All columns from dft_top_to_txs
# Values from dft_top_txs where no matching transaction_hash was found in dft_top_to_txs
print(dft_result)


dft_result.to_csv('result_txs.csv')

Number of duplicate transaction hashes in dft_top_to_txs: 42
Number of duplicate transaction hashes in dft_top_txs: 0
                                       transaction_hash  block_number  \
0     0x541efff25a29802ec982c3af9a797540fe2122d193a1...      19339087   
1     0xac6ec592f078dc1fcc182befe30252833a98656b6f1b...      19339689   
2     0x4d64d8de19976667d0287c996eb1586a541cdeca6aae...      19340233   
3     0x17c0b26696c98f507927896a374d3b071bc8085b7aa9...      19340522   
4     0x7ec95dfc1132bd5395cd85f9cbd23e35c7d11e33f8ce...      19340522   
...                                                 ...           ...   
1283  0xaeab2a7a9f6b2c5059c223a19bf2012e5c62f81935c5...      19555213   
1284  0xb957c3258148aeba194776be02a0736a74215c6a9ade...      19555213   
1285  0xc46063351f0347121cde10d1bf0b7a0a9aca21cf3562...      19555213   
1286  0xe730c835919033431702f2176881627026d76d38f61a...      19555213   
1287  0xea0cbd35d640a47105bb4b4e677a4a6f5e6eec8d0b57...      19555213   

     