In [None]:
# Import required libraries
import pandas as pd

# Read the CSV file
file_name = "../fra.trades.csv"
df = pd.read_csv(file_name)

row_count = df.shape[0]
# drop header rows that appear multiple times in the csv file
df = df.drop(df[df.timestamp == "timestamp"].index)

resstart_count = row_count - df.shape[0]

# timestamp,uuid,landed,accepted,rejected,errorType,errorContent,txn0Signature,txn1Signature,txn2Signature,arbSize,expectedProfit,hop1Dex,hop2Dex,sourceMint,intermediateMint,tipLamports,mempoolEnd,preSimEnd,simEnd,postSimEnd,calcArbEnd,buildBundleEnd,bundleSent

df['timestamp'] = pd.to_datetime(df['timestamp'], unit='ms')
df['mempoolEnd'] = pd.to_datetime(df['mempoolEnd'], unit='ms')
df['preSimEnd'] = pd.to_datetime(df['preSimEnd'], unit='ms')
df['simEnd'] = pd.to_datetime(df['simEnd'], unit='ms')
df['postSimEnd'] = pd.to_datetime(df['postSimEnd'], unit='ms')
df['calcArbEnd'] = pd.to_datetime(df['calcArbEnd'], unit='ms')
df['buildBundleEnd'] = pd.to_datetime(df['buildBundleEnd'], unit='ms')
df['bundleSent'] = pd.to_datetime(df['bundleSent'], unit='ms')
df[['landed', 'rejected']] = df[['landed', 'rejected']].fillna(
    False).astype('bool')
df[['accepted', 'arbSize', 'expectedProfit', 'tipLamports']] = df[[
    'accepted', 'arbSize', 'expectedProfit', 'tipLamports']].astype('int64')

# Display the first few records
print(df.head())
print(df.info())

In [None]:
# Calculate the percentage of rows where 'landed' is True compared to those with 'accepted' > 0
success_landed = df[(df['landed'] == True) & (df['accepted'] > 0)].shape[0]
accepted_gt_0 = df[df['accepted'] > 0].shape[0]
percentage_landed_accepted = (success_landed / accepted_gt_0) * 100

# Calculate the percentage of rows with 'accepted' > 0 compared to the total
total_rows = df.shape[0]
percentage_accepted = (accepted_gt_0 / total_rows) * 100

# Print the results
print(
    f"Percentage landed of accepted: {percentage_landed_accepted:.2f}%")
print(
    f"Percentage accepted of sent: {percentage_accepted:.2f}%")

# Find the error percentage for different error messages
errors_filtered = df[df['errorType'].notnull()]
sim_failures = errors_filtered[errors_filtered['errorType'].str.contains(
    "simulationFailure")]
total_errors = errors_filtered.shape[0]

sim_errors_my_fault = sim_failures[sim_failures.apply(
    lambda row: row['txn1Signature'] in row['errorContent'] or row['txn2Signature'] in row['errorContent'], axis=1)]
sim_errors_backrun_txn = sim_failures[sim_failures.apply(
    lambda row: row['txn0Signature'] in row['errorContent'], axis=1)]
sim_tx_already_processed = sim_failures[sim_failures['errorContent'].str.contains(
    "This transaction has already been processed")]
sim_other = sim_failures[~sim_failures.index.isin(
    sim_errors_my_fault.index.union(
        sim_errors_backrun_txn.index).union(sim_tx_already_processed.index))]

# Group non-simulation errors by errorType
non_sim_errors = errors_filtered[~errors_filtered['errorType'].str.contains("simulationFailure")]
error_groups = non_sim_errors.groupby("errorType").size().reset_index(name='count')

# Calculate the error totals and percentages for sim errors and other errors
error_totals = {
    'sim_errors_my_fault': sim_errors_my_fault.shape[0],
    'sim_errors_backrun_txn': sim_errors_backrun_txn.shape[0],
    'sim_tx_already_processed': sim_tx_already_processed.shape[0],
    'sim_other': sim_other.shape[0],
}

error_percentages = {k: (v / total_errors) * 100 for k, v in error_totals.items()}
for _, row in error_groups.iterrows():
    error_name = f"{row['errorType']}"
    error_totals[error_name] = row['count']
    error_percentages[error_name] = (row['count'] / total_errors) * 100

# Print the error totals and their percentages
print("Error Totals:")
for error_type, total in error_totals.items():
    print(f"{error_type}: {total}")

print("\nError Percentages:")
for error_type, percentage in error_percentages.items():
    print(f'{error_type}: {percentage:.2f}%')