In [32]:
import os
import pandas as pd

CSV_DIR = 'pancake/data/csv'

"""
Generate network-level stats
"""

# Dictionary to store DataFrames with filenames as keys
dataframes = {}

for filename in os.listdir(CSV_DIR):
    if filename.endswith('.csv'):
        file_path = os.path.join(CSV_DIR, filename)
        df = pd.read_csv(file_path)
        dataframes[filename] = df
        
statistics = []
for name, df in dataframes.items():
    true_profits = df['true_profit']
    naive_profits = df['naive_profit']
    
    avg_true_profit = true_profits.mean()
    avg_naive_profit = naive_profits.mean() 
    
    median_true_profit = true_profits.median()
    median_naive_profit = naive_profits.median()
    
    total_true_profit = true_profits.sum()
    total_naive_profit = naive_profits.sum()
    
    min_true_profit = true_profits.min()
    min_naive_profit = naive_profits.min()
    
    max_true_profit = true_profits.max()
    max_naive_profit = naive_profits.max()
    
    num_positive_true_profits = (true_profits > 0).sum()
    num_positive_naive_profits = (naive_profits > 0).sum()
    
    # Add the calculated statistics as a row in the list
    statistics.append({
        "File Name": name,
        "Avg True Profit": avg_true_profit,
        "Avg Naive Profit": avg_naive_profit,
        "Median True Profit": median_true_profit,
        "Median Naive Profit": median_naive_profit,
        "Total True Profit": total_true_profit,
        "Total Naive Profit": total_naive_profit,
        "Min True Profit": min_true_profit,
        "Min Naive Profit": min_naive_profit,
        "Max True Profit": max_true_profit,
        "Max Naive Profit": max_naive_profit,
        "Num Positive True Profits": num_positive_true_profits,
        "Num Positive Naive Profits": num_positive_naive_profits,
    })
    

# Create a DataFrame for the statistics
stats_df = pd.DataFrame(statistics)

# Write the statistics DataFrame to a CSV file
stats_df.to_csv('pancake/stats/network_level_statistics.csv', index=False)



"""
Generate pool-level stats
"""


combined_df = pd.concat(dataframes.values(), ignore_index=True)
unique_combined_df = combined_df.drop_duplicates(subset='pool_address', keep='first')
unique_combined_df.to_csv('pancake/stats/pool_level_statistics.csv', index=False)

statistics = []
true_profits = unique_combined_df['true_profit']
naive_profits = unique_combined_df['naive_profit']

avg_true_profit = true_profits.mean()
avg_naive_profit = naive_profits.mean() 

median_true_profit = true_profits.median()
median_naive_profit = naive_profits.median()

total_true_profit = true_profits.sum()
total_naive_profit = naive_profits.sum()

min_true_profit = true_profits.min()
min_naive_profit = naive_profits.min()

max_true_profit = true_profits.max()
max_naive_profit = naive_profits.max()

num_positive_true_profits = (true_profits > 0).sum()
num_positive_naive_profits = (naive_profits > 0).sum()
    
# Add the calculated statistics as a row in the list
statistics.append({
    "Avg True Profit": avg_true_profit,
    "Avg Naive Profit": avg_naive_profit,
    "Median True Profit": median_true_profit,
    "Median Naive Profit": median_naive_profit,
    "Total True Profit": total_true_profit,
    "Total Naive Profit": total_naive_profit,
    "Min True Profit": min_true_profit,
    "Min Naive Profit": min_naive_profit,
    "Max True Profit": max_true_profit,
    "Max Naive Profit": max_naive_profit,
    "Num Positive True Profits": num_positive_true_profits,
    "Num Positive Naive Profits": num_positive_naive_profits,
})

# Create a DataFrame for the statistics
stats_df = pd.DataFrame(statistics)

# Write the statistics DataFrame to a CSV file
stats_df.to_csv('pancake/stats/aggregate_statistics.csv', index=False)