In [5]:
import pandas as pd

#relative to the original base fee, when do we consider a spike's start and end
spike_start_pct = 0.10
spike_end_pct = 0.05

In [6]:
# Load the dataset
df = pd.read_csv('csv_inputs/2023h1ytd.csv')

# Ensure the DataFrame is sorted by 'number' in ascending order
df = df.sort_values('number')

# Initialize lists to hold the start and end indices of each spike
spike_start_indices = []
spike_end_indices = []

# Initialize variables to keep track of whether we're in a spike and the start of the spike
in_spike = False
spike_start = 0

# Iterate over the DataFrame
for i in range(1, len(df)):
    # Calculate the percentage change in base fee
    pct_change = (df.iloc[i]['base_fee_gwei'] - df.iloc[i - 1]['base_fee_gwei']) / df.iloc[i - 1]['base_fee_gwei']
    
    # Check if we're currently in a spike
    if in_spike:
        # End the spike if the base fee has returned to within 5% of the base fee at the start of the spike
        if abs(df.iloc[i]['base_fee_gwei'] - df.iloc[spike_start]['base_fee_gwei']) <= df.iloc[spike_start]['base_fee_gwei'] * spike_end_pct:
            spike_end_indices.append(i - 1)
            in_spike = False
    else:
        # Start a new spike if the base fee has increased by at least 10%
        if pct_change >= spike_start_pct:
            spike_start_indices.append(i - 1)
            spike_start = i - 1
            in_spike = True

# Create a DataFrame for the spikes
df_spikes = pd.DataFrame({'Start Index': spike_start_indices, 'End Index': spike_end_indices})

# Define a function to calculate the max base fee for each spike
def calculate_max_base_fee(start_index, end_index):
    return df.loc[start_index:end_index, 'base_fee_gwei'].max()

# Apply the function to each spike
df_spikes['Max Base Fee'] = df_spikes.apply(lambda x: calculate_max_base_fee(x['Start Index'], x['End Index']), axis=1)

# Calculate the start base fee for each spike
df_spikes['Start Base Fee'] = df.loc[df_spikes['Start Index'], 'base_fee_gwei'].values

# Calculate the percentage gain for each spike
df_spikes['Percentage Gain'] = (df_spikes['Max Base Fee'] - df_spikes['Start Base Fee']) / df_spikes['Start Base Fee'] * 100

# Check the DataFrame
df_spikes.head()

In [None]:
df_spikes.to_csv('2023h1ytd_spikes.csv')