In [None]:
# Load dependencies

import pandas as pd
import os
import matplotlib.pyplot as plt

In [None]:
# Directory containing the csv files with data dropped statistics
directory_path = "data_dropped"

In [None]:
# Names of the epochs, by duration
epochs = ['.5s Epoch', '1s Epoch', '1.5s Epoch', '2s Epoch', 
          '2.5s Epoch', '3s Epoch', '3.5s Epoch', '4s Epoch']

In [None]:
# Initialize an empty dictionary to store data
epoch_data = {epoch: [None] for epoch in epochs}

In [None]:
# Mapp file names to epochs
file_to_epoch = {
    'average_percent_.5s_epoch.csv': '.5s Epoch',
    'average_percent_1s_epoch.csv': '1s Epoch',
    'average_percent_1.5s_epoch.csv': '1.5s Epoch',
    'average_percent_2s_epoch.csv': '2s Epoch',
    'average_percent_2.5s_epoch.csv': '2.5s Epoch',
    'average_percent_3s_epoch.csv': '3s Epoch',
    'average_percent_3.5s_epoch.csv': '3.5s Epoch',
    'average_percent_4s_epoch.csv': '4s Epoch'
}

In [None]:
# Loop through the files in the directory
for file in os.listdir(directory_path):
    if file.endswith(".csv") and file in file_to_epoch:
        # Construct the full path to the file
        file_path = os.path.join(directory_path, file)
        
        # Read the CSV file into a DataFrame
        df = pd.read_csv(file_path, header=None)
        
        # Check if the DataFrame has at least two rows
        if df.shape[0] >= 2:
            # Extract the second value from the DataFrame
            value = df.iloc[1, 0]  # Access the second row, first column
        else:
            value = None  # Set to None or some default if there are not enough rows
        
        # Assign the value to the correct epoch in the dictionary
        epoch_column = file_to_epoch[file]
        epoch_data[epoch_column][0] = value

In [None]:
# Convert the dictionary to a DataFrame
epoch_df = pd.DataFrame(epoch_data)

In [None]:
epoch_df

In [None]:
# Convert all columns to numeric type
epoch_df = epoch_df.apply(pd.to_numeric, errors='coerce')

In [None]:
# Calculate the average number of epochs dropped
average_epochs_dropped = epoch_df.iloc[0].mean()

# Print the resulting averages for each epoch
print(average_epochs_dropped)

In [None]:
# Create a figure with specified size
plt.figure(figsize=(8.5, 8.5))  # 8.5" x 8.5" figure size

# Plot
ax = epoch_df.T.plot(kind='bar', legend=False)  # Transpose to make the epochs the x-axis
plt.title('Values Across Different Epochs')
plt.xlabel('Epochs')
plt.ylabel('Percent Dropped')
plt.xticks(rotation=45)  # Rotate the x-axis labels for better readability
plt.grid(axis='y', linestyle='--', alpha=0.7)  # Adding a grid for the y-axis
plt.ylim(0, 100)  # Set the y-axis range from 0 to 100

# Adjust layout to make room for label, ensuring the layout is tight
plt.tight_layout()

# Saving the figure with adjusted parameters
plt.savefig('data_dropped/barplot_dropped_data.png', 
            format='png', 
            dpi=300,  # Set the resolution to 300 dpi
            bbox_inches='tight', 
            facecolor='white',  # Ensure the background is white
            edgecolor='none')
plt.show()