In [None]:
import json
import numpy as np
import pandas as pd
import seaborn as sns
import matplotlib.pyplot as plt

import warnings
warnings.filterwarnings("ignore", category=FutureWarning)

# Loading the latest data
with open('random_parallel_normal_3.json', 'r') as file:
    latest_data = json.load(file)

# Creating a DataFrame from the latest data
latest_df = pd.DataFrame(latest_data)

# Calculating the mean of the lists for seeding and sampling times
latest_df['mean_builder_seeding_times_us'] = latest_df['builder_seeding_times_us'].apply(np.mean)
latest_df['mean_validator_sampling_times_us'] = latest_df['validator_sampling_times_us'].apply(np.mean)
latest_df['mean_regular_sampling_times_us'] = latest_df['regular_sampling_times_us'].apply(np.mean)

# Grouping by non_builder_count and parcel_size and calculating the mean for seeding and sampling times
grouped_latest_data = latest_df.groupby(['non_builder_count', 'parcel_size']).agg({
    'mean_builder_seeding_times_us': 'mean',
    'mean_validator_sampling_times_us': 'mean',
    'mean_regular_sampling_times_us': 'mean'
}).reset_index()

# Converting the time from microseconds to seconds
grouped_latest_data[['mean_builder_seeding_times_us', 'mean_validator_sampling_times_us', 'mean_regular_sampling_times_us']] /= 1e6

# Renaming the 'non_builder_count' to reflect '1V1R256PS' style and including parcel size
grouped_latest_data['group'] = grouped_latest_data.apply(lambda row: f"{int(row['non_builder_count'])}V {int(row['non_builder_count'])}R {int(row['parcel_size'])}PS", axis=1)

# Melting the DataFrame for plotting the bar chart
melted_latest_data = pd.melt(grouped_latest_data, id_vars=['group'], 
                             value_vars=['mean_builder_seeding_times_us', 
                                         'mean_validator_sampling_times_us', 
                                         'mean_regular_sampling_times_us'])

# Renaming the variables for the legend
melted_latest_data['variable'] = melted_latest_data['variable'].map({
    'mean_builder_seeding_times_us': 'Mean Builder Seeding Time',
    'mean_validator_sampling_times_us': 'Mean Validator Sampling Time',
    'mean_regular_sampling_times_us': 'Mean Regular Sampling Time'
})

# Preparing the CPU usage data for each test for the line graph
cpu_usage_lengths = latest_df['cpu_usage'].apply(len)
max_length = cpu_usage_lengths.max()
aligned_cpu_usage_per_test_latest = pd.DataFrame(index=range(max_length))

# Aligning CPU usage data
for i, row in latest_df.iterrows():
    aligned_data = row['cpu_usage'] + [np.nan] * (max_length - len(row['cpu_usage']))
    aligned_cpu_usage_per_test_latest[row['test_name']] = aligned_data[:max_length]

fig, ax = plt.subplots(figsize=(15, 8), nrows=2, ncols=1)

# Plotting the bar chart and line graph
sns.barplot(x='group', y='value', hue='variable', data=melted_latest_data, ax=ax[0])
ax[0].set_title('Comparison of Seeding and Sampling Times by Configuration')
ax[0].set_ylabel('Average Time (Seconds)')
ax[0].set_xlabel('Configuration (V = Validators, R = Regulars, PS = Parcel Size)')
ax[0].legend(title='Metrics', bbox_to_anchor=(1.05, 1), loc='upper left')
ax[0].axhline(y=12, color='red', linestyle='--')

palette = sns.color_palette("hsv", len(aligned_cpu_usage_per_test_latest.columns))
for i, column in enumerate(aligned_cpu_usage_per_test_latest.columns):
    ax[1].plot(aligned_cpu_usage_per_test_latest.index, aligned_cpu_usage_per_test_latest[column], label=column, marker='o', color=palette[i], linewidth=1, markersize=2)
ax[1].set_title('Aligned CPU Usage Over Time per Test')
ax[1].set_ylabel('CPU Usage (%)')
ax[1].set_xlabel('Time (s)')
ax[1].legend(title='Test Name', bbox_to_anchor=(1.05, 1), loc='upper left')

plt.tight_layout()
plt.show()
