In [None]:
import pandas as pd
import matplotlib.pyplot as plt
import numpy as np
from matplotlib.ticker import FuncFormatter

data = pd.read_csv('./combined_metrics.csv')
data['avg_total_cpu_time'] = data['avg_total_cpu_time'] / 60
data['std_total_cpu_time'] = data['std_total_cpu_time'] / 60

sequence_lengths = data['length'].unique()
num_simulations = data['num_simulations'].unique()

fig, ax = plt.subplots(figsize=(11.7, 6))
fig.set_dpi(300)
ax.grid(
    True, which="major", axis="y", color="gray", linestyle="--", linewidth=0.5, zorder=-10
)

bar_width = 0.19
positions = np.arange(len(sequence_lengths))

colors = plt.cm.viridis(np.linspace(0.2, 0.8, len(num_simulations)))

for i, (sim_count, color) in enumerate(zip(num_simulations, colors)):
    filtered_data = data[data['num_simulations'] == sim_count]
    
    means = filtered_data['avg_total_cpu_time']
    stds = [np.zeros(len(filtered_data)), filtered_data['std_total_cpu_time']]
    
    bar_positions = positions + (i * bar_width)
    
    ax.bar(bar_positions, means, bar_width, yerr=stds, capsize=5, label=f'{sim_count} Simulations', color=color, zorder=10)

ax.set_xticks(positions + (len(num_simulations) * bar_width / 2) - bar_width / 2)
ax.set_xticklabels(sequence_lengths, fontsize=12)
ax.set_xlabel('RNA Sequence Length (in Nucleotides)', fontsize=14)

ax.set_ylabel('Kinfold CPU Time (in Minutes)', fontsize=14)
ax.set_yscale('log')

ticks = [10**i for i in range(-2, 4)]
ax.set_yticks(ticks)

ax.get_yaxis().set_major_formatter(FuncFormatter(lambda y, _: '{:g}'.format(y)))

ax.legend(title='Avg. Kinfold CPU Time for', loc='upper left', bbox_to_anchor=(1, 1), fontsize=12)

plt.tight_layout(rect=[0, 0, 0.85, 1])
plt.show()
