In [None]:
import pandas as pd
import matplotlib.pyplot as plt

In [None]:
results_path = "results.csv"
results = pd.read_csv(results_path)

In [None]:
display(results)

In [None]:
print(results.columns.to_list())

In [None]:
import seaborn as sns

# in all plots, the legend is exp_name
# the x axis in all plots is exp_budget
# for each y value create a distinct plot:
# exp_budget vs the following: 'tau_hat', 'max_est', 'cal_mean_generated_samples', 'cal_mean_c_value', 'cal_miscoverage', 'test_miscoverage', 'test_mean_lpb', `cal_hours`

# use sns.lineplot to plot the results
def plot_results(x, y, title, xlabel, ylabel, ax = None) -> plt.Axes:
    
    if ax is None:
        fig, ax = plt.subplots(figsize=(10, 6))
    
    # Create a line plot with confidence intervals
    ax = sns.lineplot(data=results, x=x, y=y, hue='exp_name', ax=ax, marker="o", errorbar="sd")
    ax.set_title(title)
    ax.set_xlabel(xlabel)
    ax.set_ylabel(ylabel)
    # ax.set_xscale('log')
    # ax.set_yscale('log')
    return ax
    
fig, axes = plt.subplots(3, 3, figsize=(25, 15))
axes = axes.flatten()
        
# 1. exp_budget vs tau_hat
ax = plot_results('exp_budget', 'tau_hat', 'Estimated tau_hat vs Budget', 'Budget per Sample', 'Estimated tau_hat', ax=axes[0])
ax.axhline(y=0.1, color='r', linestyle='--', label='True tau')
ax.legend()

# 2. exp_budget vs max_est
plot_results('exp_budget', 'max_est', 'Max Estimation vs Budget', 'Budget per Sample', 'Max Estimation', ax=axes[1])

# 3. exp_budget vs cal_mean_generated_samples
ax = plot_results('exp_budget', 'cal_mean_generated_samples', 'Mean Generated Samples vs Budget', 'Budget per Sample', 'Mean Generated Samples', ax=axes[2])
ax.plot(results['exp_budget'].unique(), results['exp_budget'].unique(), color='red', linestyle='--', label='Ideal Budget Used')
ax.legend()

# 4. exp_budget vs cal_mean_c_value
ax = plot_results('exp_budget', 'cal_mean_c_value', 'Mean C Value vs Budget', 'Budget per Sample', 'Mean C Value', ax=axes[3])
ax.plot(results['exp_budget'].unique(), results['exp_budget'].unique(), color='red', linestyle='--', label='Ideal Budget Used')
ax.legend()

# 5. exp_budget vs cal_miscoverage
ax = plot_results('exp_budget', 'cal_miscoverage', 'Calibration Miscoverage vs Budget', 'Budget per Sample', 'Calibration Miscoverage', ax=axes[4])
ax.axhline(y=0.1, color='r', linestyle='--', label='Ideal Miscoverage')
ax.legend()

# 6. exp_budget vs test_miscoverage
ax = plot_results('exp_budget', 'test_miscoverage', 'Test Miscoverage vs Budget', 'Budget per Sample', 'Test Miscoverage', ax=axes[5])
ax.axhline(y=0.1, color='r', linestyle='--', label='Ideal Miscoverage')
ax.legend()

# 7. exp_budget vs test_mean_lpb
ax = plot_results('exp_budget', 'test_mean_lpb', 'Test Mean LPB vs Budget', 'Budget per Sample', 'Test Mean LPB', ax=axes[6])

# 8. exp_budget vs cal_hours
plot_results('exp_budget', 'cal_hours', 'Calibration Hours vs Budget', 'Budget per Sample', 'Hours', ax=axes[7])

plt.show()