In [None]:
import pandas as pd
import matplotlib.pyplot as plt

In [None]:
results_capped_adaptive_path = "results.csv"
results_capped_adaptive = pd.read_csv(results_capped_adaptive_path)

results_global_budget_path = "results_global.csv"
results_global_budget = pd.read_csv(results_global_budget_path)

results = pd.concat([results_capped_adaptive, results_global_budget], axis=0, ignore_index=True)

In [None]:
display(results)

In [None]:
display(results.head())
print(results.columns.to_list())

In [None]:
import seaborn as sns

# in all plots, the legend is exp_name
# the x axis in all plots is exp_budget
# for each y value create a distinct plot:
# 1. exp_budget vs the following: 'tau_hat', 'max_est', 'calib_tau_hat_miscoverage', 'calib_tau_target_miscoverage', 'calib_mean_generated_samples', 'calib_mean_c_value', 'test_tau_hat_lpb', 'test_tau_target_lpb', 'time_delta'

# use sns.lineplot to plot the results
def plot_results(x, y, title, xlabel, ylabel, ax = None) -> plt.Axes:
    
    if ax is None:
        fig, ax = plt.subplots(figsize=(10, 6))
    
    # Create a line plot with confidence intervals
    ax = sns.lineplot(data=results, x=x, y=y, hue='exp_name', ax=ax, marker="o", errorbar="sd")
    ax.set_xscale('log')
    ax.set_yscale('log')
    return ax
    
fig, axes = plt.subplots(3, 3, figsize=(25, 15))
axes = axes.flatten()
        
# 1. exp_budget vs tau_hat
ax = plot_results('exp_budget', 'tau_hat', 'Estimated tau_hat vs Budget', 'Budget', 'Estimated tau_hat', ax=axes[0])
ax.axhline(y=0.1, color='r', linestyle='--', label='True tau')
ax.legend()

# 2. exp_budget vs max_est
plot_results('exp_budget', 'max_est', 'Max Estimation vs Budget', 'Budget', 'Max Estimation', ax=axes[1])

# 3. exp_budget vs calib_tau_hat_miscoverage
ax = plot_results('exp_budget', 'calib_tau_hat_miscoverage', 'Calibration tau_hat Miscoverage vs Budget', 'Budget', 'Calibration tau_hat Miscoverage', ax=axes[2])
ax.axhline(y=0.1, color='r', linestyle='--', label='True tau')
ax.legend()

# 4. exp_budget vs calib_tau_target_miscoverage
ax = plot_results('exp_budget', 'calib_tau_target_miscoverage', 'Calibration tau_target Miscoverage vs Budget', 'Budget', 'Calibration tau_target Miscoverage', ax=axes[3])
ax.axhline(y=0.1, color='r', linestyle='--', label='True miscoverage')

# 5. exp_budget vs calib_mean_generated_samples
ax = plot_results('exp_budget', 'calib_mean_generated_samples', 'Calibration Mean Generated Samples vs Budget', 'Budget', 'Calibration Mean Generated Samples', ax=axes[4])
ax.plot(results['exp_budget'].unique(), results['exp_budget'].unique(), color='red', linestyle='--', label='Ideal Line')
ax.legend()

# 6. exp_budget vs calib_mean_c_value
ax = plot_results('exp_budget', 'calib_mean_c_value', 'Calibration Mean C Value vs Budget', 'Budget', 'Calibration Mean C Value', ax=axes[5])
ax.plot(results['exp_budget'].unique(), results['exp_budget'].unique(), color='red', linestyle='--', label='Ideal Line')

# 7. exp_budget vs test_tau_hat_lpb
plot_results('exp_budget', 'test_tau_hat_lpb', 'Test tau_hat LPB vs Budget', 'Budget', 'Test tau_hat LPB', ax=axes[6])

# 8. exp_budget vs test_tau_target_lpb
plot_results('exp_budget', 'test_tau_target_lpb', 'Test tau_target LPB vs Budget', 'Budget', 'Test tau_target LPB', ax=axes[7])

# 9. exp_budget vs time_delta
plot_results('exp_budget', 'time_delta', 'Time Delta vs Budget', 'Budget', 'Time Delta', ax=axes[8])
