# Plots for Hypercube Benchmark

In [None]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns
from pathlib import Path
import warnings
warnings.filterwarnings('ignore')
plt.rcParams.update({
    'font.size': 14,
    'axes.labelsize': 16,
    'axes.titlesize': 18,
    'xtick.labelsize': 14,
    'ytick.labelsize': 14,
    'legend.fontsize': 14,
    'figure.titlesize': 20,
    'axes.spines.top': False,
    'axes.spines.right': False,
    'axes.grid': True,
    # 'axes.grid.alpha': 0.3,
    'grid.linewidth': 0.5
})

plots_dir = Path('plots/')
plots_dir.mkdir(exist_ok=True)

print("Setup complete!")

In [None]:
# Define color scheme
# FK sampling: two shades of orange
# Importance sampling: two shades of purple
colors = {
    'Indicator FK': '#FF7F00',
    'Distance FK': '#CC5500',
    'Indicator Importance': '#88005E',
    'Distance Importance': '#4D0058'
}

# Particle size colors: different shades of orange
particle_colors = {
    '32': '#FFB366',
    '64': '#FF8C00',
    '128': '#FF6600',
    '256': '#CC5500'
}

In [None]:
def calculate_quartiles(runs_df, group_col, metric_col):
    """Calculate median and quartiles for each group"""
    quartiles = runs_df.groupby(group_col)[metric_col].quantile([0.25, 0.5, 0.75]).unstack()
    quartiles.columns = ['q25', 'median', 'q75']
    quartiles = quartiles.reset_index()
    return quartiles

def clip_percentage_quartiles(q25, q75):
    """Clip quartiles to [0, 100] range for percentages"""
    q25_clipped = np.clip(q25, 0, 100)
    q75_clipped = np.clip(q75, 0, 100)
    return q25_clipped, q75_clipped

In [None]:
# Load dimension scaling results
exp1_files = {
    'Indicator FK': 'results/dimension_scaling/dim_indicator_fk_runs.csv',
    'Indicator Importance': 'results/dimension_scaling/dim_indicator_importance_runs.csv',
    'Distance FK': 'results/dimension_scaling/dim_distance_fk_runs.csv',
    'Distance Importance': 'results/dimension_scaling/dim_distance_importance_runs.csv'
}

exp1_runs_data = {}
for name, file_path in exp1_files.items():
    if Path(file_path).exists():
        exp1_runs_data[name] = pd.read_csv(file_path)
        print(f"Loaded {name}: {len(exp1_runs_data[name])} runs")
    else:
        print(f"WARNING: {file_path} not found")

# Calculate quartiles
exp1_quartiles = {}
for name, runs_df in exp1_runs_data.items():
    if runs_df is not None and len(runs_df) > 0:
        exp1_quartiles[name] = {
            'success_rate': calculate_quartiles(runs_df, 'dimension', 'success_rate'),
            'w2_dist': calculate_quartiles(runs_df, 'dimension', 'w2_dist')
        }
        print(f"Calculated quartiles for {name}")

In [None]:
# Load inference curves results for indicator FK with different particle sizes
particle_sizes = [32, 64, 128, 256]
exp3_indicator_data = {}

for particles in particle_sizes:
    file_path = f'hypercube/results/inference_curves/curve_indicator_{particles}p_runs.csv'
    if Path(file_path).exists():
        exp3_indicator_data[str(particles)] = pd.read_csv(file_path)
        print(f"Loaded Indicator FK {particles}p: {len(exp3_indicator_data[str(particles)])} runs")
    else:
        print(f"WARNING: {file_path} not found")

# Calculate quartiles for indicator FK inference curves
exp3_indicator_quartiles = {}
for particles_str, runs_df in exp3_indicator_data.items():
    if runs_df is not None and len(runs_df) > 0:
        exp3_indicator_quartiles[particles_str] = {
            'success_rate': calculate_quartiles(runs_df, 'dimension', 'success_rate'),
            'w2_dist': calculate_quartiles(runs_df, 'dimension', 'w2_dist')
        }
        print(f"Calculated quartiles for {particles_str} particles")

In [None]:
# Plot 1: Dimension Scaling (Success Rate and Wasserstein Distance)
fig, (ax1, ax2) = plt.subplots(2, 1, figsize=(10, 10))

# Success Rate subplot
for name, quartiles_data in exp1_quartiles.items():
    if quartiles_data is None or 'success_rate' not in quartiles_data:
        continue
    
    df = quartiles_data['success_rate']
    if df is None or len(df) == 0:
        continue
    
    x = df['dimension']
    median = df['median']
    q25 = df['q25']
    q75 = df['q75']
    
    # Clip percentages
    q25, q75 = clip_percentage_quartiles(q25, q75)
    
    color = colors[name]
    ax1.plot(x, median, 'o-', color=color, label=name, linewidth=2.5, markersize=6)
    ax1.fill_between(x, q25, q75, alpha=0.2, color=color)

#ax1.set_xticklabels([])  # Hide x-axis labels on top plot
ax1.set_ylabel('Success Rate (%)')
ax1.set_ylim(0, 105)  # Give a bit more space at the top to see Distance FK
ax1.set_xticks(range(3, 16))  # Explicit ticks for dimensions 3-15
ax1.legend(loc='lower left')

# Wasserstein Distance subplot (log scale)
for name, quartiles_data in exp1_quartiles.items():
    if quartiles_data is None or 'w2_dist' not in quartiles_data:
        continue
    
    df = quartiles_data['w2_dist']
    if df is None or len(df) == 0:
        continue
    
    x = df['dimension']
    median = df['median']
    q25 = df['q25']
    q75 = df['q75']
    
    color = colors[name]
    ax2.plot(x, median, 'o-', color=color, label=name, linewidth=2.5, markersize=6)
    ax2.fill_between(x, q25, q75, alpha=0.2, color=color)

ax2.set_xlabel('Dimension')
ax2.set_ylabel('Wasserstein-2 Distance')
ax2.set_yscale('log')
ax2.set_xticks(range(3, 16))  # Explicit ticks for dimensions 3-15

plt.tight_layout()
plt.savefig(plots_dir / 'dimension_scaling.pdf', dpi=300, bbox_inches='tight')
plt.show()

In [None]:
# Plot 2: Indicator FK Success Rate with Different Particle Sizes
fig, ax = plt.subplots(1, 1, figsize=(10, 6))

for particles_str, quartiles_data in exp3_indicator_quartiles.items():
    if quartiles_data is None or 'success_rate' not in quartiles_data:
        continue
    
    df = quartiles_data['success_rate']
    if df is None or len(df) == 0:
        continue
    
    x = df['dimension']
    median = df['median']
    q25 = df['q25']
    q75 = df['q75']
    
    # Clip percentages
    q25, q75 = clip_percentage_quartiles(q25, q75)
    
    color = particle_colors[particles_str]
    label = f'{particles_str} particles'
    
    ax.plot(x, median, 'o-', color=color, label=label, linewidth=2.5, markersize=6)
    ax.fill_between(x, q25, q75, alpha=0.2, color=color)

ax.set_xlabel('Dimension')
ax.set_ylabel('Success Rate (%)')
ax.set_ylim(0, 105)  # Give a bit more space at the top to see 100% values
ax.set_xticks(range(3, 16))  # Explicit ticks for dimensions 3-15
ax.legend(loc='lower left')

plt.tight_layout()
plt.savefig(plots_dir / 'indicator_fk_particle_scaling.pdf', dpi=300, bbox_inches='tight')
plt.show()

In [None]:
print(f"Plots saved to : {plots_dir.absolute()}")
plot_files = list(plots_dir.glob('*.pdf'))
for plot_file in sorted(plot_files):
    print(f"{plot_file.name}")