In [None]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns
from matplotlib.gridspec import GridSpec
import matplotlib.ticker as mtick
import os

# Set the style for publication-quality plots
plt.style.use('seaborn-v0_8-whitegrid')
sns.set_context("paper", font_scale=1.5)
plt.rcParams['figure.figsize'] = (10, 6)
plt.rcParams['savefig.dpi'] = 300
plt.rcParams['font.family'] = 'serif'

# Load the data
df = pd.read_csv('../why_did_you_fail_results.csv')

# 1. Overview of the data with summary statistics
print("Unique values in each column:")
for col in df.columns:
    print(f"{col}: {df[col].unique()}")

# FIXED: Extract a single planner robustness value per explainer
# This assumes that the robustness value should be the same for all entries of the same explainer
explainer_robustness = df.groupby('Explanation')['Planner_Robustness'].first().reset_index()
print("\nPlanner Robustness by Explanation Method (one value per explainer):")
print(explainer_robustness)

# Compute summary statistics for key metrics (excluding robustness which is handled separately)
non_robustness_metrics = ['Explanation_Stability', 'Faithfulness_Score', 'Explanation_Time_s']
summary = df.groupby(['Explanation', 'Planner'])[non_robustness_metrics].mean().reset_index()
print("\nSummary statistics by explanation method and planner:")
print(summary)

# 2. Create a function for saving figures with new directory
figures_dir = './figures/why_did_you_fail'
# Create directory if it doesn't exist
os.makedirs(figures_dir, exist_ok=True)

def save_figure(fig, filename):
    fig.tight_layout()
    fig.savefig(f"{figures_dir}/{filename}.pdf", bbox_inches='tight')
    fig.savefig(f"{figures_dir}/{filename}.png", bbox_inches='tight', dpi=300)
    plt.close(fig)

# Define consistent colors for each explanation method
explanation_colors = {'Anchors': '#1f77b4', 'LIME': '#2ca02c', 'SHAP': '#d62728'}

# 3. FIXED: Comparison of explanation methods across different metrics
# Handle planner robustness separately since it's one value per explainer
fig, axes = plt.subplots(2, 2, figsize=(16, 12))
metrics = ['Explanation_Stability', 'Faithfulness_Score', 'Planner_Robustness', 'Explanation_Time_s']
titles = ['Explanation Stability', 'Faithfulness Score', 'Planner Robustness', 'Explanation Time (s)']

for i, (metric, title) in enumerate(zip(metrics, titles)):
    ax = axes[i//2, i%2]
    
    if metric == 'Planner_Robustness':
        # Special handling for robustness - bars without planner distinction
        sns.barplot(x='Explanation', y='Planner_Robustness', data=explainer_robustness, 
                    palette=explanation_colors, ax=ax)
        # ax.set_title(f"{title}\n(One value per explainer - consistent across planners)")
    else:
        # Normal handling for other metrics
        sns.barplot(x='Explanation', y=metric, data=df, hue='Planner', ax=ax, palette='Set2')
        ax.set_title(title)
    
    ax.set_xlabel('')
    ax.set_ylabel(title)  # Always show y-axis label
    
    # For explanation time, use log scale due to large differences
    if metric == 'Explanation_Time_s':
        ax.set_yscale('log')
        ax.set_ylabel('Log Time (s)')
    
    if i == 1 and metric != 'Planner_Robustness':  # Legend only on the top-right plot (if it has a hue)
        ax.legend(title='Planner', bbox_to_anchor=(1.05, 1), loc='upper left')
    elif metric != 'Planner_Robustness':
        ax.legend_.remove()

plt.suptitle('Comparison of Explanation Methods Across Different Metrics', fontsize=20, y=0.98)
save_figure(fig, "explanation_comparison")

# 4. Effect of number of perturbations on explanation stability
fig, ax = plt.subplots(figsize=(12, 7))
for exp in df['Explanation'].unique():
    for planner in df['Planner'].unique():
        subset = df[(df['Explanation'] == exp) & (df['Planner'] == planner) & (df['Perturbation_Type'] == 'move')]
        if not subset.empty:
            ax.plot(subset['Num_Perturbations'], subset['Explanation_Stability'], 
                    marker='o', label=f"{exp} - {planner}", 
                    color=explanation_colors[exp] if exp in subset['Explanation'].values else None)

ax.set_xlabel('Number of Perturbations')
ax.set_ylabel('Explanation Stability')
ax.set_title('Effect of Number of Perturbations on Explanation Stability')
ax.legend(bbox_to_anchor=(1.05, 1), loc='upper left')
save_figure(fig, "perturbation_effect")

# 5. FIXED: Heatmap of explanation method vs planner for each metric
# Handle planner robustness separately
fig, axes = plt.subplots(2, 2, figsize=(16, 12))
metrics = ['Explanation_Stability', 'Faithfulness_Score', 'Planner_Robustness', 'Explanation_Time_s']
pretty_metrics = ['Explanation Stability', 'Faithfulness Score', 'Planner Robustness', 'Explanation Time (s)']

for i, (metric, pretty_metric) in enumerate(zip(metrics, pretty_metrics)):
    ax = axes[i//2, i%2]
    
    if metric == 'Planner_Robustness':
        # Special handling for robustness - create a heatmap with same value for all planners
        pivot = pd.DataFrame({planner: explainer_robustness.set_index('Explanation')['Planner_Robustness'] 
                            for planner in df['Planner'].unique()})
        sns.heatmap(pivot, annot=True, cmap='viridis', ax=ax, fmt='.3f', cbar_kws={'label': pretty_metric})
        ax.set_title(f'Average {pretty_metric} by Explanation Method\n(same value for all planners)')
    else:
        # Normal handling for other metrics
        pivot = df.groupby(['Explanation', 'Planner'])[metric].mean().reset_index().pivot(
            index='Explanation', columns='Planner', values=metric)
        sns.heatmap(pivot, annot=True, cmap='viridis', ax=ax, fmt='.3f', cbar_kws={'label': pretty_metric})
        ax.set_title(f'Average {pretty_metric} by Explanation Method and Planner')

plt.suptitle('Heatmaps of Performance Metrics', fontsize=20, y=0.98)
save_figure(fig, "heatmap_comparison")

# 6. Impact of perturbation type on stability and faithfulness
perturbation_data = df[df['Num_Perturbations'] == 10]  # Only use data with 10 perturbations for clarity

fig, axes = plt.subplots(1, 2, figsize=(16, 6))
metrics = ['Explanation_Stability', 'Faithfulness_Score']
titles = ['Impact on Explanation Stability', 'Impact on Faithfulness Score']

for i, (metric, title) in enumerate(zip(metrics, titles)):
    palette = {exp: explanation_colors[exp] for exp in df['Explanation'].unique()}
    sns.barplot(x='Explanation', y=metric, hue='Perturbation_Type', data=perturbation_data, ax=axes[i])
    axes[i].set_title(title)
    axes[i].set_xlabel('Explanation Method')
    axes[i].set_ylabel(title)
    if i == 1:
        axes[i].legend(title='Perturbation Type', bbox_to_anchor=(1.05, 1), loc='upper left')
    else:
        axes[i].legend_.remove()

plt.suptitle('Impact of Perturbation Type on Explanation Metrics', fontsize=20)
save_figure(fig, "perturbation_type_impact")

# 7. Radar chart for comparing explanation methods across all metrics
fig = plt.figure(figsize=(10, 8))
ax = fig.add_subplot(111, polar=True)

# Normalize data for radar chart
norm_data = {}
metrics = ['Explanation_Stability', 'Faithfulness_Score', 'Planner_Robustness', 'Explanation_Time_s']
labels = ['Stability', 'Faithfulness', 'Planner Robustness', 'Time Efficiency']

# Prepare data with corrected robustness handling
for metric in metrics:
    if metric == 'Planner_Robustness':
        # Handle robustness first and separately
        norm_data[metric] = explainer_robustness.set_index('Explanation')['Planner_Robustness']
    elif metric == 'Explanation_Time_s' or metric == 'Faithfulness_Score':
        # Invert explanation time and faithfulness since lower is better
        max_val = df[metric].max()
        norm_data[metric] = df.groupby('Explanation')[metric].mean().apply(lambda x: 1 - (x / max_val))
    else:
        # For other metrics like stability
        norm_data[metric] = df.groupby('Explanation')[metric].mean()

radar_data = pd.DataFrame(norm_data)

# Number of metrics
N = len(metrics)

# What will be the angle of each axis in the plot
angles = [n / float(N) * 2 * np.pi for n in range(N)]
angles += angles[:1]  # Close the loop

# Draw the chart
ax.set_theta_offset(np.pi / 2)
ax.set_theta_direction(-1)

# Draw labels and grid lines
plt.xticks(angles[:-1], labels)
ax.set_rlabel_position(0)
plt.yticks([0.2, 0.4, 0.6, 0.8], ["0.2", "0.4", "0.6", "0.8"], color="grey", size=8)
plt.ylim(0, 1)

# Plot data using consistent colors
for i, exp in enumerate(radar_data.index):
    values = radar_data.loc[exp].values.tolist()
    values += values[:1]  # Close the loop
    
    # Plot values with consistent color
    color = explanation_colors[exp]
    ax.plot(angles, values, linewidth=2, linestyle='solid', label=exp, color=color)
    ax.fill(angles, values, alpha=0.1, color=color)

# Add legend
plt.legend(loc='upper right', bbox_to_anchor=(0.1, 0.1))
plt.title('Radar Chart: Explanation Methods Comparison', y=1.1)

save_figure(fig, "radar_comparison")

# 8. FIXED: Plot showing best explanation method for each planner
# For Explanation_Stability higher is better, for Faithfulness_Score lower is better
best_stability = df.loc[df.groupby('Planner')['Explanation_Stability'].idxmax()]
best_faithfulness = df.loc[df.groupby('Planner')['Faithfulness_Score'].idxmin()]  # Using idxmin for faithfulness

fig, axes = plt.subplots(1, 2, figsize=(16, 6))
for i, (data, title, metric) in enumerate(zip(
    [best_stability, best_faithfulness], 
    ['Best Method for Stability ↑', 'Best Method for Faithfulness ↓'],
    ['Explanation_Stability', 'Faithfulness_Score'])):
    
    # Create a list of colors matching the explanations in the data
    colors = [explanation_colors[exp] for exp in data['Explanation']]
    
    # Create barplot without hue parameter
    sns.barplot(x='Planner', y=metric, data=data, ax=axes[i], palette=colors)
    axes[i].set_title(title)
    axes[i].set_xlabel('Planner')
    axes[i].set_ylabel(title.replace('Best Method for ', ''))
    
    # Add explanation labels to each bar
    for j, row in enumerate(data.itertuples()):
        # For faithfulness, place the text at a better position for visibility
        text_position = getattr(row, metric) * (0.9 if i == 0 else 0.5)
        axes[i].text(j, text_position, getattr(row, 'Explanation'), 
                  ha='center', fontweight='bold', color='white')

plt.suptitle('Best Explanation Method by Planner', fontsize=20)
save_figure(fig, "best_method_by_planner")

# 9. FIXED: Create nice tables for paper with correct robustness values
# Load the original data
df_for_table = df.copy()

# Add the correct robustness values (one per explainer)
for explainer in explainer_robustness['Explanation']:
    rob_value = explainer_robustness.loc[explainer_robustness['Explanation'] == explainer, 'Planner_Robustness'].values[0]
    df_for_table.loc[df_for_table['Explanation'] == explainer, 'Planner_Robustness'] = rob_value

# Rename columns to be more publication-friendly
column_mapping = {
    'Explanation': 'Method',
    'Planner': 'Planner',
    'Num_Perturbations': 'Perturbations',
    'Perturbation_Type': 'Pert_Type',
    'Explanation_Stability': 'Stability',
    'Faithfulness_Score': 'Faithfulness',
    'Planner_Robustness': 'Robustness',
    'Path_Length': 'Path_Len',
    'Explanation_Time_s': 'Time'
}

# Apply new column names
df_for_table = df_for_table.rename(columns=column_mapping)

# Round numeric values to 2 decimal places
numeric_cols = ['Stability', 'Faithfulness', 'Robustness', 'Path_Len', 'Time']
df_for_table[numeric_cols] = df_for_table[numeric_cols].round(2)

# Make perturbation type more readable ('move' -> 'Move', 'remove' -> 'Remove')
df_for_table['Pert_Type'] = df_for_table['Pert_Type'].str.capitalize()

# Create a more concise dataframe with the most important metrics
paper_df = df_for_table[['Method', 'Planner', 'Pert_Type', 'Stability', 'Faithfulness', 'Robustness', 'Time']]

# Save the cleaned data to a new CSV file
paper_df.to_csv('results_table1.csv', index=False)

print("Paper-ready table saved as 'results_table1.csv'")

# Create a summary table grouping by explanation method with correct robustness
# For Robustness, we take the first value for each Method since it's the same across all rows
summary_df = df_for_table.groupby(['Method', 'Pert_Type']).agg({
    'Stability': 'mean',
    'Faithfulness': 'mean',
    'Robustness': 'first',  # Take first value since it's the same for all rows of the same Method
    'Time': 'mean'
}).reset_index()

# Round the summary values
summary_df[['Stability', 'Faithfulness', 'Robustness', 'Time']] = summary_df[['Stability', 'Faithfulness', 'Robustness', 'Time']].round(2)
summary_df.to_csv('summary_table1.csv', index=False)

print("Summary table saved as 'summary_table1.csv'")

# Print a preview of your new table
print("\nPreview of table:")
print(paper_df.head(10))

print(f"Analysis complete!")

# Add a special explanation figure about planner robustness
fig, ax = plt.subplots(figsize=(8, 6))
sns.barplot(x='Explanation', y='Planner_Robustness', data=explainer_robustness, palette=explanation_colors, ax=ax)
ax.set_title('Planner Robustness by Explanation Method')
ax.set_xlabel('Explanation Method')
ax.set_ylabel('Planner Robustness\n(Higher = more consistent across planners)')
plt.annotate(
    "Note: Planner Robustness measures how consistent an explainer's results are\nacross different planners - not how robust a specific planner is.",
    xy=(0.5, -0.15), xycoords='axes fraction', ha='center', fontsize=10, fontweight='bold'
)
save_figure(fig, "planner_robustness_explained")

Unique values in each column:
Explanation: ['Anchors' 'LIME' 'SHAP']
Planner: ['A*' 'Dijkstra' 'BFS' 'Theta*']
Num_Perturbations: [ 10  25  50  75 100 200]
Perturbation_Type: ['move' 'remove']
Explanation_Stability: [0.1488     0.1491656  0.1521     0.1549     0.1529     0.1575
 0.1588     0.1572     0.14574333 0.1598     0.1538     0.149
 0.1561     0.1462     0.5694     0.44377358 0.4368     0.4579
 0.44184184 0.4486     0.4444     0.5478     0.45962264 0.4478
 0.43993994 0.4403     0.4448     0.45105105 0.5456     0.56903553
 0.4412     0.4518     0.4425     0.442      0.4464     0.5493
 0.5611465  0.4471     0.4353     0.4395     0.448      0.4393
 0.1571     0.13954373 0.1518     0.1476     0.14814815 0.1601
 0.15255255 0.12587719 0.1474     0.14794795 0.1484     0.15185185
 0.15360721 0.5568     0.44688691 0.43403403 0.4437     0.4553
 0.4472     0.44092277 0.47811816 0.3321     0.3434     0.3389
 0.34094094 0.34364364 0.5364     0.55888325 0.4369     0.4449
 0.43943944 0.4325325


Passing `palette` without assigning `hue` is deprecated and will be removed in v0.14.0. Assign the `x` variable to `hue` and set `legend=False` for the same effect.

  sns.barplot(x='Explanation', y='Planner_Robustness', data=explainer_robustness,

Passing `palette` without assigning `hue` is deprecated and will be removed in v0.14.0. Assign the `x` variable to `hue` and set `legend=False` for the same effect.

  sns.barplot(x='Planner', y=metric, data=data, ax=axes[i], palette=colors)

Passing `palette` without assigning `hue` is deprecated and will be removed in v0.14.0. Assign the `x` variable to `hue` and set `legend=False` for the same effect.

  sns.barplot(x='Planner', y=metric, data=data, ax=axes[i], palette=colors)


Paper-ready table saved as 'results_table1.csv'
Summary table saved as 'summary_table1.csv'

Preview of table:
    Method   Planner Pert_Type  Stability  Faithfulness  Robustness  Time
0  Anchors        A*      Move       0.15          0.29         1.0  2.26
1  Anchors        A*    Remove       0.15          0.32         1.0  2.30
2  Anchors        A*      Move       0.15          0.29         1.0  2.26
3  Anchors        A*      Move       0.15          0.29         1.0  2.26
4  Anchors        A*      Move       0.15          0.29         1.0  2.26
5  Anchors        A*      Move       0.16          0.29         1.0  2.26
6  Anchors        A*      Move       0.16          0.29         1.0  2.26
7  Anchors  Dijkstra      Move       0.16          0.29         1.0  3.35
8  Anchors  Dijkstra    Remove       0.15          0.32         1.0  3.22
9  Anchors  Dijkstra      Move       0.16          0.29         1.0  3.35
Analysis complete!



Passing `palette` without assigning `hue` is deprecated and will be removed in v0.14.0. Assign the `x` variable to `hue` and set `legend=False` for the same effect.

  sns.barplot(x='Explanation', y='Planner_Robustness', data=explainer_robustness, palette=explanation_colors, ax=ax)


Create nice table for paper

In [3]:
import pandas as pd
import numpy as np

# Load the original data
df = pd.read_csv('why_did_you_fail_results.csv')

# Rename columns to be more publication-friendly
column_mapping = {
    'Explanation': 'Method',
    'Planner': 'Planner',
    'Num_Perturbations': 'Perturbations',
    'Perturbation_Type': 'Pert_Type',
    'Explanation_Stability': 'Stability',
    'Faithfulness_Score': 'Faithfulness',
    'Planner_Robustness': 'Robustness',
    'Path_Length': 'Path_Len',
    'Explanation_Time_s': 'Time'
}

# Apply new column names
df = df.rename(columns=column_mapping)

# Round numeric values to 2 decimal places
numeric_cols = ['Stability', 'Faithfulness', 'Robustness', 'Path_Len', 'Time']
df[numeric_cols] = df[numeric_cols].round(2)

# Make perturbation type more readable ('move' -> 'Move', 'remove' -> 'Remove')
df['Pert_Type'] = df['Pert_Type'].str.capitalize()

# Create a more concise dataframe with the most important metrics
paper_df = df[['Method', 'Planner', 'Pert_Type', 'Stability', 'Faithfulness', 'Robustness', 'Time']]

# Save the cleaned data to a new CSV file
paper_df.to_csv('results_table1.csv', index=False)

print("Paper-ready table saved as 'results_table1.csv'")

# Optionally, create a summary table grouping by explanation method
summary_df = df.groupby(['Method', 'Pert_Type']).agg({
    'Stability': 'mean',
    'Faithfulness': 'mean',
    'Robustness': 'mean', 
    'Time': 'mean'
}).reset_index()

# Round the summary values
summary_df[['Stability', 'Faithfulness', 'Robustness', 'Time']] = summary_df[['Stability', 'Faithfulness', 'Robustness', 'Time']].round(2)
summary_df.to_csv('summary_table1.csv', index=False)

print("Summary table saved as 'summary_table1.csv'")

# Print a preview of your new table
print("\nPreview of table:")
print(paper_df)

Paper-ready table saved as 'results_table1.csv'
Summary table saved as 'summary_table1.csv'

Preview of table:
     Method Planner Pert_Type  Stability  Faithfulness  Robustness  Time
0   Anchors      A*      Move       0.15          0.29        1.00  2.26
1   Anchors      A*    Remove       0.15          0.32        1.00  2.30
2   Anchors      A*      Move       0.15          0.29        1.00  2.26
3   Anchors      A*      Move       0.15          0.29        1.00  2.26
4   Anchors      A*      Move       0.15          0.29        1.00  2.26
..      ...     ...       ...        ...           ...         ...   ...
79     SHAP  Theta*      Move       0.36          0.07        0.63  0.22
80     SHAP  Theta*      Move       0.36          0.07        0.63  0.22
81     SHAP  Theta*      Move       0.37          0.07        0.63  0.22
82     SHAP  Theta*      Move       0.37          0.07        0.63  0.22
83     SHAP  Theta*      Move       0.37          0.07        0.63  0.22

[84 rows x 7