# Result comparison with [AXp](https://arxiv.org/abs/2105.10278)

## [AXp](https://arxiv.org/abs/2105.10278) RESULTS

In [1]:
from pathlib import Path
import json
import pandas as pd
import matplotlib.pyplot as plt

BASELINE_ROOT = Path('baseline/experiments')
def extract_baseline_metrics(json_path):
    with json_path.open('r', encoding='utf-8') as handle:
        data = json.load(handle)

    dataset_name = data.get('bench_name') or json_path.parent.name
    experiments = data.get('experiments', [])
    if not experiments:
        return None

    def metric_values(key):
        values = []
        for exp in experiments:
            explanations = exp.get('explanations', {})
            value = explanations.get(key)
            if value is not None:
                values.append(value)
        return values

    avg_times = metric_values('avg_explanation_time')
    min_times = metric_values('min_explanation_time')
    max_times = metric_values('max_explanation_time')
    avg_lengths = metric_values('avg_explanation_length')
    min_lengths = metric_values('min_explanation_length')
    max_lengths = metric_values('max_explanation_length')
    full_explanations = metric_values('full_explanations')[0]
    sum_explanations_time = metric_values('explanation_times')[0]
    len_feature = max((len(v) for v in full_explanations))
    coverage_avg = []
    for explanations in full_explanations:
        coverage = (len_feature - len(explanations))/len_feature *100
        coverage_avg.append(coverage)
    return {
        'dataset': dataset_name,
        'avg_explanation_time': sum(avg_times) / len(avg_times) if avg_times else None,
        'min_explanation_time': min(min_times) if min_times else None,
        'max_explanation_time': max(max_times) if max_times else None,
        'avg_explanation_length': int(sum(avg_lengths) / len(avg_lengths)) if avg_lengths else None,
        'min_explanation_length': min(min_lengths) if min_lengths else None,
        'max_explanation_length': max(max_lengths) if max_lengths else None,
        # 'full_explanations': full_explanations if full_explanations else None,
        'coverage_avg': sum(coverage_avg) / len(coverage_avg) if coverage_avg else None,
        'sum_explanations_time': sum(sum_explanations_time) if sum_explanations_time else None,
    }

baseline_files = sorted(BASELINE_ROOT.glob('*/*_results.json'))
if not baseline_files:
    raise FileNotFoundError('No baseline results found under baseline/experiments')

records = []
for path in baseline_files:
    record = extract_baseline_metrics(path)
    if record:
        records.append(record)

baseline_df = (
    pd.DataFrame.from_records(records)
    .sort_values('dataset')
    .reset_index(drop=True)
)
baseline_df['time_seconds'] = baseline_df['sum_explanations_time']
baseline_df.sort_values('dataset').to_csv('baseline_metrics.csv', index=False)

## Our Results

In [2]:
import pandas as pd

data = {
    'dataset': [
        'ann-thyroid', 'appendicitis', 'banknote', 'biodegradation', 'ecoli',
        'glass2', 'heart-c', 'ionosphere', 'iris', 'karhunen', 'letter',
        'magic', 'mofn-3-7-10', 'new-thyroid', 'pendigits', 'phoneme', 'ring',
        'segmentation', 'shuttle', 'sonar', 'spambase', 'spectf', 'texture',
        'threeOf9', 'twonorm', 'vowel', 'waveform-21', 'waveform-40', 'wdbc',
        'wine-recog', 'wpbc', 'xd6'
    ],
    'axp_cov': [
        86.54, 42.52, 29.71, 45.82, 21.72, 24.75, 33.17, 17.97, 13.75, 3.51,
        1.38, 23.75, 67.82, 29.30, 2.81, 31.50, 39.57, 11.38, 61.28, 18.97,
        21.72, 49.92, 4.40, 50.12, 23.80, 4.35, 22.59, 16.02, 41.23, 18.70,
        38.38, 53.50
    ]
}

df_axp = pd.DataFrame(data)
df_axp

Unnamed: 0,dataset,axp_cov
0,ann-thyroid,86.54
1,appendicitis,42.52
2,banknote,29.71
3,biodegradation,45.82
4,ecoli,21.72
5,glass2,24.75
6,heart-c,33.17
7,ionosphere,17.97
8,iris,13.75
9,karhunen,3.51


In [3]:
import pandas as pd
import json
from pathlib import Path

def load_coverage_data(n_workers):
    """
    Load coverage data for a given number of workers.
    Dynamically finds which datasets have that worker configuration.
    
    Returns: DataFrame with all binary key data, or empty DataFrame if none found
    """
    base_path = Path("results/checkpoints_scalability")
    pattern = f"*/workers_{n_workers}/class_*/all_features/redis_dump_readable.json"
    json_files = list(base_path.glob(pattern))
    
    if not json_files:
        print(f"No files found for workers_{n_workers}")
        return pd.DataFrame()
    
    # Find unique datasets
    datasets = sorted(set(f.parts[2] for f in json_files))
    print(f"Found {len(json_files)} files for workers_{n_workers}")
    print(f"Datasets: {datasets}")
    
    rows = []
    all_binary_keys = set()
    
    for json_file in json_files:
        parts = json_file.parts
        dataset_name = parts[2]
        class_label = parts[4].replace("class_", "")
        
        with open(json_file, 'r') as f:
            data = json.load(f)
        
        db_dump = data.get("0", {})
        
        for key, values in db_dump.items():
            # Filter only binary keys (contain only 0s and 1s)
            if set(key).issubset({'0', '1'}) and len(key) > 0:
                all_binary_keys.add(key)
                row = {
                    'dataset_name': dataset_name,
                    'class_label': class_label,
                    'binary_key': key,
                    'worker': values.get('worker'),
                    'timeout_occurred': values.get('timeout_occurred'),
                    'coverage': float(values.get('coverage', 0)),
                    'vts': values.get('vts'),
                    'vte': values.get('vte'),
                    'coverage_t': values.get('coverage_t'),
                    'globally_dominated': values.get('globally_dominated')
                }
                rows.append(row)
    
    df = pd.DataFrame(rows)
    
    if df.empty:
        return df
    
    # Sort binary keys lexicographically and create ID mapping
    sorted_keys = sorted(all_binary_keys)
    key_to_id = {k: i for i, k in enumerate(sorted_keys)}
    
    df['binary_key_id'] = df['binary_key'].map(key_to_id)
    
    df = df[['dataset_name', 'class_label', 'binary_key_id', 'binary_key', 
             'worker', 'timeout_occurred', 'coverage', 'vts', 'vte', 
             'coverage_t', 'globally_dominated']]
    
    df = df.sort_values(['dataset_name', 'class_label', 'binary_key_id']).reset_index(drop=True)
    
    print(f"Total rows: {len(df)}")
    print(f"Unique binary keys: {len(all_binary_keys)}")
    
    return df


def get_available_worker_configs():
    """Find all available worker configurations."""
    base_path = Path("results/checkpoints_scalability")
    worker_dirs = base_path.glob("*/workers_*")
    configs = set()
    for d in worker_dirs:
        n = d.name.replace("workers_", "")
        if n.isdigit():
            configs.add(int(n))
    return sorted(configs)


# Show available configurations
available = get_available_worker_configs()
print(f"Available worker configurations: {available}\n")

# Load data for workers_32
df_32 = load_coverage_data(32)

Available worker configurations: []

No files found for workers_32


In [4]:
# Load data for workers_16
# df_16 = load_coverage_data(16)
# df_16

In [6]:
# Average coverage per dataset (as percentage) - workers_32
if df_32.empty:
    print("No data available for workers_32 (results/checkpoints_scalability not found)")
    avg_cov_32 = pd.DataFrame(columns=['dataset', 'our_cov'])
else:
    avg_cov_32 = df_32.groupby('dataset_name')['coverage'].mean() * 100
    avg_cov_32 = avg_cov_32.reset_index()
    avg_cov_32.columns = ['dataset', 'our_cov']
    print("Workers 32:")
    display(avg_cov_32)

No data available for workers_32 (results/checkpoints_scalability not found)


In [7]:
# Merge all coverage data
df_comparison = df_axp.merge(avg_cov_32, on='dataset', how='outer')
df_comparison = df_comparison.sort_values('dataset').reset_index(drop=True)
df_comparison

Unnamed: 0,dataset,axp_cov,our_cov
0,ann-thyroid,86.54,
1,appendicitis,42.52,
2,banknote,29.71,
3,biodegradation,45.82,
4,ecoli,21.72,
5,glass2,24.75,
6,heart-c,33.17,
7,ionosphere,17.97,
8,iris,13.75,
9,karhunen,3.51,


In [8]:
from scipy import stats

# Filter rows where both values are not NaN
df_both = df_comparison.dropna(subset=['axp_cov', 'our_cov'])

if df_both.empty:
    print("No paired data available — run experiments to populate results/checkpoints_scalability")
    t_stat, p_value = float('nan'), float('nan')
else:
    # Paired t-test (same datasets, two methods)
    t_stat, p_value = stats.ttest_rel(df_both['axp_cov'], df_both['our_cov'])

    print(f"Datasets with both values: {len(df_both)}")
    print(f"Mean axp_cov: {df_both['axp_cov'].mean():.2f}")
    print(f"Mean our_cov: {df_both['our_cov'].mean():.2f}")
    print(f"\nPaired t-test:")
    print(f"t-statistic: {t_stat:.4f}")
    print(f"p-value: {p_value:.4e}")
    display(df_both)

No paired data available — run experiments to populate results/checkpoints_scalability


In [9]:
import math
from scipy.stats import t

if df_both.empty or math.isnan(t_stat):
    print("No paired data available for t-table — skipped")
else:
    df_degrees = len(df_both) - 1  # degrees of freedom

    # Critical t-values for common confidence levels (two-tailed)
    alpha_levels = [0.10, 0.05, 0.01, 0.001]

    print(f"t-statistic: {t_stat:.4f}")
    print(f"Degrees of freedom: {df_degrees}")
    print(f"\nT-table critical values (two-tailed):")
    print("-" * 50)
    print(f"{'α':<10} {'Confidence':<15} {'t_critical':<12} {'Significant'}")
    print("-" * 50)

    for alpha in alpha_levels:
        confidence = (1 - alpha) * 100
        t_critical = t.ppf(1 - alpha/2, df_degrees)
        significant = abs(t_stat) > t_critical
        symbol = "✓" if significant else "✗"
        print(f"{alpha:<10} {confidence:.1f}%{'':<9} {t_critical:<12.4f} {symbol}")

    print("-" * 50)
    print(f"\n|t| = {abs(t_stat):.4f}")
    print(f"p-value = {p_value:.4e}")

No paired data available for t-table — skipped


# Scalability Results

In [10]:
import pandas as pd

TIMEOUT_SECONDS = 6 * 60  # 6 minutes

df_16 = load_coverage_data(16)
df_32 = load_coverage_data(32)

scalability = pd.DataFrame()  # default empty

if df_16.empty or df_32.empty:
    print("No scalability data available — run experiments to populate results/checkpoints_scalability")
else:
    common_datasets = set(df_16['dataset_name'].unique()) & set(df_32['dataset_name'].unique())
    print(f"\nCommon datasets for comparison: {common_datasets}")

    df_16_common = df_16[df_16['dataset_name'].isin(common_datasets)].copy()
    df_32_common = df_32[df_32['dataset_name'].isin(common_datasets)].copy()

    df_16_common['sample_time'] = (df_16_common['vte'].astype(float) - df_16_common['vts'].astype(float)).clip(upper=TIMEOUT_SECONDS)
    df_32_common['sample_time'] = (df_32_common['vte'].astype(float) - df_32_common['vts'].astype(float)).clip(upper=TIMEOUT_SECONDS)

    def get_class_time(df):
        worker_times = df.groupby(['dataset_name', 'class_label', 'worker'])['sample_time'].sum().reset_index()
        class_times = worker_times.groupby(['dataset_name', 'class_label'])['sample_time'].max().reset_index()
        class_times.columns = ['dataset_name', 'class_label', 'class_time']
        return class_times

    class_times_16 = get_class_time(df_16_common)
    class_times_32 = get_class_time(df_32_common)

    class_comparison = class_times_16.merge(class_times_32, on=['dataset_name', 'class_label'], suffixes=('_16', '_32'))
    print(f"Classes compared: {len(class_comparison)}")

    class_comparison['speedup'] = class_comparison['class_time_16'] / class_comparison['class_time_32']

    scalability = class_comparison.groupby('dataset_name').agg({
        'class_time_16': 'sum',
        'class_time_32': 'sum',
        'speedup': 'mean'
    }).reset_index()

    scalability['total_speedup'] = scalability['class_time_16'] / scalability['class_time_32']
    scalability['efficiency'] = scalability['total_speedup'] / 2

    print("\nPer-class times:")
    display(class_comparison)
    print("\nPer-dataset summary:")
    display(scalability)

No files found for workers_16
No files found for workers_32
No scalability data available — run experiments to populate results/checkpoints_scalability


In [11]:
import plotly.graph_objects as go
from plotly.subplots import make_subplots

if scalability.empty:
    print("No scalability data available — skipped chart")
else:
    fig = make_subplots(rows=1, cols=3, subplot_titles=(
        'Total Class Time per Dataset<br>(timeouts capped at 6 min)',
        'Scalability: Speedup',
        'Parallel Efficiency'
    ))

    fig.add_trace(go.Bar(name='16 workers', x=scalability['dataset_name'], y=scalability['class_time_16'] / 60, marker_color='steelblue'), row=1, col=1)
    fig.add_trace(go.Bar(name='32 workers', x=scalability['dataset_name'], y=scalability['class_time_32'] / 60, marker_color='coral'), row=1, col=1)
    fig.add_trace(go.Bar(name='Parallel Efficiency', x=scalability['dataset_name'], y=scalability['efficiency'] * 100, marker_color='seagreen', showlegend=False), row=1, col=3)
    fig.add_hline(y=100, line_dash="dash", line_color="red", annotation_text="Ideal (100%)", row=1, col=3)

    fig.update_layout(height=400, barmode='group', title_text='Scalability Analysis: 16 vs 32 Workers')
    fig.update_xaxes(tickangle=45)
    fig.update_yaxes(title_text='Time (minutes)', row=1, col=1)
    fig.update_yaxes(title_text='Speedup', row=1, col=2)
    fig.update_yaxes(title_text='Efficiency (%)', row=1, col=3)
    fig.write_image("scalability_analysis.png")
    fig.show()

    print(f"\nOverall Statistics:")
    print(f"  Total time 16 workers: {scalability['class_time_16'].sum() / 60:.1f} minutes")
    print(f"  Total time 32 workers: {scalability['class_time_32'].sum() / 60:.1f} minutes")
    print(f"  Mean speedup: {scalability['total_speedup'].mean():.2f}x")
    print(f"  Mean efficiency: {scalability['efficiency'].mean() * 100:.1f}%")

No scalability data available — skipped chart


In [12]:
df_16 = load_coverage_data(16)

No files found for workers_16


In [13]:
df_32

In [14]:
df_16

# Visualization of Maximal continuous reason and the AXp explanation
Pick a dataset and class, then randomly select a sample and one of its maximal reasons from the Redis dump. The plot shows the reason interval per feature (vertical line) and the sample value (red point).

In [15]:
import random
import math
import sys
import subprocess
from pathlib import Path
from redis_helpers.icf import bitmap_to_icf
import json
import pandas as pd
import plotly.graph_objects as go

# ── user config ──────────────────────────────────────────────────────────────
dataset_name = 'ecoli'
class_label  = '1'
random_seed  = 17
# ─────────────────────────────────────────────────────────────────────────────

dump_path = Path(
    f'results/checkpoints/{dataset_name}/workers_32/class_{class_label}_sample_all/redis_dump_readable.json'
)
if not dump_path.exists():
    print(f"[SKIP] Redis dump not found at {dump_path}")
    print("Run experiments first to populate results/checkpoints/")
    norm_df = None
else:
    data = json.loads(dump_path.read_text(encoding='utf-8'))
    db0 = data['0']
    eu = db0['EU']
    reasons_db = data.get('2', {})
    if random_seed is not None:
        random.seed(random_seed)

    reason_items = [
        (bitmap, meta)
        for bitmap, meta in reasons_db.items()
        if isinstance(meta, dict) and meta.get('sample_key') in db0
    ]
    if not reason_items:
        raise ValueError('No reasons found that map to samples in db0')

    reason_bitmap, reason_meta = random.choice(reason_items)
    sample_key = reason_meta['sample_key']
    sample = db0[sample_key]
    icf = bitmap_to_icf(reason_bitmap, eu)

    # Build the dataframe with features and intervals
    def display_feature_name(name):
        if name.startswith('t') and name[1:].isdigit():
            return name[1:]
        return name

    rows = []
    for feature in sorted(sample.keys()):
        display_feature = display_feature_name(feature)
        if feature not in icf:
            rows.append({'feature': feature, 'display_feature': display_feature, 'in_reason': False,
                         'interval_lower': float('-inf'), 'interval_upper': float('inf'), 'sample_value': sample[feature]})
        else:
            lower, upper = icf[feature]
            rows.append({'feature': feature, 'display_feature': display_feature, 'in_reason': True,
                         'interval_lower': lower, 'interval_upper': upper, 'sample_value': sample[feature]})
    reason_df = pd.DataFrame(rows)
    display(reason_df)

    # Normalize per feature to [0, 1] using EU endpoints
    def feature_min_max(feature_name):
        endpoints = eu.get(feature_name, [])
        finite = [v for v in endpoints if math.isfinite(v)]
        return (min(finite), max(finite)) if finite else (None, None)

    def normalize_value(value, fmin, fmax):
        if value == float('-inf'): return 0.0
        if value == float('inf'):  return 1.0
        if fmin is None or fmax is None or fmax == fmin: return 0.5
        return max(0.0, min(1.0, (value - fmin) / (fmax - fmin)))

    norm_rows = []
    for _, row in reason_df.iterrows():
        fmin, fmax = feature_min_max(row['feature'])
        norm_lower = normalize_value(row['interval_lower'], fmin, fmax)
        norm_upper = normalize_value(row['interval_upper'], fmin, fmax)
        if norm_upper < norm_lower: norm_lower, norm_upper = norm_upper, norm_lower
        norm_rows.append({
            'feature': row['feature'], 'display_feature': row.get('display_feature', row['feature']),
            'in_reason': row['in_reason'], 'interval_lower': row['interval_lower'],
            'interval_upper': row['interval_upper'], 'sample_value': row['sample_value'],
            'norm_lower': norm_lower, 'norm_upper': norm_upper,
            'norm_sample': normalize_value(row['sample_value'], fmin, fmax),
            'coverage_pct': (norm_upper - norm_lower) * 100.0,
            'has_neg_inf': row['interval_lower'] == float('-inf'),
            'has_pos_inf': row['interval_upper'] == float('inf'),
        })

    norm_df = pd.DataFrame(norm_rows)
    sample_coverage = norm_df['coverage_pct'].sum() / len(norm_df)
    norm_df = norm_df.sort_values('coverage_pct', ascending=False).reset_index(drop=True)

    # Plot
    fig_norm = go.Figure()
    x_positions = list(range(len(norm_df)))
    fig_norm.add_trace(go.Scatter(x=x_positions, y=norm_df['norm_lower'].tolist(), mode='lines', line=dict(width=0), showlegend=False, hoverinfo='skip'))
    fig_norm.add_trace(go.Scatter(x=x_positions, y=norm_df['norm_upper'].tolist(), mode='lines', line=dict(width=0), fill='tonexty', fillcolor='rgba(144,238,144,0.35)', name='Reason corridor'))

    first_bounds = True
    lhw = 0.25
    for idx, row in norm_df.iterrows():
        if not row['in_reason']: continue
        if not row['has_neg_inf']:
            fig_norm.add_trace(go.Scatter(x=[idx-lhw, idx+lhw], y=[row['norm_lower']]*2, mode='lines', line=dict(color='green', width=2), name='Reason bounds', showlegend=first_bounds))
            first_bounds = False
        if not row['has_pos_inf']:
            fig_norm.add_trace(go.Scatter(x=[idx-lhw, idx+lhw], y=[row['norm_upper']]*2, mode='lines', line=dict(color='green', width=2), showlegend=False))

    in_r_x, in_r_y, ig_x, ig_y = [], [], [], []
    for idx, row in norm_df.iterrows():
        if row['in_reason']: in_r_x.append(idx); in_r_y.append(row['norm_sample'])
        else: ig_x.append(idx); ig_y.append(row['norm_sample'])
    if in_r_x: fig_norm.add_trace(go.Scatter(x=in_r_x, y=in_r_y, mode='markers', marker=dict(color='crimson', size=12, line=dict(width=1.5, color='darkred')), name='Sample (in reason)'))
    if ig_x:   fig_norm.add_trace(go.Scatter(x=ig_x, y=ig_y, mode='markers', marker=dict(color='lightgray', size=12, line=dict(width=1.5, color='gray')), name='Sample (ignored)'))

    for idx, row in norm_df.iterrows():
        fig_norm.add_annotation(x=idx, y=1.06, text=f"{row['coverage_pct']:.1f}%", showarrow=False, font=dict(size=16, color='black', family='monospace'), xanchor='center')

    fig_norm.update_layout(
        title=f'Normalized maximal reason ordered by coverage<br><sub>Dataset: {dataset_name}, class: {class_label}, coverage: {sample_coverage:.1f}%</sub>',
        xaxis=dict(tickmode='array', tickvals=x_positions, ticktext=norm_df['display_feature'], tickangle=45),
        yaxis=dict(range=[-0.05, 1.12]),
        height=500, width=1200,
    )
    try:
        fig_norm.write_image(f'maximal_reason_normalized_{dataset_name}_class_{class_label}.pdf')
    except Exception as e:
        print(f'Save failed: {e}')
    fig_norm.show()

[SKIP] Redis dump not found at results\checkpoints\ecoli\workers_32\class_1_sample_all\redis_dump_readable.json
Run experiments first to populate results/checkpoints/


In [16]:
import numpy as np
import re
import plotly.graph_objects as go

if norm_df is None:
    print("[SKIP] Baseline visualization skipped — no Redis dump available")
else:
    samples_path = Path(f'baseline/resources/datasets/{dataset_name}/{dataset_name}.samples')
    if not samples_path.exists():
        samples_path = Path(f'baseline/resources/datasets/{dataset_name}/{dataset_name}.sample')
    if not samples_path.exists():
        raise FileNotFoundError(f'Could not find samples file for {dataset_name}')

    csv_path = Path(f'baseline/resources/datasets/{dataset_name}/{dataset_name}.csv')
    if not csv_path.exists():
        raise FileNotFoundError(f'Could not find CSV file for {dataset_name}')

    with csv_path.open('r') as f:
        header_line = f.readline().strip()
        feature_names_in_file = [name.strip() for name in header_line.split(',') if name.strip()]
        if len(feature_names_in_file) > len(reason_df):
            feature_names_in_file = feature_names_in_file[:-1]

    samples = np.atleast_2d(np.loadtxt(samples_path, delimiter=','))
    print(f"[INFO] Loaded {samples.shape[0]} samples")

    feature_to_value = dict(zip(reason_df['feature'], reason_df['sample_value']))
    feature_to_display = dict(zip(reason_df['feature'], reason_df['display_feature']))

    def canonical_feature(name):
        match = re.match(r'^[ft](\d+)$', name)
        return match.group(1) if match else name

    feature_lookup = {}
    for feat in feature_to_value:
        feature_lookup[feat] = feat
        canon = canonical_feature(feat)
        if canon not in feature_lookup:
            feature_lookup[canon] = feat

    current_values, feature_keys_in_order, missing = [], [], []
    for file_feat in feature_names_in_file:
        feat_key = feature_lookup.get(file_feat) or feature_lookup.get(canonical_feature(file_feat))
        if feat_key is None: missing.append(file_feat); continue
        feature_keys_in_order.append(feat_key)
        current_values.append(feature_to_value[feat_key])
    if missing:
        raise ValueError(f'Could not map features: {missing}')
    current_values = np.array(current_values, dtype=float)

    sample_index = next((i for i, v in enumerate(samples) if np.allclose(v, current_values, atol=1e-6)), None)
    if sample_index is None:
        raise ValueError('Could not find matching sample')
    print(f"[INFO] Matching sample index: {sample_index}")

    baseline_expl = baseline_df[baseline_df['dataset'] == dataset_name]['full_explanations'].iloc[0][sample_index]
    baseline_expl_set = set(baseline_expl)

    baseline_rows = []
    for feat_idx, feat_key in enumerate(feature_keys_in_order):
        display_feature = feature_to_display.get(feat_key, feat_key)
        baseline_rows.append({'feature': feat_key, 'display_feature': display_feature,
                               'feature_idx': feat_idx, 'in_reason': feat_idx in baseline_expl_set,
                               'sample_value': current_values[feat_idx]})
    baseline_df_viz = pd.DataFrame(baseline_rows)

    norm_baseline_rows = []
    for _, row in baseline_df_viz.iterrows():
        fmin, fmax = feature_min_max(row['feature'])
        norm_baseline_rows.append({'feature': row['feature'], 'display_feature': row['display_feature'],
                                    'in_reason': row['in_reason'], 'sample_value': row['sample_value'],
                                    'norm_sample': normalize_value(row['sample_value'], fmin, fmax)})
    norm_baseline_df = pd.DataFrame(norm_baseline_rows)
    coverage_baseline = len([r for r in norm_baseline_rows if not r['in_reason']]) / len(norm_baseline_rows) * 100

    feature_order_map = {row['feature']: idx for idx, row in norm_df.iterrows()}
    norm_baseline_df['sort_order'] = norm_baseline_df['feature'].map(feature_order_map)
    norm_baseline_df = norm_baseline_df.sort_values('sort_order').drop(columns=['sort_order']).reset_index(drop=True)

    fig_baseline = go.Figure()
    x_pos = list(range(len(norm_baseline_df)))

    in_x, in_y, ni_x, ni_y = [], [], [], []
    for idx, row in norm_baseline_df.iterrows():
        (in_x if row['in_reason'] else ni_x).append(idx)
        (in_y if row['in_reason'] else ni_y).append(row['norm_sample'])

    if ni_x: fig_baseline.add_trace(go.Scatter(x=ni_x, y=ni_y, mode='markers', marker=dict(color='lightgray', size=12, line=dict(width=2, color='gray')), name='Not in explanation'))
    if in_x: fig_baseline.add_trace(go.Scatter(x=in_x, y=in_y, mode='markers', marker=dict(color='crimson', size=12, line=dict(width=2, color='darkred')), name='In explanation'))

    fig_baseline.update_layout(
        title=f'AXP Explanation<br><sub>Coverage: {coverage_baseline:.1f}%</sub>',
        xaxis=dict(tickmode='array', tickvals=x_pos, ticktext=norm_baseline_df['display_feature'], tickangle=45),
        yaxis=dict(range=[-0.05, 1.15]),
        height=500, width=1200,
    )
    try:
        fig_baseline.write_image('axp_explanation_ecoli_class_0_normalized.pdf')
    except Exception as e:
        print(f'Save failed: {e}')
    fig_baseline.show()

[SKIP] Baseline visualization skipped — no Redis dump available
