In [None]:
from robovast.common.analysis import read_output_files, read_output_csv, get_behavior_info
import pandas as pd
from robovast_nav.gui import MapVisualizer
import matplotlib.pyplot as plt
import matplotlib.cm as cm
import numpy as np
DATA_DIR = ''

df = read_output_files(DATA_DIR, lambda test_dir: read_output_csv(test_dir, "poses.csv"))

# Error checking: Ensure we have data
if df.empty:
    raise ValueError("No pose data found. Please check DATA_DIR path and ensure pose files exist.")

# Generate colors for each test
tests = df['test'].unique()
if len(tests) == 0:
    raise ValueError("No test data found in the dataframe.")

colors = cm.rainbow(np.linspace(0, 1, len(tests)))

df.loc[df['frame'] == 'turtlebot4_base_link_gt', 'position.x'] += 8

df_behaviors = read_output_files(DATA_DIR, lambda test_dir: read_output_csv(test_dir, "behaviors.csv"))
if df_behaviors.empty:
    raise ValueError("No behavior data found. Please check DATA_DIR path and ensure behavior files exist.")

df_behavior_info = get_behavior_info('differential_drive_robot.nav_to_pose', df_behaviors)
if df_behavior_info.empty:
    raise ValueError("No behavior info extracted. Check if 'differential_drive_robot.nav_to_pose' behavior exists in the data.")

## Overview

In [None]:
# Multi-Run Comparison: All Robot Paths Ground Truth

# Create single visualization with all robot paths ground truth
viz = MapVisualizer()
viz.load_map("/opt/ros/jazzy/share/nav2_bringup/maps/depot.yaml")
viz.create_figure(figsize=(14, 12))

# Draw all robot paths
for test, color in zip(tests, colors):
    df_test = df[df['test'] == test]
    df_gt_mask = df_test['frame'] == 'turtlebot4_base_link_gt'
    path_robot = list(zip(df_test.loc[df_gt_mask, 'position.x'], df_test.loc[df_gt_mask, 'position.y']))
    viz.draw_path(path_robot, color=color, linewidth=1.5, label=f'Run {test}', show_endpoints=False)

viz.ax.set_title(f'Multi-Run Comparison: All {len(tests)} Robot Paths', 
                 fontsize=14, fontweight='bold')
viz.ax.legend(loc='upper left', fontsize=9, ncol=2)

plt.tight_layout()
plt.show()

## Navigation Duration Analysis

In [None]:
# # Analyze navigation duration across all runs
# print("Navigation Duration Statistics:")
# print("="*80)

# # Display the behavior info dataframe
# print(f"\nTotal runs analyzed: {len(df_behavior_info)}")
# print(f"\nDuration statistics:")
# print(f"  Mean: {df_behavior_info['duration'].mean():.2f} seconds")
# print(f"  Std Dev: {df_behavior_info['duration'].std():.2f} seconds")
# print(f"  Median: {df_behavior_info['duration'].median():.2f} seconds")
# print(f"  Min: {df_behavior_info['duration'].min():.2f} seconds")
# print(f"  Max: {df_behavior_info['duration'].max():.2f} seconds")
# print(f"  Range: {df_behavior_info['duration'].max() - df_behavior_info['duration'].min():.2f} seconds")
# print(f"  Coefficient of Variation: {(df_behavior_info['duration'].std()/df_behavior_info['duration'].mean())*100:.2f}%")

# print("\n" + "="*80)
# print("\nDetailed Duration by Run:")
# df_behavior_info

In [None]:
# Navigation Duration Comparison - Crisp Visualizations
fig, axes = plt.subplots(2, 3, figsize=(18, 12))

# Prepare data
durations = df_behavior_info['duration'].values
tests_behavior = df_behavior_info['test'].values
x_pos = np.arange(len(durations))

# 1. Bar Chart - Duration by Run
ax = axes[0, 0]
bars = ax.bar(x_pos, durations, alpha=0.75, color=colors, edgecolor='black', linewidth=1.5)
ax.set_xlabel('Run Number', fontsize=11, fontweight='bold')
ax.set_ylabel('Duration (seconds)', fontsize=11, fontweight='bold')
ax.set_title('Navigation Duration by Run', fontsize=12, fontweight='bold')
ax.set_xticks(x_pos)
ax.set_xticklabels([f'{t}' for t in tests_behavior])
ax.grid(True, alpha=0.3, axis='y', linewidth=0.5)

# Add value labels
for i, duration in enumerate(durations):
    ax.text(i, duration + 0.5, f'{duration:.1f}s', ha='center', va='bottom', 
            fontsize=9, fontweight='bold')

# Add mean line
mean_duration = durations.mean()
ax.axhline(mean_duration, color='red', linestyle='--', linewidth=2, 
           label=f'Mean: {mean_duration:.1f}s', alpha=0.7)
ax.legend(fontsize=10)

# 2. Box Plot - Duration Distribution
ax = axes[0, 1]
bp = ax.boxplot([durations], widths=0.5, patch_artist=True, 
                tick_labels=['All Runs'], showmeans=True)
bp['boxes'][0].set_facecolor('steelblue')
bp['boxes'][0].set_alpha(0.7)
bp['boxes'][0].set_edgecolor('black')
bp['boxes'][0].set_linewidth(2)

for element in ['whiskers', 'fliers', 'medians', 'caps']:
    plt.setp(bp[element], color='black', linewidth=2)
plt.setp(bp['means'], marker='D', markerfacecolor='red', markeredgecolor='black', markersize=8)

ax.set_ylabel('Duration (seconds)', fontsize=11, fontweight='bold')
ax.set_title('Navigation Duration Distribution', fontsize=12, fontweight='bold')
ax.grid(True, alpha=0.3, axis='y', linewidth=0.5)

# Add statistics annotations
q1, median, q3 = np.percentile(durations, [25, 50, 75])
ax.text(1.3, q1, f'Q1: {q1:.1f}s', fontsize=9, va='center')
ax.text(1.3, median, f'Median: {median:.1f}s', fontsize=9, va='center', fontweight='bold')
ax.text(1.3, q3, f'Q3: {q3:.1f}s', fontsize=9, va='center')
ax.text(1.3, mean_duration, f'Mean: {mean_duration:.1f}s', fontsize=9, va='center', color='red')

# 3. Histogram - Duration Distribution
ax = axes[0, 2]
n, bins, patches = ax.hist(durations, bins=15, color='steelblue', alpha=0.7, 
                           edgecolor='black', linewidth=1.5)

# Color bars based on value
for i, patch in enumerate(patches):
    patch.set_facecolor(colors[i % len(colors)])

ax.axvline(mean_duration, color='red', linestyle='--', linewidth=2.5, 
           label=f'Mean: {mean_duration:.1f}s', alpha=0.8)
ax.axvline(median, color='green', linestyle='--', linewidth=2.5, 
           label=f'Median: {median:.1f}s', alpha=0.8)
ax.set_xlabel('Duration (seconds)', fontsize=11, fontweight='bold')
ax.set_ylabel('Frequency', fontsize=11, fontweight='bold')
ax.set_title('Duration Distribution', fontsize=12, fontweight='bold')
ax.legend(fontsize=10)
ax.grid(True, alpha=0.3, axis='y', linewidth=0.5)

# 4. Sorted Duration Plot
ax = axes[1, 0]
sorted_indices = np.argsort(durations)
sorted_durations = durations[sorted_indices]
sorted_tests = tests_behavior[sorted_indices]

bars = ax.bar(range(len(sorted_durations)), sorted_durations, alpha=0.75, 
              color=[colors[i] for i in sorted_indices], edgecolor='black', linewidth=1.5)
ax.set_xlabel('Sorted Run Index', fontsize=11, fontweight='bold')
ax.set_ylabel('Duration (seconds)', fontsize=11, fontweight='bold')
ax.set_title('Navigation Duration (Sorted)', fontsize=12, fontweight='bold')
ax.grid(True, alpha=0.3, axis='y', linewidth=0.5)

# Add labels for best and worst
ax.text(0, sorted_durations[0] + 0.5, f'Best: {sorted_durations[0]:.1f}s\n(Run {sorted_tests[0]})', 
        ha='center', va='bottom', fontsize=9, fontweight='bold', color='green')
ax.text(len(sorted_durations)-1, sorted_durations[-1] + 0.5, 
        f'Worst: {sorted_durations[-1]:.1f}s\n(Run {sorted_tests[-1]})', 
        ha='center', va='bottom', fontsize=9, fontweight='bold', color='red')

# 5. Cumulative Distribution
ax = axes[1, 1]
sorted_durations_cdf = np.sort(durations)
cumulative = np.arange(1, len(sorted_durations_cdf) + 1) * (100.0 / len(sorted_durations_cdf))

ax.plot(sorted_durations_cdf, cumulative, color='steelblue', linewidth=3, marker='o', 
        markersize=8, markerfacecolor='white', markeredgecolor='steelblue', markeredgewidth=2)
ax.set_xlabel('Duration (seconds)', fontsize=11, fontweight='bold')
ax.set_ylabel('Cumulative Percentage (%)', fontsize=11, fontweight='bold')
ax.set_title('Cumulative Distribution of Duration', fontsize=12, fontweight='bold')
ax.grid(True, alpha=0.3, linewidth=0.5)

# Add reference lines
for percentile in [25, 50, 75]:
    value = np.percentile(durations, percentile)
    ax.axhline(percentile, color='gray', linestyle='--', alpha=0.5, linewidth=1)
    ax.axvline(value, color='gray', linestyle='--', alpha=0.5, linewidth=1)
    ax.text(value, percentile + 2, f'{percentile}%: {value:.1f}s', fontsize=9, fontweight='bold')

# 6. Deviation from Mean
ax = axes[1, 2]
deviations = durations - mean_duration
colors_dev = ['green' if d <= 0 else 'red' for d in deviations]

bars = ax.bar(x_pos, deviations, alpha=0.75, color=colors_dev, edgecolor='black', linewidth=1.5)
ax.axhline(0, color='black', linestyle='-', linewidth=1)
ax.set_xlabel('Run Number', fontsize=11, fontweight='bold')
ax.set_ylabel('Deviation from Mean (seconds)', fontsize=11, fontweight='bold')
ax.set_title('Duration Deviation from Mean', fontsize=12, fontweight='bold')
ax.set_xticks(x_pos)
ax.set_xticklabels([f'{t}' for t in tests_behavior])
ax.grid(True, alpha=0.3, axis='y', linewidth=0.5)

# Add value labels
for i, dev in enumerate(deviations):
    va = 'bottom' if dev > 0 else 'top'
    offset = 0.2 if dev > 0 else -0.2
    ax.text(i, dev + offset, f'{dev:+.1f}s', ha='center', va=va, 
            fontsize=8, fontweight='bold')

plt.tight_layout()
plt.show()

In [None]:
# Prepare data for multi-run comparison
import numpy as np
import matplotlib.pyplot as plt

# Split by frame and prepare for all runs
df_robot = df[df['frame'] == 'base_link'].copy()
df_groundtruth = df[df['frame'] == 'turtlebot4_base_link_gt'].copy()

# Error check: Ensure we have robot and ground truth data
if df_robot.empty:
    raise ValueError("No robot data (frame='base_link') found. Cannot perform error analysis.")

if df_groundtruth.empty:
    raise ValueError("No ground truth data (frame='turtlebot4_base_link_gt') found. Cannot perform error analysis.")

# Sort by test and timestamp
df_robot.sort_values(['test', 'timestamp'], inplace=True)
df_groundtruth.sort_values(['test', 'timestamp'], inplace=True)

# print(f"Processing {len(tests)} runs for comparison")
# print(f"Robot data points: {len(df_robot)}")
# print(f"Ground truth data points: {len(df_groundtruth)}")

In [None]:
# Calculate errors for each run separately
from scipy.interpolate import interp1d

# Store results for each run
run_errors = {}
run_stats = []

for test in tests:
    # Get data for this specific run
    df_robot_test = df_robot[df_robot['test'] == test].copy()
    df_gt_test = df_groundtruth[df_groundtruth['test'] == test].copy()
    
    # Error checking: Ensure we have data for this test
    if df_robot_test.empty or df_gt_test.empty:
        print(f"Warning: Skipping test {test} - no robot or ground truth data found")
        continue
    
    if len(df_robot_test) < 2 or len(df_gt_test) < 2:
        print(f"Warning: Skipping test {test} - insufficient data points (need at least 2)")
        continue
    
    # Find common time range
    time_start = max(df_robot_test['timestamp'].iloc[0], df_gt_test['timestamp'].iloc[0])
    time_end = min(df_robot_test['timestamp'].iloc[-1], df_gt_test['timestamp'].iloc[-1])
    
    if time_start >= time_end:
        print(f"Warning: Skipping test {test} - no overlapping time range")
        continue
    
    # Create interpolation functions for ground truth
    gt_timestamps = df_gt_test['timestamp'].values
    gt_interp_x = interp1d(gt_timestamps, df_gt_test['position.x'].values, kind='linear', fill_value='extrapolate')
    gt_interp_y = interp1d(gt_timestamps, df_gt_test['position.y'].values, kind='linear', fill_value='extrapolate')
    gt_interp_yaw = interp1d(gt_timestamps, df_gt_test['orientation.yaw'].values, kind='linear', fill_value='extrapolate')
    
    # Filter robot data to common time range
    time_mask = (df_robot_test['timestamp'] >= time_start) & (df_robot_test['timestamp'] <= time_end)
    df_robot_aligned = df_robot_test[time_mask].copy()
    
    if df_robot_aligned.empty:
        print(f"Warning: Skipping test {test} - no data in common time range")
        continue
    
    robot_timestamps = df_robot_aligned['timestamp'].values
    
    # Interpolate ground truth at robot timestamps
    gt_x = gt_interp_x(robot_timestamps)
    gt_y = gt_interp_y(robot_timestamps)
    gt_yaw = gt_interp_yaw(robot_timestamps)
    
    # Calculate position errors
    position_error_x = df_robot_aligned['position.x'].values - gt_x
    position_error_y = df_robot_aligned['position.y'].values - gt_y
    absolute_position_error = np.sqrt(position_error_x**2 + position_error_y**2)
    
    # Calculate orientation error
    orientation_error = df_robot_aligned['orientation.yaw'].values - gt_yaw
    orientation_error = np.arctan2(np.sin(orientation_error), np.cos(orientation_error))
    
    # Store errors for this run
    run_errors[test] = {
        'timestamps': robot_timestamps,
        'position_error': absolute_position_error,
        'position_error_x': position_error_x,
        'position_error_y': position_error_y,
        'orientation_error': orientation_error
    }
    
    # Calculate statistics
    run_stats.append({
        'test': test,
        'mean_pos_error': np.mean(absolute_position_error),
        'std_pos_error': np.std(absolute_position_error),
        'max_pos_error': np.max(absolute_position_error),
        'median_pos_error': np.median(absolute_position_error),
        'p95_pos_error': np.percentile(absolute_position_error, 95),
        'mean_orient_error': np.mean(np.abs(orientation_error)),
        'std_orient_error': np.std(orientation_error),
        'max_orient_error': np.max(np.abs(orientation_error))
    })

# Convert to DataFrame for easy analysis
if len(run_stats) == 0:
    raise ValueError("No valid test data found after error calculation. Check that robot and ground truth data exist for at least one test.")

stats_df = pd.DataFrame(run_stats)

In [None]:
from robovast.common.analysis import calculate_speeds_from_poses

# Error check: Ensure we have ground truth data before calculating speeds
if df_groundtruth.empty:
    raise ValueError("Cannot calculate speeds - ground truth dataframe is empty")

df_gt_speeds = calculate_speeds_from_poses(df_groundtruth)

# Error check: Ensure speed calculation succeeded
if df_gt_speeds.empty:
    raise ValueError("Speed calculation returned empty dataframe")

In [None]:
# # Speed Statistics by Run
# print("Speed Statistics by Test Run:")
# print("="*80)

speed_stats = []
for test in tests:
    df_test_speeds = df_gt_speeds[df_gt_speeds['test'] == test]
    
    linear_speeds = df_test_speeds['linear_speed'].values
    angular_speeds = df_test_speeds['angular_speed'].values
    
    speed_stats.append({
        'test': test,
        'mean_linear': np.mean(linear_speeds),
        'max_linear': np.max(linear_speeds),
        'std_linear': np.std(linear_speeds),
        'median_linear': np.median(linear_speeds),
        'mean_angular': np.mean(np.abs(angular_speeds)),
        'max_angular': np.max(np.abs(angular_speeds)),
        'std_angular': np.std(angular_speeds),
        'median_angular': np.median(np.abs(angular_speeds))
    })

speed_stats_df = pd.DataFrame(speed_stats)
# print(speed_stats_df)
# print("\nOverall Statistics:")
# print(f"Mean Linear Speed (avg): {speed_stats_df['mean_linear'].mean():.3f} m/s (±{speed_stats_df['mean_linear'].std():.3f})")
# print(f"Mean Angular Speed (avg): {speed_stats_df['mean_angular'].mean():.3f} rad/s (±{speed_stats_df['mean_angular'].std():.3f})")

In [None]:
# Duration vs Performance Metrics Comparison
fig, axes = plt.subplots(1, 2, figsize=(16, 6))

# 1. Duration vs Mean Position Error
ax = axes[0]
durations_for_scatter = df_behavior_info['duration'].values
mean_pos_errors = stats_df['mean_pos_error'].values

# Error check: Ensure we have data to plot
if len(durations_for_scatter) == 0 or len(mean_pos_errors) == 0:
    print("Warning: No data available for duration vs performance comparison")
    plt.close(fig)
else:
    scatter = ax.scatter(durations_for_scatter, mean_pos_errors, c=colors, s=250, 
                        alpha=0.75, edgecolors='black', linewidth=2)

    # Add labels for each point
    for i, test in enumerate(tests):
        ax.annotate(f'Run {test}', (durations_for_scatter[i], mean_pos_errors[i]),
                   xytext=(5, 5), textcoords='offset points', fontsize=9, fontweight='bold')

    # Add trend line
    z = np.polyfit(durations_for_scatter, mean_pos_errors, 1)
    p = np.poly1d(z)
    ax.plot(durations_for_scatter, p(durations_for_scatter), "r--", alpha=0.8, linewidth=2,
            label=f'Trend: y={z[0]:.4f}x+{z[1]:.4f}')

    ax.set_xlabel('Navigation Duration (seconds)', fontsize=12, fontweight='bold')
    ax.set_ylabel('Mean Position Error (m)', fontsize=12, fontweight='bold')
    ax.set_title('Duration vs Position Error', fontsize=13, fontweight='bold')
    ax.legend(fontsize=10)
    ax.grid(True, alpha=0.3, linewidth=0.5)

    # Calculate correlation
    correlation = np.corrcoef(durations_for_scatter, mean_pos_errors)[0, 1]
    ax.text(0.05, 0.95, f'Correlation: {correlation:.3f}', 
            transform=ax.transAxes, fontsize=11, fontweight='bold',
            bbox=dict(boxstyle='round', facecolor='wheat', alpha=0.7), verticalalignment='top')

    # 2. Duration vs Mean Linear Speed
    ax = axes[1]
    mean_linear_speeds = speed_stats_df['mean_linear'].values

    scatter = ax.scatter(durations_for_scatter, mean_linear_speeds, c=colors, s=250, 
                        alpha=0.75, edgecolors='black', linewidth=2)

    # Add labels
    for i, test in enumerate(tests):
        ax.annotate(f'Run {test}', (durations_for_scatter[i], mean_linear_speeds[i]),
                   xytext=(5, 5), textcoords='offset points', fontsize=9, fontweight='bold')

    # Add trend line
    z = np.polyfit(durations_for_scatter, mean_linear_speeds, 1)
    p = np.poly1d(z)
    ax.plot(durations_for_scatter, p(durations_for_scatter), "r--", alpha=0.8, linewidth=2,
            label=f'Trend: y={z[0]:.4f}x+{z[1]:.4f}')

    ax.set_xlabel('Navigation Duration (seconds)', fontsize=12, fontweight='bold')
    ax.set_ylabel('Mean Linear Speed (m/s)', fontsize=12, fontweight='bold')
    ax.set_title('Duration vs Mean Speed', fontsize=13, fontweight='bold')
    ax.legend(fontsize=10)
    ax.grid(True, alpha=0.3, linewidth=0.5)

    # Calculate correlation
    correlation = np.corrcoef(durations_for_scatter, mean_linear_speeds)[0, 1]
    ax.text(0.05, 0.95, f'Correlation: {correlation:.3f}', 
            transform=ax.transAxes, fontsize=11, fontweight='bold',
            bbox=dict(boxstyle='round', facecolor='wheat', alpha=0.7), verticalalignment='top')

    plt.tight_layout()
    plt.show()

    # Print summary - add error checks for array access
    print("\n" + "="*80)
    print("DURATION ANALYSIS SUMMARY")
    print("="*80)
    print(f"\nNavigation Duration:")
    print(f"  Mean: {mean_duration:.2f} seconds")
    print(f"  Std Dev: {durations.std():.2f} seconds")
    print(f"  Range: {durations.min():.2f}s - {durations.max():.2f}s ({durations.max() - durations.min():.2f}s difference)")
    print(f"  Coefficient of Variation: {(durations.std()/mean_duration)*100:.2f}%")
    
    # Add safety checks for sorted array access
    if len(sorted_tests) > 0 and len(sorted_durations) > 0:
        print(f"\nBest Run: {sorted_tests[0]} with {sorted_durations[0]:.2f} seconds")
        print(f"Worst Run: {sorted_tests[-1]} with {sorted_durations[-1]:.2f} seconds")
        print(f"Performance Difference: {((sorted_durations[-1] - sorted_durations[0]) / sorted_durations[0] * 100):.1f}%")
    print("="*80)

## Localization Errors

In [None]:
# Multi-Run Comparison: Position Error Over Time
fig, axes = plt.subplots(2, 2, figsize=(16, 12))

# Error check: Ensure we have error data
if len(run_errors) == 0:
    print("Warning: No error data available for plotting")
    plt.close(fig)
else:
    # 1. Position errors over time for all runs
    ax = axes[0, 0]
    for test, color in zip(tests, colors):
        if test not in run_errors:
            continue
        errors = run_errors[test]
        if len(errors['timestamps']) == 0:
            continue
        # Normalize time to start at 0
        time_normalized = errors['timestamps'] - errors['timestamps'][0]
        ax.plot(time_normalized, errors['position_error'], color=color, alpha=0.6, linewidth=1.5, label=f'Run {test}')

    ax.set_xlabel('Time (s)', fontsize=11)
    ax.set_ylabel('Absolute Position Error (m)', fontsize=11)
    ax.set_title('Position Error Over Time - All Runs', fontsize=12, fontweight='bold')
    ax.legend(loc='upper right', fontsize=8, ncol=2)
    ax.grid(True, alpha=0.3)

    # 2. Mean error trajectory with standard deviation band
    ax = axes[0, 1]
    # Find common time length (use shortest run)
    if len(run_errors) > 0:
        min_length = min(len(run_errors[test]['position_error']) for test in run_errors.keys())

        if min_length > 1:
            # Interpolate all runs to common time grid
            common_time = np.linspace(0, 100, min_length)  # Normalized to 100 seconds
            error_matrix = []

            for test in tests:
                if test not in run_errors:
                    continue
                errors = run_errors[test]
                if len(errors['position_error']) < 2:
                    continue
                time_normalized = np.linspace(0, 100, len(errors['position_error']))
                interp_func = interp1d(time_normalized, errors['position_error'], kind='linear')
                error_matrix.append(interp_func(common_time))

            if len(error_matrix) > 0:
                error_matrix = np.array(error_matrix)
                mean_error = np.mean(error_matrix, axis=0)
                std_error = np.std(error_matrix, axis=0)

                ax.plot(common_time, mean_error, 'b-', linewidth=2.5, label='Mean')
                ax.fill_between(common_time, mean_error - std_error, mean_error + std_error, 
                                alpha=0.3, color='blue', label='±1 Std Dev')

    ax.set_xlabel('Normalized Time (s)', fontsize=11)
    ax.set_ylabel('Absolute Position Error (m)', fontsize=11)
    ax.set_title('Mean Position Error with Variability Band', fontsize=12, fontweight='bold')
    ax.legend(fontsize=10)
    ax.grid(True, alpha=0.3)

    # 3. Orientation errors over time for all runs
    ax = axes[1, 0]
    for test, color in zip(tests, colors):
        if test not in run_errors:
            continue
        errors = run_errors[test]
        if len(errors['timestamps']) == 0:
            continue
        time_normalized = errors['timestamps'] - errors['timestamps'][0]
        ax.plot(time_normalized, np.abs(errors['orientation_error']), color=color, alpha=0.6, linewidth=1.5, label=f'Run {test}')

    ax.set_xlabel('Time (s)', fontsize=11)
    ax.set_ylabel('Absolute Orientation Error (rad)', fontsize=11)
    ax.set_title('Orientation Error Over Time - All Runs', fontsize=12, fontweight='bold')
    ax.legend(loc='upper right', fontsize=8, ncol=2)
    ax.grid(True, alpha=0.3)

    # 4. Box plot comparison of position errors across runs
    ax = axes[1, 1]
    error_data = [run_errors[test]['position_error'] for test in tests if test in run_errors and len(run_errors[test]['position_error']) > 0]
    
    if len(error_data) > 0:
        bp = ax.boxplot(error_data, tick_labels=[f'{t}' for t in tests if t in run_errors and len(run_errors[t]['position_error']) > 0], patch_artist=True)

        # Color boxes - only for tests with data
        valid_colors = [colors[i] for i, test in enumerate(tests) if test in run_errors and len(run_errors[test]['position_error']) > 0]
        for patch, color in zip(bp['boxes'], valid_colors):
            patch.set_facecolor(color)
            patch.set_alpha(0.6)

    ax.set_xlabel('Run Number', fontsize=11)
    ax.set_ylabel('Absolute Position Error (m)', fontsize=11)
    ax.set_title('Position Error Distribution Across Runs', fontsize=12, fontweight='bold')
    ax.grid(True, alpha=0.3, axis='y')

    plt.tight_layout()
    plt.show()

In [None]:
# Statistical Comparison Across Runs
fig, axes = plt.subplots(2, 2, figsize=(16, 12))

# 1. Bar chart of mean errors with error bars (std dev)
ax = axes[0, 0]
x_pos = np.arange(len(tests))
means = stats_df['mean_pos_error'].values
stds = stats_df['std_pos_error'].values

bars = ax.bar(x_pos, means, yerr=stds, capsize=5, alpha=0.7, color=colors, edgecolor='black', linewidth=1.5)
ax.set_xlabel('Run Number', fontsize=11)
ax.set_ylabel('Mean Position Error (m)', fontsize=11)
ax.set_title('Mean Position Error by Run (with Std Dev)', fontsize=12, fontweight='bold')
ax.set_xticks(x_pos)
ax.set_xticklabels([f'{t}' for t in tests])
ax.grid(True, alpha=0.3, axis='y')

# Add value labels on bars
for i, (mean, std) in enumerate(zip(means, stds)):
    ax.text(i, mean + std + 0.01, f'{mean:.3f}', ha='center', va='bottom', fontsize=8)

# 2. Max error comparison
ax = axes[0, 1]
maxs = stats_df['max_pos_error'].values
bars = ax.bar(x_pos, maxs, alpha=0.7, color=colors, edgecolor='black', linewidth=1.5)
ax.set_xlabel('Run Number', fontsize=11)
ax.set_ylabel('Max Position Error (m)', fontsize=11)
ax.set_title('Maximum Position Error by Run', fontsize=12, fontweight='bold')
ax.set_xticks(x_pos)
ax.set_xticklabels([f'{t}' for t in tests])
ax.grid(True, alpha=0.3, axis='y')

# 3. Mean orientation error comparison
ax = axes[1, 0]
orient_means = stats_df['mean_orient_error'].values
orient_stds = stats_df['std_orient_error'].values

bars = ax.bar(x_pos, orient_means, yerr=orient_stds, capsize=5, alpha=0.7, color=colors, edgecolor='black', linewidth=1.5)
ax.set_xlabel('Run Number', fontsize=11)
ax.set_ylabel('Mean Orientation Error (rad)', fontsize=11)
ax.set_title('Mean Orientation Error by Run (with Std Dev)', fontsize=12, fontweight='bold')
ax.set_xticks(x_pos)
ax.set_xticklabels([f'{t}' for t in tests])
ax.grid(True, alpha=0.3, axis='y')

# 4. Percentile comparison (50th, 95th)
ax = axes[1, 1]
p50s = stats_df['median_pos_error'].values
p95s = stats_df['p95_pos_error'].values

width = 0.35
x_pos1 = x_pos - width/2
x_pos2 = x_pos + width/2

ax.bar(x_pos1, p50s, width, label='Median (50th)', alpha=0.7, color='skyblue', edgecolor='black', linewidth=1.5)
ax.bar(x_pos2, p95s, width, label='95th Percentile', alpha=0.7, color='coral', edgecolor='black', linewidth=1.5)

ax.set_xlabel('Run Number', fontsize=11)
ax.set_ylabel('Position Error (m)', fontsize=11)
ax.set_title('Position Error Percentiles by Run', fontsize=12, fontweight='bold')
ax.set_xticks(x_pos)
ax.set_xticklabels([f'{t}' for t in tests])
ax.legend(fontsize=10)
ax.grid(True, alpha=0.3, axis='y')

plt.tight_layout()
plt.show()

In [None]:
# Error Distribution Comparison
fig, axes = plt.subplots(2, 2, figsize=(16, 12))

# 1. Overlayed histograms of position errors
ax = axes[0, 0]
for test, color in zip(tests, colors):
    errors = run_errors[test]['position_error']
    ax.hist(errors, bins=40, alpha=0.5, color=color, label=f'Run {test}', edgecolor='black', linewidth=0.5)

ax.set_xlabel('Absolute Position Error (m)', fontsize=11)
ax.set_ylabel('Frequency', fontsize=11)
ax.set_title('Position Error Distribution - All Runs Overlayed', fontsize=12, fontweight='bold')
ax.legend(fontsize=8, ncol=2)
ax.grid(True, alpha=0.3, axis='y')

# 2. Cumulative distribution functions for all runs
ax = axes[0, 1]
for test, color in zip(tests, colors):
    errors = run_errors[test]['position_error']
    sorted_errors = np.sort(errors)
    cumulative = np.arange(1, len(sorted_errors) + 1) * (100.0 / len(sorted_errors))
    ax.plot(sorted_errors, cumulative, color=color, linewidth=2, alpha=0.7, label=f'Run {test}')

ax.set_xlabel('Absolute Position Error (m)', fontsize=11)
ax.set_ylabel('Cumulative Percentage (%)', fontsize=11)
ax.set_title('Cumulative Distribution of Position Errors', fontsize=12, fontweight='bold')
ax.legend(fontsize=8, ncol=2)
ax.grid(True, alpha=0.3)

# Add reference lines for key percentiles
ax.axhline(50, color='gray', linestyle='--', alpha=0.5, linewidth=1)
ax.axhline(95, color='gray', linestyle='--', alpha=0.5, linewidth=1)
ax.text(ax.get_xlim()[1]*0.95, 50, '50%', ha='right', va='bottom', fontsize=9)
ax.text(ax.get_xlim()[1]*0.95, 95, '95%', ha='right', va='bottom', fontsize=9)

# 3. Violin plots showing distribution shape
ax = axes[1, 0]
error_data = [run_errors[test]['position_error'] for test in tests]
parts = ax.violinplot(error_data, positions=range(len(tests)), showmeans=True, showmedians=True)

# Color the violin plots
for i, pc in enumerate(parts['bodies']):
    pc.set_facecolor(colors[i])
    pc.set_alpha(0.6)

ax.set_xlabel('Run Number', fontsize=11)
ax.set_ylabel('Absolute Position Error (m)', fontsize=11)
ax.set_title('Position Error Distribution Shape by Run', fontsize=12, fontweight='bold')
ax.set_xticks(range(len(tests)))
ax.set_xticklabels([f'{t}' for t in tests])
ax.grid(True, alpha=0.3, axis='y')

# 4. Orientation error distributions (overlayed histograms)
ax = axes[1, 1]
for test, color in zip(tests, colors):
    errors = np.abs(run_errors[test]['orientation_error'])
    ax.hist(errors, bins=40, alpha=0.5, color=color, label=f'Run {test}', edgecolor='black', linewidth=0.5)

ax.set_xlabel('Absolute Orientation Error (rad)', fontsize=11)
ax.set_ylabel('Frequency', fontsize=11)
ax.set_title('Orientation Error Distribution - All Runs Overlayed', fontsize=12, fontweight='bold')
ax.legend(fontsize=8, ncol=2)
ax.grid(True, alpha=0.3, axis='y')

plt.tight_layout()
plt.show()

In [None]:
# Aggregate Statistics and Summary
fig, axes = plt.subplots(2, 2, figsize=(16, 12))

# 1. Summary statistics table visualization
ax = axes[0, 0]
ax.axis('tight')
ax.axis('off')

# Calculate overall statistics
overall_mean = stats_df['mean_pos_error'].mean()
overall_std = stats_df['mean_pos_error'].std()
overall_min = stats_df['mean_pos_error'].min()
overall_max = stats_df['mean_pos_error'].max()

table_data = [
    ['Metric', 'Value'],
    ['Overall Mean Pos Error', f'{overall_mean:.4f} m'],
    ['Std Dev Across Runs', f'{overall_std:.4f} m'],
    ['Best Run Mean Error', f'{overall_min:.4f} m'],
    ['Worst Run Mean Error', f'{overall_max:.4f} m'],
    ['Coefficient of Variation', f'{(overall_std/overall_mean)*100:.2f}%'],
    ['', ''],
    ['Mean Orient Error (avg)', f'{stats_df["mean_orient_error"].mean():.4f} rad ({np.degrees(stats_df["mean_orient_error"].mean()):.2f}°)'],
    ['Orient Error Std Dev', f'{stats_df["mean_orient_error"].std():.4f} rad ({np.degrees(stats_df["mean_orient_error"].std()):.2f}°)'],
]

table = ax.table(cellText=table_data, cellLoc='left', loc='center',
                colWidths=[0.5, 0.5])
table.auto_set_font_size(False)
table.set_fontsize(11)
table.scale(1, 2.5)

# Style header row
for i in range(2):
    table[(0, i)].set_facecolor('#4CAF50')
    table[(0, i)].set_text_props(weight='bold', color='white')

ax.set_title('Overall Statistics Across All Runs', fontsize=12, fontweight='bold', pad=20)

# 2. Scatter plot: Mean vs Std Dev for each run
ax = axes[0, 1]
scatter = ax.scatter(stats_df['mean_pos_error'], stats_df['std_pos_error'], 
                    c=colors, s=200, alpha=0.7, edgecolors='black', linewidth=2)

for i, test in enumerate(tests):
    ax.annotate(f'Run {test}', 
               (stats_df.iloc[i]['mean_pos_error'], stats_df.iloc[i]['std_pos_error']),
               xytext=(5, 5), textcoords='offset points', fontsize=9)

ax.set_xlabel('Mean Position Error (m)', fontsize=11)
ax.set_ylabel('Standard Deviation (m)', fontsize=11)
ax.set_title('Mean vs Variability by Run', fontsize=12, fontweight='bold')
ax.grid(True, alpha=0.3)

# 3. Run consistency heatmap (correlation between runs)
ax = axes[1, 0]

# Create correlation matrix between runs (using interpolated errors)
correlation_matrix = np.corrcoef(error_matrix)

im = ax.imshow(correlation_matrix, cmap='coolwarm', aspect='auto', vmin=0, vmax=1)
ax.set_xticks(range(len(tests)))
ax.set_yticks(range(len(tests)))
ax.set_xticklabels([f'{t}' for t in tests])
ax.set_yticklabels([f'{t}' for t in tests])
ax.set_xlabel('Run Number', fontsize=11)
ax.set_ylabel('Run Number', fontsize=11)
ax.set_title('Error Pattern Correlation Between Runs', fontsize=12, fontweight='bold')

# Add colorbar
cbar = plt.colorbar(im, ax=ax)
cbar.set_label('Correlation', fontsize=10)

# Add correlation values
for i in range(len(tests)):
    for j in range(len(tests)):
        text = ax.text(j, i, f'{correlation_matrix[i, j]:.2f}',
                      ha="center", va="center", color="black", fontsize=8)

# 4. Range and quartile visualization
ax = axes[1, 1]

# Calculate quartiles for each run
quartile_data = []
for test in tests:
    errors = run_errors[test]['position_error']
    q1 = np.percentile(errors, 25)
    q2 = np.percentile(errors, 50)
    q3 = np.percentile(errors, 75)
    min_val = np.min(errors)
    max_val = np.max(errors)
    quartile_data.append([min_val, q1, q2, q3, max_val])

quartile_data = np.array(quartile_data)
x_pos = np.arange(len(tests))

# Plot ranges
for i, (color, data) in enumerate(zip(colors, quartile_data)):
    ax.plot([i, i], [data[0], data[4]], color=color, linewidth=2, alpha=0.5)
    ax.plot([i, i], [data[1], data[3]], color=color, linewidth=6, alpha=0.8)
    ax.scatter(i, data[2], color=color, s=100, zorder=5, edgecolors='black', linewidth=2)

ax.set_xlabel('Run Number', fontsize=11)
ax.set_ylabel('Position Error (m)', fontsize=11)
ax.set_title('Error Range and Quartiles by Run', fontsize=12, fontweight='bold')
ax.set_xticks(x_pos)
ax.set_xticklabels([f'{t}' for t in tests])
ax.grid(True, alpha=0.3, axis='y')

# Add legend
from matplotlib.lines import Line2D
legend_elements = [
    Line2D([0], [0], color='gray', linewidth=2, alpha=0.5, label='Min-Max Range'),
    Line2D([0], [0], color='gray', linewidth=6, alpha=0.8, label='Q1-Q3 (IQR)'),
    Line2D([0], [0], marker='o', color='w', markerfacecolor='gray', markersize=8, 
           markeredgecolor='black', markeredgewidth=2, label='Median')
]
ax.legend(handles=legend_elements, fontsize=9)

plt.tight_layout()
plt.show()

# Print comprehensive summary
print("\n" + "="*60)
print("COMPREHENSIVE MULTI-RUN COMPARISON SUMMARY")
print("="*60)
print(f"\nNumber of Runs: {len(tests)}")
print(f"\nPosition Error Statistics Across All Runs:")
print(f"  Mean of Means: {overall_mean:.4f} m")
print(f"  Std Dev of Means: {overall_std:.4f} m")
print(f"  Coefficient of Variation: {(overall_std/overall_mean)*100:.2f}%")
print(f"  Best Run: {stats_df.loc[stats_df['mean_pos_error'].idxmin(), 'test']} ({overall_min:.4f} m)")
print(f"  Worst Run: {stats_df.loc[stats_df['mean_pos_error'].idxmax(), 'test']} ({overall_max:.4f} m)")
print(f"  Range: {overall_max - overall_min:.4f} m")


## Speed Analysis

In [None]:
# Multi-Run Speed Comparison - Crisp Visualizations
fig, axes = plt.subplots(2, 3, figsize=(18, 12))

# 1. Linear Speed Distribution Comparison
ax = axes[0, 0]
for test, color in zip(tests, colors):
    df_test_speeds = df_gt_speeds[df_gt_speeds['test'] == test]
    ax.hist(df_test_speeds['linear_speed'].values, bins=40, alpha=0.5, 
            color=color, label=f'Run {test}', edgecolor='black', linewidth=0.5)

ax.set_xlabel('Linear Speed (m/s)', fontsize=11, fontweight='bold')
ax.set_ylabel('Frequency', fontsize=11, fontweight='bold')
ax.set_title('Linear Speed Distribution - All Runs', fontsize=12, fontweight='bold')
ax.legend(fontsize=8, ncol=2)
ax.grid(True, alpha=0.3, axis='y', linewidth=0.5)

# 2. Angular Speed Distribution Comparison
ax = axes[0, 1]
for test, color in zip(tests, colors):
    df_test_speeds = df_gt_speeds[df_gt_speeds['test'] == test]
    ax.hist(df_test_speeds['angular_speed'].values, bins=40, alpha=0.5, 
            color=color, label=f'Run {test}', edgecolor='black', linewidth=0.5)

ax.axvline(0, color='black', linestyle='-', linewidth=0.5)
ax.set_xlabel('Angular Speed (rad/s)', fontsize=11, fontweight='bold')
ax.set_ylabel('Frequency', fontsize=11, fontweight='bold')
ax.set_title('Angular Speed Distribution - All Runs', fontsize=12, fontweight='bold')
ax.legend(fontsize=8, ncol=2)
ax.grid(True, alpha=0.3, axis='y', linewidth=0.5)

# 3. Box Plot - Linear Speed Comparison
ax = axes[0, 2]
linear_speed_data = [df_gt_speeds[df_gt_speeds['test'] == test]['linear_speed'].values 
                     for test in tests]
bp = ax.boxplot(linear_speed_data, tick_labels=[f'{t}' for t in tests], 
                patch_artist=True, widths=0.6)

for patch, color in zip(bp['boxes'], colors):
    patch.set_facecolor(color)
    patch.set_alpha(0.7)
    patch.set_edgecolor('black')
    patch.set_linewidth(1.5)

for element in ['whiskers', 'fliers', 'means', 'medians', 'caps']:
    plt.setp(bp[element], color='black', linewidth=1.5)

ax.set_xlabel('Run Number', fontsize=11, fontweight='bold')
ax.set_ylabel('Linear Speed (m/s)', fontsize=11, fontweight='bold')
ax.set_title('Linear Speed Distribution by Run', fontsize=12, fontweight='bold')
ax.grid(True, alpha=0.3, axis='y', linewidth=0.5)

# 4. Bar Chart - Mean Linear Speed with Error Bars
ax = axes[1, 0]
x_pos = np.arange(len(tests))
linear_means = speed_stats_df['mean_linear'].values
linear_stds = speed_stats_df['std_linear'].values

bars = ax.bar(x_pos, linear_means, yerr=linear_stds, capsize=5, 
              alpha=0.75, color=colors, edgecolor='black', linewidth=1.5)
ax.set_xlabel('Run Number', fontsize=11, fontweight='bold')
ax.set_ylabel('Mean Linear Speed (m/s)', fontsize=11, fontweight='bold')
ax.set_title('Mean Linear Speed by Run', fontsize=12, fontweight='bold')
ax.set_xticks(x_pos)
ax.set_xticklabels([f'{t}' for t in tests])
ax.grid(True, alpha=0.3, axis='y', linewidth=0.5)

# Add value labels
for i, (mean, std) in enumerate(zip(linear_means, linear_stds)):
    ax.text(i, mean + std + 0.01, f'{mean:.3f}', ha='center', va='bottom', 
            fontsize=8, fontweight='bold')

# 5. Bar Chart - Mean Angular Speed with Error Bars
ax = axes[1, 1]
angular_means = speed_stats_df['mean_angular'].values
angular_stds = speed_stats_df['std_angular'].values

bars = ax.bar(x_pos, angular_means, yerr=angular_stds, capsize=5, 
              alpha=0.75, color=colors, edgecolor='black', linewidth=1.5)
ax.set_xlabel('Run Number', fontsize=11, fontweight='bold')
ax.set_ylabel('Mean Angular Speed (rad/s)', fontsize=11, fontweight='bold')
ax.set_title('Mean Angular Speed by Run', fontsize=12, fontweight='bold')
ax.set_xticks(x_pos)
ax.set_xticklabels([f'{t}' for t in tests])
ax.grid(True, alpha=0.3, axis='y', linewidth=0.5)

# Add value labels
for i, (mean, std) in enumerate(zip(angular_means, angular_stds)):
    ax.text(i, mean + std + 0.01, f'{mean:.3f}', ha='center', va='bottom', 
            fontsize=8, fontweight='bold')

# 6. Scatter Plot - Max Speed Comparison
ax = axes[1, 2]
max_linear = speed_stats_df['max_linear'].values
max_angular = speed_stats_df['max_angular'].values

scatter = ax.scatter(max_linear, max_angular, c=colors, s=250, alpha=0.75, 
                    edgecolors='black', linewidth=2)

for i, test in enumerate(tests):
    ax.annotate(f'Run {test}', (max_linear[i], max_angular[i]),
               xytext=(5, 5), textcoords='offset points', fontsize=9, fontweight='bold')

ax.set_xlabel('Max Linear Speed (m/s)', fontsize=11, fontweight='bold')
ax.set_ylabel('Max Angular Speed (rad/s)', fontsize=11, fontweight='bold')
ax.set_title('Maximum Speed Comparison', fontsize=12, fontweight='bold')
ax.grid(True, alpha=0.3, linewidth=0.5)

plt.tight_layout()
plt.show()

In [None]:
# Speed Profile Over Time - Multi-Run Comparison
fig, axes = plt.subplots(2, 1, figsize=(16, 10))

# Error check: Ensure we have speed data
if df_gt_speeds.empty:
    print("Warning: No speed data available for plotting")
    plt.close(fig)
else:
    # 1. Linear Speed Over Time - All Runs
    ax = axes[0]
    for test, color in zip(tests, colors):
        df_test_speeds = df_gt_speeds[df_gt_speeds['test'] == test]
        if df_test_speeds.empty or len(df_test_speeds) == 0:
            continue
        timestamps = df_test_speeds['timestamp'].values
        # Normalize time to start at 0
        timestamps_norm = timestamps - timestamps[0]
        linear_speeds = df_test_speeds['linear_speed'].values
        
        ax.plot(timestamps_norm, linear_speeds, color=color, alpha=0.7, 
                linewidth=1.5, label=f'Run {test}')

    ax.set_xlabel('Time (s)', fontsize=12, fontweight='bold')
    ax.set_ylabel('Linear Speed (m/s)', fontsize=12, fontweight='bold')
    ax.set_title('Linear Speed Profile Over Time - All Runs', fontsize=13, fontweight='bold')
    ax.legend(loc='upper right', fontsize=9, ncol=3)
    ax.grid(True, alpha=0.3, linewidth=0.5)
    ax.set_ylim(bottom=0)

    # 2. Angular Speed Over Time - All Runs
    ax = axes[1]
    for test, color in zip(tests, colors):
        df_test_speeds = df_gt_speeds[df_gt_speeds['test'] == test]
        if df_test_speeds.empty or len(df_test_speeds) == 0:
            continue
        timestamps = df_test_speeds['timestamp'].values
        timestamps_norm = timestamps - timestamps[0]
        angular_speeds = df_test_speeds['angular_speed'].values
        
        ax.plot(timestamps_norm, angular_speeds, color=color, alpha=0.7, 
                linewidth=1.5, label=f'Run {test}')

    ax.axhline(0, color='black', linestyle='-', linewidth=0.5)
    ax.set_xlabel('Time (s)', fontsize=12, fontweight='bold')
    ax.set_ylabel('Angular Speed (rad/s)', fontsize=12, fontweight='bold')
    ax.set_title('Angular Speed Profile Over Time - All Runs', fontsize=13, fontweight='bold')
    ax.legend(loc='upper right', fontsize=9, ncol=3)
    ax.grid(True, alpha=0.3, linewidth=0.5)

    plt.tight_layout()
    plt.show()