# ROS Bag Data Visualization - Whole Runs Analysis

This notebook provides comprehensive analysis across complete run hierarchies (e.g., runs-1 with multiple room-generated folders, each containing single runs).

In [None]:
import pandas as pd
import matplotlib.pyplot as plt
import numpy as np
import seaborn as sns
from pathlib import Path
import glob
from collections import defaultdict
import os


csv_file_paths = ['messages.csv']
print(f'Loading data from {len(csv_file_paths)} csv files.')
# Find common base path of all csv_file_paths
base_path = Path(csv_file_paths[0]).parent if len(csv_file_paths) == 1 else Path(os.path.commonpath(csv_file_paths))

# Organize files by their hierarchy
run_structure = defaultdict(list)
for csv_file in csv_file_paths:
    relative_path = Path(csv_file).relative_to(base_path)
    parts = relative_path.parts[:-1]  # Exclude 'rosbag.csv' filename
    if len(parts) >= 2:
        scenario_group = parts[0]  # e.g., 'room-generated-p1'
        run_structure[scenario_group].append(csv_file)

print(f'\nRun structure:')
for scenario, files in run_structure.items():
    print(f'  {scenario}: {len(files)} runs')

In [None]:
# Load and analyze data from all scenarios
all_scenarios_data = {}
summary_stats = []

for scenario_name, csv_files in run_structure.items():
    scenario_dataframes = []
    
    for csv_file in csv_files[:10]:  # Limit per scenario for performance
        try:
            df = pd.read_csv(csv_file)
            run_id = Path(csv_file).parent.name
            df['scenario'] = scenario_name
            df['run_id'] = run_id
            df['full_run_path'] = str(Path(csv_file).relative_to(base_path).parent)
            scenario_dataframes.append(df)
        except Exception as e:
            print(f'Error loading {csv_file}: {e}')
    
    if scenario_dataframes:
        scenario_combined = pd.concat(scenario_dataframes, ignore_index=True)
        all_scenarios_data[scenario_name] = scenario_combined
        
        # Calculate summary statistics
        numeric_cols = scenario_combined.select_dtypes(include=[np.number]).columns
        numeric_cols = [col for col in numeric_cols if col not in ['timestamp']]
        
        for col in numeric_cols[:3]:  # Limit columns for summary
            summary_stats.append({
                'scenario': scenario_name,
                'metric': col,
                'mean': scenario_combined[col].mean(),
                'std': scenario_combined[col].std(),
                'min': scenario_combined[col].min(),
                'max': scenario_combined[col].max(),
                'count': len(scenario_combined)
            })
        
        print(f'Loaded {scenario_name}: {len(scenario_combined)} rows from {len(scenario_dataframes)} runs')

print(f'\nTotal scenarios analyzed: {len(all_scenarios_data)}')

In [None]:
# Create comprehensive visualizations across all scenarios
if all_scenarios_data and summary_stats:
    # Create summary statistics DataFrame
    summary_df = pd.DataFrame(summary_stats)
    
    # 1. Overview dashboard
    fig, axes = plt.subplots(2, 2, figsize=(16, 12))
    fig.suptitle('Whole Runs Analysis Dashboard', fontsize=16)
    
    # Plot 1: Data volume per scenario
    scenario_counts = summary_df.groupby('scenario')['count'].first()
    scenario_counts.plot(kind='bar', ax=axes[0,0], color='skyblue')
    axes[0,0].set_title('Data Volume by Scenario')
    axes[0,0].set_ylabel('Number of Rows')
    axes[0,0].tick_params(axis='x', rotation=45)
    
    # Plot 2: Mean values comparison
    if len(summary_df['metric'].unique()) > 0:
        first_metric = summary_df['metric'].unique()[0]
        metric_data = summary_df[summary_df['metric'] == first_metric]
        metric_data.plot(x='scenario', y='mean', kind='bar', ax=axes[0,1], color='lightgreen')
        axes[0,1].set_title(f'Mean {first_metric} by Scenario')
        axes[0,1].set_ylabel(f'Mean {first_metric}')
        axes[0,1].tick_params(axis='x', rotation=45)
    
    # Plot 3: Variability comparison
    if len(summary_df['metric'].unique()) > 0:
        metric_data.plot(x='scenario', y='std', kind='bar', ax=axes[1,0], color='orange')
        axes[1,0].set_title(f'Variability ({first_metric}) by Scenario')
        axes[1,0].set_ylabel(f'Std Dev {first_metric}')
        axes[1,0].tick_params(axis='x', rotation=45)
    
    # Plot 4: Range comparison
    if len(summary_df['metric'].unique()) > 0:
        range_data = metric_data.copy()
        range_data['range'] = range_data['max'] - range_data['min']
        range_data.plot(x='scenario', y='range', kind='bar', ax=axes[1,1], color='salmon')
        axes[1,1].set_title(f'Range ({first_metric}) by Scenario')
        axes[1,1].set_ylabel(f'Range {first_metric}')
        axes[1,1].tick_params(axis='x', rotation=45)
    
    plt.tight_layout()
    plt.show()
    
    # Display summary table
    print('\nSummary Statistics by Scenario:')
    print(summary_df.round(3).to_string(index=False))
    
else:
    print('No data available for whole runs analysis')