# SOLAQUA Data Export Demo

This notebook demonstrates how to use `solaqua_utils` for **data bag** exports and reporting.

In [1]:
# If your package is in the same repo, ensure Python can find it (edit the path if needed)
import sys, os
from pathlib import Path
sys.set_int_max_str_digits(100000)

# Add project root to sys.path if running from a subfolder (edit as needed)
project_root = Path.cwd()  # change to your repo root if necessary
if str(project_root) not in sys.path:
    sys.path.insert(0, str(project_root))

print("Python version:", sys.version)
print("Working directory:", Path.cwd())


Python version: 3.13.5 (main, Jun 11 2025, 15:36:57) [Clang 17.0.0 (clang-1700.0.13.3)]
Working directory: /Users/eirikvarnes/code/SOLAQUA


In [2]:
# Core utilities for data export and reporting
from utils.data_export import (
    list_topics_in_bag, bag_topic_to_dataframe, save_all_topics_from_data_bags
)
from utils.reporting import (
    load_data_index, list_exported_bag_stems, overview_by_bag, overview_by_datetime,
    topics_in_bag_df, topics_overview_dir
)
from utils.core import find_data_bags
from pathlib import Path
import pandas as pd


## Configure paths

In [3]:
# Point this to your dataset folder containing *_data.bag files
DATA_DIR = Path("data")  # <-- change to your folder
EXPORTS_DIR = Path("exports")
EXPORTS_DIR.mkdir(parents=True, exist_ok=True)

print("Data dir:", DATA_DIR.resolve())
print("Exports dir:", EXPORTS_DIR.resolve())

# Quick peek: list available *_data.bag files
bags = find_data_bags(DATA_DIR, recursive=True)
bags[:5]


Data dir: /Users/eirikvarnes/code/SOLAQUA/data
Exports dir: /Users/eirikvarnes/code/SOLAQUA/exports


[PosixPath('data/2024-08-20_13-39-34_data.bag'),
 PosixPath('data/2024-08-20_13-40-35_data.bag'),
 PosixPath('data/2024-08-22_14-06-43_data.bag'),
 PosixPath('data/2024-08-22_14-29-05_data.bag'),
 PosixPath('data/2024-08-22_14-47-39_data.bag')]

## Inspect topics in a bag

In [4]:
if bags:
    bagpath = bags[0]
    print("Inspecting:", bagpath.name)
    list_topics_in_bag(bagpath)
else:
    print("No *_data.bag files found under", DATA_DIR)


Inspecting: 2024-08-20_13-39-34_data.bag


## Export all topics to CSV/Parquet

In [5]:
# Choose output format: 'csv' or 'parquet'
FILE_FORMAT = 'csv'  # 'parquet'

# Example: skip heavy image payloads by default (see utils for include/exclude)
if bags:
    index_df = save_all_topics_from_data_bags(
        DATA_DIR, out_dir=EXPORTS_DIR, file_format=FILE_FORMAT, recursive=True
    )
    index_df.head(10)
else:
    print("Skipping export since no bags were found.")


Wrote exports/by_bag/bluerov2_alive__2024-08-20_13-39-34_data.csv (90 rows).
Wrote exports/by_bag/bluerov2_armed__2024-08-20_13-39-34_data.csv (90 rows).
Wrote exports/by_bag/bluerov2_battery__2024-08-20_13-39-34_data.csv (39 rows).
Wrote exports/by_bag/bluerov2_modes__2024-08-20_13-39-34_data.csv (1 rows).
Wrote exports/by_bag/commanded_thrust__2024-08-20_13-39-34_data.csv (859 rows).
Wrote exports/by_bag/controller_x__2024-08-20_13-39-34_data.csv (296 rows).
Wrote exports/by_bag/controller_y__2024-08-20_13-39-34_data.csv (300 rows).
Wrote exports/by_bag/controller_yaw__2024-08-20_13-39-34_data.csv (301 rows).
Wrote exports/by_bag/controller_z__2024-08-20_13-39-34_data.csv (300 rows).
Wrote exports/by_bag/gui_netFollowing_manager__2024-08-20_13-39-34_data.csv (39 rows).
Wrote exports/by_bag/guidance__2024-08-20_13-39-34_data.csv (293 rows).
Wrote exports/by_bag/joystick__2024-08-20_13-39-34_data.csv (806 rows).
Wrote exports/by_bag/joystick_init__2024-08-20_13-39-34_data.csv (1 rows).

## Load index and list exported bag stems

In [6]:
try:
    idx = load_data_index(EXPORTS_DIR)
    display(idx.head(10))
    stems = list_exported_bag_stems(EXPORTS_DIR, bag_suffix="_data")
    print("Exported bag stems (first 15):", stems[:15])
except FileNotFoundError as e:
    print(e)


Unnamed: 0,bag,bag_file,topic,msgtypes,rows,out_file
0,2024-08-20_13-39-34_data,2024-08-20_13-39-34_data.bag,/bluerov2/alive,['std_msgs/msg/Float32'],90,exports/by_bag/bluerov2_alive__2024-08-20_13-3...
1,2024-08-20_13-39-34_data,2024-08-20_13-39-34_data.bag,/bluerov2/armed,['std_msgs/msg/Float32'],90,exports/by_bag/bluerov2_armed__2024-08-20_13-3...
2,2024-08-20_13-39-34_data,2024-08-20_13-39-34_data.bag,/bluerov2/battery,['messages/msg/BatteryStatus'],39,exports/by_bag/bluerov2_battery__2024-08-20_13...
3,2024-08-20_13-39-34_data,2024-08-20_13-39-34_data.bag,/bluerov2/modes,['joystick/msg/ModeManager2'],1,exports/by_bag/bluerov2_modes__2024-08-20_13-3...
4,2024-08-20_13-39-34_data,2024-08-20_13-39-34_data.bag,/commanded_thrust,['rospy_tutorials/msg/Floats'],859,exports/by_bag/commanded_thrust__2024-08-20_13...
5,2024-08-20_13-39-34_data,2024-08-20_13-39-34_data.bag,/controller/x,['messages/msg/MRACOupdate'],296,exports/by_bag/controller_x__2024-08-20_13-39-...
6,2024-08-20_13-39-34_data,2024-08-20_13-39-34_data.bag,/controller/y,['messages/msg/MRACOupdate'],300,exports/by_bag/controller_y__2024-08-20_13-39-...
7,2024-08-20_13-39-34_data,2024-08-20_13-39-34_data.bag,/controller/yaw,['messages/msg/PIDState'],301,exports/by_bag/controller_yaw__2024-08-20_13-3...
8,2024-08-20_13-39-34_data,2024-08-20_13-39-34_data.bag,/controller/z,['messages/msg/PIDState'],300,exports/by_bag/controller_z__2024-08-20_13-39-...
9,2024-08-20_13-39-34_data,2024-08-20_13-39-34_data.bag,/gui/netFollowing_manager,['messages/msg/NetFollowingManager'],39,exports/by_bag/gui_netFollowing_manager__2024-...


Exported bag stems (first 15): ['2024-08-20_13-39-34_data', '2024-08-20_13-40-35_data', '2024-08-22_14-06-43_data', '2024-08-22_14-29-05_data', '2024-08-22_14-47-39_data']


## Overview by specific bag

In [7]:
if 'stems' in locals() and stems:
    bag_stem = stems[-1]  # pick one
    files_df, topics_df = overview_by_bag(bag_stem, out_dir=EXPORTS_DIR)
    print("Files summary:")
    display(files_df.head(20))
    print("\nTopic aggregates:")
    display(topics_df.head(50))
else:
    print("No stems available yet. Run the export step above.")


Files summary:


Unnamed: 0,bag,topic,out_file,rows,t_min,t_max,duration_s,approx_rate_hz
0,2024-08-22_14-47-39_data,/bluerov2/alive,exports/by_bag/bluerov2_alive__2024-08-22_14-4...,88,1724331000.0,1724331000.0,43.071555,2.043112
1,2024-08-22_14-47-39_data,/bluerov2/armed,exports/by_bag/bluerov2_armed__2024-08-22_14-4...,88,1724331000.0,1724331000.0,43.388417,2.028191
2,2024-08-22_14-47-39_data,/bluerov2/battery,exports/by_bag/bluerov2_battery__2024-08-22_14...,41,1724331000.0,1724331000.0,41.751813,0.981993
3,2024-08-22_14-47-39_data,/bluerov2/modes,exports/by_bag/bluerov2_modes__2024-08-22_14-4...,1,1724331000.0,1724331000.0,0.0,
4,2024-08-22_14-47-39_data,/commanded_thrust,exports/by_bag/commanded_thrust__2024-08-22_14...,860,1724331000.0,1724331000.0,42.954471,20.021199
5,2024-08-22_14-47-39_data,/controller/x,exports/by_bag/controller_x__2024-08-22_14-47-...,372,1724331000.0,1724331000.0,41.268582,9.014121
6,2024-08-22_14-47-39_data,/controller/y,exports/by_bag/controller_y__2024-08-22_14-47-...,377,1724331000.0,1724331000.0,41.817128,9.015444
7,2024-08-22_14-47-39_data,/controller/yaw,exports/by_bag/controller_yaw__2024-08-22_14-4...,368,1724331000.0,1724331000.0,40.828597,9.01329
8,2024-08-22_14-47-39_data,/controller/z,exports/by_bag/controller_z__2024-08-22_14-47-...,376,1724331000.0,1724331000.0,41.699479,9.016899
9,2024-08-22_14-47-39_data,/gui/netFollowing_manager,exports/by_bag/gui_netFollowing_manager__2024-...,42,1724331000.0,1724331000.0,41.009797,1.024146



Topic aggregates:


Unnamed: 0,topic,rows,duration_s,approx_rate_hz
0,/bluerov2/alive,88,43.071555,2.043112
1,/bluerov2/armed,88,43.388417,2.028191
2,/bluerov2/battery,41,41.751813,0.981993
3,/bluerov2/modes,1,0.0,
4,/commanded_thrust,860,42.954471,20.021199
5,/controller/x,372,41.268582,9.014121
6,/controller/y,377,41.817128,9.015444
7,/controller/yaw,368,40.828597,9.01329
8,/controller/z,376,41.699479,9.016899
9,/gui/netFollowing_manager,42,41.009797,1.024146


## Overview by date & time helper

In [8]:
# If your bag naming follows 'YYYY-MM-DD_HH-MM-SS_data', you can use:
# date_str = '2024-08-20'; time_str = '13:55:34'
# bag_stem, files_df, topics_df = overview_by_datetime(date_str, time_str, out_dir=EXPORTS_DIR)
# display(files_df.head()); display(topics_df.head())


## Topic listing for any .bag

In [9]:
# You can run topic listings (with optional counts) on any .bag file:
if bags:
    df_topics = topics_in_bag_df(bags[-1], with_counts=False)
    display(df_topics.head(40))
else:
    print("No bags to list topics from.")


Unnamed: 0,topic,msgtype
0,/bluerov2/alive,std_msgs/msg/Float32
1,/bluerov2/armed,std_msgs/msg/Float32
2,/bluerov2/battery,messages/msg/BatteryStatus
3,/bluerov2/modes,joystick/msg/ModeManager2
4,/commanded_thrust,rospy_tutorials/msg/Floats
5,/controller/x,messages/msg/MRACOupdate
6,/controller/y,messages/msg/MRACOupdate
7,/controller/yaw,messages/msg/PIDState
8,/controller/z,messages/msg/PIDState
9,/gui/netFollowing_manager,messages/msg/NetFollowingManager


## Directory-wide topic overview

In [10]:
# Scan a directory for .bag files and list topics. Set suffix_filter to narrow to '_data' or '_video'.
df_dir = topics_overview_dir(DATA_DIR, recursive=True, suffix_filter="_data", with_counts=False)
display(df_dir.head(30))


Unnamed: 0,bag_file,bag_stem,topic,msgtype
0,2024-08-20_13-39-34_data.bag,2024-08-20_13-39-34_data,/bluerov2/alive,std_msgs/msg/Float32
1,2024-08-20_13-39-34_data.bag,2024-08-20_13-39-34_data,/bluerov2/armed,std_msgs/msg/Float32
2,2024-08-20_13-39-34_data.bag,2024-08-20_13-39-34_data,/bluerov2/battery,messages/msg/BatteryStatus
3,2024-08-20_13-39-34_data.bag,2024-08-20_13-39-34_data,/bluerov2/modes,joystick/msg/ModeManager2
4,2024-08-20_13-39-34_data.bag,2024-08-20_13-39-34_data,/commanded_thrust,rospy_tutorials/msg/Floats
5,2024-08-20_13-39-34_data.bag,2024-08-20_13-39-34_data,/controller/x,messages/msg/MRACOupdate
6,2024-08-20_13-39-34_data.bag,2024-08-20_13-39-34_data,/controller/y,messages/msg/MRACOupdate
7,2024-08-20_13-39-34_data.bag,2024-08-20_13-39-34_data,/controller/yaw,messages/msg/PIDState
8,2024-08-20_13-39-34_data.bag,2024-08-20_13-39-34_data,/controller/z,messages/msg/PIDState
9,2024-08-20_13-39-34_data.bag,2024-08-20_13-39-34_data,/gui/netFollowing_manager,messages/msg/NetFollowingManager


In [11]:
from utils.reporting import topics_overview_dir
df = topics_overview_dir("exports/by_bag", recursive=True, suffix_filter=None, with_counts=False)
df[df["topic"].str.contains("sonar|sonoptix|ping360|ping\\b|mbes", case=False, regex=True)].head(30)


Unnamed: 0,bag_file,bag_stem,topic,msgtype
