contagon · contagon · Sep 10, 2025 · Aug 6, 2025 · Aug 6, 2025 · Aug 8, 2025
diff --git a/python/evalio/datasets/__init__.py b/python/evalio/datasets/__init__.py
@@ -1,5 +1,6 @@
 from .base import Dataset, DatasetIterator, set_data_dir, get_data_dir
 from .botanic_garden import BotanicGarden
+from .cumulti import CUMulti
 from .enwide import EnWide
 from .helipr import HeLiPR
 from .hilti_2022 import Hilti2022
@@ -16,6 +17,7 @@
     "Dataset",
     "DatasetIterator",
     "BotanicGarden",
+    "CUMulti",
     "EnWide",
     "HeLiPR",
     "Hilti2022",

diff --git a/python/evalio/datasets/cumulti.py b/python/evalio/datasets/cumulti.py
@@ -0,0 +1,230 @@
+import os
+import urllib
+import urllib.request
+import zipfile
+from enum import auto
+from pathlib import Path
+
+import numpy as np
+from tqdm.rich import tqdm
+
+from rosbags.typesys import Stores
+
+from evalio.datasets.loaders import (
+    LidarDensity,
+    LidarFormatParams,
+    LidarMajor,
+    LidarPointStamp,
+    LidarStamp,
+    RosbagIter,
+)
+from evalio.types import SE3, Trajectory
+
+from .base import Dataset, DatasetIterator, ImuParams, LidarParams
+
+
+# https://github.com/pytorch/vision/blob/fc746372bedce81ecd53732ee101e536ae3afec1/torchvision/datasets/utils.py#L27
+def _urlretrieve(url: str, filename: Path, chunk_size: int = 1024 * 32) -> None:
+    """
+    Retrieves a file from url using urllib
+    """
+    with urllib.request.urlopen(
+        urllib.request.Request(url, headers={"User-Agent": "evalio"})
+    ) as response:
+        with (
+            open(filename, "wb") as fh,
+            tqdm(
+                total=100e9,  # GLOBUS does not include size in request response, guess=100GB
+                unit="B",
+                unit_scale=True,
+                dynamic_ncols=True,
+            ) as pbar,
+        ):
+            while chunk := response.read(chunk_size):
+                fh.write(chunk)
+                pbar.update(len(chunk))
+
+
+def _extract_noreplace(zip_file: Path, dest_dir: Path):
+    """
+    Extracts files from a zip archive (zip_file) but skips any file that already exists at the destination
+    (Allows to resume zip extractions)
+    """
+    print(f"Extracting: {zip_file.name}")
+    with zipfile.ZipFile(str(zip_file)) as archive:
+        with tqdm(total=len(archive.namelist()), dynamic_ncols=True) as pbar:
+            for filename in archive.namelist():
+                if not (dest_dir / filename).is_file():
+                    archive.extract(filename, path=dest_dir)
+                pbar.update()
+
+
+class CUMulti(Dataset):
+    """
+    Dataset collected by a ground robot (AgileX - Hunter) on the University of Colorado Boulder Campus.
+    """
+
+    kittredge_loop_robot1 = auto()
+    kittredge_loop_robot2 = auto()
+    kittredge_loop_robot3 = auto()
+    kittredge_loop_robot4 = auto()
+    main_campus_robot1 = auto()
+    main_campus_robot2 = auto()
+    main_campus_robot3 = auto()
+    main_campus_robot4 = auto()
+
+    # ------------------------- For loading data ------------------------- #
+    def data_iter(self) -> DatasetIterator:
+        lidar_format = LidarFormatParams(
+            stamp=LidarStamp.Start,
+            point_stamp=LidarPointStamp.Start,
+            major=LidarMajor.Row,
+            density=LidarDensity.AllPoints,
+        )
+
+        robot_name = self.seq_name.split("_")[-1]
+        return RosbagIter(
+            self.folder,
+            f"{robot_name}/ouster/points",
+            f"{robot_name}/imu/data",
+            self.lidar_params(),
+            lidar_format=lidar_format,
+            type_store=Stores.ROS2_FOXY,
+        )
+
+    def ground_truth_raw(self) -> Trajectory:
+        # Sequence Naming information
+        components = self.seq_name.split("_")
+        robot_name = components[-1]
+        loc_name = "_".join(components[:2])
+        gt_file = self.folder / f"{loc_name}_{robot_name}_ref.csv"
+
+        # Load the Trajectory which provides solutions in the UTM frame
+        traj_utm = Trajectory.from_csv(
+            gt_file, ["sec", "x", "y", "z", "qx", "qy", "qz", "qw"]
+        )
+
+        # Subtract out the initial position to get a local frame reference
+        init_position = traj_utm.poses[0].trans
+        return Trajectory(
+            poses=[SE3(p.rot, p.trans - init_position) for p in traj_utm.poses],
+            stamps=traj_utm.stamps,
+        )
+
+    # ------------------------- For loading params ------------------------- #
+    def imu_T_lidar(self) -> SE3:
+        # Supplied by CU-Multi Authors
+        return SE3.fromMat(
+            np.array(
+                [
+                    [-1.0, 0.0, 0.0, -0.058038],
+                    [0.0, -1.0, 0.0, 0.015573],
+                    [0.0, 0.0, 1.0, 0.049603],
+                    [0.0, 0.0, 0.0, 1.000000],
+                ]
+            )
+        )
+
+    def imu_T_gt(self) -> SE3:
+        # Groundtruth provided in the IMU Frame
+        return SE3.identity()
+
+    def imu_params(self) -> ImuParams:
+        # https://www.mouser.com/datasheet/2/1083/3dmgq7_gnssins_ds_0-1900596.pdf
+        return ImuParams(
+            gyro=4.363323129985824e-05,  # From 0.15 Deg / sqrt(hr)
+            accel=1.962e-4,  # From 20 micro-gravity / sqrt(Hz)
+            gyro_bias=1e-6,  # TODO (dan)
+            accel_bias=1e-6,  # TODO (dan)
+            bias_init=1e-8,
+            integration=1e-8,
+            gravity=np.array([0, 0, -9.81]),
+            brand="MicroStrain",
+            model="3DM-GQ7",
+        )
+
+    def lidar_params(self) -> LidarParams:
+        # Ouster OS 64 Rev 7
+        return LidarParams(
+            rate=20,
+            num_rows=64,
+            num_columns=1024,
+            min_range=1.0,
+            max_range=200.0,
+            brand="Ouster",
+            model="OS-64 (Rev 7)",
+        )
+
+    # ------------------------- dataset info ------------------------- #
+    @staticmethod
+    def url() -> str:
+        return "https://arpg.github.io/cumulti/"
+
+    @classmethod
+    def dataset_name(cls) -> str:
+        return "cumulti"
+
+    def environment(self) -> str:
+        return "CU Boulder Campus"
+
+    def vehicle(self) -> str:
+        return "ATV"
+
+    # ------------------------- For downloading ------------------------- #
+    def files(self) -> list[str | Path]:
+        components = self.seq_name.split("_")
+        robot_name = components[-1]
+        loc_name = "_".join(components[:2])
+
+        return [
+            self.folder / f"{robot_name}_{loc_name}_imu_gps",  # IMU Bag
+            self.folder / f"{robot_name}_{loc_name}_lidar",  # LIDAR Bag
+            self.folder / f"{loc_name}_{robot_name}_ref.csv",  # Reference Solution
+        ]
+
+    def download(self):
+        # Sequence Naming information
+        components = self.seq_name.split("_")
+        robot_name = components[-1]
+        loc_name = "_".join(components[:2])
+
+        # File Download URLS
+        globus_url = r"https://g-ad45ee.3d2bab.75bc.data.globus.org"
+        seq_url_base_path = (
+            f"{globus_url}/{loc_name}/{robot_name}/{robot_name}_{loc_name}"
+        )
+        lidar_url = f"{seq_url_base_path}_lidar.zip"
+        imu_url = f"{seq_url_base_path}_imu_gps.zip"
+
+        # Download destination zip files
+        lidar_zip_file = self.folder / f"{robot_name}_{loc_name}_lidar.zip"
+        imu_zip_file = self.folder / f"{robot_name}_{loc_name}_imu_gps.zip"
+
+        # Download all of the data
+        print(f"Downloading to {self.folder}...")
+        self.folder.mkdir(parents=True, exist_ok=True)
+        for zip_file, url in [(lidar_zip_file, lidar_url), (imu_zip_file, imu_url)]:
+            if not zip_file.is_file():
+                print(url)
+                _urlretrieve(url, zip_file)
+            else:
+                print("Archive already exists. Skipping Download.")
+
+            # Extract from the zip only what we dont already have
+            print("Extracting data...")
+            _extract_noreplace(zip_file, self.folder)
+
+        # Download the groundtruth
+        gt_url = f"{globus_url}/{loc_name}/{robot_name}/{loc_name}_{robot_name}/"
+        gt_file = self.folder / f"{loc_name}_{robot_name}_ref.csv"
+        print(gt_url)
+        if not gt_file.is_file():
+            _urlretrieve(gt_url, gt_file)
+        else:
+            print("Groundtruth already exists. Skipping Download.")
+
+        # If we have extracted everything we need then remove the zip directory
+        if self.is_downloaded():
+            print("All data downloaded. Removing zip files...")
+            for zip_file in [lidar_zip_file, imu_zip_file]:
+                os.remove(zip_file)
diff --git a/python/evalio/datasets/loaders.py b/python/evalio/datasets/loaders.py
@@ -21,6 +21,7 @@
 )
 from evalio.datasets.base import DatasetIterator, Measurement
 from rosbags.highlevel import AnyReader
+from rosbags.typesys import Stores, get_typestore
 import numpy as np
 from dataclasses import dataclass
 from enum import StrEnum, auto
@@ -78,19 +79,18 @@ def __init__(
         lidar_topic: str,
         imu_topic: str,
         lidar_params: LidarParams,
-        # for mcap files, we point at the directory, not the file
-        is_mcap: bool = False,
+        type_store: Optional[Stores] = None,
         # Reduce compute by telling the iterator how to format the pointcloud
         lidar_format: Optional[LidarFormatParams] = None,
         custom_col_func: Optional[Callable[[LidarMeasurement], None]] = None,
     ):
         """
         Args:
-            path (Path): Location of rosbag file(s). If a directory is passed, all .bag files in the directory will be loaded.
+            path (Path): Location of rosbag file(s) or dir(s). If a directory is passed containing multiple bags (ros1 or ros2), all will be loaded.
             lidar_topic (str): Name of lidar topic.
             imu_topic (str): Name of imu topic.
             lidar_params (LidarParams): Lidar parameters, can be gotten from [lidar_params][evalio.datasets.Dataset.lidar_params].
-            is_mcap (bool, optional): If an mcap file, will not try to glob over all rosbags. Defaults to False.
+            type_store (Optional[Stores], optional): Additional type store to be loaded into rosbags. Defaults to None.
             lidar_format (Optional[LidarFormatParams], optional): Various parameters for how lidar data is stored. If not specified, most will try to be inferred. We strongly recommend setting this to ensure data is standardized properly. Defaults to None.
             custom_col_func (Optional[Callable[[LidarMeasurement], None]], optional): Function to put the point cloud in row major format. Will generally not be needed, except for strange default orderings. Defaults to None.
 
@@ -107,17 +107,41 @@ def __init__(
             self.lidar_format = lidar_format
         self.custom_col_func = custom_col_func
 
-        # Glob to get all .bag files in the directory
-        if path.is_dir() and is_mcap is False:
-            self.path = [p for p in path.glob("*.bag") if "orig" not in str(p)]
-            if not self.path:
-                raise FileNotFoundError(f"No .bag files found in directory {path}")
-        else:
+        # Find all bags (may be either ros1 .bag files or ros2 bag/ dirs)
+        if path.is_file():
+            # Provide path is a ros1 .bag file
             self.path = [path]
+        else:
+
+            def is_ros2_bag(d):
+                return bool(list(d.glob("*.mcap")) + list(d.glob("*.db3")))
+
+            # Path provided is a directory may be ros2 bag/ dir or contain multiple bags
+            ros1_bag_file_list = [p for p in path.glob("*.bag")]
+            ros2_bag_dir_list = [d for d in path.glob("*/") if is_ros2_bag(d)]
+
+            if ros1_bag_file_list:  # path contains ros1 .bag files
+                self.path = ros1_bag_file_list
+            elif ros2_bag_dir_list:  # path contains ros2 bag/ directories
+                self.path = ros2_bag_dir_list
+            elif is_ros2_bag(path):  # path is a ros2 bag/ (contains mcap or db3 file)
+                self.path = [path]
+            else:
+                raise ValueError(
+                    f"Invalid rosbag path: {path}\nExpected path to be one of:\na) ros1 .bag \nb) ros2 bag/ dir or \nc) directory multiple a or b"
+                )
 
         # Open the bag file
         self.reader = AnyReader(self.path)
         self.reader.open()
+
+        # force load passed in type store
+        # there is a default_typestore parameter in AnyReader, but it won't be used if one of bags has msgdefs
+        # this works around that, but may be unstable
+        # https://gitlab.com/ternaris/rosbags/-/blob/master/src/rosbags/highlevel/anyreader.py?ref_type=heads#L125-140
+        if type_store is not None:
+            self.reader.typestore.register(get_typestore(type_store).fielddefs)
+
         self.connections_lidar = [
             x for x in self.reader.connections if x.topic == self.lidar_topic
         ]

diff --git a/python/evalio/datasets/oxford_spires.py b/python/evalio/datasets/oxford_spires.py
@@ -52,7 +52,6 @@ def data_iter(self) -> DatasetIterator:
             "/hesai/pandar",
             "/alphasense_driver_ros/imu",
             self.lidar_params(),
-            is_mcap=True,
             lidar_format=LidarFormatParams(
                 stamp=LidarStamp.Start,
                 point_stamp=LidarPointStamp.Start,