# Analyze database
This notebook is used to analyze the database ant its recordings.

In [1]:
import sqlite3
from pathlib import Path
from typing import TypedDict

from datetime import datetime
import matplotlib.pyplot as plt
import pandas as pd

from ddlitlab2024 import DB_PATH

In [2]:
DB_PATH = Path("/srv/ssd_nvm/dataset/ddlitlab2024/db/robocup_2024_german_open_2025.sqlite3")  # TODO Remove me

In [3]:
print(f"Analyzing {DB_PATH}")

# Connect read-only to the SQLite database
conn = sqlite3.connect(f"file:{DB_PATH}?mode=ro", uri=True)

Analyzing /srv/ssd_nvm/dataset/ddlitlab2024/db/robocup_2024_german_open_2025.sqlite3


## Gather metrics:
- Duration [s]
- Image samples
- Rotation samples
- Joint state samples
- Joint command samples
- Game state samples
- Total number of (previous) samples

In [4]:
class Metric(TypedDict):
    name: str
    duration_s: float
    num_images: int
    num_rotations: int
    num_joint_states: int
    num_joint_commands: int
    num_game_states: int
    num_messages: int

In [5]:
# Iterate over all recordings in the database and gather metrics
def gather_metrics() -> pd.DataFrame:
    # Get all recordings
    recordings = conn.execute("SELECT _id, original_file, start_time, end_time FROM Recording").fetchall()
    metrics = []

    for recording in recordings:
        recording_id = recording[0]

        duration_s = (datetime.fromisoformat(recording[3]) - datetime.fromisoformat(recording[2])).total_seconds()

        # Get the number of images
        num_images = conn.execute(
            "SELECT COUNT(*) FROM Image WHERE recording_id = ?", (recording_id,)
        ).fetchone()[0]

        # Get the number of IMU messages
        num_rotations = conn.execute(
            "SELECT COUNT(*) FROM Rotation WHERE recording_id = ?", (recording_id,)
        ).fetchone()[0]

        # Get the number of joint states
        num_joint_states = conn.execute(
            "SELECT COUNT(*) FROM JointStates WHERE recording_id = ?", (recording_id,)
        ).fetchone()[0]

        # Get the number of joint commands
        num_joint_commands = conn.execute(
            "SELECT COUNT(*) FROM JointCommands WHERE recording_id = ?", (recording_id,)
        ).fetchone()[0]

        # Get the number of game states
        num_game_states = conn.execute(
            "SELECT COUNT(*) FROM GameState WHERE recording_id = ?", (recording_id,)
        ).fetchone()[0]

        # Get the number of messages
        num_messages = num_images + num_rotations + num_joint_states + num_joint_commands + num_game_states
        metrics.append(
            Metric(
                name=recording[1],
                duration_s=duration_s,
                num_images=num_images,
                num_rotations=num_rotations,
                num_joint_states=num_joint_states,
                num_joint_commands=num_joint_commands,
                num_game_states=num_game_states,
                num_messages=num_messages,
            )
        )

    return pd.DataFrame(metrics)

df = gather_metrics()

In [6]:
df.to_csv("metrics_db.csv")

In [7]:
df

Unnamed: 0,name,duration_s,num_images,num_rotations,num_joint_states,num_joint_commands,num_game_states,num_messages
0,ID_donna_2024-07-19T16:30:37_0.mcap,689.858320,6752,33763,33763,33763,9,108050
1,ID_jack_2024-07-19T11:31:28_0.mcap,752.840411,7345,36728,36728,36728,1120,118649
2,ID_jack_2024-07-17T15:38:04_0.mcap,534.731386,5163,25819,25819,25819,634,83254
3,ID_rory_2024-07-18T13:29:30_0.mcap,1177.131614,11588,57941,57941,57941,2172,187583
4,ID_donna_2024-07-19T11:48:16_0.mcap,1052.640304,10200,51884,51884,51884,1453,167305
...,...,...,...,...,...,...,...,...
83,ID_rory_2025-03-13T14:48:56_0.mcap,220.334670,1919,9603,9603,9603,364,31092
84,ID_jack_2025-03-13T14:49:56_0.mcap,768.941289,7438,37288,37288,37288,1456,120758
85,ID_donna_2025-03-15T10:27:37_0.mcap,1059.899044,10561,52932,52932,52932,1853,171210
86,ID_jack_2025-03-15T13:33:32_0.mcap,1036.381852,10144,50730,50730,50730,1375,163709


In [8]:
# Print latex for each column
for column in df.columns:
    if column == "name":
        continue
    if column == "duration_s":
        print(f"{column} & {df[column].min():.2f} & {df[column].mean():.2f}±{df[column].std():.2f} & {df[column].max():.2f} & {df[column].sum():.2f}\\\\")
    else:
        print(f"{column} & {df[column].min():.0f} & {df[column].mean():.0f}±{df[column].std():.0f} & {df[column].max():.0f} & {df[column].sum():.0f}\\\\")

duration_s & 35.15 & 619.25±665.74 & 4598.73 & 54493.99\\
num_images & 200 & 5952±6638 & 45954 & 523819\\
num_rotations & 1014 & 30147±33378 & 229777 & 2652975\\
num_joint_states & 1014 & 30147±33378 & 229777 & 2652975\\
num_joint_commands & 1014 & 30147±33378 & 229777 & 2652975\\
num_game_states & 1 & 939±1001 & 6280 & 82638\\
num_messages & 3273 & 97334±107712 & 741565 & 8565382\\
