# Aggregate analysis walkthrough

Load the detection summary and produce quick sanity checks using pandas.

In [None]:
from pathlib import Path

import pandas as pd

DATASET_ROOT = Path("..") / "data" / "datalad" / "bogus"
SUMMARY = DATASET_ROOT / "artifacts" / "aggregate" / "summary.csv"
summary_df = pd.read_csv(SUMMARY)
summary_df.head()

## Detections per label

In [None]:
summary_df["label"].value_counts()

## Quicklook CSV exports

The `badc report quicklook --output-dir ...` command writes label/recording/chunk tables into 
CSV files so you can plot them without running DuckDB queries manually.


In [None]:
QUICKLOOK_DIR = DATASET_ROOT / "artifacts" / "aggregate" / "XXXX-000_20251001_093000_quicklook"
labels_df = pd.read_csv(QUICKLOOK_DIR / "labels.csv")
recordings_df = pd.read_csv(QUICKLOOK_DIR / "recordings.csv")
chunks_df = pd.read_csv(QUICKLOOK_DIR / "chunks.csv")
labels_df.head()

### Plot detections per label

Pandas makes it easy to turn the quicklook CSV into a bar plot for reports/notebooks.


In [None]:
labels_df.sort_values("detections", ascending=False).plot(
    kind="bar", x="label", y="detections", legend=False, title="Detections per label"
)

### Chunk timeline

Use the chunk CSV to visualize detections over time (chunk_start_ms).


In [None]:
chunks_df.sort_values("chunk_start_ms").plot(
    kind="line",
    x="chunk_start_ms",
    y="detections",
    marker="o",
    title="Detections per chunk",
    xlabel="Chunk start (ms)",
    ylabel="Detections",
)

## Runtime vs confidence join (placeholder)

In [None]:
telemetry_path = DATASET_ROOT / "data" / "telemetry" / "infer" / "log.jsonl"
print("Add join logic here once telemetry schema is finalized.")