# AMOS22 Sanity Audit (Pairing, Shapes, Labels, Basic Header Checks)

This notebook performs **dataset integrity checks** before training:

- confirms image/label pairing (train + validation)
- checks image and label shapes match
- checks label value range (expects integers 0..15 based on your `dataset.json`)
- checks basic header/affine similarity (not strict equality, but flags large differences)
- writes CSV summaries you can keep with the run record

> This is designed to be **fast enough** to run locally and produce actionable QC outputs.


In [1]:
# Cell 1 — Setup
from pathlib import Path
import json
import numpy as np
import pandas as pd
import nibabel as nib
from collections import Counter
from datetime import datetime

DATA_ROOT = Path(r"C:/Users/hyeon/Documents/miniconda_medimg_env/data/amos22")
OUT_DIR = Path("outputs_audit")
OUT_DIR.mkdir(parents=True, exist_ok=True)

print("DATA_ROOT:", DATA_ROOT.resolve())
print("OUT_DIR  :", OUT_DIR.resolve())
print("Time     :", datetime.now().isoformat(timespec="seconds"))

DATA_ROOT: C:\Users\hyeon\Documents\miniconda_medimg_env\data\amos22
OUT_DIR  : C:\Users\hyeon\Documents\miniconda_medimg_env\abdomen-multiorgan-segmentation\baseline_nnunet\outputs_audit
Time     : 2026-02-04T09:53:21


In [2]:
# Cell 2 — Load expected labels from dataset.json
ds = json.loads((DATA_ROOT / "dataset.json").read_text(encoding="utf-8"))
labels = ds.get("labels", {})
# labels is typically {"0":"background", "1":"spleen", ...}
label_ids = sorted([int(k) for k in labels.keys()])
expected_min = min(label_ids) if label_ids else 0
expected_max = max(label_ids) if label_ids else 0

print("Labels in dataset.json:", len(label_ids))
print("Expected label id range:", expected_min, "to", expected_max)
print("Label map (id -> name):")
for k in sorted(labels.keys(), key=lambda x: int(x)):
    print(f"  {k}: {labels[k]}")

Labels in dataset.json: 16
Expected label id range: 0 to 15
Label map (id -> name):
  0: background
  1: spleen
  2: right kidney
  3: left kidney
  4: gall bladder
  5: esophagus
  6: liver
  7: stomach
  8: arota
  9: postcava
  10: pancreas
  11: right adrenal gland
  12: left adrenal gland
  13: duodenum
  14: bladder
  15: prostate/uterus


In [3]:
# Cell 3 — Utility functions
def list_niigz(folder: Path):
    if not folder.exists():
        return []
    return sorted([p for p in folder.iterdir() if p.is_file() and p.name.endswith(".nii.gz")])

def stem_niigz(p: Path) -> str:
    return p.name[:-7] if p.name.endswith(".nii.gz") else p.stem

def affine_diff_mm(a: np.ndarray, b: np.ndarray) -> float:
    # crude measure: max absolute diff in translation component
    return float(np.max(np.abs(a[:3, 3] - b[:3, 3])))

def spacing_from_affine(aff: np.ndarray) -> tuple:
    # voxel spacing is norm of column vectors
    return tuple(np.linalg.norm(aff[:3, :3], axis=0).tolist())

def audit_split(images_dir: Path, labels_dir: Path | None, split_name: str):
    imgs = list_niigz(images_dir)
    labs = {stem_niigz(p): p for p in list_niigz(labels_dir)} if labels_dir else {}

    rows = []
    missing = []
    for img_path in imgs:
        sid = stem_niigz(img_path)
        lbl_path = labs.get(sid, None) if labels_dir else None

        if labels_dir and lbl_path is None:
            missing.append(sid)
            continue

        img = nib.load(str(img_path))
        img_shape = tuple(img.shape)
        img_spacing = spacing_from_affine(img.affine)

        if lbl_path is not None:
            lbl = nib.load(str(lbl_path))
            lbl_shape = tuple(lbl.shape)
            lbl_spacing = spacing_from_affine(lbl.affine)
            shape_match = (img_shape == lbl_shape)

            # Read label data (as int) and compute min/max/unique count
            arr = np.asanyarray(lbl.dataobj)
            # labels should be integer-like; cast safely
            arr_int = arr.astype(np.int32, copy=False)
            lbl_min = int(arr_int.min())
            lbl_max = int(arr_int.max())
            # avoid huge unique materialization; sample if needed
            uniq = np.unique(arr_int) if arr_int.size <= 50_000_000 else np.unique(arr_int.ravel()[::50])
            uniq_count = int(len(uniq))
            unexpected = False
            if lbl_min < expected_min or lbl_max > expected_max:
                unexpected = True

            a_diff = affine_diff_mm(img.affine, lbl.affine)

            rows.append({
                "split": split_name,
                "id": sid,
                "image": img_path.name,
                "label": lbl_path.name,
                "img_shape": str(img_shape),
                "lbl_shape": str(lbl_shape),
                "shape_match": shape_match,
                "img_spacing": str(tuple(round(x, 4) for x in img_spacing)),
                "lbl_spacing": str(tuple(round(x, 4) for x in lbl_spacing)),
                "affine_translation_diff_mm": round(a_diff, 4),
                "label_min": lbl_min,
                "label_max": lbl_max,
                "unique_label_count": uniq_count,
                "has_unexpected_label_range": unexpected,
            })
        else:
            rows.append({
                "split": split_name,
                "id": sid,
                "image": img_path.name,
                "label": "",
                "img_shape": str(img_shape),
                "lbl_shape": "",
                "shape_match": "",
                "img_spacing": str(tuple(round(x, 4) for x in img_spacing)),
                "lbl_spacing": "",
                "affine_translation_diff_mm": "",
                "label_min": "",
                "label_max": "",
                "unique_label_count": "",
                "has_unexpected_label_range": "",
            })

    return rows, missing

In [4]:
# Cell 4 — Run audits for Train + Validation + Test
train_rows, train_missing = audit_split(DATA_ROOT/"imagesTr", DATA_ROOT/"labelsTr", "train")
val_rows, val_missing = audit_split(DATA_ROOT/"imagesVa", DATA_ROOT/"labelsVa", "val")
test_rows, _ = audit_split(DATA_ROOT/"imagesTs", None, "test_no_labels")

df = pd.DataFrame(train_rows + val_rows + test_rows)

print("Train images:", len(list_niigz(DATA_ROOT/'imagesTr')))
print("Train labels:", len(list_niigz(DATA_ROOT/'labelsTr')))
print("Missing train labels:", len(train_missing))

print("Val images:", len(list_niigz(DATA_ROOT/'imagesVa')))
print("Val labels:", len(list_niigz(DATA_ROOT/'labelsVa')))
print("Missing val labels:", len(val_missing))

print("Test images:", len(list_niigz(DATA_ROOT/'imagesTs')))

# Save full audit table
csv_path = OUT_DIR / "amos22_audit_table.csv"
df.to_csv(csv_path, index=False)
print("Wrote:", csv_path.resolve())

Train images: 240
Train labels: 240
Missing train labels: 0
Val images: 120
Val labels: 120
Missing val labels: 0
Test images: 240
Wrote: C:\Users\hyeon\Documents\miniconda_medimg_env\abdomen-multiorgan-segmentation\baseline_nnunet\outputs_audit\amos22_audit_table.csv


In [7]:
# Cell 5 — Summaries: problems to fix first
# 1) Pairing issues
if train_missing:
    print("⚠️ Missing TRAIN labels for (showing up to 20):", train_missing[:20])
if val_missing:
    print("⚠️ Missing VAL labels for (showing up to 20):", val_missing[:20])

# 2) Shape mismatches
bad_shape = df[(df["split"].isin(["train","val"])) & (df["shape_match"] == False)]
print("\nShape mismatches:", len(bad_shape))
if len(bad_shape):
    display(bad_shape.head(20))

# 3) Unexpected label ranges
bad_labels = df[(df["split"].isin(["train","val"])) & (df["has_unexpected_label_range"] == True)]
print("\nUnexpected label range cases:", len(bad_labels))
if len(bad_labels):
    display(bad_labels.head(20))

# 4) Large affine translation differences (heuristic; > 1mm)
sub = df[df["split"].isin(["train", "val"])].copy()
sub["affine_translation_diff_mm"] = pd.to_numeric(sub["affine_translation_diff_mm"], errors="coerce")
aff_bad = sub[sub["affine_translation_diff_mm"] > 1.0]

print("\nAffine translation diff > 1mm:", len(aff_bad))
if len(aff_bad):
    display(aff_bad.head(20))


Shape mismatches: 0

Unexpected label range cases: 0

Affine translation diff > 1mm: 0


In [6]:
# Cell 6 — Optional: distribution of spacings (train/val)
def parse_tuple_str(s):
    if not isinstance(s, str) or not s.startswith("("):
        return None
    return tuple(float(x.strip()) for x in s.strip("()").split(","))

def spacing_table(split: str):
    sub = df[df["split"] == split].copy()
    sub["spacing"] = sub["img_spacing"].apply(parse_tuple_str)
    sub = sub[sub["spacing"].notna()]
    # round spacings to 2 decimals for grouping
    sub["spacing_r"] = sub["spacing"].apply(lambda t: tuple(round(x,2) for x in t))
    return sub["spacing_r"].value_counts().head(20)

print("Top image spacings (train):")
print(spacing_table("train"))
print("\nTop image spacings (val):")
print(spacing_table("val"))

Top image spacings (train):
spacing_r
(0.78, 0.78, 5.0)    42
(0.65, 0.65, 5.0)    14
(1.19, 1.19, 3.0)    10
(0.82, 1.1, 0.82)     9
(0.61, 0.61, 5.0)     8
(0.54, 0.54, 5.0)     7
(0.69, 0.69, 2.0)     5
(1.19, 3.0, 1.19)     5
(0.83, 0.83, 5.0)     4
(0.53, 0.53, 5.0)     4
(0.89, 0.89, 5.0)     4
(0.58, 0.58, 5.0)     4
(0.63, 0.63, 5.0)     4
(0.95, 0.95, 5.0)     4
(0.62, 0.62, 2.0)     4
(0.62, 0.62, 5.0)     4
(1.41, 1.5, 1.41)     4
(0.57, 0.57, 5.0)     3
(0.59, 0.59, 5.0)     3
(0.67, 0.67, 2.0)     3
Name: count, dtype: int64

Top image spacings (val):
spacing_r
(0.78, 0.78, 5.0)    14
(0.69, 0.69, 3.0)     8
(0.62, 0.62, 5.0)     5
(0.66, 0.66, 2.0)     4
(0.51, 0.51, 5.0)     4
(1.41, 1.5, 1.41)     4
(1.19, 1.19, 3.0)     4
(0.65, 0.65, 5.0)     3
(0.58, 0.58, 5.0)     3
(0.77, 0.77, 2.0)     3
(0.59, 0.59, 2.0)     3
(0.69, 0.69, 2.0)     3
(1.19, 3.0, 1.19)     3
(0.64, 0.64, 2.0)     3
(0.48, 0.48, 5.0)     3
(0.77, 0.77, 5.0)     2
(0.7, 0.7, 2.0)       2
(0.65, 0.65

## What to paste back into chat

After running this notebook, paste:
- counts of **missing labels**, **shape mismatches**, and **unexpected label ranges**
- (if any) 2–3 example case IDs with issues

Then we can proceed to:
- nnU-Net dataset preparation (`DatasetXXX_AMOS22`)
- baseline training commands
- unifying evaluation + report generation plan


In [None]:
import os
import sys
import subprocess
from pathlib import Path

# ---- Configure paths/arguments ----
script_path = Path("baseline_nnunet") / "prepare_dataset.py"
amos_root = Path(r"C:\Users\hyeon\Documents\miniconda_medimg_env\data\amos22")

# Ensure NNUNet_raw exists in this kernel's environment (as set earlier)
nnunet_raw = os.environ.get("NNUNet_raw")
if not nnunet_raw:
    raise EnvironmentError("NNUNet_raw is not set in os.environ. Please set it earlier in the notebook.")

# Optional sanity checks (helpful in research workflows)
assert script_path.exists(), f"Script not found: {script_path.resolve()}"
assert amos_root.exists(), f"AMOS root not found: {amos_root}"
assert Path(nnunet_raw).exists(), f"NNUNet_raw path does not exist: {nnunet_raw}"

# ---- Build the command ----
cmd = [
    sys.executable,               # Use the same Python as the notebook kernel
    str(script_path),
    "--amos_root", str(amos_root),
    "--nnunet_raw", nnunet_raw,
    "--dataset_id", "701",
    "--dataset_name", "AMOS22",
]

# ---- Run the command ----
result = subprocess.run(cmd, capture_output=True, text=True)

# ---- Show output / error ----
print("STDOUT:\n", result.stdout)
if result.returncode != 0:
    print("STDERR:\n", result.stderr)
    raise RuntimeError(f"Command failed with return code {result.returncode}")

In [None]:
from pathlib import Path

for var in ["NNUNet_raw", "NNUNet_preprocessed", "NNUNet_results"]:
    assert Path(os.environ[var]).exists(), f"{var} path does not exist"