In [4]:
#!python -m pip install --upgrade pip

#!python -m pip install idc-index

#!python -m pip install numpy scipy nibabel pydicom tqdm

#!python -m pip install moosez
!pip install halo

Collecting halo
  Using cached halo-0.0.31-py3-none-any.whl
Installing collected packages: halo
Successfully installed halo-0.0.31


In [5]:
import os
import sys
import shutil
import subprocess
import traceback
from pathlib import Path
from datetime import datetime
from concurrent.futures import ProcessPoolExecutor, as_completed
from idc_index import IDCClient
from moosez import moose
import SimpleITK as sitk

DATA_ROOT = Path("./nlst_representative_series")
OUT_ROOT  = Path("./moose_out")
LOG_DIR   = OUT_ROOT / "logs"
RES_DIR   = OUT_ROOT / "results"
NII_DIR   = OUT_ROOT / "nifti"

LOG_DIR.mkdir(parents=True, exist_ok=True)
RES_DIR.mkdir(parents=True, exist_ok=True)
NII_DIR.mkdir(parents=True, exist_ok=True)

MODELS = [
    "clin_ct_body",
    "clin_ct_cardiac",
    "clin_ct_digestive",
    "clin_ct_lungs",
    "clin_ct_muscles",
    "clin_ct_organs",
    "clin_ct_peripheral_bones",
    "clin_ct_ribs",
    "clin_ct_vertebrae",
    "clin_ct_body_composition",
]

print("DATA_ROOT:", DATA_ROOT.resolve())
print("OUT_ROOT:", OUT_ROOT.resolve())
print("Models:", MODELS)


DATA_ROOT: /home/jupyter/nlst_representative_series
OUT_ROOT: /home/jupyter/moose_out
Models: ['clin_ct_body', 'clin_ct_cardiac', 'clin_ct_digestive', 'clin_ct_lungs', 'clin_ct_muscles', 'clin_ct_organs', 'clin_ct_peripheral_bones', 'clin_ct_ribs', 'clin_ct_vertebrae', 'clin_ct_body_composition']


In [4]:
#!/usr/bin/env python3
"""
Download representative NLST CT series using idc-index.
Generated by NLST CT Acquisition Analysis notebook.
"""

# Initialize IDC client (no authentication required for downloads)
client = IDCClient()

# List of representative SeriesInstanceUIDs
series_uids = ['1.3.6.1.4.1.14519.5.2.1.7009.9004.154032804902040094243713559490', '1.2.840.113654.2.55.314047553821739591629085520556640437258', '1.3.6.1.4.1.14519.5.2.1.7009.9004.156640362884731541203097174511', '1.2.840.113654.2.55.221760170262083317410421571275313269487', '1.2.840.113654.2.55.154596183025472601902508014547512229075', '1.2.840.113654.2.55.223892495254697845073791058058774016570', '1.2.840.113654.2.55.170304785971034342181650359955002661014', '1.3.6.1.4.1.14519.5.2.1.7009.9004.167203483045408857639095733156', '1.3.6.1.4.1.14519.5.2.1.7009.9004.257111818052226587000282847993', '1.3.6.1.4.1.14519.5.2.1.7009.9004.278992675498730944309880982577', '1.3.6.1.4.1.14519.5.2.1.7009.9004.233253643922570418503774287719', '1.3.6.1.4.1.14519.5.2.1.7009.9004.733086154765965465819695855319', '1.3.6.1.4.1.14519.5.2.1.7009.9004.446120418134557663835845900702', '1.3.6.1.4.1.14519.5.2.1.7009.9004.441105416529043363380932190176', '1.2.840.113654.2.55.201655506612925010288892835508800465315', '1.3.6.1.4.1.14519.5.2.1.7009.9004.919216223379924911615465797833', '1.3.6.1.4.1.14519.5.2.1.7009.9004.242079712071558699051299202278', '1.3.6.1.4.1.14519.5.2.1.7009.9004.295310455045405391642755562114', '1.3.6.1.4.1.14519.5.2.1.7009.9004.155682136796606998456894356267', '1.2.840.113654.2.55.330091219482623240054522432469930765363', '1.3.6.1.4.1.14519.5.2.1.7009.9004.201095169663936769948542290144', '1.3.6.1.4.1.14519.5.2.1.7009.9004.293842541786302167508360098817', '1.3.6.1.4.1.14519.5.2.1.7009.9004.617834778270565478282993475869', '1.3.6.1.4.1.14519.5.2.1.7009.9004.195344797572614289877616764543', '1.3.6.1.4.1.14519.5.2.1.7009.9004.261840131606104448047411233035', '1.3.6.1.4.1.14519.5.2.1.7009.9004.102136492893650428036169782021', '1.3.6.1.4.1.14519.5.2.1.7009.9004.243233920577292138649442240317', '1.2.840.113654.2.55.310309040735672349360129308458791106009', '1.2.840.113654.2.55.229477624203342264580867775888850620134', '1.2.840.113654.2.55.129931524337413091771465035428019232784', '1.2.840.113654.2.55.287091428657300092856162782935984503476', '1.3.6.1.4.1.14519.5.2.1.7009.9004.226420772567288035741240692963', '1.2.840.113654.2.55.140563111159321584094131170060719328559', '1.2.840.113654.2.55.48995485399121652606738720093881412631', '1.2.840.113654.2.55.164217626480190476878564152695423139428', '1.2.840.113654.2.55.298854893748984534002057562092121758997', '1.2.840.113654.2.55.279492734292134753419794929571589401481', '1.2.840.113654.2.55.95242108788048571766203081639306629257', '1.2.840.113654.2.55.4873705810622904031106677385023945374', '1.3.6.1.4.1.14519.5.2.1.7009.9004.258670956660823211051324130209', '1.3.6.1.4.1.14519.5.2.1.7009.9004.600393023264914244206809778313', '1.3.6.1.4.1.14519.5.2.1.7009.9004.307853467711025882996136727237']

# Download series
print(f"Downloading {len(series_uids)} representative series...")
client.download_from_selection(
    seriesInstanceUID=series_uids,
    downloadDir="./nlst_representative_series",
    dirTemplate="%collection_id/%PatientID/%SeriesInstanceUID"
)
print("Download complete!")

2026-01-29 12:01:11,310 - Disk size needed: 3.9 GB
2026-01-29 12:01:11,312 - Disk size available: 100.86 GB


Downloading 42 representative series...


2026-01-29 12:01:11,417 - Not using s5cmd sync as the destination folder is empty or sync or progress bar is not requested
2026-01-29 12:01:11,419 - Initial size of the directory: 0 bytes
2026-01-29 12:01:11,420 - Approximate size of the files that need to be downloaded: 3.9 GB
Downloading data: 100%|██████████| 3.90G/3.90G [00:46<00:00, 83.7MB/s]
2026-01-29 12:01:57,971 - Successfully downloaded files to /home/jupyter/Desktop/nlst_representative_series


Download complete!


In [6]:
def dicom_series_to_nifti(series_dir: Path, out_nii: Path) -> Path:
    out_nii.parent.mkdir(parents=True, exist_ok=True)

    # GDCM findet DICOMs auch ohne .dcm-Endung
    reader = sitk.ImageSeriesReader()
    series_ids = reader.GetGDCMSeriesIDs(str(series_dir))
    if not series_ids:
        raise RuntimeError(f"No DICOM series found in {series_dir}")

    # Falls mehrere SeriesIDs im Ordner sind, nimm die erste (oder erweitere auf Auswahl)
    sid = series_ids[0]
    file_names = reader.GetGDCMSeriesFileNames(str(series_dir), sid)
    reader.SetFileNames(file_names)

    img = reader.Execute()
    sitk.WriteImage(img, str(out_nii), useCompression=True)
    return out_nii

In [7]:
def find_series_dirs(data_root: Path):
    if not data_root.exists():
        raise FileNotFoundError(data_root)

    # exakt 3 Ebenen tief: collection/patient/series
    series_dirs = sorted([p for p in data_root.glob("*/*/*") if p.is_dir()])
    return series_dirs

series_dirs = find_series_dirs(DATA_ROOT)
print("Found series dirs:", len(series_dirs))
print("Example:", series_dirs[0] if series_dirs else "NONE")

Found series dirs: 42
Example: nlst_representative_series/nlst/100041/1.2.840.113654.2.55.129931524337413091771465035428019232784


In [8]:
def safe_relpath(p: Path, root: Path) -> str:
    rel = p.resolve().relative_to(root.resolve())
    return str(rel).replace("/", "__")

def has_existing_outputs(series_out_dir: Path) -> bool:
    """
    Returns True if the output directory looks like it already contains segmentation outputs.
    This is deliberately broad (covers many possible MOOSE output formats).
    """
    if not series_out_dir.exists():
        return False

    # Strong signal: marker file
    if (series_out_dir / "DONE.txt").exists():
        return True

    # Common medical image outputs + masks
    exts = (".nii", ".nii.gz", ".mha", ".mhd", ".nrrd", ".npz")

    # Any non-empty typical output file?
    for p in series_out_dir.rglob("*"):
        if p.is_file():
            name = p.name.lower()
            if name.endswith(exts):
                return True

    return False


def run_moose_on_series(series_dir: Path, models, out_root: Path, device: str = "cuda"):
    tag = safe_relpath(series_dir, DATA_ROOT)
    log_file = (out_root / "logs" / f"{tag}.log")
    series_out_dir = (out_root / "results" / tag)
    series_out_dir.mkdir(parents=True, exist_ok=True)

    # NIfTI-Output für diese Serie
    nii_path = (out_root / "nifti" / f"CT_{tag}.nii.gz")  # CT_ prefix!
    (out_root / "nifti").mkdir(parents=True, exist_ok=True)

    # device fallback
    try:
        import torch
        if device == "cuda" and not torch.cuda.is_available():
            device = "cpu"
    except Exception:
        if device == "cuda":
            device = "cpu"

    t0 = datetime.now().isoformat(timespec="seconds")
    with open(log_file, "w", encoding="utf-8") as f:
        f.write(f"START:  {t0}\n")
        f.write(f"SERIES: {series_dir}\n")
        f.write(f"NIFTI:  {nii_path}\n")
        f.write(f"OUTDIR: {series_out_dir}\n")
        f.write(f"DEVICE: {device}\n")
        f.write(f"MODELS: {models}\n\n")
        f.flush()

        if has_existing_outputs(series_out_dir):
            f.write("SKIP: Output directory already contains results (or DONE.txt exists).\n")
            f.write("STATUS: SKIPPED\n")
            return {
                "series_dir": str(series_dir),
                "log_file": str(log_file),
                "out_dir": str(series_out_dir),
                "returncode": 0,
                "skipped": True,
                "reason": "existing_outputs",
            }

        try:
            # 1) DICOM -> NIfTI
            if not nii_path.exists():
                f.write("Step 1/2: DICOM -> NIfTI\n")
                f.flush()
                dicom_series_to_nifti(series_dir, nii_path)
            else:
                f.write("Step 1/2: NIfTI already exists, skip conversion\n")

            # 2) Run moose on NIfTI FILE (nicht Ordner!)
            f.write("Step 2/2: Running MOOSE\n")
            f.flush()
            moose(str(nii_path), list(models), str(series_out_dir), device)

            (series_out_dir / "DONE.txt").write_text(
                f"OK\nseries={series_dir}\nfinished={datetime.now().isoformat(timespec='seconds')}\n",
                encoding="utf-8"
            )

            t1 = datetime.now().isoformat(timespec="seconds")
            f.write(f"\nEND:   {t1}\nSTATUS: OK\n")
            return {
                "series_dir": str(series_dir),
                "log_file": str(log_file),
                "out_dir": str(series_out_dir),
                "returncode": 0,
                "skipped": False,
            }

        except Exception as e:
            t1 = datetime.now().isoformat(timespec="seconds")
            f.write(f"\nEND:   {t1}\nSTATUS: FAIL\nERROR: {repr(e)}\n\n")
            f.write(traceback.format_exc())
            return {
                "series_dir": str(series_dir),
                "log_file": str(log_file),
                "out_dir": str(series_out_dir),
                "returncode": 1,
                "skipped": False,
                "error": repr(e),
            }

In [9]:
series_dirs = sorted([p for p in DATA_ROOT.glob("*/*/*") if p.is_dir()])

results = []
for sd in series_dirs:
    r = run_moose_on_series(sd, MODELS, OUT_ROOT, device="cuda")
    results.append(r)
    print(sd.name, "->", "OK" if r["returncode"] == 0 else f"FAIL ({r['returncode']})")


1.2.840.113654.2.55.129931524337413091771465035428019232784 -> OK
1.2.840.113654.2.55.164217626480190476878564152695423139428 -> OK
1.2.840.113654.2.55.154596183025472601902508014547512229075 -> OK
1.2.840.113654.2.55.140563111159321584094131170060719328559 -> OK
1.2.840.113654.2.55.330091219482623240054522432469930765363 -> OK
1.2.840.113654.2.55.95242108788048571766203081639306629257 -> OK
1.2.840.113654.2.55.221760170262083317410421571275313269487 -> OK
1.2.840.113654.2.55.201655506612925010288892835508800465315 -> OK
1.2.840.113654.2.55.229477624203342264580867775888850620134 -> OK
1.2.840.113654.2.55.310309040735672349360129308458791106009 -> OK
1.2.840.113654.2.55.287091428657300092856162782935984503476 -> OK
1.2.840.113654.2.55.4873705810622904031106677385023945374 -> OK
1.2.840.113654.2.55.170304785971034342181650359955002661014 -> OK
1.2.840.113654.2.55.298854893748984534002057562092121758997 -> OK
1.2.840.113654.2.55.48995485399121652606738720093881412631 -> OK
1.2.840.113654