In [1]:
import subprocess
from pathlib import Path
import pandas as pd
from tempfile import TemporaryDirectory
import SimpleITK as sitk
from tqdm.auto import tqdm
from aimi_idc_data.fix_dicom import fix_dicom_dir

In [2]:
df = pd.read_csv(
    "qa-results.csv",
    dtype={
        "PatientID": str,
        "StudyDate": str,
        "StudyDate_suffix": str,
        "LikertScore": str,
        "CorrectedSegmentation": str,
    },
    keep_default_na=False,
)


In [3]:
itkimage2segimage_bin = (
    "/home/vanossj/github/dcmqi/build/dcmqi-build/bin/itkimage2segimage"
)
dicom_seg_meta_dir = Path("dcm-meta")
collection_base_dir = Path("/mnt/nfs/slow_ai_team/aimi")


In [4]:
def _convert_to_dicom_seg(
    seg_file: Path,
    dicom_seg_meta_json: Path,
    dcm_dir: Path,
    out_file: Path,
    add_background_label: bool = False,
):
    assert dcm_dir.exists(), dcm_dir
    out_file.parent.mkdir(parents=True, exist_ok=True)

    if add_background_label:
        # add background label, offset by 1
        with TemporaryDirectory() as temp_dir:
            temp_seg_file = Path(temp_dir) / "temp_seg.nii.gz"
            img = sitk.ReadImage(str(seg_file))
            img += 1
            sitk.WriteImage(img, str(temp_seg_file))

            args = [
                itkimage2segimage_bin,
                "--skip",
                "--inputImageList",
                str(temp_seg_file),
                "--inputDICOMDirectory",
                str(dcm_dir),
                "--outputDICOM",
                str(out_file),
                "--inputMetadata",
                str(dicom_seg_meta_json),
            ]

            subprocess.run(args, check=True)
    else:
        args = [
            itkimage2segimage_bin,
            "--skip",
            "--inputImageList",
            str(seg_file),
            "--inputDICOMDirectory",
            str(dcm_dir),
            "--outputDICOM",
            str(out_file),
            "--inputMetadata",
            str(dicom_seg_meta_json),
        ]

        print(" ".join(args))
        subprocess.run(args, check=True)


def convert_to_dicom_seg(
    seg_file: Path,
    dicom_seg_meta_json: Path,
    dcm_dir: Path,
    out_file: Path,
    add_background_label: bool = False,
):
    ok = True
    try:
        _convert_to_dicom_seg(
            seg_file,
            dicom_seg_meta_json,
            dcm_dir,
            out_file,
            add_background_label=add_background_label,
        )
    except Exception as e:
        ok = False
    if ok:
        return

    # fix the dicom files, and try again
    with TemporaryDirectory() as fixed_dcm_dir:
        real_dcm_dir = fix_dicom_dir(dcm_dir, Path(fixed_dcm_dir))
        _convert_to_dicom_seg(
            seg_file,
            dicom_seg_meta_json,
            real_dcm_dir,
            out_file,
            add_background_label=add_background_label,
        )


In [5]:
bad_rows = []

for i, row in tqdm(df.iterrows(), total=len(df)):
    series_uid = Path(row.PTSeriesInstanceUID)
    study_uid = Path(row.StudyInstanceUID)
    patient_id = row.PatientID
    reviewer = row.Reviewer
    study_date = row.StudyDate
    study_date_suffix = row.StudyDate_suffix
    collection = row.Collection.replace("-", "_")

    # qa segmentation info
    qa_seg_file = (
        Path("qa-segmentations") / row.CorrectedSegmentation
        if row.CorrectedSegmentation
        else None
    )
    qa_seg_dcm_file = (
        Path("qa-segmentations-dcm")
        / f"{reviewer}_{patient_id}_{study_date}_{study_date_suffix}.seg.dcm"
    )
    qa_dcm_meta_json = dicom_seg_meta_dir / f"{reviewer}-dicom-seg-meta.json"

    # ai segmentation info
    ai_seg_file = Path("ai-segmentations") / row.AISegmentation
    ai_seg_dcm_file = (
        Path("ai-segmentations-dcm")
        / f"ai_{patient_id}_{study_date}_{study_date_suffix}.seg.dcm"
    )
    qa_dcm_meta_json = dicom_seg_meta_dir / "ai-dicom-seg-meta.json"

    # find the ct dicom dri
    dcm_dir = (
        collection_base_dir / collection / "dcm" / patient_id / study_uid / series_uid
    )
    assert dcm_dir.exists(), dcm_dir

    # process qa segmentation
    if qa_seg_file is not None and not qa_seg_dcm_file.exists():
        assert qa_seg_file.exists(), qa_seg_file
        assert qa_dcm_meta_json.exists(), qa_dcm_meta_json
        qa_seg_dcm_file.parent.mkdir(parents=True, exist_ok=True)
        _convert_to_dicom_seg(qa_seg_file, qa_dcm_meta_json, dcm_dir, qa_seg_dcm_file)

    # process ai segmentation
    if not ai_seg_dcm_file.exists():
        assert ai_seg_file.exists(), ai_seg_file
        assert qa_dcm_meta_json.exists(), qa_dcm_meta_json
        ai_seg_dcm_file.parent.mkdir(parents=True, exist_ok=True)
        try:
            _convert_to_dicom_seg(
                ai_seg_file, qa_dcm_meta_json, dcm_dir, ai_seg_dcm_file
            )
        except Exception as e:
            bad_rows.append(row)
print(len(bad_rows))

  0%|          | 0/816 [00:00<?, ?it/s]

/home/vanossj/github/dcmqi/build/dcmqi-build/bin/itkimage2segimage --skip --inputImageList ai-segmentations/ai_ACRIN-NSCLC-FDG-PET-050_19600628_0.nii.gz --inputDICOMDirectory /mnt/nfs/slow_ai_team/aimi/acrin_nsclc_fdg_pet/dcm/ACRIN-NSCLC-FDG-PET-050/1.3.6.1.4.1.14519.5.2.1.7009.2403.242551432460469318383189170462/1.3.6.1.4.1.14519.5.2.1.7009.2403.755593937347614119358384083463 --outputDICOM ai-segmentations-dcm/ai_ACRIN-NSCLC-FDG-PET-050_19600628_0.seg.dcm --inputMetadata dcm-meta/ai-dicom-seg-meta.json
dcmqi repository URL: git@github.com:QIICR/dcmqi.git revision: 451bf84 tag: latest
Loaded segmentation from ai-segmentations/ai_ACRIN-NSCLC-FDG-PET-050_19600628_0.nii.gz
Searching recursively /mnt/nfs/slow_ai_team/aimi/acrin_nsclc_fdg_pet/dcm/ACRIN-NSCLC-FDG-PET-050/1.3.6.1.4.1.14519.5.2.1.7009.2403.242551432460469318383189170462/1.3.6.1.4.1.14519.5.2.1.7009.2403.755593937347614119358384083463 for DICOM files
SOPInstanceUID1.3.6.1.4.1.14519.5.2.1.7009.2403.984351235592302824283540860481

FATAL ERROR: Writing of the SEG dataset failed! Error: Invalid Object. Please report the problem to the developers, ideally accompanied by a de-identified dataset allowing to reproduce the problem!
ERROR: Conversion failed.


Loaded segmentation from ai-segmentations/ai_ACRIN-NSCLC-FDG-PET-146_19591215_0.nii.gz
Searching recursively /mnt/nfs/slow_ai_team/aimi/acrin_nsclc_fdg_pet/dcm/ACRIN-NSCLC-FDG-PET-146/1.3.6.1.4.1.14519.5.2.1.7009.2403.998525266705454048453728537785/1.3.6.1.4.1.14519.5.2.1.7009.2403.180358777415598308526008684678 for DICOM files
SOPInstanceUID1.3.6.1.4.1.14519.5.2.1.7009.2403.224781262056224596136911031761 mapped
SOPInstanceUID1.3.6.1.4.1.14519.5.2.1.7009.2403.290769091683518270382112950654 mapped
SOPInstanceUID1.3.6.1.4.1.14519.5.2.1.7009.2403.126135721661744210732075147542 mapped
SOPInstanceUID1.3.6.1.4.1.14519.5.2.1.7009.2403.491792139923579502166836100315 mapped
SOPInstanceUID1.3.6.1.4.1.14519.5.2.1.7009.2403.188737485889503553480697870801 mapped
SOPInstanceUID1.3.6.1.4.1.14519.5.2.1.7009.2403.191558404647569349825245647341 mapped
SOPInstanceUID1.3.6.1.4.1.14519.5.2.1.7009.2403.174348423302521023000379847079 mapped
SOPInstanceUID1.3.6.1.4.1.14519.5.2.1.7009.2403.31615019323960867938

FATAL ERROR: Writing of the SEG dataset failed! Error: Invalid Object. Please report the problem to the developers, ideally accompanied by a de-identified dataset allowing to reproduce the problem!
ERROR: Conversion failed.


2


In [6]:
bad_rows_df = pd.DataFrame(bad_rows)
bad_rows_df.to_csv("bad-rows.csv", index=False)
bad_rows_df

Unnamed: 0,Reviewer,Validation,Collection,PatientID,StudyDate,LikertScore,CorrectedSegmentation,CommentsAboutAISegmentation,CommentsAboutScan,StudyInstanceUID,PTSeriesInstanceUID,CTSeriesInstanceUID,StudyDate_suffix,AISegmentation
0,,False,acrin_nsclc_fdg_pet,ACRIN-NSCLC-FDG-PET-050,19600628,,,,,1.3.6.1.4.1.14519.5.2.1.7009.2403.242551432460...,1.3.6.1.4.1.14519.5.2.1.7009.2403.755593937347...,1.3.6.1.4.1.14519.5.2.1.7009.2403.228013702784...,0,ai_ACRIN-NSCLC-FDG-PET-050_19600628_0.nii.gz
2,,False,acrin_nsclc_fdg_pet,ACRIN-NSCLC-FDG-PET-146,19591215,,,,,1.3.6.1.4.1.14519.5.2.1.7009.2403.998525266705...,1.3.6.1.4.1.14519.5.2.1.7009.2403.180358777415...,1.3.6.1.4.1.14519.5.2.1.7009.2403.302495359370...,0,ai_ACRIN-NSCLC-FDG-PET-146_19591215_0.nii.gz
