In [1]:
from pathlib import Path

import dpath

from imgtools.dicom import find_dicoms

In [6]:
import json
import logging
import os
import pathlib
import time
import typing as t
from collections import defaultdict

import click
import pandas as pd
from joblib import Parallel, delayed  # type: ignore
from pydicom import dcmread
from pydicom.errors import InvalidDicomError
from tqdm import tqdm
from tqdm.contrib.logging import logging_redirect_tqdm  # type: ignore

from imgtools.dicom import find_dicoms
from imgtools.dicom.input import rtstruct_reference_uids
from imgtools.logging import logger

TAGS_OF_INTEREST = [
    "PatientID",
    "StudyInstanceUID",
    "SeriesInstanceUID",
    "SOPInstanceUID",
    "Modality",
]


# A lightweight subclass of dict that allows for attribute access
class AttrDict(dict):
    def __getattr__(self, key: str) -> str | list:
        return self[key]

    def __setattr__(self, key: str, value: str | list) -> None:
        self[key] = value



In [8]:
dirpath = Path().cwd().parent / 'TRASH/HEAD/HNPC'

dcms = find_dicoms(dirpath, recursive=True, check_header=False)
len(dcms)


[2m2025-02-26T21:11:48-0500[0m [[32m[1mdebug    [0m] [1mLooking for DICOM files       [0m [[0m[1m[34mimgtools[0m][0m [36mcall[0m=[35mutils.find_dicoms:161[0m [36mcheck_header[0m=[35mFalse[0m [36mdirectory[0m=[35mPosixPath('/home/bioinf/bhklab/radiomics/Projects/med-imagetools/TRASH/HEAD/HNPC')[0m [36mlimit[0m=[35mNone[0m [36mrecursive[0m=[35mTrue[0m [36msearch_input[0m=[35mNone[0m [36msearch_pattern[0m=[35m*[0m


391

In [14]:

def parse_dicom(dcm_path: str) -> t.Dict:
    try:
        dcm = dcmread(
            dcm_path,
            force=True,
            stop_before_pixels=True,
        )
    except InvalidDicomError as e:
        logger.error(f"Error reading {dcm_path}: {e}")
        raise

    meta = AttrDict({tag: str(dcm.get(tag)) for tag in TAGS_OF_INTEREST})
    meta.filepath = dcm_path
    match meta["Modality"]:
        case "SEG":
            try:
                ref_series = dcm.ReferencedSeriesSequence[0].SeriesInstanceUID
                meta.ReferencedSeriesUID = ref_series
            except AttributeError:
                ref_seg_instance = dcm.SourceImageSequence[
                    0
                ].ReferencedSOPInstanceUID
                meta.ReferencedSOPInstanceUID = ref_seg_instance
        case "RTSTRUCT":
            ref_series, _ = rtstruct_reference_uids(dcm)
            meta.ReferencedSeriesUID = ref_series

        # For RTPLAN and RTDOSE, we store the same id Twice, for debugging, but we will
        # only use the common `ReferencedSOPInstanceUID` (also used in SEG)
        case "RTPLAN":
            ref_struct = dcm.ReferencedStructureSetSequence[
                0
            ].ReferencedSOPInstanceUID
            meta.ReferencedRTStructInstanceUID = ref_struct
            meta.ReferencedSOPInstanceUID = ref_struct
        case "RTDOSE":
            ref_plan = dcm.ReferencedRTPlanSequence[0].ReferencedSOPInstanceUID
            meta.ReferencedRTPlanInstanceUID = ref_plan
            meta.ReferencedSOPInstanceUID = ref_plan
        case "SR":
            if sr_seq := getattr(
                dcm, "CurrentRequestedProcedureEvidenceSequence", None
            ):
                ref_series = {
                    sr.ReferencedSeriesSequence[0].SeriesInstanceUID
                    for sr in sr_seq
                }
                meta.ReferencedSeriesUID = list(ref_series)
        case _:
            pass

    return meta
for dcm in dcms:
    result = parse_dicom(dcm)
    break

result

{'PatientID': 'HN-CHUS-052',
 'StudyInstanceUID': '1.3.6.1.4.1.14519.5.2.1.5168.2407.270192284011074135763414694629',
 'SeriesInstanceUID': '1.3.6.1.4.1.14519.5.2.1.5168.2407.259673657557881696121957341418',
 'SOPInstanceUID': '1.3.6.1.4.1.14519.5.2.1.5168.2407.196238472009303016456632231933',
 'Modality': 'RTSTRUCT',
 'filepath': PosixPath('/home/bioinf/bhklab/radiomics/Projects/med-imagetools/TRASH/HEAD/HNPC/RTSTRUCT_Series-41418/1-1.dcm'),
 'ReferencedSeriesUID': '1.3.6.1.4.1.14519.5.2.1.5168.2407.316675519384816522302881406362'}

In [26]:
x = {}
from rich import print
print(dpath.new(x, "|".join([str(x) for x in result.values()]),'HI',separator="|"))