In [1]:
from imgtools.coretypes.masktypes import RTStructureSet
from pathlib import Path
import csv
from damply import dirs

  from .autonotebook import tqdm as notebook_tqdm


In [2]:
dataset = "CPTAC-PDA"
data_dir = Path(f"/home/bhkuser/bhklab/radiomics/PublicDatasets/srcdata/Abdomen/TCIA_{dataset}")

image_dir = data_dir / "images" / 'RTSTRUCT'
image_ids_file_path = data_dir / "metadata" / "image_series_ids.csv"

In [22]:
image_ids = []

for filepath in image_dir.glob("**/RTSTRUCT/**/*.dcm"):
    mask = RTStructureSet.from_dicom(dicom=filepath)
    ref_image_series_id = mask.metadata['ReferencedSeriesUID']
    image_ids.append(ref_image_series_id)

# remove duplicate values
image_ids = list(set(image_ids))

In [25]:
with open(image_ids_file_path, mode='w') as csvfile:
    writer = csv.writer(csvfile)
    writer.writerows([[image_id] for image_id in image_ids])

# Find ROIName

In [4]:
broken_file = dirs.RAWDATA / "TCIA_CPTAC-CCRCC/images/RTSTRUCT/C3L-00812/21212/RTSTRUCT/915.4/1.dcm"
mask = RTStructureSet.from_dicom(dicom=broken_file)

In [5]:
mask.metadata

{'BodyPartExamined': 'CHEST',
 'FrameOfReferenceUID': '1.3.6.1.4.1.14519.5.2.1.6450.2626.118728062024119475455338223375',
 'Manufacturer': 'Open Health Imaging Foundation',
 'ManufacturerModelName': 'OHIF-XNAT Viewer 3.2.0',
 'Modality': 'RTSTRUCT',
 'NumROIs': 1,
 'PatientID': 'C3L-00812',
 'ROINames': ['LT PARA AORTIC'],
 'ReferencedSeriesUID': '1.3.6.1.4.1.14519.5.2.1.6450.2626.108818962120167259627963514393',
 'SeriesDate': '20230525',
 'SeriesInstanceUID': '1.2.826.0.1.534147.667.2747872357.2023425485915.4',
 'SoftwareVersions': 'gmp_vct.42',
 'StructureSetDate': '20230525',
 'StructureSetLabel': 'LT PARA AORTIC L',
 'StructureSetTime': '040859',
 'StudyDate': '20081129',
 'StudyInstanceUID': '1.3.6.1.4.1.14519.5.2.1.6450.2626.110399349269235997642692221212',
 'StudyTime': '125004'}

In [6]:
from imgtools.dicom.dicom_metadata.modality_utils.rtstruct_utils import extract_roi_names
from pydicom import dcmread

rtstruct = dcmread(broken_file)


extract_roi_names(rtstruct)

['LT PARA AORTIC']

In [7]:
rtstruct.StructureSetROISequence[0].ROIName

'LT PARA AORTIC'

In [8]:
roi_sequence = rtstruct.StructureSetROISequence

roi_metas = []
for roi in roi_sequence:
    roi_meta = {}
    roi_meta["ROINumber"] = getattr(roi, "ROINumber", "")
    roi_meta["ROIName"] = getattr(roi, "ROIName", "")
    roi_meta["ROIGenerationAlgorithm"] = getattr(
        roi, "ROIGenerationAlgorithm", ""
    )
    roi_metas.append(roi_meta)

roi_meta

{'ROINumber': '1',
 'ROIName': 'LT PARA AORTIC',
 'ROIGenerationAlgorithm': 'MANUAL'}

In [9]:
from imgtools.coretypes.masktypes.roi_matching import ROIMatcher, handle_roi_matching

roi_matching = {"GTV": [".*"]}
roi_names = extract_roi_names(rtstruct)

handle_roi_matching(
    roi_names = roi_names,
    roi_matching = roi_matching,
    strategy = "SEPARATE"
)

In [10]:
import re

re.fullmatch(".*", "GTV") is None

False

In [11]:
from itertools import product

for keys, patterns in roi_matching.items():
    for pattern, roi_name in product(patterns, roi_names):
            if re.fullmatch(pattern, roi_name):
                print(roi_name, pattern)

LT PARA AORTIC .*


# Make Simple MIT Index artificially

In [18]:
from imgtools.autopipeline import SIMPLIFIED_COLUMNS
import pandas as pd
path = dirs.PROCDATA / "TCIA_CPTAC-CCRCC" / "images" / "mit_CPTAC-CCRCC" / "mit_CPTAC-CCRCC_index.csv"

mit_index = pd.read_csv(filepath_or_buffer= path)

mit_index_simple = mit_index[SIMPLIFIED_COLUMNS]

mit_index_simple.to_csv(dirs.PROCDATA / "TCIA_CPTAC-CCRCC" / "images" / "mit_CPTAC-CCRCC" / "mit_CPTAC-CCRCC_index-simple.csv", index=False)