In [1]:
import csv
from pathlib import Path

from damply import dirs
from imgtools.coretypes.masktypes import RTStructureSet

In [2]:
dataset = "CPTAC-PDA"
data_dir = Path(f"/home/bhkuser/bhklab/radiomics/PublicDatasets/srcdata/Abdomen/TCIA_{dataset}")

image_dir = data_dir / "images" / 'RTSTRUCT'
image_ids_file_path = data_dir / "metadata" / "image_series_ids.csv"

In [22]:
image_ids = []

for filepath in image_dir.glob("**/RTSTRUCT/**/*.dcm"):
    mask = RTStructureSet.from_dicom(dicom=filepath)
    ref_image_series_id = mask.metadata['ReferencedSeriesUID']
    image_ids.append(ref_image_series_id)

# remove duplicate values
image_ids = list(set(image_ids))

In [25]:
with open(image_ids_file_path, mode='w') as csvfile:
    writer = csv.writer(csvfile)
    writer.writerows([[image_id] for image_id in image_ids])

# Find ROIName

In [4]:
broken_file = dirs.RAWDATA / "TCIA_CPTAC-CCRCC/images/RTSTRUCT/C3L-00812/21212/RTSTRUCT/915.4/1.dcm"
mask = RTStructureSet.from_dicom(dicom=broken_file)

In [5]:
mask.metadata

{'BodyPartExamined': 'CHEST',
 'FrameOfReferenceUID': '1.3.6.1.4.1.14519.5.2.1.6450.2626.118728062024119475455338223375',
 'Manufacturer': 'Open Health Imaging Foundation',
 'ManufacturerModelName': 'OHIF-XNAT Viewer 3.2.0',
 'Modality': 'RTSTRUCT',
 'NumROIs': 1,
 'PatientID': 'C3L-00812',
 'ROINames': ['LT PARA AORTIC'],
 'ReferencedSeriesUID': '1.3.6.1.4.1.14519.5.2.1.6450.2626.108818962120167259627963514393',
 'SeriesDate': '20230525',
 'SeriesInstanceUID': '1.2.826.0.1.534147.667.2747872357.2023425485915.4',
 'SoftwareVersions': 'gmp_vct.42',
 'StructureSetDate': '20230525',
 'StructureSetLabel': 'LT PARA AORTIC L',
 'StructureSetTime': '040859',
 'StudyDate': '20081129',
 'StudyInstanceUID': '1.3.6.1.4.1.14519.5.2.1.6450.2626.110399349269235997642692221212',
 'StudyTime': '125004'}

In [6]:
from imgtools.dicom.dicom_metadata.modality_utils.rtstruct_utils import (
    extract_roi_names,
)
from pydicom import dcmread

rtstruct = dcmread(broken_file)


extract_roi_names(rtstruct)

['LT PARA AORTIC']

In [7]:
rtstruct.StructureSetROISequence[0].ROIName

'LT PARA AORTIC'

In [8]:
roi_sequence = rtstruct.StructureSetROISequence

roi_metas = []
for roi in roi_sequence:
    roi_meta = {}
    roi_meta["ROINumber"] = getattr(roi, "ROINumber", "")
    roi_meta["ROIName"] = getattr(roi, "ROIName", "")
    roi_meta["ROIGenerationAlgorithm"] = getattr(
        roi, "ROIGenerationAlgorithm", ""
    )
    roi_metas.append(roi_meta)

roi_meta

{'ROINumber': '1',
 'ROIName': 'LT PARA AORTIC',
 'ROIGenerationAlgorithm': 'MANUAL'}

In [9]:
from imgtools.coretypes.masktypes.roi_matching import handle_roi_matching

roi_matching = {"GTV": [".*"]}
roi_names = extract_roi_names(rtstruct)

handle_roi_matching(
    roi_names = roi_names,
    roi_matching = roi_matching,
    strategy = "SEPARATE"
)

In [10]:
import re

re.fullmatch(".*", "GTV") is None

False

In [11]:
from itertools import product

for keys, patterns in roi_matching.items():
    for pattern, roi_name in product(patterns, roi_names):
            if re.fullmatch(pattern, roi_name):
                print(roi_name, pattern)

LT PARA AORTIC .*


# Make Simple MIT Index artificially

In [2]:
import pandas as pd
from imgtools.autopipeline import SIMPLIFIED_COLUMNS

dataset = "TCGA-KIRC"
path = dirs.PROCDATA / f"TCIA_{dataset}" / "images" / f"mit_{dataset}" / f"mit_{dataset}_index.csv"

mit_index = pd.read_csv(filepath_or_buffer= path)

mit_index_simple = mit_index.reindex(columns=SIMPLIFIED_COLUMNS)

mit_index_simple.to_csv(dirs.PROCDATA / f"TCIA_{dataset}" / "images" / f"mit_{dataset}" / f"mit_{dataset}_index-simple.csv", index=False)

In [3]:
mit_index_simple

Unnamed: 0,filepath,hash,saved_time,SampleNumber,ImageID,PatientID,Modality,SeriesInstanceUID,StudyInstanceUID,ReferencedSeriesUID,...,mask.bbox.min_coord,mask.bbox.max_coord,mask.feret_diameter,mask.roundness,mask.flatness,mask.elongation,mask.equivalent_spherical_radius,mask.equivalent_spherical_perimeter,mask.equivalent_ellipsoid_diameters,mask.volume_count
0,TCGA-B0-4821_0004/CT_46126930/CT.nii.gz,5a9c927603cd8bf5eba2d11b8ceb7c57466a7c83,2025-07-09:20:15:56,4,CT,TCGA-B0-4821,CT,1.3.6.1.4.1.14519.5.2.1.6450.4004.271668964298...,1.3.6.1.4.1.14519.5.2.1.6450.4004.106932788840...,,...,,,,,,,,,,
1,TCGA-B0-4833_0006/CT_12651322/CT.nii.gz,72347b588299f6508f7a3210a47147b633abc619,2025-07-09:20:15:58,6,CT,TCGA-B0-4833,CT,1.3.6.1.4.1.14519.5.2.1.6450.4004.175542747344...,1.3.6.1.4.1.14519.5.2.1.6450.4004.131965569276...,,...,,,,,,,,,,
2,TCGA-B0-4821_0005/CT_61816317/CT.nii.gz,f15cf67175a2d672a79a129e5ad33c11aea125e3,2025-07-09:20:15:57,5,CT,TCGA-B0-4821,CT,1.3.6.1.4.1.14519.5.2.1.6450.4004.197843508979...,1.3.6.1.4.1.14519.5.2.1.6450.4004.106932788840...,,...,,,,,,,,,,
3,TCGA-B0-4839_0007/CT_28822160/CT.nii.gz,6185c1ff08973452fd4b21c8674fc6a0e9e0c417,2025-07-09:20:15:59,7,CT,TCGA-B0-4839,CT,1.3.6.1.4.1.14519.5.2.1.6450.4004.170990576592...,1.3.6.1.4.1.14519.5.2.1.6450.4004.185063702452...,,...,,,,,,,,,,
4,TCGA-B0-4713_0003/CT_89275370/CT.nii.gz,6b71d9f8da1a720d0704790a745fd8a2de51b86a,2025-07-09:20:15:57,3,CT,TCGA-B0-4713,CT,1.3.6.1.4.1.14519.5.2.1.6450.4004.193245818439...,1.3.6.1.4.1.14519.5.2.1.6450.4004.267828530238...,,...,,,,,,,,,,
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
380,TCGA-DV-5576_0378/CT_06999966/CT.nii.gz,da7d0c083dbeaf51cf24ba6edcff5c013d53d425,2025-07-09:20:22:01,378,CT,TCGA-DV-5576,CT,1.3.6.1.4.1.14519.5.2.1.1357.4004.589207600295...,1.3.6.1.4.1.14519.5.2.1.1357.4004.501039540205...,,...,,,,,,,,,,
381,TCGA-G6-A8L7_0384/CT_35697205/CT.nii.gz,bd5607740d9aae7a56956184449aa6b215393cbd,2025-07-09:20:22:05,384,CT,TCGA-G6-A8L7,CT,1.3.6.1.4.1.14519.5.2.1.3023.4004.225868719049...,1.3.6.1.4.1.14519.5.2.1.3023.4004.166818214324...,,...,,,,,,,,,,
382,TCGA-G6-A8L7_0383/CT_80952146/CT.nii.gz,88044ae494fc78dabaf0f15f0fb6c2fc8809e6cc,2025-07-09:20:22:05,383,CT,TCGA-G6-A8L7,CT,1.3.6.1.4.1.14519.5.2.1.3023.4004.162874589239...,1.3.6.1.4.1.14519.5.2.1.3023.4004.166818214324...,,...,,,,,,,,,,
383,TCGA-DV-A4W0_0379/CT_28403438/CT.nii.gz,de9943c4e7220cf181a9a59e6525a777c2a4c716,2025-07-09:20:22:02,379,CT,TCGA-DV-A4W0,CT,1.3.6.1.4.1.14519.5.2.1.1357.4004.218894913071...,1.3.6.1.4.1.14519.5.2.1.1357.4004.931221705159...,,...,,,,,,,,,,
