In [2]:
from imgtools.coretypes.masktypes import RTStructureSet
from pathlib import Path
import csv
from damply import dirs

In [2]:
dataset = "CPTAC-PDA"
data_dir = Path(f"/home/bhkuser/bhklab/radiomics/PublicDatasets/srcdata/Abdomen/TCIA_{dataset}")

image_dir = data_dir / "images" / 'RTSTRUCT'
image_ids_file_path = data_dir / "metadata" / "image_series_ids.csv"

In [22]:
image_ids = []

for filepath in image_dir.glob("**/RTSTRUCT/**/*.dcm"):
    mask = RTStructureSet.from_dicom(dicom=filepath)
    ref_image_series_id = mask.metadata['ReferencedSeriesUID']
    image_ids.append(ref_image_series_id)

# remove duplicate values
image_ids = list(set(image_ids))

In [25]:
with open(image_ids_file_path, mode='w') as csvfile:
    writer = csv.writer(csvfile)
    writer.writerows([[image_id] for image_id in image_ids])

# Find ROIName

In [4]:
broken_file = dirs.RAWDATA / "TCIA_CPTAC-CCRCC/images/RTSTRUCT/C3L-00812/21212/RTSTRUCT/915.4/1.dcm"
mask = RTStructureSet.from_dicom(dicom=broken_file)

In [5]:
mask.metadata

{'BodyPartExamined': 'CHEST',
 'FrameOfReferenceUID': '1.3.6.1.4.1.14519.5.2.1.6450.2626.118728062024119475455338223375',
 'Manufacturer': 'Open Health Imaging Foundation',
 'ManufacturerModelName': 'OHIF-XNAT Viewer 3.2.0',
 'Modality': 'RTSTRUCT',
 'NumROIs': 1,
 'PatientID': 'C3L-00812',
 'ROINames': ['LT PARA AORTIC'],
 'ReferencedSeriesUID': '1.3.6.1.4.1.14519.5.2.1.6450.2626.108818962120167259627963514393',
 'SeriesDate': '20230525',
 'SeriesInstanceUID': '1.2.826.0.1.534147.667.2747872357.2023425485915.4',
 'SoftwareVersions': 'gmp_vct.42',
 'StructureSetDate': '20230525',
 'StructureSetLabel': 'LT PARA AORTIC L',
 'StructureSetTime': '040859',
 'StudyDate': '20081129',
 'StudyInstanceUID': '1.3.6.1.4.1.14519.5.2.1.6450.2626.110399349269235997642692221212',
 'StudyTime': '125004'}

In [6]:
from imgtools.dicom.dicom_metadata.modality_utils.rtstruct_utils import extract_roi_names
from pydicom import dcmread

rtstruct = dcmread(broken_file)


extract_roi_names(rtstruct)

['LT PARA AORTIC']

In [7]:
rtstruct.StructureSetROISequence[0].ROIName

'LT PARA AORTIC'

In [8]:
roi_sequence = rtstruct.StructureSetROISequence

roi_metas = []
for roi in roi_sequence:
    roi_meta = {}
    roi_meta["ROINumber"] = getattr(roi, "ROINumber", "")
    roi_meta["ROIName"] = getattr(roi, "ROIName", "")
    roi_meta["ROIGenerationAlgorithm"] = getattr(
        roi, "ROIGenerationAlgorithm", ""
    )
    roi_metas.append(roi_meta)

roi_meta

{'ROINumber': '1',
 'ROIName': 'LT PARA AORTIC',
 'ROIGenerationAlgorithm': 'MANUAL'}

In [9]:
from imgtools.coretypes.masktypes.roi_matching import ROIMatcher, handle_roi_matching

roi_matching = {"GTV": [".*"]}
roi_names = extract_roi_names(rtstruct)

handle_roi_matching(
    roi_names = roi_names,
    roi_matching = roi_matching,
    strategy = "SEPARATE"
)

In [10]:
import re

re.fullmatch(".*", "GTV") is None

False

In [11]:
from itertools import product

for keys, patterns in roi_matching.items():
    for pattern, roi_name in product(patterns, roi_names):
            if re.fullmatch(pattern, roi_name):
                print(roi_name, pattern)

LT PARA AORTIC .*


# Make Simple MIT Index artificially

In [6]:
from imgtools.autopipeline import SIMPLIFIED_COLUMNS
import pandas as pd

dataset = "TCGA-KIRC"
path = dirs.PROCDATA / f"TCIA_{dataset}" / "images" / f"mit_{dataset}" / f"mit_{dataset}_index.csv"

mit_index = pd.read_csv(filepath_or_buffer= path)

mit_index_simple = mit_index[mit_index.columns.intersection(SIMPLIFIED_COLUMNS)]

mit_index_simple.to_csv(dirs.PROCDATA / f"TCIA_{dataset}" / "images" / f"mit_{dataset}" / f"mit_{dataset}_index-simple.csv", index=False)

In [5]:
mit_index_simple

Unnamed: 0,ImageID,Modality,PatientID,SampleNumber,SeriesInstanceUID,StudyInstanceUID,class,direction,dtype_numpy,dtype_str,...,min,ndim,nvoxels,origin,saved_time,size,spacing,std,sum,variance
0,CT,CT,TCGA-B0-4821,4,1.3.6.1.4.1.14519.5.2.1.6450.4004.271668964298...,1.3.6.1.4.1.14519.5.2.1.6450.4004.106932788840...,Scan,"(1.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 1.0)",<class 'numpy.int16'>,16-bit signed integer,...,-1024.0,3,13893632,"(-189.0, -180.0, -413.829987)",2025-07-09:20:15:56,"(512, 512, 53)","(0.703125, 0.703125, 4.999999711538463)",481.621193,-7.328850e+09,2.319590e+05
1,CT,CT,TCGA-B0-4833,6,1.3.6.1.4.1.14519.5.2.1.6450.4004.175542747344...,1.3.6.1.4.1.14519.5.2.1.6450.4004.131965569276...,Scan,"(1.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 1.0)",<class 'numpy.int16'>,16-bit signed integer,...,-1024.0,3,15728640,"(-208.600006, -134.0, 24.26)",2025-07-09:20:15:58,"(512, 512, 60)","(0.703125, 0.703125, 7.372881186440678)",492.485091,-8.212319e+09,2.425416e+05
2,CT,CT,TCGA-B0-4821,5,1.3.6.1.4.1.14519.5.2.1.6450.4004.197843508979...,1.3.6.1.4.1.14519.5.2.1.6450.4004.106932788840...,Scan,"(1.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 1.0)",<class 'numpy.int16'>,16-bit signed integer,...,-1024.0,3,31981568,"(-180.0, -180.0, -148.830002)",2025-07-09:20:15:57,"(512, 512, 122)","(0.703125, 0.703125, 3.5537191322314046)",495.100283,-1.629033e+10,2.451243e+05
3,CT,CT,TCGA-B0-4839,7,1.3.6.1.4.1.14519.5.2.1.6450.4004.170990576592...,1.3.6.1.4.1.14519.5.2.1.6450.4004.185063702452...,Scan,"(1.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 1.0)",<class 'numpy.int16'>,16-bit signed integer,...,-1024.0,3,12582912,"(-235.199997, -265.200012, -340.799988)",2025-07-09:20:15:59,"(512, 512, 48)","(0.9375, 0.9375, 6.99999974468085)",481.982126,-6.188272e+09,2.323068e+05
4,CT,CT,TCGA-B0-4713,3,1.3.6.1.4.1.14519.5.2.1.6450.4004.193245818439...,1.3.6.1.4.1.14519.5.2.1.6450.4004.267828530238...,Scan,"(1.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 1.0)",<class 'numpy.int32'>,32-bit signed integer,...,-2048.0,3,24117248,"(-180.0, -180.0, -510.25)",2025-07-09:20:15:57,"(512, 512, 92)","(0.703125, 0.703125, 5.0)",811.455312,-1.641115e+10,6.584597e+05
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
380,CT,CT,TCGA-DV-5576,378,1.3.6.1.4.1.14519.5.2.1.1357.4004.589207600295...,1.3.6.1.4.1.14519.5.2.1.1357.4004.501039540205...,Scan,"(1.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 1.0)",<class 'numpy.int32'>,32-bit signed integer,...,-3024.0,3,40894464,"(-184.0, -190.0, -385.0)",2025-07-09:20:22:01,"(512, 512, 156)","(0.742187976837, 0.742187976837, 1.0)",1184.707823,-3.498562e+10,1.403533e+06
381,CT,CT,TCGA-G6-A8L7,384,1.3.6.1.4.1.14519.5.2.1.3023.4004.225868719049...,1.3.6.1.4.1.14519.5.2.1.3023.4004.166818214324...,Scan,"(1.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 1.0)",<class 'numpy.int16'>,16-bit signed integer,...,-1024.0,3,14155776,"(-219.0, -124.0, 670.400024)",2025-07-09:20:22:05,"(512, 512, 54)","(0.80078125, 0.80078125, 5.0)",472.877736,-7.796019e+09,2.236134e+05
382,CT,CT,TCGA-G6-A8L7,383,1.3.6.1.4.1.14519.5.2.1.3023.4004.162874589239...,1.3.6.1.4.1.14519.5.2.1.3023.4004.166818214324...,Scan,"(1.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 1.0)",<class 'numpy.int16'>,16-bit signed integer,...,-1024.0,3,30408704,"(-212.5, -117.5, 527.800049)",2025-07-09:20:22:05,"(512, 512, 116)","(0.775390625, 0.775390625, 5.000000008695653)",479.905074,-1.641844e+10,2.303089e+05
383,CT,CT,TCGA-DV-A4W0,379,1.3.6.1.4.1.14519.5.2.1.1357.4004.218894913071...,1.3.6.1.4.1.14519.5.2.1.1357.4004.931221705159...,Scan,"(1.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 1.0)",<class 'numpy.int32'>,32-bit signed integer,...,-3024.0,3,50331648,"(-200.0, -200.0, -200.0)",2025-07-09:20:22:02,"(512, 512, 192)","(0.78125, 0.78125, 2.6178010471204187)",1184.081405,-4.494843e+10,1.402049e+06
