In [1]:
from radiomics import featureextractor
from damply import dirs
from pathlib import Path
import SimpleITK as sitk
import pandas as pd

In [2]:
settings = dirs.CONFIG / "pyradiomics" / "pyradiomics_original_single_feature.yaml" 
image = sitk.ReadImage(f"{dirs.PROCDATA}/TCIA_NSCLC-Radiomics_test/images/mit_NSCLC-Radiomics_test/LUNG1-001_0000/CT_63382046/CT.nii.gz")
mask = sitk.ReadImage(f"{dirs.PROCDATA}/TCIA_NSCLC-Radiomics_test/images/mit_NSCLC-Radiomics_test/LUNG1-001_0000/RTSTRUCT_35578236/GTV.nii.gz")

In [3]:
dataset_index = pd.read_csv(f"{dirs.PROCDATA}/TCIA_NSCLC-Radiomics_test/features/pyradiomics/pyradiomics_NSCLC-Radiomics_test_index.csv")

dataset_index['DataSource'] = "TCIA"

metadata = dataset_index.iloc[0]['DataSource']
metadata

'TCIA'

# Testing out functions

In [4]:
import sys; sys.path.append("../workflow/scripts")
from feature_extraction.extract import extract_dataset_features, pyradiomics_extract, compile_dataset_features

compile_dataset_features(dataset_index=dataset_index,
                          method="pyradiomics",
                          settings_name="pyradiomics_original_single_feature")

[2m        [0m [[31m[1merror    [0m] [1mNo non-empty dataframes found for shuffled non_roi.[0m [[0m[1m[34mreadii[0m][0m [36mcall[0m=[35mextract.compile_dataset_features:267[0m
[2m        [0m [[31m[1merror    [0m] [1mEmpty file written to /home/bhkuser/bhklab/katy/readii_2_roqc/data/results/TCIA_NSCLC-Radiomics_test/features/pyradiomics/pyradiomics_original_single_feature/shuffled_non_roi_features.csv[0m [[0m[1m[34mreadii[0m][0m [36mcall[0m=[35mextract.compile_dataset_features:272[0m
[2m        [0m [[31m[1merror    [0m] [1mNo non-empty dataframes found for shuffled full.[0m [[0m[1m[34mreadii[0m][0m [36mcall[0m=[35mextract.compile_dataset_features:267[0m
[2m        [0m [[31m[1merror    [0m] [1mEmpty file written to /home/bhkuser/bhklab/katy/readii_2_roqc/data/results/TCIA_NSCLC-Radiomics_test/features/pyradiomics/pyradiomics_original_single_feature/shuffled_full_features.csv[0m [[0m[1m[34mreadii[0m][0m [36mcall[0m=[35mextract

{'randomized_non_roi': 0        SampleID                                              Image  \
 1  LUNG1-001_0000  readii_NSCLC-Radiomics_test/LUNG1-001_0000/CT_...   
 1  LUNG1-002_0001  readii_NSCLC-Radiomics_test/LUNG1-002_0001/CT_...   
 
 0                                               Mask           DatasetName  \
 1  mit_NSCLC-Radiomics_test/LUNG1-001_0000/RTSTRU...  NSCLC-Radiomics_test   
 1  mit_NSCLC-Radiomics_test/LUNG1-002_0001/RTSTRU...  NSCLC-Radiomics_test   
 
 0 SeriesInstanceUID_Image Modality_Image SeriesInstanceUID_Mask Modality_Mask  \
 1                     NaN             CT                    NaN      RTSTRUCT   
 1                     NaN             CT                    NaN      RTSTRUCT   
 
 0 MaskID readii_Permutation  ... diagnostics_Mask-interpolated_Size  \
 1    GTV         randomized  ...                     (108, 101, 75)   
 1    GTV         randomized  ...                     (110, 109, 90)   
 
 0 diagnostics_Mask-interpolated_BoundingBox  \
 1  

In [5]:
extract_dataset_features(dataset = "NSCLC-Radiomics_test",
        method = "pyradiomics",
        settings = "pyradiomics_original_single_feature.yaml",
        overwrite = True,
        parallel = True)

KeyboardInterrupt: 

In [9]:
from itertools import product 
list(product(dataset_index['readii_Permutation'].unique(), dataset_index['readii_Region'].unique()))

[('original', 'full'),
 ('original', 'non_roi'),
 ('original', 'roi'),
 ('randomized', 'full'),
 ('randomized', 'non_roi'),
 ('randomized', 'roi'),
 ('sampled', 'full'),
 ('sampled', 'non_roi'),
 ('sampled', 'roi'),
 ('shuffled', 'full'),
 ('shuffled', 'non_roi'),
 ('shuffled', 'roi')]

# Development

In [14]:
# Convert settings Path to string for pyradiomics to read it
if isinstance(settings, Path):
    settings = str(settings)

try:
    # Set up PyRadiomics feature extractor with provided settings file (expects a string, not a pathlib Path)
    extractor = featureextractor.RadiomicsFeatureExtractor(settings)

    sample_feature_vector = extractor.execute(image, mask)

except Exception as e:
    print(f"Feature extraction failed for this sample: {e}")

In [5]:
metadata['Dataset'] = "NSCLC-Radiomics_test"
metadata

SampleID                                                      LUNG1-001_0000
Image                      mit_NSCLC-Radiomics_test/LUNG1-001_0000/CT_633...
Mask                       mit_NSCLC-Radiomics_test/LUNG1-001_0000/RTSTRU...
DatasetName                                             NSCLC-Radiomics_test
SeriesInstanceUID_Image    1.3.6.1.4.1.32722.99.99.2989917765213423750108...
Modality_Image                                                            CT
SeriesInstanceUID_Mask     1.3.6.1.4.1.32722.99.99.2279381215866080725084...
Modality_Mask                                                       RTSTRUCT
MaskID                                                                   GTV
readii_Permutation                                                  original
readii_Region                                                           full
Dataset                                                 NSCLC-Radiomics_test
Name: 0, dtype: object

In [15]:
from collections import OrderedDict
od_metadata = metadata.to_dict(into=OrderedDict)

In [9]:
features_dir_struct = Path(f"{dataset_index.iloc[0]['DataSource']}_{dataset_index.iloc[0]['DatasetName']}") / "features" / "pyradiomics" / "pyradiomics_original_single_feature"

# Set up path to the directory containing the sample feature files
sample_features_dir = dirs.PROCDATA / features_dir_struct

filelist = list(sample_features_dir.rglob("**/original_full_features.csv"))

pd.read_csv(filelist[0], index_col=0, header=None, sep=";").transpose()

Unnamed: 0,SampleID,Image,Mask,DatasetName,SeriesInstanceUID_Image,Modality_Image,SeriesInstanceUID_Mask,Modality_Mask,MaskID,readii_Permutation,...,diagnostics_Mask-interpolated_Size,diagnostics_Mask-interpolated_BoundingBox,diagnostics_Mask-interpolated_VoxelNum,diagnostics_Mask-interpolated_VolumeNum,diagnostics_Mask-interpolated_CenterOfMassIndex,diagnostics_Mask-interpolated_CenterOfMass,diagnostics_Mask-interpolated_Mean,diagnostics_Mask-interpolated_Minimum,diagnostics_Mask-interpolated_Maximum,original_shape_VoxelVolume
1,LUNG1-001_0000,mit_NSCLC-Radiomics_test/LUNG1-001_0000/CT_633...,mit_NSCLC-Radiomics_test/LUNG1-001_0000/RTSTRU...,NSCLC-Radiomics_test,1.3.6.1.4.1.32722.99.99.2989917765213423750108...,CT,1.3.6.1.4.1.32722.99.99.2279381215866080725084...,RTSTRUCT,GTV,original,...,"(108, 101, 75)","(6, 6, 7, 96, 89, 63)",160563,1,"(np.float64(54.732460155826686), np.float64(43...","(82.23246015582669, -201.5430858915192, -456.8...",-74.4041155185192,-1105.0,1081.0,160563.0
