In [3]:
%cd ..

/Users/katyscott/Documents/BHKLab_GitHub/readii_2_roqc


In [2]:
from pathlib import Path
from typing import Optional

import pandas as pd
import SimpleITK as sitk
from damply import dirs
from readii.image_processing import alignImages, flattenImage
from readii.io.loaders import loadImageDatasetConfig
from readii.io.writers.nifti_writer import NIFTIWriter, NiftiWriterIOError
from readii.negative_controls_refactor import NegativeControlManager
from readii.process.config import get_full_data_name
from readii.utils import logger

In [3]:
def get_readii_settings(dataset_config: dict) -> tuple[list, list, list]:
    """Extract READII settings from a configuration dictionary.
    
    Parameters
    ----------
    dataset_config : dict
        Configuration dictionary read in with `loadImageDatasetConfig` containing READII settings
    
    Returns
    -------
    tuple
        A tuple containing:
        - regions: list of regions to process
        - permutations: list of permutations to apply
        - crop: list of crop settings
    """
    readii_config = dataset_config['READII']
    if 'IMAGE_TYPES' not in readii_config:
        message = "READII configuration must contain 'IMAGE_TYPES'."
        logger.error(message)
        raise KeyError(message)
    
    regions = readii_config['IMAGE_TYPES']['regions']

    permutations = readii_config['IMAGE_TYPES']['permutations']

    crop = readii_config['IMAGE_TYPES']['crop']

    return regions, permutations, crop

In [11]:
def get_masked_image_metadata(dataset_index:pd.DataFrame,
                              dataset_config:Optional[dict] = None,
                              image_modality:Optional[str] = None,
                              mask_modality:Optional[str] = None):
    """Get rows of Med-ImageTools index.csv with the mask modality and the corresponding image modality and create a new index with just these rows for READII
    
    Parameters
    ----------
    dataset_index : pd.DataFrame
        DataFrame loaded from a Med-ImageTools index.csv containing image metadata. Must have columns for Modality, ReferencedSeriesUID, and SeriesInstanceUID.
    dataset_config : Optional[dict]
        Dictionary of configuration settings to get image and mask modality from for filtering dataset_index. Must include MIT MODALITIES image and MIT MODALITIES mask. Expected output from running loadImageDatasetConfig.
    image_modality : Optional[str]
        Image modality to filter dataset_index with. Will override dataset_config setting.
    mask_modality : Optional[str]
        Mask modality to filter dataset_index with. Will override dataset_config setting.

    Returns
    -------
    pd.DataFrame
        Subset of the dataset_index with just the masks and their reference images' metadata.
    """

    if image_modality is None:
        if dataset_config is None:
            message = "No image modality setting passed. Must pass a image_modality or dataset_config with an image modality setting."
            logger.error(message)
            raise ValueError(message)
        
        # Get the image modality from config to retrieve from the metadata
        image_modality = dataset_config["MIT"]["MODALITIES"]["image"]
    
    if mask_modality is None:
        if dataset_config is None:
            message = "No mask modality setting passed. Must pass a mask_modality or dataset_config with a mask modality setting."
            logger.error(message)
            raise ValueError(message)
        
        # Get the mask modality from config to retrieve from the metadata
        mask_modality = dataset_config["MIT"]["MODALITIES"]["mask"]

    # Get all metadata rows with the mask modality
    mask_metadata = dataset_index[dataset_index['Modality'] == mask_modality]

    # Get a Series of ReferenceSeriesUIDs from the masks - these point to the images the masks were made on
    referenced_series_ids = mask_metadata['ReferencedSeriesUID']
    
    # Get image metadata rows with a SeriesInstanceUID matching one of the ReferenceSeriesUIDS of the masks
    image_metadata = dataset_index[dataset_index['Modality'] == image_modality]
    masked_image_metadata = image_metadata[image_metadata['SeriesInstanceUID'].isin(referenced_series_ids)]

    # Return the subsetted metadata
    return pd.concat([masked_image_metadata, mask_metadata], sort=True)

In [12]:
def save_out_negative_controls(nifti_writer: NIFTIWriter,
                            patient_id: str,
                            image: sitk.Image,
                            region: str,
                            permutation: str):
    """Save out negative control images using the NIFTIWriter."""

    try:
        nifti_writer.save(
                        image,
                        PatientID=patient_id,
                        region=region,
                        permutation=permutation
                    )
    except NiftiWriterIOError:
        message = f"{permutation} {region} negative control file already exists for {patient_id}. If you wish to overwrite, set overwrite to true in the NIFTIWriter."
        logger.debug(message)

    return image

In [None]:
dataset = "NSCLC-Radiomics"
random_seed = 10

if dataset is None:
    message = "Dataset name must be provided."
    logger.error(message)
    raise ValueError(message)

config_dir_path = dirs.CONFIG / 'datasets'

dataset_config = loadImageDatasetConfig(dataset, config_dir_path)

dataset_name = dataset_config['DATASET_NAME']
full_data_name = get_full_data_name(config_dir_path / dataset)
logger.info(f"Creating negative controls for dataset: {dataset_name}")

# Extract READII settings
regions, permutations, _crop = get_readii_settings(dataset_config)

# Set up negative control manager with settings from config
manager = NegativeControlManager.from_strings(
    negative_control_types=permutations,
    region_types=regions,
    random_seed=random_seed
)

mit_images_dir_path = dirs.PROCDATA / full_data_name / 'images' /f'mit_{dataset_name}'

dataset_index = pd.read_csv(Path(mit_images_dir_path, f'mit_{dataset_name}_index.csv'))

image_modality = dataset_config["MIT"]["MODALITIES"]["image"]
mask_modality = dataset_config["MIT"]["MODALITIES"]["mask"]

masked_image_index = get_masked_image_metadata(dataset_index = dataset_index,
                                                image_modality = image_modality,
                                                mask_modality = mask_modality)

DirectoryNameNotFoundError: Project directory name 'CONFIG' not found in configuration or does not exist at '/Users/katyscott/Documents/BHKLab_GitHub/readii_2_roqc/sandbox/config'

In [14]:
# StudyInstanceUID
for study, study_data in masked_image_index.groupby('StudyInstanceUID'):
    logger.info(f"Processing StudyInstanceUID: {study}")

    # Get image metadata as a pd.Series
    image_metadata = study_data[study_data['Modality'] == image_modality].squeeze()
    image_path = Path(image_metadata['filepath'])
    # Load in image
    raw_image = sitk.ReadImage(mit_images_dir_path / image_path)
    # Remove extra dimension of image, set origin, spacing, direction to original
    image = alignImages(raw_image, flattenImage(raw_image))

    
    # Get mask metadata as a pd.Series
    # all_mask_metadata = study_data[study_data['Modality'] == mask_modality]

    # for mask_metadata in all_mask_metadata.iterrows():
    #     mask_path = Path(mask_metadata['filepath'])
    #     # Load in mask
    #     raw_mask = sitk.ReadImage(mit_images_dir_path / mask_path)
    #     mask = alignImages(raw_mask, flattenImage(raw_mask))

    #     # Set up writer for saving out the negative controls
    #     nifti_writer = NIFTIWriter(
    #         root_directory = mit_images_dir_path.parent / f'readii_{dataset_name}' / image_path.parent / mask_path.parent,
    #         filename_format = "{permutation}_{region}.nii.gz",
    #         overwrite = True,
    #         create_dirs = True
    #     )

TypeError: argument should be a str or an os.PathLike object where __fspath__ returns a str, not 'Series'

In [19]:
masked_image_index

Unnamed: 0,AcquisitionDate,AcquisitionNumber,AcquisitionTime,BodyPartExamined,CTDIvol,ContentDate,ContentTime,ContrastBolusAgent,ContrastBolusIngredient,ContrastBolusIngredientConcentration,...,ndim,nvoxels,origin,roi_key,saved_time,size,spacing,std,sum,variance
1,,,,,,,,,,,...,3,29097984,"(-250.112, -250.112, -133.4)",GTV,2025-05-29:17-31-31,"(512, 512, 111)","(0.977, 0.977, 3.0)",0.065499,125373.0,0.00429
3,,,,,,,,,,,...,3,35127296,"(-249.51171875, -460.51171875, -681.5)",GTV,2025-05-29:17-31-34,"(512, 512, 134)","(0.9765625, 0.9765625, 3.0)",0.039992,56271.0,0.001599


In [6]:
image_metadata['filepath']

'LUNG1-001_0000/CT_63382046/CT.nii.gz'

In [9]:
image.GetSpacing()

(1.0, 1.0, 1.0)

In [10]:
raw_image = sitk.ReadImage(mit_images_dir_path / image_path)
raw_image.GetSpacing()

(0.9765625, 0.9765625, 3.0)

In [14]:
neg_image.GetSpacing()

(0.9765625, 0.9765625, 3.0)

# Update readii index checking for overwrite

In [37]:
import itertools
readii_index_file = Path("/Users/katyscott/Documents/BHKLab_GitHub/readii_2_roqc/data/procdata/TCIA_NSCLC-Radiomics/images/readii_NSCLC-Radiomics/readii_NSCLC-Radiomics_index.csv")
overwrite = False

dataset_config = loadImageDatasetConfig("NSCLC-Radiomics.yaml", "/Users/katyscott/Documents/BHKLab_GitHub/readii_2_roqc/config/datasets")

regions, permutations, _crop = get_readii_settings(dataset_config)
seed = 10
# Set up negative control manager with settings from config
manager = NegativeControlManager.from_strings(
    negative_control_types=permutations,
    region_types=regions,
    random_seed=seed
    )


if readii_index_file.exists() and not overwrite: 
    # Load in readii index and check:
    # 1. if all negative controls requested have been extracted
    # 2. for all of the patients
    readii_index = pd.read_csv(readii_index_file)

    processed_image_types = {itype for itype in readii_index[['Permutation', 'Region']].itertuples(index=False, name=None)}
    requested_image_types = {itype for itype in itertools.product([permutation.name() for permutation in manager.negative_control_strategies],
                                                                  [region.name() for region in manager.region_strategies])}
    
    # Check if the requested image types are a subset of those already processed
    if requested_image_types.issubset(processed_image_types):
        print("Requested negative controls have already been generated for these samples or are listed in the readii index as if they have been. Set overwrite to true if you want to re-process these.")
    

Requested negative controls have already been generated for these samples or are listed in the readii index as if they have been. Set overwrite to true if you want to re-process these.


In [35]:
processed = readii_index['PatientID']
set(readii_index['PatientID'].to_list())

{'LUNG1-001', 'LUNG1-002'}

In [28]:
requested = {itype for itype in itertools.product([permutation.name() for permutation in manager.negative_control_strategies],
                                                                  [region.name() for region in manager.region_strategies])}

In [29]:
requested.issubset(processed)

False

In [30]:
processed

{('randomized', 'full'),
 ('randomized', 'roi'),
 ('shuffled', 'full'),
 ('shuffled', 'roi')}

In [31]:
requested

{('randomized', 'full'), ('randomized', 'non_roi'), ('randomized', 'roi')}