In [1]:
%cd ..

/home/bhkuser/bhklab/katy/readii_2_roqc


In [2]:
from damply import dirs
from readii_2_roqc.utils.loaders import load_dataset_config
from readii_2_roqc.readii.make_negative_controls import get_readii_settings, get_masked_image_metadata
from readii.negative_controls_refactor.manager import NegativeControlManager
from imgtools.transforms.spatial_transforms import Resize
from readii.io.writers.nifti_writer import NIFTIWriter

import pandas as pd
import itertools

In [3]:
dataset = "NSCLC-Radiomics_test"
overwrite = False
seed = 10

# Set up logging

In [4]:
import logging

logger = logging.getLogger(__name__)
logging.basicConfig(filename = dirs.LOGS / f"{dataset}_make_negative_controls.log", encoding='utf-8', level=logging.DEBUG)

# Load dataset config

In [15]:
if dataset is None:
    message = "Dataset name must be provided."
    logger.error(message)
    raise ValueError(message)

dataset_config, dataset_name, full_dataset_name = load_dataset_config(dataset)
logger.info(f"Creating negative controls for dataset: {dataset_name}")

# Load dataset index

In [16]:
images_dir_path = dirs.PROCDATA / full_dataset_name / 'images'

dataset_index = pd.read_csv(images_dir_path / f'mit_{dataset_name}' / f'mit_{dataset_name}_index-simple.csv')

# Filter the index by R2R configuration file

In [17]:
masked_image_index = get_masked_image_metadata(dataset_index, dataset_config)

# Check for output(s) existence

In [18]:
readii_image_dir = images_dir_path / f'readii_{dataset_name}'
readii_index_filepath = readii_image_dir / f'readii_{dataset_name}_index.csv'

In [20]:
if readii_index_filepath.exists() and not overwrite:
    regions, permutations, crop, resize = get_readii_settings(dataset_config)
    # Load in readii index and check:
    # 1. if all negative controls requested have been extracted
    # 2. for all of the patients
    readii_index = pd.read_csv(readii_index_filepath)

    # Get list of patients that have already been processed and what has been requested based on the dataset index
    processed_samples = set(readii_index['PatientID'].to_list())
    requested_samples = set(dataset_index['PatientID'].to_list())


    readii_settings = ['Permutation', 'Region', 'Crop', 'Resize']
    if not set(readii_index.columns).issuperset(readii_settings):
        print("Not all READII settings satisfied in existing output. Re-running negative control generation.")
    
    else:
        resize_string = '_'.join(str(val) for val in resize)
        processed_image_types = {itype for itype in readii_index[readii_settings].itertuples(index=False, name=None)}
        requested_image_types = {itype for itype in itertools.product(permutations,
                                                                      regions,
                                                                      crop,
                                                                      [resize_string])}
        if requested_image_types.issubset(processed_image_types) and requested_samples.issubset(processed_samples):
            print("Requested negative controls have already been generated for these samples or are listed in the readii index as if they have been. Set overwrite to true if you want to re-process these.")
            print(readii_index['filepath'].to_list())

Not all READII settings satisfied in existing output. Re-running negative control generation.


In [25]:
from readii_2_roqc.utils.metadata import make_edges_df

edges_index = make_edges_df(masked_image_index, dataset_config['MIT']['MODALITIES']['image'], dataset_config['MIT']['MODALITIES']['mask'])

for idx, data_row in edges_index.iterrows():
    print(data_row.filepath_image)
    print(data_row.filepath_mask)

LUNG1-001_0000/CT_63382046/CT.nii.gz
LUNG1-001_0000/RTSTRUCT_35578236/GTV__[GTV-1].nii.gz
LUNG1-002_0001/CT_23261228/CT.nii.gz
LUNG1-002_0001/RTSTRUCT_43245931/GTV__[GTV-1].nii.gz


In [23]:
edges_index

Unnamed: 0,ImageID_image,Modality_image,PatientID,ReferencedSeriesUID_image,SampleID_image,SampleNumber,SeriesInstanceUID_image,StudyInstanceUID_image,class_image,direction_image,...,ndim_mask,nvoxels_mask,origin_mask,roi_key_mask,saved_time_mask,size_mask,spacing_mask,std_mask,sum_mask,variance_mask
0,CT,CT,LUNG1-001,,LUNG1-001_0000,0,1.3.6.1.4.1.32722.99.99.2989917765213423750108...,1.3.6.1.4.1.32722.99.99.2393413539117143687725...,Scan,"(1.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 1.0)",...,3,35127296,"(-249.51171875, -460.51171875, -681.5)",GTV,2025-07-22:14:16:22,"(512, 512, 134)","(0.9765625, 0.9765625, 3.0)",0.039992,56271.0,0.001599
1,CT,CT,LUNG1-002,,LUNG1-002_0001,1,1.3.6.1.4.1.32722.99.99.2329880015517990803358...,1.3.6.1.4.1.32722.99.99.2037150038059966416957...,Scan,"(1.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 1.0)",...,3,29097984,"(-250.112, -250.112, -133.4)",GTV,2025-07-22:14:16:18,"(512, 512, 111)","(0.977, 0.977, 3.0)",0.065496,125364.0,0.00429


# Negative control generator function that can be run in parallel

In [None]:
from imgtools.transforms.spatial_transforms import Resize
from readii.process.images.crop import crop_and_resize_image_and_mask
import SimpleITK as sitk
from readii.image_processing import alignImages, flattenImage

def load_image_and_mask(image_path, mask_path = None):
    # Load in image
    raw_image = sitk.ReadImage(image_path)
    # Remove extra dimension of image, set origin, spacing, direction to original
    image = alignImages(raw_image, flattenImage(raw_image)) 

    if mask_path:
        # Load in mask
        raw_mask = sitk.ReadImage(mask_path)
        mask = alignImages(image, flattenImage(raw_mask))
        return image, mask
    
    return image


def negative_control_generator(dataset_config, image_path, mask_path, output_dir, overwrite):
    if overwrite:
        existing_file_mode = 'OVERWRITE'
        overwrite_index = True
    else:
        existing_file_mode = 'SKIP'
        overwrite_index = False

    regions, permutations, crop, resize = get_readii_settings(dataset_config)

    # Set up negative control manager with settings from config
    manager = NegativeControlManager.from_strings(
        negative_control_types=permutations,
        region_types=regions,
        random_seed=seed
    )

    image, mask = load_image_and_mask(image_path, mask_path)


    for neg_image, permutation, region in manager.apply(image, mask):
        # apply crop and resize

        # set up nifti writer

        # call save out negative controls


    # # Set up writer for saving out the negative controls and index file
    # nifti_writer = NIFTIWriter(
    #         root_directory = readii_image_dir,
    #         filename_format = "{dir_original_image}/{dirname_mask}_{ImageID_mask}/" + f"{image_modality}" + "_{Permutation}_{Region}.nii.gz",
    #         create_dirs = True,
    #         existing_file_mode = existing_file_mode,
    #         sanitize_filenames = True,
    #         index_filename = readii_image_dir /f"readii_{dataset_name}_index.csv",
    #         overwrite_index = overwrite_index
    #     )
    


In [74]:
masked_image_index

Unnamed: 0,ImageID,Modality,PatientID,ReferencedSeriesUID,SampleID,SampleNumber,SeriesInstanceUID,StudyInstanceUID,class,direction,...,ndim,nvoxels,origin,roi_key,saved_time,size,spacing,std,sum,variance
0,CT,CT,LUNG1-001,,LUNG1-001_0000,0,1.3.6.1.4.1.32722.99.99.2989917765213423750108...,1.3.6.1.4.1.32722.99.99.2393413539117143687725...,Scan,"(1.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 1.0)",...,3,35127296,"(-249.51171875, -460.51171875, -681.5)",,2025-07-22:14:16:16,"(512, 512, 134)","(0.9765625, 0.9765625, 3.0)",426.958598,-26042950000.0,182293.644053
2,CT,CT,LUNG1-002,,LUNG1-002_0001,1,1.3.6.1.4.1.32722.99.99.2329880015517990803358...,1.3.6.1.4.1.32722.99.99.2037150038059966416957...,Scan,"(1.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 1.0)",...,3,29097984,"(-250.112, -250.112, -133.4)",,2025-07-22:14:16:15,"(512, 512, 111)","(0.977, 0.977, 3.0)",431.016943,-21971040000.0,185775.604855
0,GTV__[GTV-1],RTSTRUCT,LUNG1-001,1.3.6.1.4.1.32722.99.99.2989917765213423750108...,LUNG1-001_0000,0,1.3.6.1.4.1.32722.99.99.2279381215866080725084...,1.3.6.1.4.1.32722.99.99.2393413539117143687725...,Mask,"(1.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 1.0)",...,3,35127296,"(-249.51171875, -460.51171875, -681.5)",GTV,2025-07-22:14:16:22,"(512, 512, 134)","(0.9765625, 0.9765625, 3.0)",0.039992,56271.0,0.001599
1,GTV__[GTV-1],RTSTRUCT,LUNG1-002,1.3.6.1.4.1.32722.99.99.2329880015517990803358...,LUNG1-002_0001,1,1.3.6.1.4.1.32722.99.99.2432675512669112458302...,1.3.6.1.4.1.32722.99.99.2037150038059966416957...,Mask,"(1.0, 0.0, 0.0, 0.0, 1.0, 0.0, 0.0, 0.0, 1.0)",...,3,29097984,"(-250.112, -250.112, -133.4)",GTV,2025-07-22:14:16:18,"(512, 512, 111)","(0.977, 0.977, 3.0)",0.065496,125364.0,0.00429
