In [1]:
%cd ..

/home/bhkuser/bhklab/katy/readii_2_roqc


In [2]:
from damply import dirs
from pathlib import Path
from tqdm import tqdm
from joblib import Parallel, delayed

from imgtools.io.writers.nifti_writer import NIFTIWriter, NiftiWriterIOError

from readii.process.images.crop import crop_and_resize_image_and_mask
from readii.negative_controls_refactor.manager import NegativeControlManager

from readii_2_roqc.utils.loaders import load_dataset_config, load_image_and_mask
from readii_2_roqc.utils.metadata import get_masked_image_metadata, insert_SampleID
from readii_2_roqc.utils.settings import get_readii_settings, get_resize_string

import pandas as pd
import itertools
import logging

In [3]:
dataset = "NSCLC-Radiomics_test"
overwrite = False
seed = 10

# Set up logging

In [4]:
logger = logging.getLogger(__name__)
logging.basicConfig(filename = dirs.LOGS / f"{dataset}_make_negative_controls.log", encoding='utf-8', level=logging.DEBUG)

# Load dataset config

In [5]:
if dataset is None:
    message = "Dataset name must be provided."
    logger.error(message)
    raise ValueError(message)

dataset_config, dataset_name, full_dataset_name = load_dataset_config(dataset)
logger.info(f"Creating negative controls for dataset: {dataset_name}")

# Load dataset index

In [6]:
images_dir_path = dirs.PROCDATA / full_dataset_name / 'images'

dataset_index = pd.read_csv(images_dir_path / f'mit_{dataset_name}' / f'mit_{dataset_name}_index-simple.csv')
dataset_index = insert_SampleID(dataset_index)

# Filter the index by R2R configuration file

In [7]:
masked_image_index = get_masked_image_metadata(dataset_index, dataset_config)

# Check for output(s) existence

In [8]:
regions, permutations, crop, resize = get_readii_settings(dataset_config)

readii_image_dir = images_dir_path / f'readii_{dataset_name}'

if crop != "" and resize != []:
    readii_index_filepath = readii_image_dir / f"{crop}_{get_resize_string(resize)}" / f'readii_{dataset_name}_index.csv'
else:
    readii_index_filepath = readii_image_dir / f'readii_{dataset_name}_index.csv'

In [9]:
if readii_index_filepath.exists() and not overwrite:
    regions, permutations, crop, resize = get_readii_settings(dataset_config)
    # Load in readii index and check:
    # 1. if all negative controls requested have been extracted
    # 2. for all of the patients
    readii_index = pd.read_csv(readii_index_filepath)

    # Get list of patients that have already been processed and what has been requested based on the dataset index
    processed_samples = set(readii_index['PatientID'].to_list())
    requested_samples = set(dataset_index['SampleID'].to_list())


    readii_settings = ['Permutation', 'Region', 'crop', 'Resize']
    if not set(readii_index.columns).issuperset(readii_settings):
        print("Not all READII settings satisfied in existing output. Re-running negative control generation.")
    
    else:
        processed_image_types = {itype for itype in readii_index[readii_settings].itertuples(index=False, name=None)}
        requested_image_types = {itype for itype in itertools.product(permutations,
                                                                      regions,
                                                                      [crop],
                                                                      [get_resize_string(resize)])}
        if requested_image_types.issubset(processed_image_types) and requested_samples.issubset(processed_samples):
            print("Requested negative controls have already been generated for these samples or are listed in the readii index as if they have been. Set overwrite to true if you want to re-process these.")

# Negative control generator function that can be run in parallel

In [10]:
crop = ''

In [None]:
def negative_control_generator(dataset_config:dict, 
                               image_path:Path, 
                               mask_path:Path, 
                               images_dir_path:Path, 
                               output_dir:Path, 
                               sample_id:str = None, 
                               mask_image_id:str = None, 
                               overwrite:bool = False):
    if overwrite:
        existing_file_mode = 'OVERWRITE'
        overwrite_index = True
    else:
        existing_file_mode = 'SKIP'
        overwrite_index = False

    regions, permutations, crop, resize = get_readii_settings(dataset_config)
    resize_string = get_resize_string(resize)

    # Get sample metadata from path if not provided
    if sample_id is None:
        sample_id = Path(image_path).parts[0]
    if mask_image_id is None:
        mask_image_id = Path(mask_path).name.removesuffix('.nii.gz')
    
    # Get beginning of the path to the nifti images dir
    mit_images_dir = images_dir_path / f'mit_{dataset_name}'
    # load in the nifti image and mask files, flattened to 3D and aligned with each other
    image, mask = load_image_and_mask(mit_images_dir / image_path, mit_images_dir / mask_path)
    # get image modality for file writer
    image_modality = dataset_config['MIT']['MODALITIES']['image']

# TODO: Figure out how to handle the no crop and resize set for the output index path
    # # Set up the readii subdirectory for the image being processed, specifically the crop and resize level
    # if crop == '' and resize == []:
    #     proc_image_type = 'original_size'
    # else:
    #     proc_image_type = f'{crop}_{resize_string}'

    # Set up writer for saving out the negative controls and index file
    nifti_writer = NIFTIWriter(
            root_directory = output_dir,
            filename_format = "{crop}_{Resize}/{dir_original_image}/{dirname_mask}_{ImageID_mask}/" + f"{image_modality}" + "_{Permutation}_{Region}.nii.gz",
            create_dirs = True,
            existing_file_mode = existing_file_mode,
            sanitize_filenames = True,
            index_filename = output_dir / f'{crop}_{resize_string}' / f"readii_{dataset_name}_index.csv",
            overwrite_index = overwrite_index
        )
    
    readii_image_paths = []
    # Process crop and resize of original image if needed, and save
    if crop != '' and resize != []:
        crop_image, crop_mask = crop_and_resize_image_and_mask(image, 
                                                               mask, 
                                                               crop_method = crop, 
                                                               resize_dimension = resize)
        # save out cropped image
        try:
            out_path = nifti_writer.save(
                            crop_image,
                            PatientID=sample_id,
                            Region="original",
                            Permutation="full",
                            crop=crop,
                            Resize=resize_string,
                            ImageID_mask=mask_image_id.replace(' ', "_"),
                            dir_original_image=image_path.parent,
                            dirname_mask=mask_path.parent.name,
                        )
        except NiftiWriterIOError:
            message = f"{permutation} {region} negative control file already exists for {sample_id}. If you wish to overwrite, set overwrite to True."
            logger.debug(message)
        
        readii_image_paths.append(out_path)
    # end original image processing

    # Set up negative control manager with settings from config
    manager = NegativeControlManager.from_strings(
        negative_control_types=permutations,
        region_types=regions,
        random_seed=seed
    )
    # Process and save negative control images
    for proc_image, permutation, region in manager.apply(image, mask):
        # apply crop and resize
        if crop != "" and resize != []:
            proc_image, proc_mask = crop_and_resize_image_and_mask(proc_image, 
                                                                   mask, 
                                                                   crop_method = crop, 
                                                                   resize_dimension = resize)
        # save out negative controls
        try:
            out_path = nifti_writer.save(
                            proc_image,
                            PatientID=sample_id,
                            Region=region,
                            Permutation=permutation,
                            crop=crop,
                            Resize=resize_string,
                            ImageID_mask=mask_image_id.replace(' ', "_"),
                            dir_original_image=image_path.parent,
                            dirname_mask=mask_path.parent.name,
                        )
        except NiftiWriterIOError:
            message = f"{permutation} {region} negative control file already exists for {sample_id}. If you wish to overwrite, set overwrite to True."
            logger.debug(message)
    
        readii_image_paths.append(out_path)
    
    return readii_image_paths


In [19]:
from readii_2_roqc.utils.metadata import make_edges_df

edges_index = make_edges_df(masked_image_index, dataset_config['MIT']['MODALITIES']['image'], dataset_config['MIT']['MODALITIES']['mask'])
parallel = True
n_jobs = 4

if parallel:
    # Use joblib to parallelize negative control generation
    readii_image_paths = Parallel(n_jobs=n_jobs)(
                            delayed(negative_control_generator)(
                                dataset_config=dataset_config, 
                                image_path=Path(data_row.filepath_image), 
                                mask_path=Path(data_row.filepath_mask), 
                                images_dir_path=images_dir_path, 
                                output_dir=readii_image_dir,
                                sample_id=data_row.SampleID_image,
                                mask_image_id=data_row.ImageID_mask, 
                                overwrite=overwrite
                            )
                            for _, data_row in tqdm(
                                edges_index.iterrows(),
                                desc="Generating negative controls for each image-mask pair...",
                                total=len(edges_index)
                            )
                        )
else:
    readii_image_paths = [negative_control_generator(dataset_config=dataset_config, 
                                                    image_path=Path(data_row.filepath_image), 
                                                    mask_path=Path(data_row.filepath_mask), 
                                                    images_dir_path=images_dir_path, 
                                                    output_dir=readii_image_dir,
                                                    sample_id=data_row.SampleID_image,
                                                    mask_image_id=data_row.ImageID_mask, 
                                                    overwrite=overwrite
                                                    ) for _, data_row in tqdm(edges_index.iterrows(),
                                                                                desc="Generating negative controls for each image-mask pair...",
                                                                                total=len(edges_index))]
 
# negative_control_generator(dataset_config, image_path, mask_path, images_dir_path, readii_image_dir, overwrite=True)

Generating negative controls for each image-mask pair...: 100%|██████████| 2/2 [00:00<00:00, 1032.44it/s]
