# Create READII negative control CTs to run through FMCIB

This code utilizes the RADCURE dataset from TCIA. This dataset is under the TCIA Restricted License, so users will need to request access prior to running this code.

We will be using the RADCURE test subset specified in the clinical data sheet. 

## Set up pixi environment kernel

1. Run the following commands in the terminal:

    ```bash
    $ pixi install

    $ pixi run make_kernel
    ```

2. In the `Select Kernel` menu at the top right of the notebook, select `Jupyter Kernel` as the source. 

3. Refresh the options and one called `readii-fmcib` should appear. Select this option.

## Imports

In [1]:
import csv
import itertools
import pandas as pd
import SimpleITK as sitk 
import shutil
import yaml

from imgtools.autopipeline import AutoPipeline
from pathlib import Path
from readii.io.loaders import loadImageDatasetConfig
from readii.negative_controls import applyNegativeControl
from tqdm.notebook import tqdm

import sys; sys.path.append("code")
from process_readii import find_bbox, crop_bbox

## Initialize dataset name and negative control settings

In [2]:
config = loadImageDatasetConfig("RADCURE", Path("config"))

DATASET_NAME = config["dataset_name"]
NEG_CONTROL_REGIONS = config["negative_control_regions"]
NEG_CONTROL_TYPES = config["negative_control_types"]

## Set up data directories

In [3]:
for combo in itertools.product(["rawdata", "procdata"], [DATASET_NAME], ["clinical", "images"]):
    Path(*combo).mkdir(parents=True, exist_ok=True)

raw_images_dir = Path("rawdata", DATASET_NAME , "images")
proc_images_dir = Path("procdata", DATASET_NAME, "images")

## Copy the raw image data for RADCURE test set to the `rawdata/RADCURE/images` directory

In [4]:
# INPUT THE PATH TO THE RADCURE IMAGE DATA
image_dir_path = Path("/home/bioinf/bhklab/radiomics/radiomics_orcestra/rawdata/RADCURE/images/zipped")

In [15]:
patient_ID_list_file = Path("./rawdata/RADCURE/clinical/col_test_patient_IDs_RADCURE.csv")
copy_dir_path = Path(raw_images_dir)

with open(patient_ID_list_file, "r") as f:
    pat_list = csv.reader(f)
    for row in pat_list:
        patient_ID = row[0]

        existing_patient_image_directory = image_dir_path / patient_ID
        copy_patient_image_directory = copy_dir_path / patient_ID

        if copy_patient_image_directory.exists():
            print(f"Copy of {patient_ID}'s image file already exists.")
        else:
            destination = shutil.copytree(existing_patient_image_directory, copy_patient_image_directory, dirs_exist_ok=True)

Copy of RADCURE-0300's image file already exists.


## Make ROI yaml file

In [6]:
roi_name = "GTV"
roi_matches = {roi_name: "GTVp$"}

with open(f"{raw_images_dir}/mit_roi_names.yaml", "w") as outfile:
    yaml.dump(roi_matches, outfile)

## Run med-imagetools to get converted nifti files and get dataset summary file

In [7]:
mit_output_dir = proc_images_dir / "mit_outputs"
modalities = "CT,RTSTRUCT"
roi_yaml_path = raw_images_dir / "mit_roi_names.yaml"

In [18]:
pipeline = AutoPipeline(input_directory=raw_images_dir,
                        output_directory=mit_output_dir,
                        modalities=modalities,
                        spacing=(0., 0., 0.),
                        read_yaml_label_names = True,
                        ignore_missing_regex = True,
                        roi_yaml_path = roi_yaml_path,
                        update=True
                        )

pipeline.run()

  0%|          | 0/714 [00:00<?, ?it/s]

100%|██████████| 714/714 [01:53<00:00,  6.30it/s]
  relevant_study_id = self.df_new.loc[(self.df_new.edge_type.str.contains(regex_term)), "study_x"].unique()


Dataset already processed...


## Load the CT and RTSTURCT to run through READII to generate negative controls, crop and save the original and negative control images

In [8]:
# Read in the dataset.csv file made by med-imagetools autopipeline
images_metadata = pd.read_csv(Path(mit_output_dir, "dataset.csv"), index_col=0)

# Set up the output directories for all the READII processed images
# Make main output directory for cropped nifti images
cropped_images_dir = proc_images_dir / "cropped_images"

# Make output directory for the original CT
cropped_original_dir = cropped_images_dir / "original"
Path.mkdir(cropped_original_dir, parents=True, exist_ok=True)

# Make list of negative control types and regions
# Regions refer to what portion of the CT image to apply the negative control to
# Types refer to what will be done with the voxels of the CT image in the region
negative_control_regions = ["full", "roi", "non_roi"]
negative_control_types = ["shuffled", "randomized_sampled"]

### NOTE: The following cell will take a while to run. 

In [None]:
for image_idx in tqdm(images_metadata.index):
    image_idx_metadata = images_metadata.loc[image_idx]

    patient_ID = image_idx_metadata['patient_ID']
    # print(f"Processing {patient_ID}...")

    # Load in the CT image output from med-imagetools
    ct_image = sitk.ReadImage(Path(mit_output_dir, image_idx, "CT", "CT.nii.gz"))
    
    # Load in the RTSTRUCT image output from med-imagetools
    roi_image = sitk.ReadImage(Path(mit_output_dir, image_idx, "RTSTRUCT_CT", f"{roi_name}.nii.gz"))

    # print("----> CT and RTSTRUCT loaded.")

    # Find the bounding box of the ROI to crop CT image to
    bounding_box = find_bbox(roi_image)

    # Process the original CT image
    # Crop the CT image to the bounding box and resize it to 50x50x50 for input to FMCIB
    cropped_ct_image = crop_bbox(ct_image, bounding_box, (50, 50, 50))
    
    # Save the cropped CT image to the cropped_original_dir
    cropped_output_path = cropped_original_dir / f"{patient_ID}.nii.gz"
    sitk.WriteImage(cropped_ct_image, cropped_output_path)
    # print("----> Original CT image cropped to the ROI bounding box, resized, and saved.")

    # Process the negative control CT images
    for negative_control in itertools.product(NEG_CONTROL_TYPES, NEG_CONTROL_REGIONS):
        # Make negative control image using READII
        negative_control_ct_image = applyNegativeControl(ct_image, 
                                                         negativeControlType=negative_control[0], 
                                                         negativeControlRegion=negative_control[1],
                                                         roiMask=roi_image,
                                                         randomSeed=10)
    
        # Crop the negative control CT image to the bounding box and resize it to 50x50x50 for input to FMCIB
        cropped_nc_ct_image = crop_bbox(negative_control_ct_image, bounding_box, (50, 50, 50))

        # Set up the directory to save the cropped negative control CT images
        cropped_nc_dir = cropped_images_dir / f"{negative_control[0]}_{negative_control[1]}"
        Path.mkdir(cropped_nc_dir, parents=True, exist_ok=True)

        # Save the cropped negative control CT image
        cropped_nc_output_path = cropped_nc_dir / f"{patient_ID}.nii.gz"
        sitk.WriteImage(cropped_nc_ct_image, cropped_nc_output_path)
        # print(f"----> Negative control {negative_control[0]}_{negative_control[1]} cropped to the ROI bounding box, resized, and saved.")  

## Set up expected input file for FMCIB

In [12]:
for image_type_dir_path in sorted(cropped_images_dir.glob("*")):
    image_type = image_type_dir_path.name
    
    image_type_file_paths = sorted(image_type_dir_path.glob("*.nii.gz"))

    # Create a dataframe with these image paths and all coordinates set to 0
    fmcib_input_df = pd.DataFrame(data = {"image_path": image_type_file_paths})
    fmcib_input_df["coordX"] = 0
    fmcib_input_df["coordY"] = 0
    fmcib_input_df["coordZ"] = 0

    # Print length of the dataframe - for RADCURE test, should be 713
    print(f"Number of images for {image_type}: {fmcib_input_df.shape[0]}")

    fmcib_input_df.to_csv(Path(proc_images_dir, "fmcib_input", f"fmcib_input_{DATASET_NAME}_{image_type}.csv"), index=False)

Number of images for original: 713
Number of images for randomized_sampled_full: 713
Number of images for randomized_sampled_non_roi: 713
Number of images for randomized_sampled_roi: 713
Number of images for shuffled_full: 713
Number of images for shuffled_non_roi: 713
Number of images for shuffled_roi: 713
