# Code to run READII negative controls through FMCIB

This code utilizes the RADCURE dataset from TCIA. This dataset is under the TCIA Restricted License, so users will need to request access prior to running this code.

We will be using the RADCURE test subset specified in the clinical data sheet. 

## [INSERT steps to set up pixi environment for this notebook here]

## Initialize dataset name

In [1]:
import yaml

config = yaml.load(open("config/RADCURE.yaml", "r"), Loader=yaml.FullLoader)

DATASET_NAME = config["dataset_name"]
NEG_CONTROL_REGIONS = config["negative_control_regions"]
NEG_CONTROL_TYPES = config["negative_control_types"]

## Set up data directories

In [2]:
import os
import itertools

# Set up raw and processed data directories
for combo in itertools.product(["rawdata", "procdata"], [DATASET_NAME], ["clinical", "images"]):
    os.makedirs(os.path.join(*combo), exist_ok=True)

raw_images_dir = f"rawdata/{DATASET_NAME}/images"
proc_images_dir = f"procdata/{DATASET_NAME}/images"

### Here is where you move the raw image data for RADCURE test set to the `rawdata/RADCURE/images` directory

## Make ROI yaml file

In [8]:
import yaml

roi_name = "GTV"
roi_matches = {roi_name: "GTVp$"}

with open(f"{raw_images_dir}/mit_roi_names.yaml", "w") as outfile:
    yaml.dump(roi_matches, outfile)

## Run med-imagetools

In [10]:
from imgtools.autopipeline import AutoPipeline

mit_output_dir = f"{proc_images_dir}/mit_outputs"
modalities = "CT,RTSTRUCT"
roi_yaml_path = f"{raw_images_dir}/mit_roi_names.yaml"

pipeline = AutoPipeline(input_directory=raw_images_dir,
                        output_directory=mit_output_dir,
                        modalities=modalities,
                        spacing=(0., 0., 0.),
                        read_yaml_label_names = True,
                        ignore_missing_regex = True,
                        roi_yaml_path = roi_yaml_path,
                        update=True
                        )

pipeline.run()

100%|██████████| 14/14 [00:00<00:00, 10794.16it/s]
  relevant_study_id = self.df_new.loc[(self.df_new.edge_type.str.contains(regex_term)), "study_x"].unique()


6_RADCURE-03199_RADCURE-0314

3_RADCURE-0183
Processing: 3_RADCURE-0183
Processing: 9_RADCURE-0314
Processing: 6_RADCURE-0319
2_RADCURE-0317
Processing: 2_RADCURE-0317
8_RADCURE-0112
Processing: 8_RADCURE-0112
1_RADCURE-0171
Processing: 1_RADCURE-0171
11_RADCURE-0287
Processing: 11_RADCURE-0287
7_RADCURE-0252
Processing: 7_RADCURE-0252
6_RADCURE-0319  start
2_RADCURE-0317  start
9_RADCURE-0314  start
11_RADCURE-0287  start
7_RADCURE-0252  start
6_RADCURE-0319  SAVED IMAGE
8_RADCURE-0112  start
6_RADCURE-0319 SAVED MASK ON CT
4_RADCURE-0131
Processing: 4_RADCURE-0131
3_RADCURE-0183  start
2_RADCURE-0317  SAVED IMAGE
11_RADCURE-0287  SAVED IMAGE
9_RADCURE-0314  SAVED IMAGE
2_RADCURE-0317 SAVED MASK ON CT
5_RADCURE-0065
Processing: 5_RADCURE-0065
1_RADCURE-0171  start
9_RADCURE-0314 SAVED MASK ON CT
12_RADCURE-0099
Processing: 12_RADCURE-0099
11_RADCURE-0287 SAVED MASK ON CT
10_RADCURE-0244
Processing: 10_RADCURE-0244
3_RADCURE-0183  SAVED IMAGE
4_RADCURE-0131  start
3_RADCURE-0183 SAVED 

## Load the CT and RTSTURCT to run through READII to generate negative controls, crop and save the original and negative control images

In [11]:
import pandas as pd
import SimpleITK as sitk
from readii.negative_controls import applyNegativeControl
from tqdm.notebook import tqdm, trange

import sys; sys.path.append("code")
from process_readii import find_bbox, crop_bbox

# Read in the dataset.csv file made by med-imagetools autopipeline
images_metadata = pd.read_csv(os.path.join(mit_output_dir, "dataset.csv"), index_col=0)

# Set up the output directories for all the READII processed images
# Make main output directory for cropped nifti images
cropped_images_dir = os.path.join(proc_images_dir, "cropped_images")

# Make output directory for the original CT
cropped_original_dir = os.path.join(cropped_images_dir, "original")
os.makedirs(cropped_original_dir, exist_ok=True)

# Make list of negative control types and regions
# Regions refer to what portion of the CT image to apply the negative control to
# Types refer to what will be done with the voxels of the CT image in the region
negative_control_regions = ["full", "roi", "non_roi"]
negative_control_types = ["shuffled", "randomized_sampled"]

for image_idx in tqdm(images_metadata.index):
    image_idx_metadata = images_metadata.loc[image_idx]

    patient_ID = image_idx_metadata['patient_ID']
    # print(f"Processing {patient_ID}...")

    # Load in the CT image output from med-imagetools
    ct_image = sitk.ReadImage(os.path.join(mit_output_dir, image_idx, "CT", "CT.nii.gz"))
    
    # Load in the RTSTRUCT image output from med-imagetools
    roi_image = sitk.ReadImage(os.path.join(mit_output_dir, image_idx, "RTSTRUCT_CT", f"{roi_name}.nii.gz"))

    # print("----> CT and RTSTRUCT loaded.")

    # Find the bounding box of the ROI to crop CT image to
    bounding_box = find_bbox(roi_image)

    # Process the original CT image
    # Crop the CT image to the bounding box and resize it to 50x50x50 for input to FMCIB
    cropped_ct_image = crop_bbox(ct_image, bounding_box, (50, 50, 50))
    
    # Save the cropped CT image to the cropped_original_dir
    cropped_output_path = os.path.join(cropped_original_dir, f"{patient_ID}.nii.gz")
    sitk.WriteImage(cropped_ct_image, cropped_output_path)
    # print("----> Original CT image cropped to the ROI bounding box, resized, and saved.")

    # Process the negative control CT images
    for negative_control in itertools.product(NEG_CONTROL_TYPES, NEG_CONTROL_REGIONS):
        # Make negative control image using READII
        negative_control_ct_image = applyNegativeControl(ct_image, 
                                                         negativeControlType=negative_control[0], 
                                                         negativeControlRegion=negative_control[1],
                                                         roiMask=roi_image,
                                                         randomSeed=10)
    
        # Crop the negative control CT image to the bounding box and resize it to 50x50x50 for input to FMCIB
        cropped_nc_ct_image = crop_bbox(negative_control_ct_image, bounding_box, (50, 50, 50))

        # Set up the directory to save the cropped negative control CT images
        cropped_nc_dir = os.path.join(cropped_images_dir, f"{negative_control[0]}_{negative_control[1]}")
        if not os.path.exists(cropped_nc_dir):
            os.mkdir(cropped_nc_dir)

        # Save the cropped negative control CT image
        cropped_nc_output_path = os.path.join(cropped_nc_dir, f"{patient_ID}.nii.gz")
        sitk.WriteImage(cropped_nc_ct_image, cropped_nc_output_path)
        # print(f"----> Negative control {negative_control[0]}_{negative_control[1]} cropped to the ROI bounding box, resized, and saved.")  

  0%|          | 0/13 [00:00<?, ?it/s]

In [14]:
# Make the input CSV file for FMCIB for each CT type
os.makedirs(os.path.join(proc_images_dir, "fmcib_input"), exist_ok=True)

for image_type in sorted(os.listdir(cropped_images_dir)):
    # Get the full paths to the files in each image type directory
    image_type_file_paths = [os.path.join(cropped_images_dir, image_type, image) 
                             for image in os.listdir(os.path.join(cropped_images_dir, image_type))]
    
    # Create a dataframe with these image paths and all coordinates set to 0
    fmcib_input_df = pd.DataFrame(data = {"image_path": image_type_file_paths})
    fmcib_input_df["coordX"] = 0
    fmcib_input_df["coordY"] = 0
    fmcib_input_df["coordZ"] = 0

    # Write the CSV file
    fmcib_input_df.to_csv(os.path.join(proc_images_dir, "fmcib_input", f"fmcib_input_{DATASET_NAME}_{image_type}.csv"), index=False)
