# Code to run READII negative controls through FMCIB

This code utilizes the RADCURE dataset from TCIA. This dataset is under the TCIA Restricted License, so users will need to request access prior to running this code.

We will be using the RADCURE test subset specified in the clinical data sheet. 

## [INSERT steps to set up pixi environment for this notebook here]

# Steps
Initialize dataset name

Set up data directories

Move raw RADCURE image data to `rawdata/RADCURE/images`

Make ROI yaml file

Run med-imagetools on the raw image data to organize it and get metdata file and convert the CT and RTSTRUCT files to niftis

Run READII to generate negative control images and save them as niftis to the procdata directory

  - Should end up with 6 niftis along with the original CT and RTSTRUCT nifit files 

Run process_readii.py to crop the images to the ROI and save them as niftis to the procdata directory and get the FMCIB input csv file

Run code from infer.py to get the FMCIB features extracted from each image type

Create correlation matrix of the features and display

## Initialize dataset name

In [1]:
DATASET_NAME = "RADCURE"

## Set up data directories

In [8]:
import os
import itertools

# Set up raw and processed data directories
for combo in itertools.product(["rawdata", "procdata"], [DATASET_NAME], ["clinical", "images"]):
    os.makedirs(os.path.join("../", *combo), exist_ok=True)

raw_images_dir = f"rawdata/{DATASET_NAME}/images"
proc_images_dir = f"procdata/{DATASET_NAME}/images"

# Set up results directory
for combo in itertools.product(["results"], [DATASET_NAME], ["features", "analysis/correlations"]):
    os.makedirs(os.path.join("../", *combo), exist_ok=True)


### Here is where you move the raw image data for RADCURE test set to the `rawdata/RADCURE/images` directory

## Make ROI yaml file

In [9]:
import yaml

roi_name = "GTV"
roi_matches = dict(roi_name = "GTVp$")

with open(f"{raw_images_dir}/mit_roi_names.yaml", "w") as outfile:
    yaml.dump(roi_matches, outfile)

## Run med-imagetools

In [10]:
from imgtools.autopipeline import AutoPipeline

mit_output_dir = f"{proc_images_dir}/mit_outputs"
modalities = "CT,RTSTRUCT"
roi_yaml_path = f"{raw_images_dir}/mit_roi_names.yaml"

pipeline = AutoPipeline(input_directory=raw_images_dir,
                        output_directory=mit_output_dir,
                        modalities=modalities,
                        spacing=(0., 0., 0.),
                        read_yaml_label_names = True,
                        ignore_missing_regex = True,
                        roi_yaml_path = roi_yaml_path
                        )

pipeline.run()



Dataset already processed...


  relevant_study_id = self.df_new.loc[(self.df_new.edge_type.str.contains(regex_term)), "study_x"].unique()


## Load the CT and RTSTURCT to run through READII to generate negative controls, crop and save the original and negative control images

In [None]:
import pandas as pd
import SimpleITK as sitk
from readii.negative_controls import applyNegativeControl
from tqdm.notebook import tqdm, trange

import sys; sys.path.append("code")
from process_readii import find_bbox, crop_bbox

# Read in the dataset.csv file made by med-imagetools autopipeline
images_metadata = pd.read_csv(os.path.join(mit_output_dir, "dataset.csv"), index_col=0)

# Set up the output directories for all the READII processed images
# Make main output directory for cropped nifti images
cropped_images_dir = os.path.join(proc_images_dir, "/cropped_images")

# Make output directory for the original CT
cropped_original_dir = os.path.join(cropped_images_dir, "original")
os.makedirs(cropped_original_dir, exist_ok=True)

# Make list of negative control types and regions
# Regions refer to what portion of the CT image to apply the negative control to
# Types refer to what will be done with the voxels of the CT image in the region
negative_control_regions = ["full", "roi", "non_roi"]
negative_control_types = ["shuffled", "randomized_sampled"]

for image_idx in tqdm(images_metadata.index):
    image_idx_metadata = images_metadata.loc[image_idx]

    patient_ID = image_idx_metadata['patient_ID']
    # print(f"Processing {patient_ID}...")

    # Load in the CT image output from med-imagetools
    ct_image = sitk.ReadImage(os.path.join(mit_output_dir, image_idx, "CT", "CT.nii.gz"))
    
    # Load in the RTSTRUCT image output from med-imagetools
    roi_image = sitk.ReadImage(os.path.join(mit_output_dir, image_idx, "RTSTRUCT_CT", f"{roi_name}.nii.gz"))

    # print("----> CT and RTSTRUCT loaded.")

    # Find the bounding box of the ROI to crop CT image to
    bounding_box = find_bbox(roi_image)

    # Process the original CT image
    # Crop the CT image to the bounding box and resize it to 50x50x50 for input to FMCIB
    cropped_ct_image = crop_bbox(ct_image, bounding_box, (50, 50, 50))
    
    # Save the cropped CT image to the cropped_original_dir
    cropped_output_path = os.path.join(cropped_original_dir, f"{patient_ID}.nii.gz")
    sitk.WriteImage(cropped_ct_image, cropped_output_path)
    # print("----> Original CT image cropped to the ROI bounding box, resized, and saved.")

    # Process the negative control CT images
    for negative_control in itertools.product(negative_control_types, negative_control_regions):
        # Make negative control image using READII
        negative_control_ct_image = applyNegativeControl(ct_image, 
                                                         negativeControlType=negative_control[0], 
                                                         negativeControlRegion=negative_control[1],
                                                         roiMask=roi_image,
                                                         randomSeed=10)
    
        # Crop the negative control CT image to the bounding box and resize it to 50x50x50 for input to FMCIB
        cropped_nc_ct_image = crop_bbox(negative_control_ct_image, bounding_box, (50, 50, 50))

        # Set up the directory to save the cropped negative control CT images
        cropped_nc_dir = os.path.join(cropped_images_dir, f"{negative_control[0]}_{negative_control[1]}")
        if not os.path.exists(cropped_nc_dir):
            os.mkdir(cropped_nc_dir)

        # Save the cropped negative control CT image
        cropped_nc_output_path = os.path.join(cropped_nc_dir, f"{patient_ID}.nii.gz")
        sitk.WriteImage(cropped_nc_ct_image, cropped_nc_output_path)
        # print(f"----> Negative control {negative_control[0]}_{negative_control[1]} cropped to the ROI bounding box, resized, and saved.")  

  0%|          | 0/1 [00:00<?, ?it/s]

In [None]:
# Make the input CSV file for FMCIB for each CT type
os.makedirs(os.path.join(proc_images_dir, "fmcib_input"), exist_ok=True)

for image_type in sorted(os.listdir(cropped_images_dir)):
    # Get the full paths to the files in each image type directory
    image_type_file_paths = [os.path.join(cropped_images_dir, image_type, image) 
                             for image in os.listdir(os.path.join(cropped_images_dir, image_type))]
    
    # Create a dataframe with these image paths and all coordinates set to 0
    fmcib_input_df = pd.DataFrame(columns=["image_path"], data = [image_type_file_paths])
    fmcib_input_df["coordX"] = 0
    fmcib_input_df["coordY"] = 0
    fmcib_input_df["coordZ"] = 0

    # Write the CSV file
    fmcib_input_df.to_csv(os.path.join(proc_images_dir, "fmcib_input", f"fmcib_input_{DATASET_NAME}_{image_type}.csv"), index=False)
