# Experiment

## Import

In [1]:
import tensorflow as tf
import tensorflow_io as tfio
import numpy as np
import pydicom as pdc
import pathlib
import matplotlib.pyplot as plt
import random
from typing import List, Tuple
from tqdm import tqdm

2023-04-06 23:29:09.523657: I tensorflow/tsl/cuda/cudart_stub.cc:28] Could not find cuda drivers on your machine, GPU will not be used.
2023-04-06 23:29:09.576494: I tensorflow/tsl/cuda/cudart_stub.cc:28] Could not find cuda drivers on your machine, GPU will not be used.
2023-04-06 23:29:09.578767: I tensorflow/core/platform/cpu_feature_guard.cc:182] This TensorFlow binary is optimized to use available CPU instructions in performance-critical operations.
To enable the following instructions: AVX2 FMA, in other operations, rebuild TensorFlow with the appropriate compiler flags.


## Preprocessing

### Helper Function

In [13]:
def get_patient_path_pair(project_root_path: pathlib.Path, patient_list: List[int]) -> List[Tuple[pathlib.Path, pathlib.Path, pathlib.Path]]:
    """Create a list of tuples that contain the InPhase, OutPhase, and ground truth PNG file paths for each patient.

    Args:
        project_root_path: The path to the root directory of the project.
        patient_list: A list of patient IDs.

    Returns:
        A list of tuples where each tuple contains the InPhase, OutPhase, and ground truth PNG file paths for a patient.
    """
    patient_path_pair = []
    for idx in tqdm(patient_list, desc="Creating Path Pairing"):
        mri_tdual_in_dcm_path = [path.as_posix() for path in sorted(list(project_root_path.rglob(f"MR/{idx}/T1DUAL/**/InPhase/*.dcm")))]
        mri_tdual_out_dcm_path = [path.as_posix() for path in sorted(list(project_root_path.rglob(f"MR/{idx}/T1DUAL/**/OutPhase/*.dcm")))]
        mri_tdual_ground_truth_path = [path.as_posix() for path in sorted(list(project_root_path.rglob(f"MR/{idx}/**/T1DUAL/Ground/*.png")))]
        for pair in zip(mri_tdual_in_dcm_path,mri_tdual_out_dcm_path,mri_tdual_ground_truth_path):
            patient_path_pair.append(pair)
    return patient_path_pair

def get_train_val_test_split(split: str, patient_list: List[int]) -> dict:
    """Split the list of patients into train, validation, and test sets.

    Args:
        split: A string with three numbers separated by commas that represent the percentage of patients to include in the train, validation, and test sets, respectively.
        patient_list: A list of patient IDs.

    Returns:
        A dictionary with keys "train", "val", and "test" that each have a corresponding list of patient IDs.
    """
    random_patient_list = patient_list
    random.shuffle(random_patient_list)
    
    n = len(patient_list)
    
    n_train = int(n*(int(split[0])/10))
    n_val = int(n*(int(split[1])/10))
    n_test = int(n*(int(split[2])/10))
                
    train = random_patient_list[:n_train]
    val = random_patient_list[n_train:n-n_test]
    test = random_patient_list[n_train+n_val:]
    return {"train":train,"val":val,"test":test}

def get_dcm_img(dcm_path):
    """Load DICOM file and return pixel data .

    Args:
        dcm_path: The path to the DICOM file.

    Returns:
        The DICOM file with img.
    """
    image_bytes = tf.io.read_file(dcm_path)
    image = tfio.image.decode_dicom_image(image_bytes, dtype=tf.uint16)
    return image

def preprocess_dcm(dcm_path):
    img = get_dcm_img(dcm_path)
    #Normalize uint16 to [0,1]
    img_norm = img / tf.cast(65535,tf.uint16)
    return img_norm

def preprocess_ground(png_path):
    png = tf.io.read_file(png_path)
    png_array = tf.io.decode_png(png,channels=1)
    png_mask = tf.cast(tf.equal(png_array, 63), tf.uint8)
    return png_mask

def parsed_path_to_dataset(features):
    in_img = preprocess_dcm(features[0])
    out_img = preprocess_dcm(features[1])
    ground_truth = preprocess_ground(features[2])
    return (in_img,out_img),ground_truth

def load_dicom_image(dicom_path):
    """
    Load DICOM file from given path and decode it using TensorFlow I/O.

    Args:
        dicom_path (str): The path to the DICOM file.

    Returns:
        The decoded DICOM image.
    """
    image_bytes = tf.io.read_file(dicom_path)
    image = tfio.image.decode_dicom_image(image_bytes, dtype=tf.uint16)
    return image


def normalize_image(image):
    """
    Normalize the given image to the range [0, 1] by dividing it by the maximum value (2^16-1).

    Args:
        image (tf.Tensor): The image to be normalized.

    Returns:
        The normalized image.
    """
    max_value = tf.cast(65535, tf.uint16)
    normalized_image = tf.divide(image, max_value)
    return normalized_image


def load_ground_truth_mask(png_path):
    """
    Load the ground truth mask from the given PNG file path and convert it to binary mask.

    Args:
        png_path (str): The path to the PNG file.

    Returns:
        The binary mask of the ground truth.
    """
    png = tf.io.read_file(png_path)
    png_array = tf.io.decode_png(png, channels=1)
    ground_truth_mask = tf.cast(tf.equal(png_array, 63), tf.uint8)
    return ground_truth_mask


def parse_path_to_dataset(path_list):
    """
    Load and preprocess the DICOM image and the ground truth mask from the given path list.

    Args:
        path_list (list): A list of paths to DICOM and PNG files.

    Returns:
        A tuple of the preprocessed in-phase and out-of-phase images and the ground truth mask.
    """
    in_phase_image = load_dicom_image(path_list[0])
    out_phase_image = load_dicom_image(path_list[1])
    ground_truth_mask = load_ground_truth_mask(path_list[2])
    in_phase_image_norm = normalize_image(in_phase_image)
    out_phase_image_norm = normalize_image(out_phase_image)
    return (in_phase_image_norm, out_phase_image_norm), ground_truth_mask

def get_dataset(pair_path_list,batch_size):
    dataset = (
        tf.data.Dataset.from_tensor_slices(pair_path_list)
        .map(parse_path_to_dataset,num_parallel_calls=tf.data.AUTOTUNE)
        .shuffle(batch_size * 10)
        .batch(batch_size)
        .prefetch(tf.data.AUTOTUNE)
    )
    return dataset

In [14]:
# Get the project root path
project_root_path = pathlib.Path.cwd().parent

# Set a list of patient IDs
patient_list = [1, 2, 3, 5, 8, 10, 13, 15, 19, 20, 21, 22, 31, 32, 33, 34, 36, 37, 38, 39]

# Get patient split
patient_split = get_train_val_test_split("721",patient_list)

# Get train, test, and val split
train_path_pair = get_patient_path_pair(project_root_path,patient_split["train"])
val_path_pair = get_patient_path_pair(project_root_path,patient_split["val"])
test_path_pair = get_patient_path_pair(project_root_path,patient_split["test"])

# Create Dataset
batch_size=10
train_dataset = get_dataset(train_path_pair,batch_size)
val_dataset = get_dataset(val_path_pair,batch_size)
test_dataset = get_dataset(test_path_pair,batch_size)

Creating Path Pairing: 100%|█| 14/14 [00:00<00:
Creating Path Pairing: 100%|█| 4/4 [00:00<00:00
Creating Path Pairing: 100%|█| 2/2 [00:00<00:00
