# Development Notebook

Develop and debug code for data preprocessing

In [1]:
%cd ../..

/home/bhkuser/bhklab/katy/aaura-bench-preprocess


In [2]:
import logging

logging.basicConfig(
	level=logging.INFO, format='%(asctime)s - %(levelname)s - %(message)s'
)

logger = logging.getLogger(__name__)

In [3]:
from damply import dirs

print(dirs)

DamplyDirs<Structure: NESTED>
Project Root: /home/bhkuser/bhklab/katy/aaura-bench-preprocess
CONFIG       : ├── <not found>
LOGS         : ├── logs
METADATA     : ├── <not found>
NOTEBOOKS    : ├── workflow/notebooks
PROCDATA     : ├── data/procdata
RAWDATA      : ├── data/rawdata
RESULTS      : ├── data/results
SCRIPTS      : └── workflow/scripts


In [4]:
dataset = "CVPR_LesionLocator"
sample_id = "LesionLocator_0001"

In [5]:
import pandas as pd
metadata = pd.read_csv(dirs.RAWDATA / dataset / "naming.csv")
anatomy_match = pd.read_csv(dirs.RAWDATA / dataset / "dataset_anatomy_match.csv")

In [6]:
for idx, source_dataset in anatomy_match.iterrows():
    # print(source_dataset['Anatomy'])
    metadata.loc[metadata.Source.str.contains(source_dataset.Dataset),'lesion_location'] = source_dataset['Anatomy']

In [7]:
import SimpleITK as sitk
from imgtools.coretypes import MedImage
def image_proc(image_path):
    # Read in image
    image = sitk.ReadImage(str(image_path))

    # Cast image to Int16
    image = sitk.Cast(image, sitk.sitkInt32)

    # Convert to MedImage
    image = MedImage(image)

    return image

  from .autonotebook import tqdm as notebook_tqdm


In [8]:
import SimpleITK as sitk
from imgtools.coretypes import Mask, VectorMask

def mask_proc(mask_path):
    # Read in mask
    mask = sitk.ReadImage(str(mask_path))

    # Cast mask to UInt8
    mask = sitk.Cast(mask, sitk.sitkUInt8)

    # TODO: Find the maximum pixel value, this will be the number of volumes, make a little roi mapping then go back to using VectorMask

    # Convert to MedImageTools Mask
    mask = Mask(mask, metadata={"mask.ndim": 3})

    return mask

In [9]:
from imgtools.coretypes import MedImage, Mask
from pathlib import Path
import SimpleITK as sitk

base_scan_path = Path(dataset) / "Baseline" / "images" / f"{sample_id}_0000.nii.gz"
base_mask_path = Path(dataset) / "Baseline" / "labels" / f"{sample_id}.nii.gz"
synth_scan_path = Path(dataset) / "Synthetic_Follow_Up" / "images" / f"{sample_id}_0000.nii.gz"
synth_mask_path = Path(dataset) / "Synthetic_Follow_Up" / "labels" / f"{sample_id}.nii.gz"

base_scan = image_proc(dirs.RAWDATA / base_scan_path)
base_mask = mask_proc(dirs.RAWDATA / base_mask_path)
synth_scan = image_proc(dirs.RAWDATA / synth_scan_path)
synth_mask = mask_proc(dirs.RAWDATA / synth_mask_path)

In [None]:
scan_metadata = base_scan.fingerprint
scan_metadata

In [16]:
base_mask.fingerprint

{'class': 'Mask',
 'hash': '4c63823ae7d3583ca354df0433b1f7e2dc230b09',
 'size': Size3D(w=512, h=512, d=611),
 'ndim': 3,
 'nvoxels': 160169984,
 'spacing': Spacing3D(x=0.919921875, y=0.919921875, z=0.5),
 'origin': Coordinate3D(x=470.080078125, y=470.080078125, z=-305.0),
 'direction': Direction([-1.00,0.00,0.00], [0.00,-1.00,0.00], [0.00,0.00,1.00]),
 'min': 0.0,
 'max': 1.0,
 'sum': 20462.0,
 'mean': 0.00012775177651263298,
 'std': 0.011302011183578355,
 'variance': 0.0001277354567937302,
 'dtype_str': '8-bit unsigned integer',
 'dtype_numpy': numpy.uint8,
 'mask.bbox.size': Size3D(w=29, h=29, d=55),
 'mask.bbox.min_coord': Coordinate3D(x=322, y=175, z=267),
 'mask.bbox.max_coord': Coordinate3D(x=351, y=204, z=322),
 'mask.feret_diameter': 31.144797773379853,
 'mask.roundness': 0.8958489097623504,
 'mask.flatness': 1.149567479265029,
 'mask.elongation': 1.1500055913775435,
 'mask.equivalent_spherical_radius': 12.738270174503679,
 'mask.equivalent_spherical_perimeter': 2039.0636179607

In [13]:
if base_mask.volume_count > 1:
    print("Multiple masks, must separate")

In [11]:
baseline_sample_index = {"id": sample_id,
                         "image_path": base_scan_path,
                         "mask_path": base_mask_path,
                         "recist_coords": None,
                         "spacing": base_scan.spacing,
                         "origin": base_scan.origin,
                         "direction": base_scan.direction,
                         "mask_volume": base_mask.fingerprint["sum"],
                         "lesion_location": "abdomen",
                         "source": metadata_pat1['Source'].values[0]
                         }

In [17]:
np_base_mask = base_mask.to_numpy()[0]

np_base_mask.shape

(611, 512, 512)

In [15]:
import numpy as np
from skimage.measure import regionprops 
def mask2D_to_bbox(mask:np.array, 
                   padding:int | None = None,
                   spacing:np.array = None
                   ) -> np.array:
        
        props = regionprops(mask)[0]
        y_cent, x_cent = props.centroid
        orientation = props.orientation
        semi_maj_axis_len = props.axis_major_length / 2

        x_start = x_cent - np.sin(orientation) * semi_maj_axis_len
        y_start = y_cent - np.cos(orientation) * semi_maj_axis_len

        x_end = x_cent + np.sin(orientation) * semi_maj_axis_len
        y_end = y_cent + np.cos(orientation) * semi_maj_axis_len

        boxes = np.array([x_start, y_start, x_end, y_end])

        # if padding:
        #     boxes = pad_bbox(box = boxes,
        #                      mask = mask,
        #                      padding = padding,
        #                      spacing = spacing)
        
        return boxes.astype(int)

In [16]:
import numpy as np
def get_recist_coords(mask:MedImage):
    # Convert the sitk.Image to a numpy array
    np_mask = mask.to_numpy()[0]
    # Sum the mask in the x and y axes to find the axial slice with the largest tumour area
    axial_sum = np.sum(np_mask, axis=(1,2))
    # Get the index of the axial slice with the largest tumour area
    axial_index = np.argmax(axial_sum)

    max_slice = np_mask[axial_index]
    recist_coords = mask2D_to_bbox(max_slice)
    
    return recist_coords
    

In [18]:
baseline_sample_index['recist_coords'] = get_recist_coords(base_mask)

In [19]:
baseline_sample_index

{'id': 'LesionLocator_0001',
 'image_path': PosixPath('CVPR_LesionLocator/Baseline/images/LesionLocator_0001_0000.nii.gz'),
 'mask_path': PosixPath('CVPR_LesionLocator/Baseline/labels/LesionLocator_0001.nii.gz'),
 'recist_coords': array([347, 179, 326, 200]),
 'spacing': Spacing3D(x=0.919921875, y=0.919921875, z=0.5),
 'origin': Coordinate3D(x=470.080078125, y=470.080078125, z=-305.0),
 'direction': Direction([-1.00,0.00,0.00], [0.00,-1.00,0.00], [0.00,0.00,1.00]),
 'mask_volume': 20462.0,
 'lesion_location': 'abdomen',
 'source': 'KiTS23_case_00000'}