In [1]:
import shutil
from pathlib import Path
import numpy as np


base_path = Path.cwd().parent
base_path

WindowsPath('d:/Frank/Imperial-FYP/software_archive')

In [9]:
import torch
from matplotlib import pyplot as plt
from segment_anything import sam_model_registry
from segment_anything.utils.transforms import ResizeLongestSide
from tqdm import tqdm

import SimpleITK as sitk
import shutil

In [3]:
data_path = base_path / "processed_crohns"
# find all annotated cases for axial images
axial_path = data_path / "Crohns2023Axial"
axial_labels_path = data_path / "Crohns2023AxialLabels"

training_data_filenames = [f"A{i}_axial.nii.gz" for i in range(101, 114)] + [f"I{i}_axial.nii.gz" for i in range(101, 121)]

In [7]:
training_data_path = base_path / "lab" / "Crohns23AxialAnnotated"
training_data_path.mkdir(exist_ok=True)
training_image_folder = training_data_path / "images"
training_label_folder = training_data_path / "labels"
training_image_folder.mkdir(exist_ok=True)
training_label_folder.mkdir(exist_ok=True)

for i, filename in enumerate(training_data_filenames):
    new_filename = f"Crohns23Axial_Tr_{i+1:04d}_0000.nii.gz"
    new_label_filename = f"Crohns23Axial_Tr_{i+1:04d}.nii.gz"
    shutil.copyfile(axial_path / filename, training_image_folder/ new_filename)
    shutil.copyfile(axial_labels_path / filename, training_label_folder / new_label_filename)


In [7]:
coronal_path = data_path / "Crohns2023Coronal"
coronal_labels_path = data_path / "Crohns2023CoronalLabels"

training_data_filenames = [f"A{i}_coronal.nii.gz" for i in range(101, 114)] + [f"I{i}_coronal.nii.gz" for i in range(101, 121)]

In [8]:
training_data_path = base_path / "lab" / "Crohns23CoronalAnnotated"
training_data_path.mkdir(exist_ok=True)
training_image_folder = training_data_path / "images"
training_label_folder = training_data_path / "labels"
training_image_folder.mkdir(exist_ok=True)
training_label_folder.mkdir(exist_ok=True)


for i, filename in enumerate(training_data_filenames):
    new_filename = f"Crohns23Coronal_Tr_{i+1:04d}_0000.nii.gz"
    new_label_filename = f"Crohns23Coronal_Tr_{i+1:04d}.nii.gz"
    shutil.copyfile(coronal_path / filename, training_image_folder/ new_filename)
    shutil.copyfile(coronal_labels_path / filename, training_label_folder / new_label_filename)

In [4]:
import xml.etree.ElementTree as ET


def get_centerline_points(centerline_path):
    crohns_centerline = ET.parse(centerline_path)
    root = crohns_centerline.getroot()
    centerline_points = []
    for path in root:
        if 'name' not in path.attrib:
            continue

        for point in path:
            centerline_points.append([int(point.attrib['x']), int(point.attrib['y']), int(point.attrib['z'])])
    
    crohns_centerline_size = int(len(centerline_points) * 0.2)
    return np.array(centerline_points[:crohns_centerline_size])

In [5]:
def get_bbox_from_centerline(centerline_points):
    min_x = np.min(centerline_points[:, 0])
    max_x = np.max(centerline_points[:, 0])
    min_y = np.min(centerline_points[:, 1])
    max_y = np.max(centerline_points[:, 1])
    min_z = np.min(centerline_points[:, 2])
    max_z = np.max(centerline_points[:, 2])
    
    # the top left corner of the bounding box
    index = (int(min_x), int(min_y), int(min_z))
    # the size of the bounding box
    size = (int(max_x - min_x + 1), int(max_y - min_y + 1), int(max_z - min_z + 1))
    return index, size

In [41]:
axial_centerlines_path = data_path / "Crohns2023AxialCenterlines"

axial_filenames = [f.name[:-4] for f in axial_centerlines_path.glob("*.xml")]

proxy_training_data_path = base_path / "lab" / "Crohns23AxialProxy"
proxy_training_data_path.mkdir(exist_ok=True)

proxy_training_image_folder = proxy_training_data_path / "images"
proxy_training_label_folder = proxy_training_data_path / "labels"
proxy_training_image_folder.mkdir(exist_ok=True)
proxy_training_label_folder.mkdir(exist_ok=True)


for i, filename in enumerate(axial_filenames):
    original_image_name = f"{filename}.nii.gz"
    new_image_name = f"Crohns23Axial_Tr_{i+1:04d}_0000.nii.gz"
    new_label_name = f"Crohns23Axial_Tr_{i+1:04d}.nii.gz"
    shutil.copyfile(axial_path / original_image_name, proxy_training_image_folder / new_image_name)

    # construct a psuedo label by introducing the centerline
    image = sitk.ReadImage(str(axial_path / original_image_name))
    image = sitk.GetArrayFromImage(image)
    label = np.zeros_like(image)

    # get the centerline points
    centerline_points = get_centerline_points(axial_centerlines_path / f"{filename}.xml")
    # get the bounding box corner and size
    index, size = get_bbox_from_centerline(centerline_points)

    xmin, ymin, zmin = index
    xmax, ymax, zmax = np.array(index) + np.array(size)

    label[zmin:zmax, ymin:ymax, xmin:xmax] = 1

    # write the label
    label = sitk.GetImageFromArray(label)
    label.CopyInformation(sitk.ReadImage(str(axial_path / original_image_name)))
    sitk.WriteImage(label, proxy_training_label_folder / new_label_name)

In [10]:
coronal_centerlines_path = data_path / "Crohns2023CoronalCenterlines"

coronal_filenames = [f.name[:-4] for f in coronal_centerlines_path.glob("*.xml")]

proxy_training_data_path = base_path / "lab" / "Crohns23CoronalProxy"
proxy_training_data_path.mkdir(exist_ok=True)

proxy_training_image_folder = proxy_training_data_path / "images"
proxy_training_label_folder = proxy_training_data_path / "labels"
proxy_training_image_folder.mkdir(exist_ok=True)
proxy_training_label_folder.mkdir(exist_ok=True)

for i, filename in enumerate(coronal_filenames):
    original_image_name = f"{filename}.nii.gz"
    new_image_name = f"Crohns23Coronal_Tr_{i+1:04d}_0000.nii.gz"
    new_label_name = f"Crohns23Coronal_Tr_{i+1:04d}.nii.gz"
    shutil.copyfile(coronal_path / original_image_name, proxy_training_image_folder / new_image_name)

    # construct a psuedo label by introducing the centerline
    image = sitk.ReadImage(str(coronal_path / original_image_name))
    image = sitk.GetArrayFromImage(image)
    label = np.zeros_like(image)

    # get the centerline points
    centerline_points = get_centerline_points(coronal_centerlines_path / f"{filename}.xml")
    # get the bounding box corner and size
    index, size = get_bbox_from_centerline(centerline_points)

    xmin, ymin, zmin = index
    xmax, ymax, zmax = np.array(index) + np.array(size)

    label[zmin:zmax, ymin:ymax, xmin:xmax] = 1

    # write the label
    label = sitk.GetImageFromArray(label)
    label.CopyInformation(sitk.ReadImage(str(coronal_path / original_image_name)))
    sitk.WriteImage(label, proxy_training_label_folder / new_label_name)