<a href="https://colab.research.google.com/github/deepakri201/AbdoBench/blob/main/CHAOS_preprocessing.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

CHAOS MR preprocessing code

Deepa Krishnaswamy and Cosmin Ciausu

July 2025

Brigham and Women's Hospital

# Environment setup

In [None]:
import os
import numpy as np
import nibabel as nib
# from glob import glob as glob
import glob
import shutil
from matplotlib import image as plt_image

In [None]:
!pip install SimpleITK
import SimpleITK as sitk

Collecting SimpleITK
  Downloading simpleitk-2.5.2-cp311-abi3-manylinux2014_x86_64.manylinux_2_17_x86_64.whl.metadata (7.2 kB)
Downloading simpleitk-2.5.2-cp311-abi3-manylinux2014_x86_64.manylinux_2_17_x86_64.whl (52.6 MB)
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m52.6/52.6 MB[0m [31m15.5 MB/s[0m eta [36m0:00:00[0m
[?25hInstalling collected packages: SimpleITK
Successfully installed SimpleITK-2.5.2


In [None]:
# Install dcm2niix
!wget https://github.com/rordenlab/dcm2niix/releases/latest/download/dcm2niix_lnx.zip
!unzip dcm2niix_lnx.zip
!cp /content/dcm2niix /usr/local/bin

--2025-07-10 19:54:33--  https://github.com/rordenlab/dcm2niix/releases/latest/download/dcm2niix_lnx.zip
Resolving github.com (github.com)... 140.82.116.3
Connecting to github.com (github.com)|140.82.116.3|:443... connected.
HTTP request sent, awaiting response... 302 Found
Location: https://github.com/rordenlab/dcm2niix/releases/download/v1.0.20250506/dcm2niix_lnx.zip [following]
--2025-07-10 19:54:34--  https://github.com/rordenlab/dcm2niix/releases/download/v1.0.20250506/dcm2niix_lnx.zip
Reusing existing connection to github.com:443.
HTTP request sent, awaiting response... 302 Found
Location: https://objects.githubusercontent.com/github-production-release-asset-2e65be/25434012/ad8c76fa-0b5f-4856-aca4-987ab3de6bf3?X-Amz-Algorithm=AWS4-HMAC-SHA256&X-Amz-Credential=releaseassetproduction%2F20250710%2Fus-east-1%2Fs3%2Faws4_request&X-Amz-Date=20250710T195434Z&X-Amz-Expires=1800&X-Amz-Signature=fca8e969b4d87b21a2124374fb8b241d7df61b3b597f9607eb331fd8525cd491&X-Amz-SignedHeaders=host&respo

# Download the data from Zenodo

In [None]:
# We download the CHAOS training data set
# https://zenodo.org/records/3431873

!wget https://zenodo.org/records/3431873/files/CHAOS_Train_Sets.zip?download=1

--2025-07-10 19:54:37--  https://zenodo.org/records/3431873/files/CHAOS_Train_Sets.zip?download=1
Resolving zenodo.org (zenodo.org)... 188.185.45.92, 188.185.48.194, 188.185.43.25, ...
Connecting to zenodo.org (zenodo.org)|188.185.45.92|:443... connected.
HTTP request sent, awaiting response... 200 OK
Length: 890771694 (850M) [application/octet-stream]
Saving to: ‘CHAOS_Train_Sets.zip?download=1’


2025-07-10 19:55:21 (19.7 MB/s) - ‘CHAOS_Train_Sets.zip?download=1’ saved [890771694/890771694]



In [None]:
# Unzip the contents

!unzip /content/CHAOS_Train_Sets.zip?download=1

[1;30;43mStreaming output truncated to the last 5000 lines.[0m
  inflating: Train_Sets/CT/28/Ground/liver_GT_059.png  
  inflating: Train_Sets/CT/28/Ground/liver_GT_060.png  
  inflating: Train_Sets/CT/28/Ground/liver_GT_061.png  
  inflating: Train_Sets/CT/28/Ground/liver_GT_062.png  
  inflating: Train_Sets/CT/28/Ground/liver_GT_063.png  
  inflating: Train_Sets/CT/28/Ground/liver_GT_064.png  
  inflating: Train_Sets/CT/28/Ground/liver_GT_065.png  
  inflating: Train_Sets/CT/28/Ground/liver_GT_066.png  
  inflating: Train_Sets/CT/28/Ground/liver_GT_067.png  
  inflating: Train_Sets/CT/28/Ground/liver_GT_068.png  
  inflating: Train_Sets/CT/28/Ground/liver_GT_069.png  
  inflating: Train_Sets/CT/28/Ground/liver_GT_070.png  
  inflating: Train_Sets/CT/28/Ground/liver_GT_071.png  
  inflating: Train_Sets/CT/28/Ground/liver_GT_072.png  
  inflating: Train_Sets/CT/28/Ground/liver_GT_073.png  
  inflating: Train_Sets/CT/28/Ground/liver_GT_074.png  
  inflating: Train_Sets/CT/28/Ground/li

# Conversion of CHAOS

In [None]:
# Create the output directory to store the images and segmentations to be used for evaluation

output_main_directory = "/content/CHAOS_preprocessed_data"
output_images_directory = os.path.join(output_main_directory, "images")
output_labels_directory = os.path.join(output_main_directory, "labels")

if not os.path.isdir(output_main_directory):
  os.mkdir(output_main_directory)

if not os.path.isdir(output_images_directory):
  os.makedirs(output_images_directory, exist_ok=True)
if not os.path.isdir(output_labels_directory):
  os.makedirs(output_labels_directory, exist_ok=True)


In [None]:
def flip_image_to_ras(image):
  x, y, z = nib.aff2axcodes(image.affine)
  image_arr = image.get_fdata()
  if x != 'R':
    image_arr = np.flip(image_arr, 0)
    #image_arr = np.flip(image_arr, 1) is equivalent to flipud(m).
    # flip(m, 1) is equivalent to fliplr(m).
  if y != 'A':
    image_arr = np.flip(image_arr, 1)
  if z != 'S':
    image_arr = np.flip(image_arr, 2)
  return nib.Nifti1Image(image_arr, nib.as_closest_canonical(image).affine, image.header)

def fix_orient_image_mr(image_nifti_path, out_image_path):
  img = nib.load(image_nifti_path)
  t2_img_orient = flip_image_to_ras(img)
  nib.save(nib.Nifti1Image(t2_img_orient.get_fdata(), t2_img_orient.affine, header=t2_img_orient.header),
      out_image_path)

def annot_v2_png_to_nii(image_nifti_path, png_folder, output_path_root, out_file_name):
  list_seg_pngs = sorted(glob.glob(os.path.join(png_folder, "*.png")))#, reverse=True)
  print(list_seg_pngs)
  vol = np.stack([np.asarray(plt_image.imread(_)) for _ in list_seg_pngs])
  print(f"vol shape : {vol.shape}")
  for i in range(0,vol.shape[0]):#
      temp = vol[i,:,:]
      temp = np.rot90(temp, k=6, axes=(0,1))
      vol[i,:,:] = temp
  seg_image_array = vol.copy()
  seg_image_array = seg_image_array * 255
  seg_image_array[(55 <= seg_image_array) & (seg_image_array <= 70)] = 5 #liver
  seg_image_array[(110 <= seg_image_array) & (seg_image_array <= 135)] = 2 #right kidney
  seg_image_array[(175 <= seg_image_array) & (seg_image_array <= 200)] = 3 # left kidney
  seg_image_array[(240 <= seg_image_array) & (seg_image_array <= 255)] = 1 # spleen
  seg_image_array = seg_image_array.astype(np.uint16)
  seg_image_array[np.invert(np.isin(seg_image_array, [5,2,3,1]))] = 0#else is background
  seg_image = sitk.GetImageFromArray(seg_image_array.astype(np.uint16))
  ref_image = sitk.ReadImage(image_nifti_path)
  seg_image.SetOrigin(ref_image.GetOrigin())
  seg_image.SetDirection(ref_image.GetDirection())
  seg_image.SetSpacing(ref_image.GetSpacing())
  sitk.WriteImage(seg_image, os.path.join(output_path_root, out_file_name))

## Images

In [None]:
# We convert from DICOM to nifti for the images
# And convert to RAS

input_main_directory = "/content/Train_Sets/MR"

temp_dcm2niix_directory = "/content/tmp"
if not os.path.isdir(temp_dcm2niix_directory):
  os.mkdir(temp_dcm2niix_directory)

patient_ids = os.listdir(input_main_directory)
num_patient_ids = len(patient_ids)
print('patient_ids: ' + str(patient_ids))
print('num_patient_ids: ' + str(num_patient_ids))

for patient_id in patient_ids:

  ### set up directories ###

  # T1 dual in phase
  input_t1dual_inphase_directory = os.path.join(input_main_directory, patient_id, "T1DUAL", "DICOM_anon", "InPhase")
  output_t1dual_inphase_directory = output_images_directory
  output_t1dual_inphase_filename = os.path.join(output_images_directory, "mr_" + str(patient_id) + "_T1DUAL_INPHASE.nii.gz")
  # T1 dual out phase
  input_t1dual_outphase_directory = os.path.join(input_main_directory, patient_id, "T1DUAL", "DICOM_anon", "OutPhase")
  output_t1dual_outphase_directory = output_images_directory
  output_t1dual_outphase_filename = os.path.join(output_images_directory, "mr_" + str(patient_id) + "_T1DUAL_OUTPHASE.nii.gz")
  # T2 SPIR
  input_t2spir_directory = os.path.join(input_main_directory, patient_id, "T2SPIR", "DICOM_anon")
  output_t2spir_directory = output_images_directory
  output_t2spir_filename = os.path.join(output_images_directory, "mr_" + str(patient_id) + "_T2SPIR.nii.gz")

  ### convert ###

  # T1 dual in phase
  try:
    # dcm2niix
    !dcm2niix -z y -f mr_{patient_id}_{"T1DUAL_INPHASE"} -o $temp_dcm2niix_directory $input_t1dual_inphase_directory
  except:
    print('ERROR: unable to convert ' + input_t1dual_inphase_directory + ' to ' + output_t1dual_inphase_filename)
  # copy to actual filename
  nii_files = [os.path.join(temp_dcm2niix_directory,f) for f in os.listdir(temp_dcm2niix_directory) if f.endswith(".nii.gz")]
  if (len(nii_files)>1):
    print('Warning: more than 1 nii file found')
  else:
    nii_file = nii_files[0]
    try:
      shutil.copy2(nii_file, output_t1dual_inphase_filename)
    except:
      print('ERROR: cannot copy from ' + nii_file + ' to ' + output_t1dual_inphase_filename)
  # convert to RAS
  fix_orient_image_mr(image_nifti_path=output_t1dual_inphase_filename,
                      out_image_path=output_t1dual_inphase_filename)
  # remove files from temp directory
  try:
      shutil.rmtree(temp_dcm2niix_directory)
  except OSError as e:
      print("Error: %s - %s." % (e.filename, e.strerror))
  # create temp directory again
  os.makedirs(temp_dcm2niix_directory, exist_ok=True)

  # T1 dual out phase
  try:
    !dcm2niix -z y -f mr_{patient_id}_{"T1DUAL_OUTPHASE"} -o $temp_dcm2niix_directory $input_t1dual_outphase_directory
  except:
    print('ERROR: unable to convert ' + input_t1dual_outphase_directory + ' to ' + output_t1dual_outphase_filename)
  # copy to actual filename
  nii_files = [os.path.join(temp_dcm2niix_directory,f) for f in os.listdir(temp_dcm2niix_directory) if f.endswith(".nii.gz")]
  if (len(nii_files)>1):
    print('Warning: more than 1 nii file found')
  else:
    nii_file = nii_files[0]
    try:
      shutil.copy2(nii_file, output_t1dual_outphase_filename)
    except:
      print('ERROR: cannot copy from ' + nii_file + ' to ' + output_t1dual_outphase_filename)
  # convert to RAS
  fix_orient_image_mr(image_nifti_path=output_t1dual_outphase_filename,
                      out_image_path=output_t1dual_outphase_filename)
  # remove files from temp directory
  try:
      shutil.rmtree(temp_dcm2niix_directory)
  except OSError as e:
      print("Error: %s - %s." % (e.filename, e.strerror))
  # create temp directory again
  os.makedirs(temp_dcm2niix_directory, exist_ok=True)

  # T2 SPIR
  try:
    !dcm2niix -z y -f mr_{patient_id}_{"T2SPIR"} -o $temp_dcm2niix_directory $input_t2spir_directory
  except:
    print('ERROR: unable to convert ' + temp_dcm2niix_directory + ' to ' + output_t2spir_filename)
  # copy to actual filename
  nii_files = [os.path.join(temp_dcm2niix_directory,f) for f in os.listdir(temp_dcm2niix_directory) if f.endswith(".nii.gz")]
  if (len(nii_files)>1):
    print('Warning: more than 1 nii file found')
  else:
    nii_file = nii_files[0]
    try:
      shutil.copy2(nii_file, output_t2spir_filename)
    except:
      print('ERROR: cannot copy from ' + nii_file + ' to ' + output_t2spir_filename)
  # convert to RAS
  fix_orient_image_mr(image_nifti_path=output_t2spir_filename,
                      out_image_path=output_t2spir_filename)
  # remove files from temp directory
  try:
      shutil.rmtree(temp_dcm2niix_directory)
  except OSError as e:
      print("Error: %s - %s." % (e.filename, e.strerror))
  # create temp directory again
  os.makedirs(temp_dcm2niix_directory, exist_ok=True)




patient_ids: ['37', '32', '20', '33', '3', '1', '31', '21', '15', '34', '22', '8', '36', '10', '38', '2', '39', '19', '5', '13']
num_patient_ids: 20
Chris Rorden's dcm2niiX version v1.0.20250505  (JP2:OpenJPEG) (JP-LS:CharLS) GCC8.4.0 x86-64 (64-bit Linux)
Found 30 DICOM file(s)
Convert 30 DICOM as /content/tmp/mr_37_T1DUAL_INPHASE_e2 (256x256x30x1)
Compress: "/usr/bin/pigz" -b 960 --no-time -n -f -6 "/content/tmp/mr_37_T1DUAL_INPHASE_e2.nii"
Conversion required 0.545283 seconds (0.066350 for core code).
Chris Rorden's dcm2niiX version v1.0.20250505  (JP2:OpenJPEG) (JP-LS:CharLS) GCC8.4.0 x86-64 (64-bit Linux)
Found 30 DICOM file(s)
Convert 30 DICOM as /content/tmp/mr_37_T1DUAL_OUTPHASE (256x256x30x1)
Compress: "/usr/bin/pigz" -b 960 --no-time -n -f -6 "/content/tmp/mr_37_T1DUAL_OUTPHASE.nii"
Conversion required 0.264829 seconds (0.045286 for core code).
Chris Rorden's dcm2niiX version v1.0.20250505  (JP2:OpenJPEG) (JP-LS:CharLS) GCC8.4.0 x86-64 (64-bit Linux)
Found 32 DICOM file(s)
Co

## Labels

In [None]:
# We convert from png to nifti for the labels

# This dictionary holds the mapping of the CHAOS ids to the TotalSegmentator CT ids
# refer to the total_v1 here for the TotalSegmentator IDs: https://github.com/wasserth/TotalSegmentator/blob/master/totalsegmentator/map_to_binary.py
chaos_to_totalsegmentator_dict = {
    63 : 5,  #liver
    126 : 2, #kidney right
    189 : 3, #left kidney
    252 : 1  #spleen
}


In [None]:
for patient_id in patient_ids:

  ### set up directories ###

  # T1 dual in phase
  input_t1dual_inphase_image_filename = os.path.join(output_images_directory, "mr_" + str(patient_id) + "_T1DUAL_INPHASE.nii.gz")
  input_t1dual_inphase_labels_directory = os.path.join(input_main_directory, patient_id, "T1DUAL", "Ground")
  output_t1dual_inphase_label_filename = os.path.join(output_labels_directory, "mr_" + str(patient_id) + "_T1DUAL_INPHASE.nii.gz")
  # T1 dual out phase
  input_t1dual_outphase_image_filename = os.path.join(output_images_directory, "mr_" + str(patient_id) + "_T1DUAL_OUTPHASE.nii.gz")
  input_t1dual_outphase_labels_directory = os.path.join(input_main_directory, patient_id, "T1DUAL", "Ground")
  output_t1dual_outphase_label_filename = os.path.join(output_labels_directory, "mr_" + str(patient_id) + "_T1DUAL_OUTPHASE.nii.gz")
  # T2 SPIR
  input_t2spir_image_filename = os.path.join(output_images_directory, "mr_" + str(patient_id) + "_T2SPIR.nii.gz")
  input_t2spir_labels_directory = os.path.join(input_main_directory, patient_id, "T2SPIR", "Ground")
  output_t2spir_label_filename = os.path.join(output_labels_directory, "mr_" + str(patient_id) + "_T2SPIR.nii.gz")

  ### convert ###

  # T1 dual in phase
  try:
    annot_v2_png_to_nii(image_nifti_path = input_t1dual_inphase_image_filename,
                        png_folder = input_t1dual_inphase_labels_directory,
                        output_path_root = output_labels_directory,
                        out_file_name = os.path.basename(output_t1dual_inphase_label_filename))
    # set the xyzt units, not sure why it changed
    nii = nib.load(output_t1dual_inphase_label_filename)
    nii.header['xyzt_units'] = 2
    nib.save(nii, output_t1dual_inphase_label_filename)
  except:
    print('ERROR: cannot convert png directory: ' + input_t1dual_inphase_labels_directory + ' to nii: ' + output_t1dual_inphase_label_filename)

  # T1 dual out phase
  try:
    annot_v2_png_to_nii(image_nifti_path = input_t1dual_outphase_image_filename,
                        png_folder = input_t1dual_outphase_labels_directory,
                        output_path_root = output_labels_directory,
                        out_file_name = os.path.basename(output_t1dual_outphase_label_filename))
    # set the xyzt units, not sure why it changed
    nii = nib.load(output_t1dual_outphase_label_filename)
    nii.header['xyzt_units'] = 2
    nib.save(nii, output_t1dual_outphase_label_filename)
  except:
    print('ERROR: cannot convert png directory: ' + input_t1dual_outphase_labels_directory + ' to nii: ' + output_t1dual_outphase_label_filename)

  # T2SPIR
  try:
    annot_v2_png_to_nii(image_nifti_path = input_t2spir_image_filename,
                        png_folder = input_t2spir_labels_directory,
                        output_path_root = output_labels_directory,
                        out_file_name = os.path.basename(output_t2spir_label_filename))
    # set the xyzt units, not sure why it changed
    nii = nib.load(output_t2spir_label_filename)
    nii.header['xyzt_units'] = 2
    nib.save(nii, output_t2spir_label_filename)
  except:
    print('ERROR: cannot convert png directory: ' + input_t2spir_labels_directory + ' to nii: ' + output_t2spir_label_filename)


['/content/Train_Sets/MR/37/T1DUAL/Ground/IMG-0017-00002.png', '/content/Train_Sets/MR/37/T1DUAL/Ground/IMG-0017-00004.png', '/content/Train_Sets/MR/37/T1DUAL/Ground/IMG-0017-00006.png', '/content/Train_Sets/MR/37/T1DUAL/Ground/IMG-0017-00008.png', '/content/Train_Sets/MR/37/T1DUAL/Ground/IMG-0017-00010.png', '/content/Train_Sets/MR/37/T1DUAL/Ground/IMG-0017-00012.png', '/content/Train_Sets/MR/37/T1DUAL/Ground/IMG-0017-00014.png', '/content/Train_Sets/MR/37/T1DUAL/Ground/IMG-0017-00016.png', '/content/Train_Sets/MR/37/T1DUAL/Ground/IMG-0017-00018.png', '/content/Train_Sets/MR/37/T1DUAL/Ground/IMG-0017-00020.png', '/content/Train_Sets/MR/37/T1DUAL/Ground/IMG-0017-00022.png', '/content/Train_Sets/MR/37/T1DUAL/Ground/IMG-0017-00024.png', '/content/Train_Sets/MR/37/T1DUAL/Ground/IMG-0017-00026.png', '/content/Train_Sets/MR/37/T1DUAL/Ground/IMG-0017-00028.png', '/content/Train_Sets/MR/37/T1DUAL/Ground/IMG-0017-00030.png', '/content/Train_Sets/MR/37/T1DUAL/Ground/IMG-0017-00032.png', '/conte