<a href="https://colab.research.google.com/github/aubricot/CV_for_flower_CT/blob/main/preprocessing_whole_flower_nrrd2nifti.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

# Convert .nrrd and .seg.nrrd files from 3D Slicer to .nii.gz (nifti) files for machine learning - Whole Flower
---   
*Last Updated 26 March 2025*  
-Runs in Python 3-   
Convert 3D slicer volume and segmentation files into nifti format for machine learning

In [None]:
#@title Choose where to save results

# Imports
import os
!pip install slicerio
import slicerio
import json
!pip install SimpleITK
import SimpleITK as sitk
import nrrd
!pip install torchio
import torchio as tio

# Use dropdown menu on right
save = "in my Google Drive" #@param ["in my Google Drive", "in Colab runtime (files deleted after each session)"]

# Mount google drive to export image tagging file(s)
if 'Google Drive' in save:
    from google.colab import drive
    drive.mount('/content/drive', force_remount=True)

# Type in the path to your project wd in form field on right
wd = "/content/drive/MyDrive/slicer" # @param ["/content/drive/MyDrive/slicer"] {"allow-input":true}
print("\nWorking with data from:")
%cd $wd

# Set up folder to save .nii.gz files
ngz_folder = "masked_val_niigzs" # @param {"type":"string","placeholder":"nii_gzs"}
ngz_fpath = wd + '/' + ngz_folder
# Set up folder to save segmentation files (labels)
labels_fpath = ngz_fpath + '/' + 'labels'
# Set up folder to save segmentation files (labels)
images_fpath = ngz_fpath + '/' + 'images'

# Make ngz_folder if it doesn't already exist
if not os.path.exists(ngz_fpath):
    print("Making ngz_folder at: ", ngz_fpath)
    os.makedirs(ngz_fpath)
    print("Making labels_folder at : ", labels_fpath)
    os.makedirs(labels_fpath)
    print("Making images_folder at : ", images_fpath)
    os.makedirs(images_fpath)

else:
    print("\nngz_folder, images_folder, and labels_folder already exist at: ", ngz_fpath, images_fpath, labels_fpath)

In [None]:
#@title Whole Flower: Inspect a single segmentation file to see how it's ready

input_filename = "thecac_fbg_cg_220622_03/thecac_fbg_cg_220622_03.seg.nrrd" # @param ["thecac_fbg_cg_220622_03/thecac_fbg_cg_220622_03.seg.nrrd"] {"allow-input":true}

segmentation = slicerio.read_segmentation(input_filename, skip_voxels=True)

number_of_segments = len(segmentation["segments"])
print(f"Number of segments: {number_of_segments}")

segment_names = slicerio.segment_names(segmentation)
print(f"Segment names: {', '.join(segment_names)}")

segment0 = slicerio.segment_from_name(segmentation, segment_names[0])
print("First segment info:\n" + json.dumps(segment0, sort_keys=False, indent=4))

In [None]:
#@title Whole Flower: Crawl through wd to find .nrrd and _crop.nrrd files, convert them to .nii.gz, save to ngz_folder

# List input and output filenames
in_vols = [] # volume files (.nrrd or _crop.nrrd)
out_vols = [] # volume files (.nii.gz)
in_segs = [] # segmentation files (.seg.nrrd)
out_segs = [] # segmentation files (_seg.nii.gz)

# Directories to search for volumes and segmentation labels
include = ['the', 'THE', 'The'] # Only use folders for theobroma cacao (thecac)
dirs = os.listdir('.')
dirs = [dir for dir in dirs if any(y in dir for y in include)]

# Make a list of directories to check if missing files
dirs_to_check = []
segs_to_check = []

# Walk through chosen directories
for dir in dirs:
    files = os.listdir(dir)
    files = [f for f in files if dir in f]
    print("\n \033[92m Searching files in: {}\033[0m".format(dir))
    seg_file = [f for f in files if f.endswith("seg.nrrd")]
    vol_file = [f for f in files if "mask" in f]
    if not vol_file:
        vol_file = [f for f in files if ("seg" not in f) and (f.endswith(".nrrd"))]

    # Warning message
    if not seg_file and vol_file:
        print("\n \033[31m Missing segmentation or volume file!\033[0m")
        print("Check files at: ", dir)
        dirs_to_check.append(dir)
        pass

    # Success message
    else:
        try:
            print("\033[92m Found segmentation and volume files!\033[0m")
            print("seg_file: ", seg_file)
            print("vol_file: ", vol_file)

            # Hacky workaround for list comprehension results
            seg_file = seg_file[0]
            vol_file = vol_file[0]

            # Segs
            # First, check if it has the correct number of labels
            num_labels = 1 #@param
            seg = slicerio.read_segmentation(dir + '/' + seg_file, skip_voxels=True)
            num_segs = len(seg["segments"])
            if num_segs == num_labels:
                img = sitk.ReadImage(dir + '/' + seg_file)
                outpath = labels_fpath + '/' + seg_file[:-9] + '_seg.nii.gz'
                print("seg_file saved to: ", outpath)
                sitk.WriteImage(img, outpath)
                in_segs.append(seg_file)
                out_segs.append(outpath)
            else:
                print("\033[31m Wrong number of labels for segmentation file: {}\033[0m\n".format((dir + '/' + seg_file)))
                segs_to_check.append((dir + '/' + seg_file))

            # Vols
            img = sitk.ReadImage(dir + '/' + vol_file)
            outpath = images_fpath + '/' + vol_file[:-5] + '.nii.gz'
            print("vol_file saved to: ", outpath)
            sitk.WriteImage(img, outpath)
            in_vols.append(vol_file)
            out_vols.append(outpath)
        except:
            pass

    print("{} .nrrd files and {} seg.nrrd files from {} converted to .nii.gz at \n volumes: {} \n labels: {}".format(len(in_vols), len(in_segs), wd, ngz_fpath, labels_fpath))
    print("{} folders were missing seg or vol files. Check on folders: {}".format(len(dirs_to_check), dirs_to_check))

# Save dirs_to_check to txt file in your wd for record of missing files
with open("dirs_to_check.txt", "w") as output:
    output.write(str(dirs_to_check))
print("Saved dirs_to_check list to: ", (wd + '/dirs_to_check.txt'))

# Save segs_to_check to txt file in your wd for record of missing files
with open("segs_to_check.txt", "w") as output:
    output.write(str(segs_to_check))
print("Saved segs_to_check list to: ", (wd + '/segs_to_check.txt'))

In [None]:
#@title Resample the labels so they match the CT images (converting from nrrd to nifti can lose this info inconsistently)

# Fill these two lists
ct_paths = sorted(os.listdir(ngz_fpath + "/images"))
seg_paths = sorted(os.listdir(ngz_fpath + "/labels"))

for ct_path, seg_path in zip(ct_paths, seg_paths):
    ct_path = ngz_fpath + "/images/" + ct_path
    seg_path = ngz_fpath + "/labels/" + seg_path
    image = tio.LabelMap(seg_path)
    transform = tio.Resample(ct_path)
    transformed = transform(image)
    out_path = seg_path.replace('_seg', '_seg_reg')
    transformed.save(out_path)

In [None]:
#@title Zip the folder for archival purposes
# Enter wd, zip and unzipped filename using form field to right
wd = "/content/drive/MyDrive/slicer" #@param ["/content/drive/MyDrive"] {allow-input: true}
zip_fn = ngz_folder + '.zip'
zip_fpath = wd + '/' + zip_fn
unzipped_fn = ngz_folder
unzipped_fpath = wd + '/' + unzipped_fn

# Zip the file/folder
!zip -r $zip_fpath $unzipped_fpath