# 1. Automatic file format conversion: nii to nii.gz

In [11]:
import os
import gzip
import shutil
import SimpleITK as sitk

# Define input and output folders
input_folder = "../Raw/ForSeg"
output_folder = "../Processed/PII_GZ"

os.makedirs(output_folder, exist_ok=True)

# Traverse all subfolders and files in the ForSeg folder
for folder_name in os.listdir(input_folder):
    folder_path = os.path.join(input_folder, folder_name)
    
    if not os.path.isdir(folder_path):
        continue
    
    # Process b0_img.nii and seg.nii
    b0_img_file = os.path.join(folder_path, "b0_img.nii")
    seg_file = os.path.join(folder_path, "seg.nii")
    
    if os.path.exists(b0_img_file):
        # Read and compress to nii.gz
        img = sitk.ReadImage(b0_img_file)
        compressed_file_path = os.path.join(output_folder, folder_name + "_b0_img.nii.gz")
        sitk.WriteImage(img, compressed_file_path)
    
    if os.path.exists(seg_file):
        # Read and compress to nii.gz
        img = sitk.ReadImage(seg_file)
        compressed_file_path = os.path.join(output_folder, folder_name + "_seg.nii.gz")
        sitk.WriteImage(img, compressed_file_path)

print("Conversion to .nii.gz completed!")

Conversion to .nii.gz completed!


# 2. Extract each folder's original MRI and its segmentation result MRI into images, labels, and folders respectively.

In [12]:
import os
import shutil
import csv

# Define the file path
forseg_path = "../Processed/PII_GZ"  # Now use the compressed file path
images_path = os.path.join("../Processed/PII_Full", "images")
labels_path = os.path.join("../Processed/PII_Full", "labels")
csv_file = os.path.join("../Processed/PII_Full", "PII_log.csv")

if not os.path.exists(forseg_path):
    raise FileNotFoundError(f"Path doesnt exit: {forseg_path}")

# If the images and labels folders do not exist, create them
os.makedirs(images_path, exist_ok=True)
os.makedirs(labels_path, exist_ok=True)

# Initialize the global file number count
global_count = 1

# Open the CSV file and prepare to record the mapping between raw and separation
with open(csv_file, mode='w', newline='') as file:
    writer = csv.writer(file)
    writer.writerow(["Original MRI File", "Original Seg File", "Dataset", "New Filename"])
    
    # Traverse all files under PII_GZ
    for filename in os.listdir(forseg_path):
        if filename.endswith("_b0_img.nii.gz"):
            b0_img_file = os.path.join(forseg_path, filename)
            seg_file = os.path.join(forseg_path, filename.replace("_b0_img.nii.gz", "_seg.nii.gz"))
            
            if os.path.exists(b0_img_file) and os.path.exists(seg_file):
                new_file_name = f"PII_{str(global_count).zfill(3)}.nii.gz"
                
                # Copy b0_img.nii.gz to the images folder
                new_b0_img_path = os.path.join(images_path, new_file_name)
                shutil.copy2(b0_img_file, new_b0_img_path)
                
                # Copy seg.nii.gz to the labels folder
                new_seg_path = os.path.join(labels_path, new_file_name)
                shutil.copy2(seg_file, new_seg_path)
                
                # Logging to CSV file
                writer.writerow([b0_img_file, seg_file, "images", new_file_name])
                writer.writerow([b0_img_file, seg_file, "labels", new_file_name])
                
                # Increment global count
                global_count += 1

print("Task completed! File copies and CSV log created.")

Task completed! File copies and CSV log created.


# 3. Separate the datasets in images and labels into corresponding imagesTR training and imagesTs test datasets

In [13]:
import os
import shutil
import random
import csv

# Define PII_Full path
images_full_path = os.path.join("../Processed/PII_Full", "images")
labels_full_path = os.path.join("../Processed/PII_Full", "labels")

# Define the folder path after splitting
split_base_path = "../Processed/PII_Splite"
images_tr_path = os.path.join(split_base_path, "imagesTr")
labels_tr_path = os.path.join(split_base_path, "labelsTr")
images_ts_path = os.path.join(split_base_path, "imagesTs")
labels_ts_path = os.path.join(split_base_path, "labelsTs")

os.makedirs(images_tr_path, exist_ok=True)
os.makedirs(labels_tr_path, exist_ok=True)
os.makedirs(images_ts_path, exist_ok=True)
os.makedirs(labels_ts_path, exist_ok=True)

# Get all file names (assuming the file names in images and labels are the same)
all_files = os.listdir(images_full_path)

# Randomly shuffle the order of files
random.shuffle(all_files)

# Split the training set and test set into a 2:8 ratio
split_index = int(0.8 * len(all_files))
train_files = all_files[:split_index]
test_files = all_files[split_index:]

# Creating a CSV Log File
train_csv = os.path.join(split_base_path, "train_log.csv")
test_csv = os.path.join(split_base_path, "test_log.csv")

# Processing the training set
with open(train_csv, mode='w', newline='') as train_file:
    train_writer = csv.writer(train_file)
    train_writer.writerow(["Original MRI Path", "Original Seg Path", "New MRI Path", "New Seg Path"])
    
    for filename in train_files:
        # Define the original path
        original_img = os.path.join(images_full_path, filename)
        original_seg = os.path.join(labels_full_path, filename)
        
        # Define the training set path
        train_img = os.path.join(images_tr_path, filename)
        train_seg = os.path.join(labels_tr_path, filename)
        
        # Copy the file to the training set
        shutil.copy2(original_img, train_img)
        shutil.copy2(original_seg, train_seg)
        
        # Logging to CSV
        train_writer.writerow([original_img, original_seg, train_img, train_seg])

# Processing test sets
with open(test_csv, mode='w', newline='') as test_file:
    test_writer = csv.writer(test_file)
    test_writer.writerow(["Original MRI Path", "Original Seg Path", "New MRI Path", "New Seg Path"])
    
    for filename in test_files:
        # Define the original path
        original_img = os.path.join(images_full_path, filename)
        original_seg = os.path.join(labels_full_path, filename)

        # Define the testing set path
        test_img = os.path.join(images_ts_path, filename)
        test_seg = os.path.join(labels_ts_path, filename)
        
        # Copy files to the test suite
        shutil.copy2(original_img, test_img)
        shutil.copy2(original_seg, test_seg)

        # Logging to CSV
        test_writer.writerow([original_img, original_seg, test_img, test_seg])

print("Data split into train/test sets completed and CSV logs created.")

Data split into train/test sets completed and CSV logs created.


# 4. Create and modify the relevant json files in the final dataset to form a complete MRI dataset

In [16]:
import os
import json
from datetime import datetime

# Define the path to the dataset.json file
json_file_path = "../Processed/PII_Splite/dataset.json"  # Relative path from the ipython notebook

# Read the existing json file
with open(json_file_path, 'r', encoding='utf-8') as f:
    data = json.load(f)

# Update name, description, reference, license, and release date
data['name'] = "Placenta in Uterus"
data['description'] = "Segmentation of the placenta in the uterus"
data['reference'] = "Washington University in St. Louis"
data['licence'] = "null"
data['relase'] = datetime.now().strftime("%Y-%m-%d")

# Replace label 1 with "Placenta in Uterus"
data['labels']['1'] = "Placenta in Uterus"

# Define paths to the split directories
split_base_path = "../Processed/PII_Splite"
images_tr_path = os.path.join(split_base_path, "imagesTr")
labels_tr_path = os.path.join(split_base_path, "labelsTr")
images_ts_path = os.path.join(split_base_path, "imagesTs")
labels_ts_path = os.path.join(split_base_path, "labelsTs")

# Construct training and testing lists
training_list = []
testing_list = []

# Iterate over training set
for img_file in os.listdir(images_tr_path):
    label_file = img_file  # Assuming labels and images have the same filename
    training_list.append({
        "image": f"./imagesTr/{img_file}",   # Ensure that image files end with .nii.gz
        "label": f"./labelsTr/{label_file}"  # Ensure that label files end with .nii.gz
    })

# Iterate over testing set
for img_file in os.listdir(images_ts_path):
    testing_list.append(f"./imagesTs/{img_file}")  # Ensure that test image files end with .nii.gz

# Update the json fields for training and testing data
data['training'] = training_list
data['test'] = testing_list

# Update the number of training and testing samples
data['numTraining'] = len(training_list)
data['numTest'] = len(testing_list)

# Write the updated content back to the json file
with open(json_file_path, 'w', encoding='utf-8') as f:
    json.dump(data, f, ensure_ascii=False, indent=4)

print("dataset.json updated successfully!")

dataset.json updated successfully!
