# Build the dataset
From each VH brain, slice from the center with steps of 4-5 and generate about 5 2D images. In total 57x5=285 images (make it adjustable if more images are wanted)

In [3]:
import os
import nibabel as nib
import numpy as np
import matplotlib.pyplot as plt
import plotly.graph_objects as go


In [None]:
# Path to the data folder
# data_folder = "/data/shared/Lili/VH_MNI"
# folder = "/VH2016"
# Path to the data folder
data_folder = "/home/benet/data/VH"
folders = ["/train", "/test"]  # Both train and test folders
flair_image = "flair.nii.gz"
mask_image = "lesionMask.nii.gz"
slices_per_example = 5  # Number of slices to save per example
slices_step = 3  # Number of slices to skip between each slice
start_slice = 88  # Starting slice
output_folder = "/home/benet/data/VH_slices"

# Create the output folder and subfolders if they don't exist
if not os.path.exists(output_folder):
    os.makedirs(output_folder)
if not os.path.exists(os.path.join(output_folder, "images")):
    os.makedirs(os.path.join(output_folder, "images"))
if not os.path.exists(os.path.join(output_folder, "images", "flair")):
    os.makedirs(os.path.join(output_folder, "images", "flair"))
if not os.path.exists(os.path.join(output_folder, "images", "mask")):
    os.makedirs(os.path.join(output_folder, "images", "mask"))
if not os.path.exists(os.path.join(output_folder, "npy")):
    os.makedirs(os.path.join(output_folder, "npy"))
if not os.path.exists(os.path.join(output_folder, "npy", "flair")):
    os.makedirs(os.path.join(output_folder, "npy", "flair"))
if not os.path.exists(os.path.join(output_folder, "npy", "mask")):
    os.makedirs(os.path.join(output_folder, "npy", "mask"))

for folder in folders:
    examples = sorted(os.listdir(data_folder + folder))  # Get all example folders
    
    for example in examples:
        example_path = os.path.join(data_folder+folder, example)
        
        if not os.path.isdir(example_path):  # Skip if it's not a directory
            print(f"Skipping {example_path}")
            continue

        flair_path = os.path.join(example_path, flair_image)
        mask_path = os.path.join(example_path, mask_image)
        flair = nib.load(flair_path)
        mask = nib.load(mask_path)
        flair_data = flair.get_fdata()
        mask_data = mask.get_fdata()
        
        # Determine the number of slices
        n_slices = flair_data.shape[2]
        assert n_slices == mask_data.shape[2]

        end_slice = start_slice + slices_per_example * slices_step
        j = 0
        for i in range(start_slice, end_slice, slices_step):
            #save each image as a png and a npy file
            flair_slice = flair_data[:, :, i]
            mask_slice = mask_data[:, :, i]
            flair_slice = np.rot90(flair_slice)
            mask_slice = np.rot90(mask_slice)

            # Save the images
            flair_slice_path = os.path.join(output_folder, "images", "flair", f"{example}_{j}.png")
            mask_slice_path = os.path.join(output_folder, "images", "mask", f"{example}_{j}.png")
            npy_flair_slice_path = os.path.join(output_folder, "npy", "flair", f"{example}_{j}.npy")
            npy_mask_slice_path = os.path.join(output_folder, "npy", "mask", f"{example}_{j}.npy")

            plt.imsave(flair_slice_path, flair_slice, cmap="gray")
            plt.imsave(mask_slice_path, mask_slice, cmap="gray")
            np.save(npy_flair_slice_path, flair_slice)
            np.save(npy_mask_slice_path, mask_slice)
            
            j += 1
        
        break
    break
