In [1]:
import os
import nibabel as nib
import pandas as pd
import numpy as np

def nii_to_numpy_and_save(directory, output_dir="dataset"):
    """
    Convert NIfTI files in a directory to NumPy arrays, save them to an output directory,
    and create a DataFrame with the filenames and dimensions.

    Args:
    - directory (str): Path to the directory containing NIfTI files.
    - output_dir (str): Path to the directory where the NumPy files will be saved.

    Returns:
    - DataFrame: A pandas DataFrame with columns filename, x, y, z.
    """
    # Create the output directory if it does not exist
    os.makedirs(output_dir, exist_ok=True)

    # Initialize an empty list to store data for DataFrame
    data = []

    # Iterate over all files in the directory
    for filename in os.listdir(directory):
        # Check if the file is a NIfTI file
        if filename.endswith(".nii") or filename.endswith(".nii.gz"):
            # Construct the full path to the file
            filepath = os.path.join(directory, filename)

            # Load the NIfTI file
            img = nib.load(filepath)
            
            # Convert the NIfTI image to a NumPy array
            img_array = img.get_fdata()
            
            # Get the dimensions of the image
            x, y, z = img_array.shape
            
            # Append the filename and dimensions to the list
            data.append({"filename": filename, "x": x, "y": y, "z": z})
            
            # Save the NumPy array to the output directory
            npy_filename = os.path.splitext(filename)[0] + '.npy'  # Removes the NIfTI extension and adds .npy
            npy_filepath = os.path.join(output_dir, npy_filename)
            np.save(npy_filepath, img_array)
    
    # Convert the list to a DataFrame
    df = pd.DataFrame(data)

    return df

# Specify the directory containing your NIfTI files
directory = "../images_nii/64x64x64"

# Convert NIfTI files to NumPy arrays, save them, and create the DataFrame
df = nii_to_numpy_and_save(directory)

# Display the DataFrame
print(df)

# Optionally, save the DataFrame to a CSV file
df.to_csv("nii_dimensions.csv", index=False)


                                              filename   x   y   z
0                        dose_316_1_$Left_Frontal$.nii  64  64  64
1            mri_408_2_$Right_Medial_Frontal_Flax$.nii  64  64  64
2            dose_147_2_$Right_Posterior_Temporal$.nii  64  64  64
3          struct_152_3_$Right_Anterior_Frontal_1$.nii  64  64  64
4                struct_492_2_$Right_Motor_Cortex$.nii  64  64  64
..                                                 ...  ..  ..  ..
727           mri_147_2_$Right_Posterior_Temporal$.nii  64  64  64
728                     dose_274_1_$Left_Parietal$.nii  64  64  64
729  dose_151_1_$11_Left_Medial_Anterior_Frontal_1$...  64  64  64
730           mri_257_4_$Right_Posterior_Parietal$.nii  64  64  64
731                 struct_158_1_$Left_Cerebellar$.nii  64  64  64

[732 rows x 4 columns]


In [3]:
df.describe()

Unnamed: 0,x,y,z
count,732.0,732.0,732.0
mean,63.913934,63.795082,64.0
std,1.342547,2.257458,0.0
min,43.0,38.0,64.0
25%,64.0,64.0,64.0
50%,64.0,64.0,64.0
75%,64.0,64.0,64.0
max,64.0,64.0,64.0


Fix dimensions of files

In [4]:
import os
import numpy as np
import pandas as pd

def pad_array(array, desired_shape=(64, 64, 64)):
    """
    Pad an array to the desired shape with zeros.
    
    Args:
    - array (numpy.ndarray): The original array.
    - desired_shape (tuple): The desired dimensions (x, y, z).

    Returns:
    - numpy.ndarray: The padded array.
    """
    padding = [(0, max(desired_shape[i] - array.shape[i], 0)) for i in range(3)]
    padded_array = np.pad(array, padding, mode='constant', constant_values=0)
    return padded_array[:desired_shape[0], :desired_shape[1], :desired_shape[2]]

def update_and_pad_npy_files(directory):
    """
    Load .npy files, pad them, save back, and create a DataFrame with dimensions.

    Args:
    - directory (str): The directory containing .npy files.

    Returns:
    - pd.DataFrame: DataFrame with filename and dimensions.
    """
    data = []
    for filename in os.listdir(directory):
        if filename.endswith(".npy"):
            filepath = os.path.join(directory, filename)
            array = np.load(filepath)
            padded_array = pad_array(array)
            np.save(filepath, padded_array)  # Overwrite the original file with the padded array
            
            # Update the DataFrame information
            data.append({
                "filename": filename,
                "x": padded_array.shape[0],
                "y": padded_array.shape[1],
                "z": padded_array.shape[2]
            })
    
    df = pd.DataFrame(data)
    return df

# Specify the directory containing your .npy files
directory = "dataset"

# Update and pad .npy files, and create the DataFrame
df = update_and_pad_npy_files(directory)

# Display the DataFrame
print(df)

# Optionally, save the DataFrame to a CSV file
df.to_csv("updated_npy_dimensions.csv", index=False)


                                        filename   x   y   z
0                 dose_234_1_$Right_Frontal$.npy  64  64  64
1              struct_257_4_$Right_Parietal$.npy  64  64  64
2               struct_103_2_$Right_Frontal$.npy  64  64  64
3        mri_224_1_$Right_Cerebellar_Cavity$.npy  64  64  64
4           dose_492_1_$Left_Frontal_Cavity$.npy  64  64  64
..                                           ...  ..  ..  ..
727   mri_151_1_$10_Left_Anterior_Frontal_1$.npy  64  64  64
728  dose_147_2_$Left_Inferior_Cerebellar_1$.npy  64  64  64
729              mri_257_3_$Right_Ventricle$.npy  64  64  64
730              dose_420_1_$Left_Occipital$.npy  64  64  64
731         struct_147_2_$Left_Motor_Cortex$.npy  64  64  64

[732 rows x 4 columns]


In [5]:
df.describe()

Unnamed: 0,x,y,z
count,732.0,732.0,732.0
mean,64.0,64.0,64.0
std,0.0,0.0,0.0
min,64.0,64.0,64.0
25%,64.0,64.0,64.0
50%,64.0,64.0,64.0
75%,64.0,64.0,64.0
max,64.0,64.0,64.0
