In [1]:
import os
import numpy as np
import pandas as pd
import nibabel as nib
import SimpleITK as sitk
from skimage.transform import resize

In [2]:
# Load the CSV file into a dataframe
metadata = pd.read_csv("../data/metadata.csv")

# Remove columns that contain only one unique value
for column in metadata.columns:
    if len(metadata[column].unique()) == 1:
        metadata.drop(column, axis=1, inplace=True)

metadata.rename(columns={'Study Date': 'StudyDate', 'Series Description':'course'}, inplace=True)
cols=['Series UID', 'Study UID', 'Study Description', 'Manufacturer', 'SOP Class UID', 'SOP Class Name', 'Number of Images', 'File Size','Download Timestamp']
metadata.drop(cols, axis=1, inplace=True)

# Convert the 'course' column to a numeric type, coercing errors to NaN
metadata['course'] = pd.to_numeric(metadata['course'], errors='coerce')

# Sort the DataFrame
metadata.sort_values(['pid', 'StudyDate', 'course'], inplace=True)

# Forward and backward fill the course number within each group of 'pid' and 'StudyDate'
metadata['course'] = metadata.groupby(['pid', 'StudyDate'])['course'].transform(lambda x: x.ffill().bfill())
metadata.drop('StudyDate', axis=1, inplace=True)
metadata['Modality'] = metadata['Modality'].replace({'RTSTRUCT': 'str', 'RTDOSE': 'dos', 'MR': 'mri'})
metadata.columns = ['pid','course','modality','folder']
# Convert the 'course' column to integers
metadata['course'] = metadata['course'].astype(int)
metadata

Unnamed: 0,pid,course,modality,folder
88,103,1,str,./Brain-TR-GammaKnife/GK_103/04-18-2014-NA-MR ...
11,103,1,dos,./Brain-TR-GammaKnife/GK_103/04-18-2014-NA-MR ...
90,103,1,mri,./Brain-TR-GammaKnife/GK_103/04-18-2014-NA-MR ...
91,103,2,str,./Brain-TR-GammaKnife/GK_103/12-18-2014-NA-MR ...
142,103,2,dos,./Brain-TR-GammaKnife/GK_103/12-18-2014-NA-MR ...
...,...,...,...,...
215,492,1,dos,./Brain-TR-GammaKnife/GK_492/09-08-2014-NA-hea...
223,492,1,mri,./Brain-TR-GammaKnife/GK_492/09-08-2014-NA-hea...
67,492,2,str,./Brain-TR-GammaKnife/GK_492/12-11-2014-NA-hea...
2,492,2,dos,./Brain-TR-GammaKnife/GK_492/12-11-2014-NA-hea...


Converto to NII

In [14]:
def dicom_series_to_nifti(input_path, output_path):
    reader = sitk.ImageSeriesReader()
    dicom_names = reader.GetGDCMSeriesFileNames(input_path)
    reader.SetFileNames(dicom_names)
    image = reader.Execute()
    sitk.WriteImage(image, output_path)

In [15]:
def get_nifti_info(file_path):
    nifti = nib.load(file_path)
    header = nifti.header

    dimensions = header.get_data_shape()
    voxel_spacing = header.get_zooms()
    origin = nifti.affine[:3, 3]

    return {
        'file_name': os.path.basename(file_path),
        'dim_x': dimensions[0],
        'dim_y': dimensions[1],
        'dim_z': dimensions[2],
        'voxel_x': voxel_spacing[0],
        'voxel_y': voxel_spacing[1],
        'voxel_z': voxel_spacing[2],
        'origin_x': origin[0],
        'origin_y': origin[1],
        'origin_z': origin[2]
    }


In [11]:
for index, row in metadata.iterrows():

    pid,course,modality,input_path = row
    if modality != "mri":
        continue
    output_path = os.path.join('dataset_nii', modality+'_'+str(pid)+'_'+str(course)+'.nii')
    dicom_series_to_nifti(input_path, output_path)

Check Files

In [27]:
nifti_dir = 'dataset_nii'
nifti_files = [os.path.join(nifti_dir, f) for f in os.listdir(nifti_dir) if f.endswith('.nii')]

nifti_info_list = [get_nifti_info(file_path) for file_path in nifti_files]
df = pd.DataFrame(nifti_info_list)
df.describe()

Unnamed: 0,dim_x,dim_y,dim_z,voxel_x,voxel_y,voxel_z,origin_x,origin_y,origin_z
count,76.0,76.0,76.0,76.0,76.0,76.0,76.0,76.0,76.0
mean,279.157895,279.157895,218.947368,0.980029,0.980029,1.0,134.037186,127.668246,-116.162796
std,57.147541,57.147541,22.355846,0.11021,0.11021,0.0,8.628068,18.88913,30.502427
min,256.0,256.0,176.0,0.488281,0.488281,1.0,101.672928,73.387619,-190.880844
25%,256.0,256.0,208.0,0.972222,0.972222,1.0,129.623131,116.603365,-135.270245
50%,256.0,256.0,216.0,1.015625,1.015625,1.0,133.592773,127.54237,-116.558857
75%,288.0,288.0,240.0,1.015625,1.015625,1.0,138.49366,135.946903,-94.188595
max,512.0,512.0,240.0,1.09375,1.09375,1.0,154.110397,183.163712,-33.935753


In [49]:
import os
import SimpleITK as sitk

def resample_image(input_image, new_spacing=(1.0, 1.0, 1.0)):
    """Resample image to the specified new_spacing."""
    original_spacing = input_image.GetSpacing()
    original_size = input_image.GetSize()

    # Calculate the new size based on the original size and spacing
    new_size = [
        int(round(original_size[0] * (original_spacing[0] / new_spacing[0]))),
        int(round(original_size[1] * (original_spacing[1] / new_spacing[1]))),
        original_size[2]  # Keep the original z-dimension size
    ]

    # Resample the image
    resampler = sitk.ResampleImageFilter()
    resampler.SetOutputSpacing(new_spacing)
    resampler.SetSize(new_size)
    resampler.SetInterpolator(sitk.sitkLinear)
    resampler.SetTransform(sitk.Transform())
    resampler.SetOutputOrigin(input_image.GetOrigin())
    resampler.SetOutputDirection(input_image.GetDirection())

    return resampler.Execute(input_image)

# Directory containing the NIfTI files
input_dir = 'dataset_nii'
output_dir = 'vox_nii'

# Iterate over each NIfTI file in the directory
for filename in os.listdir(input_dir):
    if filename.endswith('.nii'):
        input_path = os.path.join(input_dir, filename)
        output_path = os.path.join(output_dir, filename)

        # Read the input image
        input_image = sitk.ReadImage(input_path)

        # Resample the image to isotropic spacing with fixed x and y dimensions
        resampled_image = resample_image(input_image, new_spacing=(1.0, 1.0, input_image.GetSpacing()[2]))

        # Write the resampled image to the output directory
        sitk.WriteImage(resampled_image, output_path)


In [50]:
nifti_dir = 'vox_nii'
nifti_files = [os.path.join(nifti_dir, f) for f in os.listdir(nifti_dir) if f.endswith('.nii')]

nifti_info_list = [get_nifti_info(file_path) for file_path in nifti_files]
df_vox_gpt = pd.DataFrame(nifti_info_list)
df_vox_gpt.describe()

Unnamed: 0,dim_x,dim_y,dim_z,voxel_x,voxel_y,voxel_z,origin_x,origin_y,origin_z
count,76.0,76.0,76.0,76.0,76.0,76.0,76.0,76.0,76.0
mean,267.473684,267.473684,218.947368,1.0,1.0,1.0,134.037186,127.668246,-116.162796
std,10.329858,10.329858,22.355846,0.0,0.0,0.0,8.628068,18.88913,30.502427
min,250.0,250.0,176.0,1.0,1.0,1.0,101.672928,73.387619,-190.880844
25%,260.0,260.0,208.0,1.0,1.0,1.0,129.623131,116.603365,-135.270245
50%,260.0,260.0,216.0,1.0,1.0,1.0,133.592773,127.54237,-116.558857
75%,280.0,280.0,240.0,1.0,1.0,1.0,138.49366,135.946903,-94.188595
max,280.0,280.0,240.0,1.0,1.0,1.0,154.110397,183.163712,-33.935753


resize to 256x256x256

In [54]:
import os
import numpy as np
import nibabel as nib

input_dir = 'vox_nii'
output_dir = 'ppp_nii'

if not os.path.exists(output_dir):
    os.makedirs(output_dir)

for filename in os.listdir(input_dir):
    if filename.endswith('.nii'):
        input_path = os.path.join(input_dir, filename)
        output_path = os.path.join(output_dir, filename)

        # Load the NIfTI file
        img = nib.load(input_path)
        data = img.get_fdata()

        # Resize the first two dimensions (x and y) with padding or cropping
        x_diff = 256 - data.shape[0]
        y_diff = 256 - data.shape[1]

        if x_diff > 0:  # Pad x dimension
            x_pad_before = x_diff // 2
            x_pad_after = x_diff - x_pad_before
            data = np.pad(data, ((x_pad_before, x_pad_after), (0, 0), (0, 0)), 'constant')
        elif x_diff < 0:  # Crop x dimension
            x_crop_before = abs(x_diff) // 2
            x_crop_after = abs(x_diff) - x_crop_before
            data = data[x_crop_before:-x_crop_after, :, :]

        if y_diff > 0:  # Pad y dimension
            y_pad_before = y_diff // 2
            y_pad_after = y_diff - y_pad_before
            data = np.pad(data, ((0, 0), (y_pad_before, y_pad_after), (0, 0)), 'constant')
        elif y_diff < 0:  # Crop y dimension
            y_crop_before = abs(y_diff) // 2
            y_crop_after = abs(y_diff) - y_crop_before
            data = data[:, y_crop_before:-y_crop_after, :]

        # Resize the third dimension (z) with padding
        z_diff = 256 - data.shape[2]
        if z_diff > 0:
            z_pad_before = z_diff // 2
            z_pad_after = z_diff - z_pad_before
            data = np.pad(data, ((0, 0), (0, 0), (z_pad_before, z_pad_after)), 'constant')

        # Create a new NIfTI image with the resized data
        new_img = nib.Nifti1Image(data, img.affine)

        # Save the resized image
        nib.save(new_img, output_path)


In [59]:
nifti_dir = 'ppp_nii'
nifti_files = [os.path.join(nifti_dir, f) for f in os.listdir(nifti_dir) if f.endswith('.nii')]

nifti_info_list = [get_nifti_info(file_path) for file_path in nifti_files]
df_vox_gpt = pd.DataFrame(nifti_info_list)
df_vox_gpt.describe().round(3)

Unnamed: 0,dim_x,dim_y,dim_z,voxel_x,voxel_y,voxel_z,origin_x,origin_y,origin_z
count,76.0,76.0,76.0,76.0,76.0,76.0,76.0,76.0,76.0
mean,256.0,256.0,256.0,1.0,1.0,1.0,134.037,127.668,-116.163
std,0.0,0.0,0.0,0.0,0.0,0.0,8.628,18.889,30.502
min,256.0,256.0,256.0,1.0,1.0,1.0,101.673,73.388,-190.881
25%,256.0,256.0,256.0,1.0,1.0,1.0,129.623,116.603,-135.27
50%,256.0,256.0,256.0,1.0,1.0,1.0,133.593,127.542,-116.559
75%,256.0,256.0,256.0,1.0,1.0,1.0,138.494,135.947,-94.189
max,256.0,256.0,256.0,1.0,1.0,1.0,154.11,183.164,-33.936


In [58]:
df.describe().round(3)

Unnamed: 0,dim_x,dim_y,dim_z,voxel_x,voxel_y,voxel_z,origin_x,origin_y,origin_z
count,76.0,76.0,76.0,76.0,76.0,76.0,76.0,76.0,76.0
mean,279.158,279.158,218.947,0.98,0.98,1.0,134.037,127.668,-116.163
std,57.148,57.148,22.356,0.11,0.11,0.0,8.628,18.889,30.502
min,256.0,256.0,176.0,0.488,0.488,1.0,101.673,73.388,-190.881
25%,256.0,256.0,208.0,0.972,0.972,1.0,129.623,116.603,-135.27
50%,256.0,256.0,216.0,1.016,1.016,1.0,133.593,127.542,-116.559
75%,288.0,288.0,240.0,1.016,1.016,1.0,138.494,135.947,-94.189
max,512.0,512.0,240.0,1.094,1.094,1.0,154.11,183.164,-33.936
