In [2]:
import os
import numpy as np
import pandas as pd
import nibabel as nib
import SimpleITK as sitk
from rt_utils import RTStructBuilder
from skimage.transform import resize


In [3]:
# Load the CSV file into a dataframe
metadata = pd.read_csv("../data/metadata.csv")

# Remove columns that contain only one unique value
for column in metadata.columns:
    if len(metadata[column].unique()) == 1:
        metadata.drop(column, axis=1, inplace=True)

metadata.rename(columns={'Study Date': 'StudyDate', 'Series Description':'course'}, inplace=True)
cols=['Series UID', 'Study UID', 'Study Description', 'Manufacturer', 'SOP Class UID', 'SOP Class Name', 'Number of Images', 'File Size','Download Timestamp']
metadata.drop(cols, axis=1, inplace=True)

# Convert the 'course' column to a numeric type, coercing errors to NaN
metadata['course'] = pd.to_numeric(metadata['course'], errors='coerce')

# Sort the DataFrame
metadata.sort_values(['pid', 'StudyDate', 'course'], inplace=True)

# Forward and backward fill the course number within each group of 'pid' and 'StudyDate'
metadata['course'] = metadata.groupby(['pid', 'StudyDate'])['course'].transform(lambda x: x.ffill().bfill())
metadata.drop('StudyDate', axis=1, inplace=True)
metadata['Modality'] = metadata['Modality'].replace({'RTSTRUCT': 'str', 'RTDOSE': 'dos', 'MR': 'mri'})
metadata.columns = ['pid','course','modality','folder']
# Convert the 'course' column to integers
metadata['course'] = metadata['course'].astype(int)
metadata

Unnamed: 0,pid,course,modality,folder
88,103,1,str,./Brain-TR-GammaKnife/GK_103/04-18-2014-NA-MR ...
11,103,1,dos,./Brain-TR-GammaKnife/GK_103/04-18-2014-NA-MR ...
90,103,1,mri,./Brain-TR-GammaKnife/GK_103/04-18-2014-NA-MR ...
91,103,2,str,./Brain-TR-GammaKnife/GK_103/12-18-2014-NA-MR ...
142,103,2,dos,./Brain-TR-GammaKnife/GK_103/12-18-2014-NA-MR ...
...,...,...,...,...
215,492,1,dos,./Brain-TR-GammaKnife/GK_492/09-08-2014-NA-hea...
223,492,1,mri,./Brain-TR-GammaKnife/GK_492/09-08-2014-NA-hea...
67,492,2,str,./Brain-TR-GammaKnife/GK_492/12-11-2014-NA-hea...
2,492,2,dos,./Brain-TR-GammaKnife/GK_492/12-11-2014-NA-hea...


In [4]:
connector = pd.read_csv("../connector/connector.csv")
connector

Unnamed: 0,sid,course,Diagnosis (Only want Mets),Primary Diagnosis,Age at Diagnosis,Gender,Lesion #,lesion_clinical,mri_type,duration_tx_to_imag (months),Fractions,lesion,date,lesion_rtstruct,lesion_clean,file_name
0,151,1,Brain Mets -Lung,Adenocarcinoma of the lung,77.0,Female,1.0,1 Lt Inf Cerebellar1,stable,42.458101,1.0,1 Left Inferior Cerebellar 1,10-31-2013,1 Lt Inf Cerebellar 1,LtInfCerebellar,151_1_LtInfCerebellar
1,151,1,Brain Mets -Lung,Adenocarcinoma of the lung,77.0,Female,10.0,10 Lt Ant Frontal 1,stable,42.458101,1.0,10 Left Anterior Frontal 1,10-31-2013,10 Lt ant Frontal 1,LtantFrontal,151_1_LtantFrontal
2,151,1,Brain Mets -Lung,Adenocarcinoma of the lung,77.0,Female,11.0,11 Lt Med Ant 1,stable,42.458101,1.0,11 Left Medial Anterior Frontal 1,10-31-2013,11 Lt med ant frontal 1,Ltmedantfrontal,151_1_Ltmedantfrontal
3,151,1,Brain Mets -Lung,Adenocarcinoma of the lung,77.0,Female,12.0,12 Rt Ant Frontal 1,stable,42.458101,1.0,12 Right Anterior Frontal 1,10-31-2013,12 Rt ant frontal 1,Rtantfrontal,151_1_Rtantfrontal
4,151,1,Brain Mets -Lung,Adenocarcinoma of the lung,77.0,Female,13.0,13 Lt Vertex 1,stable,42.458101,1.0,13 Left Vertex 1,10-31-2013,13 Lt vertex,Ltvertex,151_1_Ltvertex
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
239,105,1,Brain Mets Kidney,Squamous cell carcinoma,58.0,Male,1.0,RT Temporal,stable,2.103188,1.0,Right Temporal,09-04-2014,RT Temporal,RTTemporal,105_1_RTTemporal
240,492,2,Brain Mets - Lung,Adenocarcinoma of the lung,73.0,Male,13.0,Rt temporal,stable,11.501807,1.0,Right Temporal,12-11-2014,R temporal,Rtemporal,492_2_Rtemporal
241,246,1,Brain Mets-Lung,Adenocarcinoma of the lung,62.0,Female,4.0,Rt Thalmic,stable,0.854420,1.0,Right Thalamic,07-10-2013,4 Rt Thalamic 1,RtThalamic,246_1_RtThalamic
242,257,3,Brain Mets Renal cell,Renal cell carcinoma,58.0,Female,13.0,Rt Ventricle,stable,41.307920,1.0,Right Ventricle,09-29-2014,Rt Ventricle,RtVentricle,257_3_RtVentricle


In [5]:
# Apply str.strip() to all elements
connector = connector.applymap(lambda x: x.strip() if isinstance(x, str) else x)

  connector = connector.applymap(lambda x: x.strip() if isinstance(x, str) else x)


In [17]:
import SimpleITK as sitk
import glob
import re
import numpy as np
import nibabel as nib

from rt_utils import RTStructBuilder

grouped_df = metadata.groupby(['pid', 'course'])

rt_lesions = pd.DataFrame(columns=['pid', 'date', 'lesion', 'course']) 
output_folder = "../images_nii/vox_nii"
# Iterate over each group
for group_name, group_data in grouped_df:    
    pid, course = group_name
    
    # Get the 'SOP Class Name' and 'File Location' for each group
    folders_dic = dict(zip(group_data['modality'], group_data['folder']))
    
    mri_folder = os.path.join('../data',folders_dic['mri'])
    str_folder = os.path.join('../data',folders_dic['str'])
    
    mri_nii_file = f"{output_folder}/mri_{pid}_{course}.nii"
    img = nib.load(mri_nii_file)
    affine=img.affine
    
    # STRUCT
    modality = 'struct'    
    str_file = glob.glob(os.path.join(str_folder, "*.dcm"))[0]   
    rtstruct = RTStructBuilder.create_from(dicom_series_path=mri_folder, rt_struct_path=str_file)
    names = rtstruct.get_roi_names()
    lesions = [name for name in names if "Skull" not in name] 
        
    for lesion in lesions:      
        # Create the numpy files for the struct
        mask = rtstruct.get_roi_mask_by_name(lesion).astype(np.int16)  # Convert mask to int16
        # Transpose the mask from [z, x, y] to [z, y, x] to match SimpleITK's expectation
        mask = np.transpose(mask, (1, 0, 2))

        # Attempt to find a matching row based on multiple conditions
        matching_rows = connector.loc[
            (connector['sid'] == pid) & 
            (connector['course'] == course) & 
            (connector['lesion_rtstruct'] == lesion), 
            'lesion'
        ].values

        # Check if we found any matching rows
        if matching_rows.size > 0:
            # Found at least one match, process the first one
            result = matching_rows[0]
            result = f"${result.replace(' ', '_')}$"
        else:
            # No matches found, handle accordingly
            continue
       
        # Create the NIfTI file for the structure
        nifti_img = nib.Nifti1Image(mask, affine=affine)
        output_path = os.path.join('../images_nii/vox_nii', f"{modality}_{pid}_{course}_{result}.nii")
        nib.save(nifti_img, output_path)
        



Check equal between rtstruct and mri

In [21]:
import os
import nibabel as nib

# Define a function to check the equivalence of the header information
def validate_equivalence(mri_nii_path, rtstruct_nii_path):
    # Load the MRI and RTSTRUCT NIfTI files
    mri_img = nib.load(mri_nii_path)
    rtstruct_img = nib.load(rtstruct_nii_path)
    
    # Compare the header information
    if not np.array_equal(mri_img.shape, rtstruct_img.shape):
        pass
        # return False, "Mismatch in dimensions"
    if not np.array_equal(mri_img.header.get_zooms(), rtstruct_img.header.get_zooms()):
        return False, "Mismatch in voxel sizes"
    if not np.allclose(mri_img.affine, rtstruct_img.affine):
        return False, "Mismatch in affine transformation"
    
    # Everything matches
    return True, "All parameters match"

# Directory containing the NIfTI files
output_folder = "../images_nii/vox_nii"

# List all the MRI and RTSTRUCT NIfTI files
mri_files = [f for f in os.listdir(output_folder) if f.startswith('mri_') and f.endswith('.nii')]
rtstruct_files = [f for f in os.listdir(output_folder) if f.startswith('struct_') and f.endswith('.nii')]

# Assume a naming convention that allows matching RTSTRUCT files to MRI files
for mri_file in mri_files:
    # Extract patient ID and course from the file name
    parts = mri_file.split('.')[0].split('_')
    pid = parts[1]
    course = parts[2]
    
    # Find the matching RTSTRUCT file
    rtstruct_file = next((f for f in rtstruct_files if f"struct_{pid}_{course}" in f), None)
    
    if rtstruct_file:
        # Validate equivalence of header information
        mri_nii_path = os.path.join(output_folder, mri_file)
        rtstruct_nii_path = os.path.join(output_folder, rtstruct_file)
        valid, message = validate_equivalence(mri_nii_path, rtstruct_nii_path)
        print(f"Validation for {mri_file} and {rtstruct_file}: {message}")
    else:
        print(f"No matching RTSTRUCT file found for {mri_file}")
    

Validation for mri_408_2.nii and struct_408_2_$Right_Medial_Frontal_Flax$.nii: All parameters match
Validation for mri_338_1.nii and struct_338_1_$Right_Medial_Cerebellum$.nii: All parameters match
Validation for mri_463_2.nii and struct_463_2_$Left_Posterior_Temporal$.nii: All parameters match
Validation for mri_270_2.nii and struct_270_2_$Right_Occipital$.nii: All parameters match
Validation for mri_431_1.nii and struct_431_1_$Left_Occipital$.nii: All parameters match
Validation for mri_364_1.nii and struct_364_1_$Right_Corpus_Callosum$.nii: All parameters match
Validation for mri_114_2.nii and struct_114_2_$Left_Temporal$.nii: All parameters match
Validation for mri_103_2.nii and struct_103_2_$Right_Superior_Frontal$.nii: Mismatch in voxel sizes
Validation for mri_243_2.nii and struct_243_2_$Left_Medial_Anterior_Frontal$.nii: All parameters match
Validation for mri_257_7.nii and struct_257_7_$Right_Occipital$.nii: All parameters match
Validation for mri_257_2.nii and struct_257_2_$L

resize to 256x256x256

In [22]:
import os
import numpy as np
import nibabel as nib

input_dir = '../images_nii/vox_nii'
output_dir = '../images_nii/ppp_nii'

if not os.path.exists(output_dir):
    os.makedirs(output_dir)

for filename in os.listdir(input_dir):
    if filename.startswith('struct') and filename.endswith(".nii"):
        input_path = os.path.join(input_dir, filename)
        output_path = os.path.join(output_dir, filename)

        # Load the NIfTI file
        img = nib.load(input_path)
        data = img.get_fdata()

        # Resize the first two dimensions (x and y) with padding or cropping
        x_diff = 256 - data.shape[0]
        y_diff = 256 - data.shape[1]

        if x_diff > 0:  # Pad x dimension
            x_pad_before = x_diff // 2
            x_pad_after = x_diff - x_pad_before
            data = np.pad(data, ((x_pad_before, x_pad_after), (0, 0), (0, 0)), 'constant')
        elif x_diff < 0:  # Crop x dimension
            x_crop_before = abs(x_diff) // 2
            x_crop_after = abs(x_diff) - x_crop_before
            data = data[x_crop_before:-x_crop_after, :, :]

        if y_diff > 0:  # Pad y dimension
            y_pad_before = y_diff // 2
            y_pad_after = y_diff - y_pad_before
            data = np.pad(data, ((0, 0), (y_pad_before, y_pad_after), (0, 0)), 'constant')
        elif y_diff < 0:  # Crop y dimension
            y_crop_before = abs(y_diff) // 2
            y_crop_after = abs(y_diff) - y_crop_before
            data = data[:, y_crop_before:-y_crop_after, :]

        # Resize the third dimension (z) with padding
        z_diff = 256 - data.shape[2]
        if z_diff > 0:
            z_pad_before = z_diff // 2
            z_pad_after = z_diff - z_pad_before
            data = np.pad(data, ((0, 0), (0, 0), (z_pad_before, z_pad_after)), 'constant')

        # Create a new NIfTI image with the resized data
        new_img = nib.Nifti1Image(data, img.affine)

        # Save the resized image
        nib.save(new_img, output_path)
