In [19]:
import SimpleITK as sitk
import numpy as np
import matplotlib.pyplot as plt
from pathlib import Path
import os

import imageio.v2 as imageio
import SimpleITK as sitk
from radiomics import featureextractor
import pandas as pd
import re
import numpy as np
from radiomics import glszm

In [None]:
path = Path('/projects/YG')
save_path = path.copy()
subfolders = [folder for folder in path.iterdir() if folder.is_dir() and folder.name.startswith('C') and folder.name != 'CAPS25' ] # and folder.name != 'CAPS29']

In [None]:
segmentation_files = []
inphase_files = []
outphase_files = []

for folder in subfolders:
    for custom_folder in folder.iterdir():
        if custom_folder.is_dir():
            segmentation_filenames = list(custom_folder.glob('Segment*.nrrd'))
            segmentation_filenames = [x for x in segmentation_filenames if 'label' not in x.name]

            inphase_filenames = list(custom_folder.glob('in*.gz'))
            outphase_filenames = list(custom_folder.glob('out*.gz'))

            for filename in segmentation_filenames[:1]:  
                segmentation_files.append(filename)

            for filename in inphase_filenames:
                inphase_files.append(filename)

            for filename in outphase_filenames: 
                outphase_files.append(filename)   

                
for seg_file in segmentation_files:
    print(seg_file)
print(f"\nTotal number of segmentation files found: {len(segmentation_files)}") # 74

for in_file in inphase_files:
    print(in_file)
print(f"\nTotal number of in phase files found: {len(inphase_files)}")

for out_file in outphase_files:
    print(out_file)
print(f"\nTotal number of in phase files found: {len(outphase_files)}")

In [None]:
segmentation_files_sorted = sorted(
    segmentation_files,
    key=lambda x: (
        x.parts[-3],
        x.parts[-2]
    )
)
print("\nSorted segmentation files:")
for seg_file in segmentation_files_sorted:
    print(seg_file)

    
outphase_files_sorted = sorted(
    outphase_files,
    key=lambda x: (
        x.parts[-3],
        x.parts[-2]
    )
)
print("\nSorted outphase files:")
for outphase_file in outphase_files_sorted:
    print(outphase_file)
    
    
inphase_files_sorted = sorted(
    inphase_files,
    key=lambda x: (
        x.parts[-3],
        x.parts[-2]
    )
)
print("\nSorted inphase files:")
for inphase_file in inphase_files_sorted:
    print(inphase_file)

In [25]:
extractor = featureextractor.RadiomicsFeatureExtractor()

radiomic_features = []

def generate_largest_segmentation_image(segment_file, outphase_file, inphase_file, save_path):
    patient_id = os.path.normpath(segment_file).split(os.sep)[4]
    
    patient_folder = os.path.join(save_path, patient_id)
    os.makedirs(patient_folder, exist_ok=True)

    segmentation = sitk.ReadImage(segment_file)
    segmentation_array = sitk.GetArrayFromImage(segmentation)  # Shape: [z, y, x]

    outphase = sitk.ReadImage(outphase_file)
    outphase_array = sitk.GetArrayFromImage(outphase)

    inphase = sitk.ReadImage(inphase_file)
    inphase_array = sitk.GetArrayFromImage(inphase)

    if segmentation_array.shape[1:] != inphase_array.shape[1:]:
        print(f"Skipping PID: {patient_id} due to mismatched dimensions.")
        return
    if segmentation_array.shape[1:] != outphase_array.shape[1:]:
        print(f"Skipping PID: {patient_id} due to mismatched dimensions.")
        return

    slice_areas = [np.count_nonzero(segmentation_array[z]) for z in range(segmentation_array.shape[0])]
    largest_slice_index = np.argmax(slice_areas)

    segmentation_slice = segmentation_array[largest_slice_index]
    outphase_slice = outphase_array[largest_slice_index]
    inphase_slice = inphase_array[largest_slice_index]
    

    segmentation_slice = (segmentation_slice > 0).astype(np.uint8)  

    inphase_sitk = sitk.GetImageFromArray(inphase_slice)
    outphase_sitk = sitk.GetImageFromArray(outphase_slice)
    segmentation_sitk = sitk.GetImageFromArray(segmentation_slice)

    npz_output_path = os.path.join(patient_folder, f"{patient_id}_largest_slice_data.npz")
    np.savez(npz_output_path, 
             inphase=inphase_slice, 
             outphase=outphase_slice, 
             segmentation=segmentation_slice)

    print(f"Saved data at {npz_output_path}")

    result = extractor.execute(inphase_sitk, segmentation_sitk)

    filtered_result = {key: value for key, value in result.items() if "diagnostics" not in key}
    radiomic_features.append({"PatientID": patient_id, **filtered_result})
    

In [None]:
# # Load a saved patient's npz file
patient_id = "CAPS20" 
npz_path = f"/projects/pancreas-cancer-hpc/MRI-texture-results-YG/{patient_id}/{patient_id}_largest_slice_data.npz"

# Load the file
data = np.load(npz_path)

# Access individual images
inphase_image = data["inphase"]
outphase_image = data["outphase"]
segmentation_mask = data["segmentation"]

print("In-phase image shape:", inphase_image.shape)
print("Out-phase image shape:", outphase_image.shape)
print("Segmentation mask shape:", segmentation_mask.shape)

### inphase 

In [None]:
for seg_file, out_file, in_file in zip(segmentation_files_sorted, outphase_files_sorted, inphase_files_sorted):
    #print(f"Processing: {seg_file}, {out_file}, {in_file}")
    generate_largest_segmentation_image(seg_file, out_file, in_file, save_path)
    

# Convert the radiomic features to a DataFrame and save to Excel
radiomic_features_df = pd.DataFrame(radiomic_features)
radiomic_features_df

In [None]:
radiomic_features_df

In [None]:
radiomic_features_df['PatientID'].unique()
# radiomic_features_df['PatientID'].nunique()

In [None]:
radiomic_features_df = radiomic_features_df[
    radiomic_features_df.columns.drop(list(radiomic_features_df.filter(regex='diagnostics')))
]
radiomic_features_df = radiomic_features_df[
    radiomic_features_df.columns.drop(list(radiomic_features_df.filter(regex='firstorder')))
]
radiomic_features_df = radiomic_features_df.sort_index()
radiomic_features_df

In [17]:
file_path = os.path.join(save_path, 'original_features_largest_slices_inphase.xlsx')
radiomic_features_df.to_excel(file_path, index=False)

### outphase

In [7]:
extractor = featureextractor.RadiomicsFeatureExtractor()

radiomic_features = []


def generate_largest_segmentation_image(segment_file, outphase_file, inphase_file):
    patient_id = os.path.normpath(segment_file).split(os.sep)[4]
    
    patient_folder = os.path.join(save_path, patient_id)
    os.makedirs(patient_folder, exist_ok=True)
    
    segmentation = sitk.ReadImage(segment_file)
    segmentation_array = sitk.GetArrayFromImage(segmentation)  # Shape: [z, y, x]

    outphase = sitk.ReadImage(outphase_file)
    outphase_array = sitk.GetArrayFromImage(outphase)

    inphase = sitk.ReadImage(inphase_file)
    inphase_array = sitk.GetArrayFromImage(inphase)

    if segmentation_array.shape[1:] != inphase_array.shape[1:]:
        print(f"Skipping PID: {patient_id} due to mismatched dimensions.")
        return 

    slice_areas = [np.count_nonzero(segmentation_array[z]) for z in range(segmentation_array.shape[0])]
    largest_slice_index = np.argmax(slice_areas)

    segmentation_slice = segmentation_array[largest_slice_index]
    outphase_slice = outphase_array[largest_slice_index]
    inphase_slice = inphase_array[largest_slice_index]

    inphase_slice = inphase_array[largest_slice_index, :, :]  
    outphase_slice = outphase_array[largest_slice_index, :, :]
    segmentation_slice = segmentation_array[largest_slice_index, :, :]  
    segmentation_slice = (segmentation_slice > 0).astype(np.uint8) 
    inphase_sitk = sitk.GetImageFromArray(inphase_slice)
    outphase_sitk = sitk.GetImageFromArray(outphase_slice)
    segmentation_sitk = sitk.GetImageFromArray(segmentation_slice)

    result = extractor.execute(outphase_sitk, segmentation_sitk)
    filtered_result = {key: value for key, value in result.items() if "diagnostics" not in key}
    radiomic_features.append({"PatientID": patient_id, **filtered_result})

In [None]:
for seg_file, out_file, in_file in zip(segmentation_files_sorted, outphase_files_sorted, inphase_files_sorted):
    generate_largest_segmentation_image(seg_file, out_file, in_file)
radiomic_features_df = pd.DataFrame(radiomic_features)

In [None]:
radiomic_features_df = radiomic_features_df[
    radiomic_features_df.columns.drop(list(radiomic_features_df.filter(regex='diagnostics')))
]
radiomic_features_df = radiomic_features_df[
    radiomic_features_df.columns.drop(list(radiomic_features_df.filter(regex='firstorder')))
]
radiomic_features_df = radiomic_features_df.sort_index()
radiomic_features_df
radiomic_features_df.to_excel(save_path, index=False)