### This notebook is for sub-volume extraction and radiomics calculation

In [None]:
import os
import SimpleITK as sitk
import pandas as pd
import glob

import cv2
import numpy as np
import matplotlib.pyplot as plt
from skimage import measure, color
import skimage.morphology as morphology

from skimage.segmentation import mark_boundaries as mark_boundaries
from skimage.segmentation import slic as slic

import shutil
import openpyxl
import nibabel as nib
import subprocess
import pingouin as pg

supervoxel分割 SLIC算法

In [None]:
def supervoxel(volume_list,mask_list,n_segment,supervoxel_path,center):
    print('Start super voxel processing ...')
    if not os.path.exists(supervoxel_path):
        os.makedirs(supervoxel_path)      
    
    if not os.path.exists(os.path.join(supervoxel_path,center)):
        os.makedirs(os.path.join(supervoxel_path,center))

    failed_cases = []

    for volume_path, mask_path in zip(volume_list, mask_list):   
        try:  
            volume_itk = sitk.ReadImage(volume_path)
            img = sitk.GetArrayFromImage(volume_itk)
            mask = sitk.GetArrayFromImage(sitk.ReadImage(mask_path))
            img_normalized = cv2.normalize(img, None, alpha=0, beta=1, norm_type=cv2.NORM_MINMAX, dtype=cv2.CV_32F)
            tmp_mask = morphology.opening(mask, morphology.ball(1)) 

            slic_mask = slic(img_normalized, n_segment, compactness=10, mask=tmp_mask, start_label=1,
                             channel_axis=None)
        
            mask_itk = sitk.GetImageFromArray(np.uint16(slic_mask))
            mask_itk.CopyInformation(volume_itk)

            output_name = os.path.join(supervoxel_path,center,center+'_' + os.path.basename(mask_path).split('.nii')[0] + '_SVmask.nii.gz')
            sitk.WriteImage(mask_itk, output_name)
            print('{} is Done'.format(output_name.split('/')[-1]))

        except Exception as e:
            print(f'something wrong, SV computation failed for {os.path.basename(volume_path)}: {e}')
            failed_cases.append(os.path.basename(volume_path))

    if failed_cases:
        df_failed_cases = pd.DataFrame(failed_cases, columns=['Failed Cases'])
        df_failed_cases.to_excel(os.path.join(supervoxel_path,'failed_cases.xlsx'), index=False)
        print("Failed cases have been saved to 'failed_cases.xlsx'.")
#TODO

SV_split 

In [None]:
def SV_split(volume, mask, out_dir):

    mask_array = sitk.GetArrayFromImage(sitk.ReadImage(mask))   
    _, num = measure.label(mask_array, connectivity=3, return_num=True) 


    for i in range(1, num + 1):
        section_mask = sitk.GetArrayFromImage(sitk.ReadImage(mask))

        section_mask[section_mask != i] = 0 
        section_mask[section_mask == i] = 1 
        svsplit_path = os.path.join(out_dir,'SVmask') 
        
        if not os.path.exists(svsplit_path):  
            os.makedirs(svsplit_path)  

        section_mask_itk = sitk.GetImageFromArray(section_mask)
        section_mask_itk.CopyInformation(sitk.ReadImage(volume))

        sitk.WriteImage(section_mask_itk,
                os.path.join(svsplit_path, os.path.basename(mask).split('.nii')[0]) + str(i)+'.nii.gz')
        
        print(f'split {os.path.basename(mask).split(".nii")[0]} is Done')

In [None]:
def run_pyradiomics(input_path, params_path, output_path):
    # Define the command
    command = ['pyradiomics', input_path, '-p', params_path, '-o', output_path,'-f', 'csv']
    # Run the command
    subprocess.run(command, shell=True)

params_path = "/path/to/params"

In [None]:
def check_dataset(list_volume, list_mask):  
    if len(list_volume) != len(list_mask):
        raise ValueError('There exists a mismatch between two datasets.')

#### Set User Parameters

In [None]:
output_dir = "/path/to/output_dir"
n_segment = 100
center = 'hn'

SVS_dir = os.path.join(output_dir,'SV','SVSplit')
SVSmask_dir = os.path.join(output_dir,'SV','SVSplit','SVmask')
SVwhole_dir = os.path.join(output_dir,'SV','SVWhole')

dir_list = [output_dir,SVS_dir,SVSmask_dir,SVwhole_dir]
for dir in dir_list:
    if not os.path.exists(dir):
        os.makedirs(dir)

In [None]:
pyradiomics_input_path = "/path/to/pyradiomics_input"

df = pd.read_csv(pyradiomics_input_path)

volume_list = df['Image'].tolist()
mask_list = df['Mask'].tolist()

check_dataset(volume_list, mask_list) 

In [None]:
output_path = os.path.join("/path/to/output.csv")  

run_pyradiomics(pyradiomics_input_path,params_path,output_path)

#### Implementation of sub-volume calculation

In [None]:
# supervoxel calculation by slic
supervoxel(volume_list, mask_list, n_segment, SVwhole_dir, center) 
print('Supervoxel Done')

In [None]:
sv_list = glob.glob(os.path.join(SVwhole_dir,center, '*'))
sv_list.sort()

volume_list2 = []

for sv_path  in sv_list:
    new_filename = os.path.join("/path/to/save",'_'.join(os.path.basename(sv_path).split('_')[1:-2])+ ".nii.gz")
    volume_list2.append(new_filename)

check_dataset(volume_list2, sv_list)

print('Start SV split processing ...')
for volume_i, mask_j in zip(volume_list2, sv_list):
    SV_split(volume_i, mask_j, SVS_dir)  
print('SV split done')

In [None]:
mask_files = glob.glob(os.path.join(SVSmask_dir,'*'))
volume_threshold = np.prod([3, 3, 3])

filtered_masks = []
small_filtered_masks = []  

for mask_file in mask_files:
    mask = nib.load(mask_file)
    mask_data = mask.get_fdata()
    mask_volume = np.sum(mask_data > 0)
    
    if mask_volume > volume_threshold:
        filtered_masks.append(mask_file)
    else:
        small_filtered_masks.append({'Mask File': mask_file, 'Volume': mask_volume})

if small_filtered_masks: 
    df_rejected = pd.DataFrame(small_filtered_masks)
    df_rejected.to_excel(os.path.join(output_dir,"small_masks.xlsx"), index=False)

print("Filtered Masks Done")  

#### Generate case table for radiomics

In [None]:
mask_filtered_files = glob.glob(os.path.join(SVSmask_dir, '*'))

In [None]:
len(mask_filtered_files)

In [None]:
image_files = []
for file_path in mask_filtered_files:
    new_filename =os.path.join("/path/to/save",'_'.join(os.path.basename(file_path).split('_')[1:-2])+ ".nii.gz")
    image_files.append(new_filename)

check_dataset(image_files, mask_filtered_files)

data = []
for img_file, mask_file in zip(image_files, mask_filtered_files):
    id = os.path.basename(mask_file).split('.nii')[0]
    
    data.append([id, img_file, mask_file])

df = pd.DataFrame(data, columns=['ID', 'Image', 'Mask'])

df.to_csv(os.path.join(output_dir ,'featurelevel_input.csv'), index=False)

#### Implementation of radiomics calculation

In [None]:
input_path = os.path.join(output_dir ,'featurelevel_input.csv')
output_path = os.path.join(output_dir ,'featurelevel_output.csv')

run_pyradiomics(input_path,params_path,output_path)

In [None]:
mask_filtered_files = glob.glob(os.path.join(SVwhole_dir,'hn', '*'))
image_files = []

for file_path in mask_filtered_files:
    new_filename =os.path.join("/path/to/save",'_'.join(os.path.basename(file_path).split('_')[3:-2])+ "_image.nii.gz")
    image_files.append(new_filename)

check_dataset(image_files, mask_filtered_files)

data = []
for img_file, mask_file in zip(image_files, mask_filtered_files):
    id = os.path.basename(mask_file).split('.nii')[0]
    
    data.append([id, img_file, mask_file])

df = pd.DataFrame(data, columns=['ID', 'Image', 'Mask'])

df.to_csv(os.path.join(output_dir ,'ICC_tumourlevel_input.csv'), index=False)

input_path = os.path.join(output_dir ,'ICC_tumourlevel_input.csv')
output_path = os.path.join(output_dir ,'ICC_tumourlevel_output.csv')

run_pyradiomics(input_path,params_path,output_path)