In [1]:
import os, glob 
sys.path.append('../')

import SimpleITK as sitk
import numpy as np
import cv2

from MRIsegm.utils import get_slices, mask_slices, get_rois

In [3]:
src = '/Users/giuseppefilitto/sorted_plus_extra'

patients = os.listdir(src)
if '.DS_Store' in patients:
    patients.remove('.DS_Store')

bad_patients = [ 'BO9', 'BO17', 'BO28', 'BO36', 'BO37', 'BO39', 'BO40', 'BO54', 'BO72', 'BO77', 'BO86']

good_patients = list(set(patients) - set(bad_patients))

# Removing because of special folders
special_patients = []

good_patients = [x for x in good_patients if x not in special_patients]
print("Number of good patients:",len(good_patients))

Number of good patients: 43


In [4]:
unwanted = ['ROI', '_frames', 'bis', '_resized', '_contoured', '_NRRD', 'PRINCIPALI', 'predicted', 'T2ROI','predicted_mask']
wanted = ['ROI']

for patient in good_patients:

    print(f'{patient}')

    folders_glob=glob.glob(src + '/' + patient + '/T2*')
    folders = sorted([x for x in folders_glob if all(y not in x for y in unwanted)])
    ROIs = sorted([x for x in folders_glob if any(y in x for y in wanted)])
        
    if len(folders) == 0:
        folders_glob=glob.glob(src + '/' + patient + '/t2DEF')
        folders = sorted([x for x in folders_glob if all(y not in x for y in unwanted)])
    

    for (path, path_rois)  in zip(folders, ROIs):

        folder_name = os.path.split(path)[1]
        print(f' folder: {folder_name}')
        slices = get_slices(dir_path=path, uint8=False)

        if slices.shape[1:3] != 512:
            resized = np.zeros(shape=(slices.shape[0], 512, 512))
            for layer in range(slices.shape[0]):
                img = slices[layer, :, :]
                res = cv2.resize(img, (512, 512))
                resized[layer, ...] = res
        else:
            resized = slices
        
        stack = sitk.GetImageFromArray(resized)

        print(stack.GetSize())
    
        output_folder = '_'.join((path, 'NRRD'))
        if not os.path.exists(output_folder):
            os.makedirs(output_folder)

        output_name = 'original.nrrd'

        output = os.path.join(output_folder, output_name)

        if os.path.isfile(output):
            os.remove(output)

        sitk.WriteImage(stack, output)
        
        #! ROIs

        folder_name = os.path.split(path_rois)[1]
        print(f' folder: {folder_name}')

        rois = get_rois(roi_path=path_rois)
    
        slices_of_masks = mask_slices(slices=resized, rois=rois)
        
        output_folder = '_'.join((path, 'NRRD'))

        if not os.path.exists(output_folder):
            os.makedirs(output_folder)
        
        stack_masks = sitk.GetImageFromArray(slices_of_masks)
        print(stack_masks.GetSize())

        output_name = 'segmented.nrrd'

        output = os.path.join(output_folder, output_name)

        if os.path.isfile(output):
            os.remove(output)

        sitk.WriteImage(stack_masks, output)

    folders = []

BO56
 folder: T2
(512, 512, 24)
 folder: T2ROI
(512, 512, 24)
BO122
 folder: T2AX
(512, 512, 16)
 folder: T2ROI
(512, 512, 16)
BO16
 folder: T2AX
(512, 512, 24)
 folder: T2ROI
(512, 512, 24)
BO32
 folder: T2AX
(512, 512, 32)
 folder: T2ROI
(512, 512, 32)
BO52
 folder: T2
(512, 512, 40)
 folder: T2ROI
(512, 512, 40)
BO43
 folder: T2AX
(512, 512, 24)
 folder: T2ROI
(512, 512, 24)
BO90
 folder: T25mm
(512, 512, 30)
 folder: T2ROI
(512, 512, 30)
BO50
 folder: T2
(512, 512, 24)
 folder: T2ROI
(512, 512, 24)
BO64
 folder: T2
(512, 512, 24)
 folder: T2ROI
(512, 512, 24)
BO61
 folder: T2
(512, 512, 52)
 folder: T2ROI
(512, 512, 52)
BO68
 folder: T2
(512, 512, 42)
 folder: T2ROI
(512, 512, 42)
BO6
 folder: T2AX
(512, 512, 24)
 folder: T2ROI
(512, 512, 24)
BO63
 folder: T2
(512, 512, 25)
 folder: T2ROI
(512, 512, 25)
BO49
 folder: T2
(512, 512, 31)
 folder: T2ROI
(512, 512, 31)
BO71
 folder: T2
(512, 512, 20)
 folder: T2ROI
(512, 512, 20)
BO74
 folder: T2
(512, 512, 20)
 folder: T2ROI
(512, 512,

In [5]:
import pickle
from radiomics import featureextractor
import pandas as pd

In [6]:
print(good_patients)

['BO56', 'BO122', 'BO16', 'BO32', 'BO52', 'BO43', 'BO90', 'BO50', 'BO64', 'BO61', 'BO68', 'BO6', 'BO63', 'BO49', 'BO71', 'BO74', 'BO47', 'BO75', 'BO15', 'BO78', 'BO45', 'BO18', 'BO101', 'BO1', 'BO42', 'BO26', 'BO66', 'BO33', 'BO82', 'BO38', 'BO11', 'BO85', 'BO2', 'BO107', 'BO29', 'BO51', 'BO60', 'BO76', 'BO31', 'BO48', 'BO109', 'BO44', 'BO35']


In [7]:
params  = '../extras/Params.yaml'
extractor = featureextractor.RadiomicsFeatureExtractor(params)

features = {}

for patient in good_patients:

    dirs = glob.glob(src + '/' + patient + '/*_NRRD')
    
    
    for directory in dirs:

        original = sitk.ReadImage(directory + "/original.nrrd")
        segmented = sitk.ReadImage(directory + "/segmented.nrrd")

        folder_name = os.path.split(directory)[1]
        fold_prefix = folder_name.split('_')[0]

        features[patient, fold_prefix] = extractor.execute(original, segmented)

INFO:radiomics.featureextractor:Loading parameter file ../extras/Params.yaml
INFO:radiomics.featureextractor:Calculating features with label: 255
INFO:radiomics.featureextractor:Loading image and mask
INFO:radiomics.featureextractor:Computing shape
INFO:radiomics.featureextractor:Adding image type "Original" with custom settings: {}
INFO:radiomics.featureextractor:Calculating features for original image
INFO:radiomics.featureextractor:Computing firstorder
INFO:radiomics.featureextractor:Computing glcm
INFO:radiomics.featureextractor:Computing glrlm
INFO:radiomics.featureextractor:Computing glszm
INFO:radiomics.featureextractor:Computing gldm
INFO:radiomics.featureextractor:Calculating features with label: 255
INFO:radiomics.featureextractor:Loading image and mask
INFO:radiomics.featureextractor:Computing shape
INFO:radiomics.featureextractor:Adding image type "Original" with custom settings: {}
INFO:radiomics.featureextractor:Calculating features for original image
INFO:radiomics.featu

In [8]:
with open('../data/features/'  + 'features_extra.pickle', 'wb') as file:
     pickle.dump(features, file)

In [9]:
with open('../data/features/'  + 'features_extra.pickle', 'rb') as file:
     features = pickle.load(file)

In [10]:
dict_list = list(features)
feature_names = list(sorted(filter ( lambda k: k.startswith("original_"), features[dict_list[0]] )))

print('NUMEBR OF CASE_ID: ', len(dict_list))
print('NUMEBR OF FEATURES: ', len(feature_names))
print(dict_list)

NUMEBR OF CASE_ID:  45
NUMEBR OF FEATURES:  100
[('BO56', 'T2'), ('BO56', 'T25mm'), ('BO122', 'T2AX'), ('BO16', 'T2AX'), ('BO32', 'T2AX'), ('BO52', 'T2'), ('BO43', 'T2AX'), ('BO90', 'T25mm'), ('BO50', 'T2'), ('BO64', 'T2'), ('BO61', 'T2'), ('BO68', 'T2'), ('BO6', 'T2AX'), ('BO63', 'T2'), ('BO49', 'T2'), ('BO71', 'T2'), ('BO74', 'T2'), ('BO47', 'T2'), ('BO75', 'T2'), ('BO15', 'T2AX'), ('BO78', 'T2'), ('BO45', 'T2'), ('BO18', 'T2AX'), ('BO101', 'T2AX'), ('BO1', 'T2AX'), ('BO42', 'T2AX'), ('BO26', 'T2AX'), ('BO66', 'T2'), ('BO33', 'T2AX'), ('BO82', 'T2'), ('BO38', 'T2AXAlta'), ('BO38', 'T2AXbassa'), ('BO11', 'T2AX'), ('BO85', 'T2'), ('BO2', 'T2AX'), ('BO107', 'T2AX'), ('BO29', 'T2AX'), ('BO51', 'T2'), ('BO60', 'T2'), ('BO76', 'T2'), ('BO31', 't2DEF'), ('BO48', 'T2'), ('BO109', 'T2AX'), ('BO44', 'T2AX'), ('BO35', 'T2AX')]


In [11]:
sorted_list = sorted(dict_list, key=lambda x: int(x[0].replace('BO', '')))
print(sorted_list)

[('BO1', 'T2AX'), ('BO2', 'T2AX'), ('BO6', 'T2AX'), ('BO11', 'T2AX'), ('BO15', 'T2AX'), ('BO16', 'T2AX'), ('BO18', 'T2AX'), ('BO26', 'T2AX'), ('BO29', 'T2AX'), ('BO31', 't2DEF'), ('BO32', 'T2AX'), ('BO33', 'T2AX'), ('BO35', 'T2AX'), ('BO38', 'T2AXAlta'), ('BO38', 'T2AXbassa'), ('BO42', 'T2AX'), ('BO43', 'T2AX'), ('BO44', 'T2AX'), ('BO45', 'T2'), ('BO47', 'T2'), ('BO48', 'T2'), ('BO49', 'T2'), ('BO50', 'T2'), ('BO51', 'T2'), ('BO52', 'T2'), ('BO56', 'T2'), ('BO56', 'T25mm'), ('BO60', 'T2'), ('BO61', 'T2'), ('BO63', 'T2'), ('BO64', 'T2'), ('BO66', 'T2'), ('BO68', 'T2'), ('BO71', 'T2'), ('BO74', 'T2'), ('BO75', 'T2'), ('BO76', 'T2'), ('BO78', 'T2'), ('BO82', 'T2'), ('BO85', 'T2'), ('BO90', 'T25mm'), ('BO101', 'T2AX'), ('BO107', 'T2AX'), ('BO109', 'T2AX'), ('BO122', 'T2AX')]


In [12]:
sorted_ID = list(map(lambda x: x[0], sorted_list))
print(sorted_ID)

['BO1', 'BO2', 'BO6', 'BO11', 'BO15', 'BO16', 'BO18', 'BO26', 'BO29', 'BO31', 'BO32', 'BO33', 'BO35', 'BO38', 'BO38', 'BO42', 'BO43', 'BO44', 'BO45', 'BO47', 'BO48', 'BO49', 'BO50', 'BO51', 'BO52', 'BO56', 'BO56', 'BO60', 'BO61', 'BO63', 'BO64', 'BO66', 'BO68', 'BO71', 'BO74', 'BO75', 'BO76', 'BO78', 'BO82', 'BO85', 'BO90', 'BO101', 'BO107', 'BO109', 'BO122']


In [13]:
samples = np.zeros((len(sorted_list), len(feature_names)))

for k, case_id in enumerate(sorted_list):
    a = np.array([])
    for feature_name in feature_names:
        a = np.append(a, features[case_id][feature_name])
    samples[k, ...] = a

#for possible NaNs
samples = np.nan_to_num(samples)

samples.shape

(45, 100)

In [14]:
d = pd.DataFrame(data=samples, columns=feature_names, index=sorted_ID)

d

Unnamed: 0,original_firstorder_10Percentile,original_firstorder_90Percentile,original_firstorder_Energy,original_firstorder_Entropy,original_firstorder_InterquartileRange,original_firstorder_Kurtosis,original_firstorder_Maximum,original_firstorder_Mean,original_firstorder_MeanAbsoluteDeviation,original_firstorder_Median,...,original_shape_Maximum2DDiameterColumn,original_shape_Maximum2DDiameterRow,original_shape_Maximum2DDiameterSlice,original_shape_Maximum3DDiameter,original_shape_MeshVolume,original_shape_MinorAxisLength,original_shape_Sphericity,original_shape_SurfaceArea,original_shape_SurfaceVolumeRatio,original_shape_VoxelVolume
BO1,429.0,798.0,1336950000.0,4.536893,192.0,4.809975,1370.0,589.569839,120.627757,557.0,...,42.047592,45.099889,57.140179,58.00862,3550.416667,35.631913,0.358332,3140.879027,0.884651,3594.0
BO2,237.0,390.0,1990333000.0,3.402873,75.0,5.854507,723.0,311.722498,49.870963,306.0,...,100.079968,139.014388,145.784087,161.198635,19389.291667,80.24634,0.171401,20363.155899,1.050227,19553.0
BO6,171.0,303.0,2730424000.0,3.096831,66.0,3.970963,562.0,234.0975,41.070879,229.0,...,90.005555,141.056726,155.75622,156.2498,47304.041667,74.309648,0.192545,32851.185525,0.694469,47395.0
BO11,154.0,293.0,2823765000.0,3.169558,65.0,7.140266,744.0,217.377486,44.028965,208.0,...,100.244701,94.04786,101.237345,110.154437,55595.208333,81.328868,0.24749,28463.243811,0.511973,55512.0
BO15,340.0,570.0,22314180000.0,3.91217,116.0,4.471278,1120.0,451.199374,71.950989,444.0,...,145.344419,141.088625,144.100659,157.981012,105132.416667,100.491254,0.218123,49386.421549,0.469754,105109.0
BO16,138.0,244.0,348327100.0,2.764315,55.0,3.543087,399.0,187.399852,32.846181,182.0,...,57.008771,90.022219,96.462428,97.411498,9376.083333,51.375241,0.26221,8200.670904,0.874637,9451.0
BO18,222.0,370.0,155615400.0,3.188206,78.0,2.582487,443.0,297.086268,44.511454,297.0,...,35.014283,33.015148,36.891733,37.749172,1677.0,27.527861,0.370662,1841.599478,1.098151,1704.0
BO26,191.0,303.0,10072570000.0,2.917379,57.0,4.066159,610.0,247.838891,35.250953,247.0,...,142.014084,138.014492,144.069428,145.196419,158485.625,110.110423,0.248722,56941.572313,0.359285,158607.0
BO29,223.0,417.0,1859944000.0,3.613234,109.0,2.597569,580.0,317.326669,61.116935,314.0,...,82.05486,95.634722,82.462113,99.362971,17205.916667,63.084556,0.159622,20191.865984,1.173542,17507.0
BO31,493.0,1068.0,10149750000.0,5.202772,309.0,4.431866,2467.0,783.858711,181.5298,780.0,...,75.166482,117.004273,117.153745,118.156676,15058.708333,55.602673,0.238019,12389.802684,0.822767,15217.0
