In [5]:
from radiomics import featureextractor, glrlm
import os
import sys
import SimpleITK as sitk
import pandas as pd
import pprint
from multiprocessing import Manager, Lock, Pool, cpu_count
import time

In [2]:
def progress(count, total, status=''):
    
    bar_len = 40
    filled_len = int(round(bar_len * count / float(total)))

    percents = round(100.0 * count / float(total), 1)
    bar = '█' * filled_len + '░' * (bar_len - filled_len)

    sys.stdout.write(f'\r|{bar}| {percents}% ... {status}')
    sys.stdout.flush()

In [3]:
# Paths to scans' dir, survival csv file and output dir
DATADIR = '/path/to/file'
OUTDIR = '/path/to/file'
survival_data = '/path/to/file/.../brats/survival_data.csv'

In [4]:
# Path to extractor parameters and setup of extractor
params = '/path/to/file/my_params.yaml'
extractor = featureextractor.RadiomicsFeaturesExtractor(params)
# extractor = featureextractor.RadiomicsFeaturesExtractor()

In [5]:
# Display current setup for extractor (those could be changed by modifing my_params.yaml)
print('Extraction parameters:')
pprint.pprint(extractor.settings, indent=2)
print('\nEnabled filters:')
pprint.pprint(extractor._enabledImagetypes, indent=2)
print('\nEnabled features:')
pprint.pprint(extractor._enabledFeatures, indent=2)

Extraction parameters:
{ 'additionalInfo': True,
  'binWidth': 25,
  'distances': [1],
  'force2D': False,
  'force2Ddimension': 0,
  'interpolator': 'sitkBSpline',
  'label': 1,
  'minimumROIDimensions': 1,
  'minimumROISize': None,
  'normalize': False,
  'normalizeScale': 1,
  'padDistance': 5,
  'preCrop': False,
  'removeOutliers': None,
  'resampledPixelSpacing': None,
  'resegmentRange': None,
  'weightingNorm': None}

Enabled filters:
{ 'Exponential': {},
  'Gradient': {},
  'LoG': {'sigma': [1.0, 3.0]},
  'Logarithm': {},
  'Original': {},
  'Square': {},
  'SquareRoot': {},
  'Wavelet': {'binWidth': 30}}

Enabled features:
{ 'firstorder': None,
  'glcm': [ 'Autocorrelation',
            'JointAverage',
            'ClusterProminence',
            'ClusterShade',
            'ClusterTendency',
            'Contrast',
            'Correlation',
            'DifferenceAverage',
            'DifferenceEntropy',
            'DifferenceVariance',
            'JointEnergy',
        

In [6]:
# Gets all the samples and filters away other paths (if there's some non-BraTS file)
# Could change depending on which dataset you're working on
g_samples = [*filter(lambda x: x.startswith('Brats18'), os.listdir(DATADIR))]

In [7]:
# Loads survival csv with panda and gets array of names of samples in it
survival = pd.read_csv(survival_data)
sub_names = [*survival.BraTS18ID]

In [3]:
#Function to segment the array in order to utilize multiprocessing

def segment(array, parts):
    
    avg = len(array) / parts
    last = 0.0

    while last < len(array):
        yield array[int(last):int(last + avg)]
        last += avg

In [9]:
#Calculates the features of the brain scans
def calculate_feature(samples):
    
    for sample in samples:
    
        # Gets the row of the sample and gets age and survival.
        # If age or survival are not avaiable sets the values to None
        row = survival.loc[survival['BraTS18ID'] == sample]
        age = int(row['Age']) if len(row['Age']) else None
        surv = int(row['Survival']) if len(row['Survival']) else None

        # Reads the image with SimpleITK and applies Threshold function
        label = sitk.ReadImage(os.path.join(DATADIR, sample, f'{sample}_seg.nii.gz'))

        # Gets the features for every scan
        feature = {}
        for seq in ['t1', 't1ce', 't2', 'flair']:    #could change with OPBG
            image = sitk.ReadImage(os.path.join(DATADIR, sample, f'{sample}_{seq}.nii.gz'))
            result = extractor.execute(image, label)

            # Make an array of the values
            for key, value in result.items():
                if not key.startswith("general_"):
                    feature[key + f'_{seq}'] = result[key]

        # Adds age, survival and sample name to the feature list
        feature['age'] = age
        feature['surv'] = surv
        feature['sample'] = sample
        with Lock():
            features_man.append(feature)
            # Progress bar for better visualization
            count.value += 1
            progress(count.value, len(g_samples), status=f'Extracting Features ... {sample}')
            

In [10]:
with Manager() as manager:
    # List of all features
    features_man = manager.list()
    count = manager.Value('i', 0)
    
    t0 = time.time()

    with Pool(cpu_count()) as pool:
        tasks = [pool.apply_async(calculate_feature, args=(part,)) for part in segment(g_samples, cpu_count())]

        [task.get() for task in tasks]

        features_all = [x for x in features_man]

    print(f' ... {round(time.time() - t0, 2)}s')

|██████████░░░░░░░░░░░░░░░░░░░░░░░░░░░░░░| 25.3% ... Extracting Features ... Brats18_TCIA10_346_1

  lrlgle = numpy.sum((self.P_glrlm * (jvector[None, :, None] ** 2) / (ivector[:, None, None] ** 2)),
  lrlgle = numpy.sum((self.P_glrlm * (jvector[None, :, None] ** 2) / (ivector[:, None, None] ** 2)),
  lglre = numpy.sum((pg / (ivector[:, None] ** 2)), 0) / Nz
  srlgle = numpy.sum((self.P_glrlm / ((ivector[:, None, None] ** 2) * (jvector[None, :, None] ** 2))),
  srlgle = numpy.sum((self.P_glrlm / ((ivector[:, None, None] ** 2) * (jvector[None, :, None] ** 2))),
  lilae = numpy.sum(self.P_glszm * (jvector[None, :] ** 2) / (ivector[:, None] ** 2)) / Nz
  lilae = numpy.sum(self.P_glszm * (jvector[None, :] ** 2) / (ivector[:, None] ** 2)) / Nz
  lie = numpy.sum(pg / (ivector ** 2)) / Nz
  lisae = numpy.sum(self.P_glszm / ((ivector[:, None] ** 2) * (jvector[None, :] ** 2))) / Nz
  lisae = numpy.sum(self.P_glszm / ((ivector[:, None] ** 2) * (jvector[None, :] ** 2))) / Nz
  ldlgle = numpy.sum(self.P_gldm * (jvector[None, :] ** 2) / (ivector[:, None] ** 2)) / Nz
  ldlgle = numpy.sum(self.P_g

|██████████████░░░░░░░░░░░░░░░░░░░░░░░░░░| 36.0% ... Extracting Features ... Brats18_TCIA10_629_1

  lrlgle = numpy.sum((self.P_glrlm * (jvector[None, :, None] ** 2) / (ivector[:, None, None] ** 2)),
  lrlgle = numpy.sum((self.P_glrlm * (jvector[None, :, None] ** 2) / (ivector[:, None, None] ** 2)),
  lglre = numpy.sum((pg / (ivector[:, None] ** 2)), 0) / Nz
  srlgle = numpy.sum((self.P_glrlm / ((ivector[:, None, None] ** 2) * (jvector[None, :, None] ** 2))),
  srlgle = numpy.sum((self.P_glrlm / ((ivector[:, None, None] ** 2) * (jvector[None, :, None] ** 2))),
  lilae = numpy.sum(self.P_glszm * (jvector[None, :] ** 2) / (ivector[:, None] ** 2)) / Nz
  lilae = numpy.sum(self.P_glszm * (jvector[None, :] ** 2) / (ivector[:, None] ** 2)) / Nz
  lie = numpy.sum(pg / (ivector ** 2)) / Nz
  lisae = numpy.sum(self.P_glszm / ((ivector[:, None] ** 2) * (jvector[None, :] ** 2))) / Nz
  lisae = numpy.sum(self.P_glszm / ((ivector[:, None] ** 2) * (jvector[None, :] ** 2))) / Nz
  ldlgle = numpy.sum(self.P_gldm * (jvector[None, :] ** 2) / (ivector[:, None] ** 2)) / Nz
  ldlgle = numpy.sum(self.P_g

|█████████████████████████████████░░░░░░░| 82.7% ... Extracting Features ... Brats18_TCIA10_644_1

  lrlgle = numpy.sum((self.P_glrlm * (jvector[None, :, None] ** 2) / (ivector[:, None, None] ** 2)),
  lrlgle = numpy.sum((self.P_glrlm * (jvector[None, :, None] ** 2) / (ivector[:, None, None] ** 2)),
  lglre = numpy.sum((pg / (ivector[:, None] ** 2)), 0) / Nz
  srlgle = numpy.sum((self.P_glrlm / ((ivector[:, None, None] ** 2) * (jvector[None, :, None] ** 2))),
  srlgle = numpy.sum((self.P_glrlm / ((ivector[:, None, None] ** 2) * (jvector[None, :, None] ** 2))),
  lilae = numpy.sum(self.P_glszm * (jvector[None, :] ** 2) / (ivector[:, None] ** 2)) / Nz
  lilae = numpy.sum(self.P_glszm * (jvector[None, :] ** 2) / (ivector[:, None] ** 2)) / Nz
  lie = numpy.sum(pg / (ivector ** 2)) / Nz
  lisae = numpy.sum(self.P_glszm / ((ivector[:, None] ** 2) * (jvector[None, :] ** 2))) / Nz
  lisae = numpy.sum(self.P_glszm / ((ivector[:, None] ** 2) * (jvector[None, :] ** 2))) / Nz
  ldlgle = numpy.sum(self.P_gldm * (jvector[None, :] ** 2) / (ivector[:, None] ** 2)) / Nz
  ldlgle = numpy.sum(self.P_g

|████████████████████████████████████████| 100.0% ... Extracting Features ... Brats18_TCIA10_637_1 ... 605.82s


In [11]:
# New dataframe for feature storing
df = pd.DataFrame()
t0 = time.time()

# Generation of csv file
for i, feature in enumerate(features_all):
    progress(i + 1, len(features_all), status='Writing features to disk')
    df = pd.concat([df, pd.DataFrame(feature, index=[i])])

print(f' ... {round(time.time() - t0, 2)}s')

# Saving the csv file to the specified path    
df.to_csv(os.path.join(OUTDIR, 'HGG_full.csv'))

|████████████████████████████████████████| 100.0% ... Writing features to disk ... 16.21s


In [12]:
df

Unnamed: 0,original_shape_Elongation_t1,original_shape_Flatness_t1,original_shape_LeastAxis_t1,original_shape_MajorAxis_t1,original_shape_Maximum2DDiameterColumn_t1,original_shape_Maximum2DDiameterRow_t1,original_shape_Maximum2DDiameterSlice_t1,original_shape_Maximum3DDiameter_t1,original_shape_MinorAxis_t1,original_shape_Sphericity_t1,...,gradient_gldm_LargeDependenceEmphasis_flair,gradient_gldm_LargeDependenceHighGrayLevelEmphasis_flair,gradient_gldm_LargeDependenceLowGrayLevelEmphasis_flair,gradient_gldm_LowGrayLevelEmphasis_flair,gradient_gldm_SmallDependenceEmphasis_flair,gradient_gldm_SmallDependenceHighGrayLevelEmphasis_flair,gradient_gldm_SmallDependenceLowGrayLevelEmphasis_flair,age,surv,sample
0,0.859293,0.472908,10.934087,23.120976,21.213203,28.844410,19.235384,32.939338,19.867693,0.296505,...,93.709382,109.778032,91.394562,0.855406,0.065322,0.216538,0.039544,,,Brats18_TCIA13_653_1
1,0.866290,0.501836,13.434651,26.771009,37.735925,28.284271,26.627054,41.593269,23.191446,0.239142,...,32.387075,332.035310,5.553936,0.198197,0.134303,3.715464,0.028742,,,Brats18_TCIA09_312_1
2,0.755805,0.598468,16.574364,27.694660,27.658633,47.095647,49.497475,50.685304,20.931749,0.441835,...,299.918558,376.804183,280.813886,0.834803,0.023494,0.106719,0.012819,,,Brats18_TCIA10_449_1
3,0.807621,0.600040,15.748353,26.245517,28.160256,28.792360,30.675723,31.192948,21.196427,0.694198,...,332.456719,719.936617,250.014407,0.667348,0.010390,0.049565,0.005074,,,Brats18_2013_16_1
4,0.828242,0.621108,24.436927,39.344085,41.436699,44.721360,37.947332,46.195238,32.586442,0.674545,...,174.676728,512.193677,120.849096,0.472414,0.046263,1.512628,0.007328,,,Brats18_TCIA10_266_1
5,0.720288,0.575729,27.343621,47.493922,41.109610,59.682493,55.542776,61.204575,34.209304,0.572253,...,347.059430,600.375983,301.185661,0.684681,0.020191,0.635871,0.004703,,,Brats18_TCIA12_466_1
6,0.823293,0.651742,29.098382,44.647086,47.507894,55.072679,45.221676,56.692151,36.757635,0.687487,...,346.187508,627.965469,291.428430,0.645751,0.018428,0.391621,0.004453,,,Brats18_TCIA13_630_1
7,0.702176,0.605063,35.280154,58.308196,51.623638,46.529560,54.671748,56.780278,40.942637,0.296694,...,230.189248,553.715024,179.486462,0.539736,0.036880,0.755513,0.009531,,,Brats18_2013_0_1
8,0.841307,0.634258,30.833516,48.613516,51.623638,55.803226,49.497475,56.169387,40.898893,0.765215,...,592.437635,622.152675,585.612432,0.940775,0.005255,0.022291,0.002631,,,Brats18_2013_24_1
9,0.849219,0.701774,12.873150,18.343718,17.464249,18.867962,20.000000,20.346990,15.577831,0.415493,...,4.786260,2495.434421,0.027403,0.007745,0.570217,566.267711,0.005240,,,Brats18_TCIA09_620_1
