In [1]:
%pip install pyradiomics dicom_numpy pydicom plotly matplotlib scikit-image simpleITK pynrrd dicom2nifti NiBabel NiLearn openpyxl pydicom-seg pandarallel

Note: you may need to restart the kernel to use updated packages.


In [11]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt

import dicom_numpy
import pydicom as dicom

import dicom2nifti
import nibabel as nib
import nilearn as nil
import scipy.ndimage as ndi
import SimpleITK as sitk
import os

from radiomics import featureextractor

from tqdm import tqdm

from pandarallel import pandarallel

import re

tqdm.pandas()
pandarallel.initialize(progress_bar=True,nb_workers= 12)

INFO: Pandarallel will run on 12 workers.
INFO: Pandarallel will use standard multiprocessing data transfer (pipe) to transfer data between the main process and workers.

https://nalepae.github.io/pandarallel/troubleshooting/


In [3]:
annotation_boxes = pd.read_csv("segmentation_annotations_NIFTI.csv")#.set_index("Patient_ID")

DATASET_PATH = '../../Processed NIFTI Dataset/'
# SEGMENTED_DATASET_PATH = '../../Segmented Dataset/'

In [4]:
annotation_boxes

Unnamed: 0,Patient_ID,Start Row,End Row,Start Column,End Column,Start Slice,End Slice
0,Breast_MRI_001,234,271,308,341,89,112
1,Breast_MRI_002,251,294,108,136,59,72
2,Breast_MRI_003,351,412,82,139,96,108
3,Breast_MRI_004,262,280,193,204,86,95
4,Breast_MRI_005,188,213,138,178,76,122
...,...,...,...,...,...,...,...
917,Breast_MRI_918,345,395,338,395,62,85
918,Breast_MRI_919,285,312,369,397,98,109
919,Breast_MRI_920,172,193,337,355,87,101
920,Breast_MRI_921,328,374,404,446,97,121


In [5]:
filenames = ['pre.img.gz', 'post_1.img.gz']

In [6]:
# generate segmentation masks for each patient in the directory

def segment_row(x):
    import SimpleITK as sitk
    import os
    import numpy as np

    filenames = ['pre.img.gz', 'post_1.img.gz']
    DATASET_PATH = '../../Processed NIFTI Dataset/'


    patient_id = x['Patient_ID']
    patient_path = os.path.join(DATASET_PATH, patient_id)

    row1 = x['Start Row']
    row2 = x['End Row']

    col1 = x['Start Column']
    col2 = x['End Column']

    slice1 = x['Start Slice']
    slice2 = x['End Slice']

    if(os.path.isdir(patient_path)):
        # find files with paths here
        img_path = os.path.join(patient_path, filenames[1])   #load pre path and generate segments from this
        img = sitk.ReadImage(img_path)

        
        #filter instantiation : Otsu filtering
        otsu_filter = sitk.OtsuThresholdImageFilter()
        otsu_filter.SetInsideValue(0)
        otsu_filter.SetOutsideValue(1)

        # segmentation and getting back array
        seg = otsu_filter.Execute(img)
        seg = sitk.GetArrayFromImage(seg)

        #mask generation
        mask = np.zeros(seg.shape)
        mask[slice1:slice2, row1:row2, col1:col2] = seg[slice1:slice2, row1:row2, col1:col2]

        segment = sitk.GetImageFromArray(mask)
        segment.CopyInformation(img)

        sitk.WriteImage(segment, os.path.join(patient_path, 'segmentation.img.gz'))

    
annotation_boxes.parallel_apply(segment_row, axis = 1)


VBox(children=(HBox(children=(IntProgress(value=0, description='0.00%', max=47), Label(value='0 / 47'))), HBox…

0      None
1      None
2      None
3      None
4      None
       ... 
917    None
918    None
919    None
920    None
921    None
Length: 922, dtype: object

In [7]:
feat_ext = featureextractor.RadiomicsFeatureExtractor()
feat_ext.settings['minimumROIDimensions'] = 2
feat_ext.settings['normalize'] = True
feat_ext.settings['additionalInfo'] = False
feat_ext.settings['correctMask'] = True
feat_ext.settings

{'minimumROIDimensions': 2,
 'minimumROISize': None,
 'normalize': True,
 'normalizeScale': 1,
 'removeOutliers': None,
 'resampledPixelSpacing': None,
 'interpolator': 'sitkBSpline',
 'preCrop': False,
 'padDistance': 5,
 'distances': [1],
 'force2D': False,
 'force2Ddimension': 0,
 'resegmentRange': None,
 'label': 1,
 'additionalInfo': False,
 'correctMask': True}

In [14]:
def get_features(x):
    from radiomics import featureextractor
    import SimpleITK as sitk
    import os
    import numpy as np

    feat_ext = featureextractor.RadiomicsFeatureExtractor()
    feat_ext.settings['minimumROIDimensions'] = 2
    feat_ext.settings['normalize'] = True
    feat_ext.settings['additionalInfo'] = False
    feat_ext.settings['correctMask'] = True
    feat_ext.settings

    filenames = ['pre.img.gz', 'post_1.img.gz']
    DATASET_PATH = '../../Processed NIFTI Dataset/'

    
    patient_id = x['Patient_ID']
    file_path = os.path.join(DATASET_PATH, patient_id, 'post_1.img.gz')                # only for post1
    segmentation_path = os.path.join(DATASET_PATH, patient_id, 'segmentation.img.gz')  # segmentation

    img = sitk.ReadImage(file_path)
    smt = sitk.ReadImage(segmentation_path)

    smt.SetOrigin(img.GetOrigin())

    feats = feat_ext.execute(img, smt)
    feats['patient'] = patient_id
    feats['sequence'] = 'post_1'

    return feats

In [15]:
pyradiomics_features = annotation_boxes.parallel_apply(get_features, axis = 1)

VBox(children=(HBox(children=(IntProgress(value=0, description='0.00%', max=77), Label(value='0 / 77'))), HBox…

In [16]:
# import pickle as pkl

# with open("pyrad_feats.pkl", 'wb') as file:
#     pkl.dump(pyradiomics_features, file)

In [24]:
myfeatures = pyradiomics_features.tolist()
feature_df = pd.DataFrame()

for i in myfeatures:
    temp = {}
    for key, value in i.items():
        if(isinstance(value, str)):
            temp[key] = value
        else:
            temp[key] = value.item()

    ft_df = pd.DataFrame(temp, index = [0])
    feature_df = pd.concat([feature_df, ft_df])

In [28]:
feature_df = feature_df.set_index(['patient', 'sequence']).reset_index()

feature_df

Unnamed: 0,patient,sequence,original_shape_Elongation,original_shape_Flatness,original_shape_LeastAxisLength,original_shape_MajorAxisLength,original_shape_Maximum2DDiameterColumn,original_shape_Maximum2DDiameterRow,original_shape_Maximum2DDiameterSlice,original_shape_Maximum3DDiameter,...,original_glszm_SmallAreaHighGrayLevelEmphasis,original_glszm_SmallAreaLowGrayLevelEmphasis,original_glszm_ZoneEntropy,original_glszm_ZonePercentage,original_glszm_ZoneVariance,original_ngtdm_Busyness,original_ngtdm_Coarseness,original_ngtdm_Complexity,original_ngtdm_Contrast,original_ngtdm_Strength
0,Breast_MRI_001,post_1,0.807005,0.729780,23.614309,32.358102,36.073737,38.431136,39.309346,45.798426,...,5.741796e-01,5.741796e-01,2.136700e+00,0.003961,4.009263e+06,0.0,1000000.0,0.0,0.0,0.0
1,Breast_MRI_002,post_1,0.641558,0.577887,17.378564,30.072592,23.148662,29.852205,29.748492,33.784677,...,6.011205e-01,6.011205e-01,1.823026e+00,0.001401,6.054147e+06,0.0,1000000.0,0.0,0.0,0.0
2,Breast_MRI_003,post_1,0.756223,0.272616,13.050754,47.872295,40.488107,43.126056,56.606869,57.665740,...,5.064866e-01,5.064866e-01,2.281373e+00,0.001484,1.663605e+07,0.0,1000000.0,0.0,0.0,0.0
3,Breast_MRI_004,post_1,0.718139,0.610317,8.730117,14.304230,11.420813,14.972284,14.477716,16.540987,...,3.501278e-07,3.501278e-07,-3.203427e-16,0.000592,0.000000e+00,0.0,1000000.0,0.0,0.0,0.0
4,Breast_MRI_005,post_1,0.687760,0.436019,22.539193,51.693090,55.175581,49.674566,36.443449,58.274413,...,5.000000e-01,5.000000e-01,1.000000e+00,0.000047,4.436710e+08,0.0,1000000.0,0.0,0.0,0.0
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
917,Breast_MRI_918,post_1,0.886914,0.668151,28.553043,42.734408,45.886501,41.977361,51.383808,56.016573,...,3.266470e-10,3.266470e-10,-3.203427e-16,0.000018,0.000000e+00,0.0,1000000.0,0.0,0.0,0.0
918,Breast_MRI_919,post_1,0.757419,0.596730,12.864613,21.558522,21.066007,20.543932,25.375225,27.268354,...,4.503200e-01,4.503200e-01,1.921928e+00,0.000938,4.485585e+06,0.0,1000000.0,0.0,0.0,0.0
919,Breast_MRI_920,post_1,0.872632,0.837806,15.199560,18.142097,19.038279,20.649190,20.505122,24.348411,...,4.626802e-08,4.626802e-08,-3.203427e-16,0.000215,0.000000e+00,0.0,1000000.0,0.0,0.0,0.0
920,Breast_MRI_921,post_1,0.855874,0.735409,30.453436,41.410193,43.076382,45.448460,48.143552,54.398662,...,4.008642e-01,4.008642e-01,2.197160e+00,0.000230,1.504081e+08,0.0,1000000.0,0.0,0.0,0.0


In [29]:
feature_df.to_csv("pyradiomics_extraction.csv", index = False)