In [1]:
%pip install pyradiomics dicom_numpy pydicom plotly matplotlib scikit-image simpleITK pynrrd dicom2nifti NiBabel NiLearn openpyxl pydicom-seg

Note: you may need to restart the kernel to use updated packages.


In [2]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt

import dicom_numpy
import pydicom as dicom

import dicom2nifti
import nibabel as nib
import nilearn as nil
import scipy.ndimage as ndi
import SimpleITK as sitk
import os

from radiomics import featureextractor

from tqdm import tqdm



In [4]:
annotation_boxes = pd.read_excel("../../Simple Path Dataset/Annotation_Boxes.xlsx").set_index("Patient ID")

DATASET_PATH = '../../New Dataset/'
# SEGMENTED_DATASET_PATH = '../../Segmented Dataset/'

In [6]:
annotation_boxes

Unnamed: 0_level_0,Start Row,End Row,Start Column,End Column,Start Slice,End Slice
Patient ID,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1
Breast_MRI_001,234,271,308,341,89,112
Breast_MRI_002,251,294,108,136,59,72
Breast_MRI_003,351,412,82,139,96,108
Breast_MRI_004,262,280,193,204,86,95
Breast_MRI_005,188,213,138,178,76,122
...,...,...,...,...,...,...
Breast_MRI_918,345,395,338,395,62,85
Breast_MRI_919,285,312,369,397,98,109
Breast_MRI_920,172,193,337,355,87,101
Breast_MRI_921,328,374,404,446,97,121


In [5]:
annotation_boxes.loc['Breast_MRI_001']

Start Row       234
End Row         271
Start Column    308
End Column      341
Start Slice      89
End Slice       112
Name: Breast_MRI_001, dtype: int64

In [12]:
filenames = ['pre.nii.gz', 'post_1.nii.gz']

In [18]:
# generate segmentation masks for each patient in the directory

for item in tqdm(os.listdir(DATASET_PATH)):
    patient_path = os.path.join(DATASET_PATH, item)

    x = annotation_boxes.loc[item]
    row1 = x['Start Row']
    row2 = x['End Row']

    col1 = x['Start Column']
    col2 = x['End Column']

    slice1 = x['Start Slice']
    slice2 = x['End Slice']

    if(os.path.isdir(patient_path)):
        # find files with paths here
        nifti_img = nib.load(os.path.join(patient_path, filenames[0]))   #load pre path and generate segments from this
        mask = np.zeros(nifti_img.dataobj.shape)
        mask[row1:row2, col1:col2, slice1:slice2] = 1
        segment = nib.Nifti1Image(mask, affine = nifti_img.affine)
        nib.save(segment, os.path.join(patient_path, 'segmentation.nii.gz'))

100%|██████████| 922/922 [15:48<00:00,  1.03s/it]


In [19]:
feat_ext = featureextractor.RadiomicsFeatureExtractor()
feat_ext.settings['minimumROIDimensions'] = 2
feat_ext.settings['normalize'] = True
feat_ext.settings['additionalInfo'] = False
feat_ext.settings

{'minimumROIDimensions': 2,
 'minimumROISize': None,
 'normalize': True,
 'normalizeScale': 1,
 'removeOutliers': None,
 'resampledPixelSpacing': None,
 'interpolator': 'sitkBSpline',
 'preCrop': False,
 'padDistance': 5,
 'distances': [1],
 'force2D': False,
 'force2Ddimension': 0,
 'resegmentRange': None,
 'label': 1,
 'additionalInfo': False}

In [35]:
features = pd.DataFrame()

for patient in tqdm(os.listdir(DATASET_PATH)):
    segmentation_path = os.path.join(DATASET_PATH, patient, 'segmentation.nii.gz')
    for filename in filenames:
        # get features for pre and post_1
        file_path = os.path.join(DATASET_PATH, patient, filename)
        img = sitk.ReadImage(file_path)
        smt = sitk.ReadImage(segmentation_path)

        smt.SetOrigin(img.GetOrigin())
        feats = feat_ext.execute(img, smt)
        for key, value in feats.items():
            feats[key] = value.item()
        ft_df = pd.DataFrame(feats, index = [0])
        ft_df['patient'] = patient
        ft_df['sequence'] = filename.split('.nii.gz')[0]
        features = pd.concat([features, ft_df])

  0%|          | 0/922 [00:00<?, ?it/s]GLCM is symmetrical, therefore Sum Average = 2 * Joint Average, only 1 needs to be calculated
GLCM is symmetrical, therefore Sum Average = 2 * Joint Average, only 1 needs to be calculated
  0%|          | 1/922 [00:01<29:59,  1.95s/it]GLCM is symmetrical, therefore Sum Average = 2 * Joint Average, only 1 needs to be calculated
GLCM is symmetrical, therefore Sum Average = 2 * Joint Average, only 1 needs to be calculated
  0%|          | 2/922 [00:04<31:35,  2.06s/it]GLCM is symmetrical, therefore Sum Average = 2 * Joint Average, only 1 needs to be calculated
GLCM is symmetrical, therefore Sum Average = 2 * Joint Average, only 1 needs to be calculated
  0%|          | 3/922 [00:06<34:37,  2.26s/it]GLCM is symmetrical, therefore Sum Average = 2 * Joint Average, only 1 needs to be calculated
GLCM is symmetrical, therefore Sum Average = 2 * Joint Average, only 1 needs to be calculated
  0%|          | 4/922 [00:08<34:42,  2.27s/it]GLCM is symmetrical, 

In [36]:
features.to_csv("pyradiomics_extraction.csv", index = False)

Unnamed: 0,original_shape_Elongation,original_shape_Flatness,original_shape_LeastAxisLength,original_shape_MajorAxisLength,original_shape_Maximum2DDiameterColumn,original_shape_Maximum2DDiameterRow,original_shape_Maximum2DDiameterSlice,original_shape_Maximum3DDiameter,original_shape_MeshVolume,original_shape_MinorAxisLength,...,original_glszm_ZoneEntropy,original_glszm_ZonePercentage,original_glszm_ZoneVariance,original_ngtdm_Busyness,original_ngtdm_Coarseness,original_ngtdm_Complexity,original_ngtdm_Contrast,original_ngtdm_Strength,patient,sequence
0,0.891808,0.850437,29.186299,34.319179,38.431136,36.073737,39.309346,46.240422,19914.791496,30.606121,...,1.750000e+00,0.000285,8.618192e+07,5.497338,0.091060,0.000782,3.011166e-07,0.092604,Breast_MRI_001,pre
0,0.891808,0.850437,29.186299,34.319179,38.431136,36.073737,39.309346,46.240422,19914.791496,30.606121,...,-3.203427e-16,0.000036,0.000000e+00,0.000000,1000000.000000,0.000000,0.000000e+00,0.000000,Breast_MRI_001,post_1
0,0.668996,0.650923,18.931009,29.083312,29.852205,23.148662,29.748492,33.784677,6966.458183,19.456628,...,2.978075e+00,0.005367,2.374784e+06,288.409155,0.001006,0.127011,1.063641e-02,0.001006,Breast_MRI_002,pre
0,0.668996,0.650923,18.931009,29.083312,29.852205,23.148662,29.748492,33.784677,6966.458183,19.456628,...,1.943752e+00,0.001278,1.153687e+07,13.650935,0.018427,0.006934,2.849699e-05,0.018260,Breast_MRI_002,post_1
0,0.934408,0.286810,13.808210,48.144081,43.126056,40.488107,56.606869,57.665740,19467.935987,44.986213,...,2.846439e+00,0.000240,1.041128e+08,1140.457544,0.000980,0.048920,7.407411e-03,0.000972,Breast_MRI_003,pre
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
0,0.877183,0.856791,15.749673,18.382163,20.649190,19.038279,20.505122,24.348411,3033.167788,16.124515,...,-3.203427e-16,0.000189,0.000000e+00,0.000000,1000000.000000,0.000000,0.000000e+00,0.000000,Breast_MRI_920,post_1
0,0.913000,0.800835,33.226543,41.489900,45.448460,43.076382,48.143552,55.603011,33924.801358,37.880295,...,1.971225e+00,0.002027,1.360858e+07,6049.519483,0.000420,0.102645,2.014567e-02,0.000420,Breast_MRI_921,pre
0,0.913000,0.800835,33.226543,41.489900,45.448460,43.076382,48.143552,55.603011,33924.801358,37.880295,...,2.541725e+00,0.001294,2.233828e+07,2876.276646,0.000583,0.073943,1.326310e-02,0.000583,Breast_MRI_921,post_1
0,0.945533,0.798995,9.708553,12.150957,12.641847,14.031840,13.078416,16.022136,881.013461,11.489125,...,2.753434e+00,0.007778,1.507122e+05,73.646825,0.004476,0.248217,3.370219e-02,0.004456,Breast_MRI_922,pre
