In [1]:
%pip install pyradiomics dicom_numpy pydicom plotly matplotlib scikit-image simpleITK pynrrd dicom2nifti NiBabel NiLearn openpyxl pydicom-seg

Note: you may need to restart the kernel to use updated packages.


In [2]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt

import dicom_numpy
import pydicom as dicom

import dicom2nifti
import nibabel as nib
import nilearn as nil
import scipy.ndimage as ndi
import SimpleITK as sitk
import os

from radiomics import featureextractor

from tqdm import tqdm



In [17]:
annotation_boxes = pd.read_csv("segmentation_annotations_NIFTI.csv").set_index("Patient_ID")

DATASET_PATH = '../../New Dataset/'
# SEGMENTED_DATASET_PATH = '../../Segmented Dataset/'

In [18]:
annotation_boxes

Unnamed: 0_level_0,Start Row,End Row,Start Column,End Column,Start Slice,End Slice
Patient_ID,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1
Breast_MRI_001,308,341,234,271,48,71
Breast_MRI_002,108,136,251,294,70,83
Breast_MRI_003,82,139,351,412,48,60
Breast_MRI_004,193,204,262,280,69,78
Breast_MRI_005,138,178,188,213,38,84
...,...,...,...,...,...,...
Breast_MRI_918,338,395,345,395,107,130
Breast_MRI_919,369,397,285,312,83,94
Breast_MRI_920,337,355,172,193,59,73
Breast_MRI_921,404,446,328,374,53,77


In [19]:
annotation_boxes.loc['Breast_MRI_001']

Start Row       308
End Row         341
Start Column    234
End Column      271
Start Slice      48
End Slice        71
Name: Breast_MRI_001, dtype: int64

In [20]:
filenames = ['pre.nii.gz', 'post_1.nii.gz']

In [22]:
# generate segmentation masks for each patient in the directory

for item in tqdm(os.listdir(DATASET_PATH)):
    patient_path = os.path.join(DATASET_PATH, item)

    x = annotation_boxes.loc[item]
    row1 = x['Start Row']
    row2 = x['End Row']

    col1 = x['Start Column']
    col2 = x['End Column']

    slice1 = x['Start Slice']
    slice2 = x['End Slice']

    if(os.path.isdir(patient_path)):
        # find files with paths here
        nifti_img = nib.load(os.path.join(patient_path, filenames[0]))   #load pre path and generate segments from this
        mask = np.zeros(nifti_img.dataobj.shape)
        mask[row1:row2, col1:col2, slice1:slice2] = 1
        segment = nib.Nifti1Image(mask, affine = nifti_img.affine)
        nib.save(segment, os.path.join(patient_path, 'segmentation.nii.gz'))

100%|██████████| 922/922 [15:20<00:00,  1.00it/s]


In [23]:
feat_ext = featureextractor.RadiomicsFeatureExtractor()
feat_ext.settings['minimumROIDimensions'] = 2
feat_ext.settings['normalize'] = True
feat_ext.settings['additionalInfo'] = False
feat_ext.settings

{'minimumROIDimensions': 2,
 'minimumROISize': None,
 'normalize': True,
 'normalizeScale': 1,
 'removeOutliers': None,
 'resampledPixelSpacing': None,
 'interpolator': 'sitkBSpline',
 'preCrop': False,
 'padDistance': 5,
 'distances': [1],
 'force2D': False,
 'force2Ddimension': 0,
 'resegmentRange': None,
 'label': 1,
 'additionalInfo': False}

In [24]:
features = pd.DataFrame()

for patient in tqdm(os.listdir(DATASET_PATH)):
    segmentation_path = os.path.join(DATASET_PATH, patient, 'segmentation.nii.gz')
    for filename in filenames:
        # get features for pre and post_1
        file_path = os.path.join(DATASET_PATH, patient, filename)
        img = sitk.ReadImage(file_path)
        smt = sitk.ReadImage(segmentation_path)

        smt.SetOrigin(img.GetOrigin())
        feats = feat_ext.execute(img, smt)
        for key, value in feats.items():
            feats[key] = value.item()
        ft_df = pd.DataFrame(feats, index = [0])
        ft_df['patient'] = patient
        ft_df['sequence'] = filename.split('.nii.gz')[0]
        features = pd.concat([features, ft_df])

  0%|          | 0/922 [00:00<?, ?it/s]GLCM is symmetrical, therefore Sum Average = 2 * Joint Average, only 1 needs to be calculated
GLCM is symmetrical, therefore Sum Average = 2 * Joint Average, only 1 needs to be calculated
  0%|          | 1/922 [00:01<29:52,  1.95s/it]GLCM is symmetrical, therefore Sum Average = 2 * Joint Average, only 1 needs to be calculated
GLCM is symmetrical, therefore Sum Average = 2 * Joint Average, only 1 needs to be calculated
  0%|          | 2/922 [00:03<30:00,  1.96s/it]GLCM is symmetrical, therefore Sum Average = 2 * Joint Average, only 1 needs to be calculated
GLCM is symmetrical, therefore Sum Average = 2 * Joint Average, only 1 needs to be calculated
  0%|          | 3/922 [00:06<31:52,  2.08s/it]GLCM is symmetrical, therefore Sum Average = 2 * Joint Average, only 1 needs to be calculated
GLCM is symmetrical, therefore Sum Average = 2 * Joint Average, only 1 needs to be calculated
  0%|          | 4/922 [00:08<31:54,  2.09s/it]GLCM is symmetrical, 

In [25]:
features.to_csv("pyradiomics_extraction.csv", index = False)

In [26]:
features

Unnamed: 0,original_shape_Elongation,original_shape_Flatness,original_shape_LeastAxisLength,original_shape_MajorAxisLength,original_shape_Maximum2DDiameterColumn,original_shape_Maximum2DDiameterRow,original_shape_Maximum2DDiameterSlice,original_shape_Maximum3DDiameter,original_shape_MeshVolume,original_shape_MinorAxisLength,...,original_glszm_ZoneEntropy,original_glszm_ZonePercentage,original_glszm_ZoneVariance,original_ngtdm_Busyness,original_ngtdm_Coarseness,original_ngtdm_Complexity,original_ngtdm_Contrast,original_ngtdm_Strength,patient,sequence
0,0.891808,0.850437,29.186299,34.319179,36.073737,38.431136,39.309346,46.240422,19914.791496,30.606121,...,2.825564,0.003846,4.996251e+06,1060.390698,0.000323,0.220287,0.032468,0.000324,Breast_MRI_001,pre
0,0.891808,0.850437,29.186299,34.319179,36.073737,38.431136,39.309346,46.240422,19914.791496,30.606121,...,2.580368,0.004380,4.745944e+06,714.483384,0.000444,0.160234,0.019571,0.000443,Breast_MRI_001,post_1
0,0.668996,0.650923,18.931009,29.083312,23.148662,29.852205,29.748492,33.784677,6966.458183,19.456628,...,3.147323,0.002108,5.759789e+06,302.684948,0.000982,0.130170,0.012262,0.000985,Breast_MRI_002,pre
0,0.668996,0.650923,18.931009,29.083312,23.148662,29.852205,29.748492,33.784677,6966.458183,19.456628,...,3.339987,0.003194,4.028316e+06,253.258853,0.001130,0.113070,0.008739,0.001129,Breast_MRI_002,post_1
0,0.934408,0.286810,13.808210,48.144081,40.488107,43.126056,56.606869,57.665740,19467.935987,44.986213,...,2.909565,0.005536,6.875739e+06,356.064866,0.000750,0.063894,0.002623,0.000748,Breast_MRI_003,pre
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
0,0.877183,0.856791,15.749673,18.382163,19.038279,20.649190,20.505122,24.348411,3033.167788,16.124515,...,1.811278,0.001512,3.036683e+06,4.777973,0.052622,0.007182,0.000027,0.053438,Breast_MRI_920,post_1
0,0.913000,0.800835,33.226543,41.489900,43.076382,45.448460,48.143552,55.603011,33924.801358,37.880295,...,3.258646,0.001186,3.581583e+07,258.667361,0.001018,0.042386,0.001378,0.001013,Breast_MRI_921,pre
0,0.913000,0.800835,33.226543,41.489900,43.076382,45.448460,48.143552,55.603011,33924.801358,37.880295,...,3.144687,0.001208,3.514451e+07,259.701406,0.001015,0.042512,0.001414,0.001006,Breast_MRI_921,post_1
0,0.945533,0.798995,9.708553,12.150957,14.031840,12.641847,13.078416,16.022136,881.013461,11.489125,...,1.921928,0.002778,5.076704e+05,3.463405,0.073097,0.015201,0.000122,0.075003,Breast_MRI_922,pre
