# Notebook performing radiomics extraction

Radiomics features are extracted from nodule volumes by PyRadiomics package.

In [1]:
import pandas as pd
import matplotlib.pyplot as plt
from radiomics import featureextractor, getTestCase
import SimpleITK as sitk
import numpy as np
import glob
from pathlib import Path

In [2]:
%load_ext autoreload
%autoreload 2
from LIDC_Dataset import LIDC_Dataset

In [3]:
data_path = "dataset"
# define features to be extracted
feature_list = ["glrlm", "shape", "firstorder", "glcm", "glszm", "ngtdm", "gldm"]

In [4]:
data = LIDC_Dataset(data_dir = data_path, 
            train_mode=False,
            apply_mask=False,
            return_mask=True,
            full_vol=True)

PyRadiomics package requires nodule volumes (.pt files) in .nrrd format.
So there is need for conversion:

In [5]:
Path(f"{data_path}/crops_pyradiomics").mkdir(parents=True, exist_ok=True)
Path(f"{data_path}/masks_pyradiomics").mkdir(parents=True, exist_ok=True)
Path(f"{data_path}/radiomics_features").mkdir(parents=True, exist_ok=True)

for i in range(len(data)):
    image = np.array(data[i][0][0]*255).astype(np.uint8)
    mask = np.array(data[i][2][0]*255).astype(np.uint8)
    
    img = sitk.GetImageFromArray(image)
    msk = sitk.GetImageFromArray(mask)
    
    img_path = f"{data_path}/crops_pyradiomics/{str(i+1).zfill(4)}.nrrd"
    msk_path = f"{data_path}/masks_pyradiomics/{str(i+1).zfill(4)}.nrrd"

    sitk.WriteImage(img, img_path)
    sitk.WriteImage(msk, msk_path)

In [6]:
imgs_path = f"{data_path}/crops_pyradiomics/*"
masks_path = f"{data_path}/masks_pyradiomics/*"

imgs = glob.glob(imgs_path)
masks = glob.glob(masks_path)

imgs.sort()
masks.sort()

In [7]:
# Paths to files:
print(imgs[:5])
print("***")
print(masks[:5])

['dataset/crops_pyradiomics/0001.nrrd', 'dataset/crops_pyradiomics/0002.nrrd', 'dataset/crops_pyradiomics/0003.nrrd', 'dataset/crops_pyradiomics/0004.nrrd', 'dataset/crops_pyradiomics/0005.nrrd']
***
['dataset/masks_pyradiomics/0001.nrrd', 'dataset/masks_pyradiomics/0002.nrrd', 'dataset/masks_pyradiomics/0003.nrrd', 'dataset/masks_pyradiomics/0004.nrrd', 'dataset/masks_pyradiomics/0005.nrrd']


In [12]:
settings = {"label":255}
extractor = featureextractor.RadiomicsFeatureExtractor(**settings)

for feat in feature_list:
    print(feat)
    extractor.disableAllFeatures()
    extractor.enableFeatureClassByName(feat)
    # this feature vector is only used to extract the relevant feature names
    feature_vector = extractor.execute(imgs[0], masks[0])
    feature_names = list(feature_vector.keys())
    shape_feature_names = [x for x in feature_names if x.startswith(f"original_{feat}")]
    
    shape_features_list = []
    for i in range(len(imgs)):
        try:
            feature_vector = extractor.execute(imgs[i], masks[i])
            shape_features = [feature_vector[x] for x in shape_feature_names]
            shape_features_list.append(shape_features)
        except Exception as e:
            print(i, e)
    df = pd.DataFrame(shape_features_list, columns = shape_feature_names)
    df.to_pickle(f"{data_path}/radiomics_features/{feat}_radiomics_df.pkl")

glrlm
shape
firstorder


GLCM is symmetrical, therefore Sum Average = 2 * Joint Average, only 1 needs to be calculated
GLCM is symmetrical, therefore Sum Average = 2 * Joint Average, only 1 needs to be calculated
GLCM is symmetrical, therefore Sum Average = 2 * Joint Average, only 1 needs to be calculated
GLCM is symmetrical, therefore Sum Average = 2 * Joint Average, only 1 needs to be calculated


glcm


GLCM is symmetrical, therefore Sum Average = 2 * Joint Average, only 1 needs to be calculated
GLCM is symmetrical, therefore Sum Average = 2 * Joint Average, only 1 needs to be calculated
GLCM is symmetrical, therefore Sum Average = 2 * Joint Average, only 1 needs to be calculated
GLCM is symmetrical, therefore Sum Average = 2 * Joint Average, only 1 needs to be calculated
GLCM is symmetrical, therefore Sum Average = 2 * Joint Average, only 1 needs to be calculated
GLCM is symmetrical, therefore Sum Average = 2 * Joint Average, only 1 needs to be calculated
GLCM is symmetrical, therefore Sum Average = 2 * Joint Average, only 1 needs to be calculated
GLCM is symmetrical, therefore Sum Average = 2 * Joint Average, only 1 needs to be calculated
GLCM is symmetrical, therefore Sum Average = 2 * Joint Average, only 1 needs to be calculated
GLCM is symmetrical, therefore Sum Average = 2 * Joint Average, only 1 needs to be calculated
GLCM is symmetrical, therefore Sum Average = 2 * Joint Avera

glszm
ngtdm
gldm


In [23]:
# Examplary result:

dataframe = pd.read_pickle(f"{data_path}/radiomics_features/firstorder_radiomics_df.pkl")
dataframe.head()

Unnamed: 0,original_firstorder_10Percentile,original_firstorder_90Percentile,original_firstorder_Energy,original_firstorder_Entropy,original_firstorder_InterquartileRange,original_firstorder_Kurtosis,original_firstorder_Maximum,original_firstorder_MeanAbsoluteDeviation,original_firstorder_Mean,original_firstorder_Median,original_firstorder_Minimum,original_firstorder_Range,original_firstorder_RobustMeanAbsoluteDeviation,original_firstorder_RootMeanSquared,original_firstorder_Skewness,original_firstorder_TotalEnergy,original_firstorder_Uniformity,original_firstorder_Variance
0,35.0,223.0,166020995.0,3.270020900855951,89.0,2.3424774698263664,255.0,53.47941565556968,140.68443992445154,152.0,0.0,255.0,38.82385779160951,155.3075688479153,-0.4384766917100197,166020995.0,0.1141251370507772,4328.329304593332
1,18.0,239.0,137719910.0,3.364814401559609,159.0,1.5709351774021028,255.0,73.6749226739048,131.85252808988764,132.0,0.0,255.0,62.75286062993119,155.49390658786297,-0.0607995162021719,137719910.0,0.1027202955356015,6793.265822260447
2,24.300000000000004,223.7,10057943.0,3.3998627342177343,120.5,1.8289970644308933,255.0,63.37777474216242,124.1797520661157,126.5,0.0,255.0,50.352009449918114,144.15572817943152,-0.0084332454439072,10057943.0,0.0969196093163035,5360.2631437401815
3,7.800000000000011,223.0,19881863.0,3.283675849430537,133.0,1.9811409926118144,255.0,66.86110480316675,100.57038242473556,86.0,0.0,255.0,53.44131459736452,127.18989154403954,0.4654761472431378,19881863.0,0.1161230395626178,6062.866689926981
4,29.100000000000005,204.1,860539.0,3.0448032930271376,88.0,2.3554521014923795,249.0,48.91632373113856,109.75925925925924,112.0,0.0,249.0,32.954648526077094,126.23750396537238,0.2307135389101746,860539.0,0.1392318244170096,3888.8124142661177
