# Radiomics Feature Extraction

Extracting features from PET and CT images.

In [1]:
import os
import nrrd
import utils 
import radiomics

import numpy as np
import pandas as pd
import matplotlib.pyplot as plt

from feature_extraction import feature_extractor

%matplotlib inline

In [2]:
#path_ct_imagedir = './../../data_source/images/ct_nrrd/'
#path_ct_masksdir = './../../data_source/images/masks_nrrd/'
#path_pet_imagedir = './../../data_source/images/pet_nrrd/'
#path_pet_masksdir = './../../data_source/images/masks_nrrd/'

#path_ct_imagedir = './../../data_source/images/ct_removed_broken_images/'
#path_ct_masksdir = './../../data_source/images/masks_removed_broken_images/'
#path_pet_imagedir = './../../data_source/images/pet_removed_broken_images/'
#path_pet_masksdir = './../../data_source/images/masks_removed_broken_images/'

path_ct_imagedir = './../../data_source/images/ct_removed_broken_slices/'
path_ct_masksdir = './../../data_source/images/masks_removed_broken_slices_ct_size/'
path_pet_imagedir = './../../data_source/images/pet_removed_broken_slices/'
path_pet_masksdir = './../../data_source/images/masks_removed_broken_slices_pet_size/'

In [3]:
# NB: Load correct image data set before calc features.
ct_param_files = [
    #'./parameter_files/firstorder_original_images/ct32_firstorder_original_images_config.yaml',
    #'./parameter_files/firstorder_original_images/ct64_firstorder_original_images_config.yaml',
    #'./parameter_files/firstorder_original_images/ct128_firstorder_original_images_config.yaml',
    #'./parameter_files/texture_original_images/ct32_texture_original_images_config.yaml',
    #'./parameter_files/texture_original_images/ct64_texture_original_images_config.yaml',
    #'./parameter_files/texture_original_images/ct128_texture_original_images_config.yaml'
    
    #'./parameter_files/firstorder_removed_broken_images/ct32_firstorder_removed_broken_images_config.yaml',
    #'./parameter_files/firstorder_removed_broken_images/ct64_firstorder_removed_broken_images_config.yaml',
    #'./parameter_files/firstorder_removed_broken_images/ct128_firstorder_removed_broken_images_config.yaml',
    #'./parameter_files/texture_removed_broken_images/ct32_texture_removed_broken_images_config.yaml',
    #'./parameter_files/texture_removed_broken_images/ct64_texture_removed_broken_images_config.yaml',
    #'./parameter_files/texture_removed_broken_images/ct128_texture_removed_broken_images_config.yaml',
    
    './parameter_files/firstorder_removed_broken_slices/ct32_firstorder_removed_broken_slices_config.yaml',
    './parameter_files/firstorder_removed_broken_slices/ct64_firstorder_removed_broken_slices_config.yaml',
    './parameter_files/firstorder_removed_broken_slices/ct128_firstorder_removed_broken_slices_config.yaml',
    './parameter_files/texture_removed_broken_slices/ct32_texture_removed_broken_slices_config.yaml',
    './parameter_files/texture_removed_broken_slices/ct64_texture_removed_broken_slices_config.yaml',
    './parameter_files/texture_removed_broken_slices/ct128_texture_removed_broken_slices_config.yaml',
]
ct_feature_files = [
    #'./../../data_source/radiomic_features/firstorder_original_images/ct32_firstorder_original_images.csv',
    #'./../../data_source/radiomic_features/firstorder_original_images/ct64_firstorder_original_images.csv',
    #'./../../data_source/radiomic_features/firstorder_original_images/ct128_firstorder_original_images.csv',
    #'./../../data_source/radiomic_features/texture_original_images/ct32_texture_original_images.csv',
    #'./../../data_source/radiomic_features/texture_original_images/ct64_texture_original_images.csv',
    #'./../../data_source/radiomic_features/texture_original_images/ct128_texture_original_images.csv'
    
    #'./../../data_source/radiomic_features/firstorder_removed_broken_images/ct32_firstorder_removed_broken_images.csv',
    #'./../../data_source/radiomic_features/firstorder_removed_broken_images/ct64_firstorder_removed_broken_images.csv',
    #'./../../data_source/radiomic_features/firstorder_removed_broken_images/ct128_firstorder_removed_broken_images.csv',
    #'./../../data_source/radiomic_features/texture_removed_broken_images/ct32_texture_removed_broken_images.csv',
    #'./../../data_source/radiomic_features/texture_removed_broken_images/ct64_texture_removed_broken_images.csv',
    #'./../../data_source/radiomic_features/texture_removed_broken_images/ct128_texture_removed_broken_images.csv',
    
    './../../data_source/radiomic_features/firstorder_removed_broken_slices/ct32_firstorder_removed_broken_slices.csv',
    './../../data_source/radiomic_features/firstorder_removed_broken_slices/ct64_firstorder_removed_broken_slices.csv',
    './../../data_source/radiomic_features/firstorder_removed_broken_slices/ct128_firstorder_removed_broken_slices.csv',
    './../../data_source/radiomic_features/texture_removed_broken_slices/ct32_texture_removed_broken_slices.csv',
    './../../data_source/radiomic_features/texture_removed_broken_slices/ct64_texture_removed_broken_slices.csv',
    './../../data_source/radiomic_features/texture_removed_broken_slices/ct128_texture_removed_broken_slices.csv',
]

In [4]:
# NB: Load correct image data set before calc features.
pet_param_files = [
    #'./parameter_files/shape/shape_config.yaml',
    #'./parameter_files/shape/shape_config.yaml',
    #'./parameter_files/shape/shape_config.yaml',
    
    #'./parameter_files/firstorder_original_images/pet32_firstorder_original_images_config.yaml',
    #'./parameter_files/firstorder_original_images/pet64_firstorder_original_images_config.yaml',
    #'./parameter_files/firstorder_original_images/pet128_firstorder_original_images_config.yaml',
    #'./parameter_files/texture_original_images/pet32_texture_original_images_config.yaml',
    #'./parameter_files/texture_original_images/pet64_texture_original_images_config.yaml',
    #'./parameter_files/texture_original_images/pet128_texture_original_images_config.yaml'
    
    #'./parameter_files/firstorder_removed_broken_images/pet32_firstorder_removed_broken_images_config.yaml',
    #'./parameter_files/firstorder_removed_broken_images/pet64_firstorder_removed_broken_images_config.yaml',
    #'./parameter_files/firstorder_removed_broken_images/pet128_firstorder_removed_broken_images_config.yaml',
    #'./parameter_files/texture_removed_broken_images/pet32_texture_removed_broken_images_config.yaml',
    #'./parameter_files/texture_removed_broken_images/pet64_texture_removed_broken_images_config.yaml',
    #'./parameter_files/texture_removed_broken_images/pet128_texture_removed_broken_images_config.yaml',
    
    './parameter_files/firstorder_removed_broken_slices/pet32_firstorder_removed_broken_slices_config.yaml',
    './parameter_files/firstorder_removed_broken_slices/pet64_firstorder_removed_broken_slices_config.yaml',
    './parameter_files/firstorder_removed_broken_slices/pet128_firstorder_removed_broken_slices_config.yaml',
    './parameter_files/texture_removed_broken_slices/pet32_texture_removed_broken_slices_config.yaml',
    './parameter_files/texture_removed_broken_slices/pet64_texture_removed_broken_slices_config.yaml',
    './parameter_files/texture_removed_broken_slices/pet128_texture_removed_broken_slices_config.yaml',
]
pet_feature_files = [
    #'./../../data_source/radiomic_features/shape/shape_original_masks.csv',
    #'./../../data_source/radiomic_features/shape/shape_removed_broken_images.csv',
    #'./../../data_source/radiomic_features/shape/shape_removed_broken_slices.csv',
    
    #'./../../data_source/radiomic_features/firstorder_original_images/pet32_firstorder_original_images.csv',
    #'./../../data_source/radiomic_features/firstorder_original_images/pet64_firstorder_original_images.csv',
    #'./../../data_source/radiomic_features/firstorder_original_images/pet128_firstorder_original_images.csv',
    #'./../../data_source/radiomic_features/texture_original_images/pet32_texture_original_images.csv',
    #'./../../data_source/radiomic_features/texture_original_images/pet64_texture_original_images.csv',
    #'./../../data_source/radiomic_features/texture_original_images/pet128_texture_original_images.csv'
    
    #'./../../data_source/radiomic_features/firstorder_removed_broken_images/pet32_firstorder_removed_broken_images.csv',
    #'./../../data_source/radiomic_features/firstorder_removed_broken_images/pet64_firstorder_removed_broken_images.csv',
    #'./../../data_source/radiomic_features/firstorder_removed_broken_images/pet128_firstorder_removed_broken_images.csv',
    #'./../../data_source/radiomic_features/texture_removed_broken_images/pet32_texture_removed_broken_images.csv',
    #'./../../data_source/radiomic_features/texture_removed_broken_images/pet64_texture_removed_broken_images.csv',
    #'./../../data_source/radiomic_features/texture_removed_broken_images/pet128_texture_removed_broken_images.csv',
    
    './../../data_source/radiomic_features/firstorder_removed_broken_slices/pet32_firstorder_removed_broken_slices.csv',
    './../../data_source/radiomic_features/firstorder_removed_broken_slices/pet64_firstorder_removed_broken_slices.csv',
    './../../data_source/radiomic_features/firstorder_removed_broken_slices/pet128_firstorder_removed_broken_slices.csv',
    './../../data_source/radiomic_features/texture_removed_broken_slices/pet32_texture_removed_broken_slices.csv',
    './../../data_source/radiomic_features/texture_removed_broken_slices/pet64_texture_removed_broken_slices.csv',
    './../../data_source/radiomic_features/texture_removed_broken_slices/pet128_texture_removed_broken_slices.csv',
]

In [5]:
paths_ct_images = utils.sample_paths(
    path_ct_imagedir, path_ct_masksdir, target_format='nrrd'
)
paths_pet_images = utils.sample_paths(
    path_pet_imagedir, path_pet_masksdir, target_format='nrrd'
)

In [6]:
# Sanity check.
len(paths_ct_images), len(paths_pet_images)

(196, 196)

## Setup

In [7]:
# Texture features are shown to be more stable after GL normalizations.
# Ref: Voxel size and gray level normalization of CT radiomic features in lung cancer.
def z_score_transform(image):
    
    _image = np.copy(image)
    image_z_scored = (_image - np.nanmean(_image)) / (np.nanstd(_image) + 1e-10)

    return image_z_scored


def bin_widths(path_images, nbins, n=3, z_scoring=False):
    """Average min and max to compare histogram across stacks.
    Fixed bin iwdth (and not fixed bin size) to compare texture
    features across stacks.
    
    """
    img_max, img_min = [], []
    for image_path in path_images:
        
        image, _ = nrrd.read(image_path['Image'])
        mask, _ = nrrd.read(image_path['Mask'])
        # Calc GL discr. from tumor region only.
        _cropped_image = image * mask
        # NOTE: Want min that is not background. Returned 
        # cropped image changes shape.
        cropped_image = np.copy(_cropped_image)
        cropped_image[_cropped_image == 0] = np.nan
        
        if z_scoring:
            cropped_image = z_score_transform(cropped_image)
        
        img_max.append(np.ceil(np.nanmax(cropped_image)))
        img_min.append(np.floor(np.nanmin(cropped_image)))
  
    return (np.mean(img_max) - np.mean(img_min)) / nbins

In [8]:
# NOTE: 
# * Calc GL discr from tumor region only to best preserve tumor texture.
# * Slice removal requires different GL bins widths.

#print(bin_widths(paths_pet_images, 32))
#print(bin_widths(paths_pet_images, 64)) 
#print(bin_widths(paths_pet_images, 128))

In [9]:
# TODO:
# * Calc GL discr from tumor region only to best preserve tumor texture.
# * Slice removal requires different GL bins widths.

#print(bin_widths(paths_ct_images, 32))
#print(bin_widths(paths_ct_images, 64)) 
#print(bin_widths(paths_ct_images, 128))

In [10]:
# TODO: 
# * Calc GL discr from tumor region only to best preserve tumor texture.
# * Slice removal requires different GL bins widths.

#print(bin_widths(paths_ct_images, 32, z_scoring=True))
#print(bin_widths(paths_ct_images, 64, z_scoring=True))
#print(bin_widths(paths_ct_images, 128, z_scoring=True))

## Calculate features

In [11]:
for ct_param_file, ct_feature_file in zip(ct_param_files, ct_feature_files):
    
    # Sanity check (shape features only): Corresponding names on parameter and feature file.
    _param_file_ref = ('_').join(os.path.basename(ct_param_file).split('_')[:3])
    _feature_file_ref = ('_').join(os.path.basename(ct_feature_file).split('_')[:3])
    assert _param_file_ref == _feature_file_ref
    
    feature_extractor(
        param_file=ct_param_file, 
        paths_to_images_and_masks=paths_ct_images, 
        verbose=1, 
        path_to_results=ct_feature_file,
        n_jobs=None, 
        drop_missing=True, 
        variance_thresh=0.0
    )

Initiated feature extraction.


[Parallel(n_jobs=3)]: Using backend LokyBackend with 3 concurrent workers.
[Parallel(n_jobs=3)]: Done  44 tasks      | elapsed:    6.4s
[Parallel(n_jobs=3)]: Done 196 out of 196 | elapsed:   21.7s finished
[Parallel(n_jobs=3)]: Using backend LokyBackend with 3 concurrent workers.


Initiated feature extraction.


[Parallel(n_jobs=3)]: Done  44 tasks      | elapsed:    3.8s
[Parallel(n_jobs=3)]: Done 196 out of 196 | elapsed:   14.5s finished
[Parallel(n_jobs=3)]: Using backend LokyBackend with 3 concurrent workers.


Initiated feature extraction.


[Parallel(n_jobs=3)]: Done  82 tasks      | elapsed:    6.4s
[Parallel(n_jobs=3)]: Done 196 out of 196 | elapsed:   15.3s finished
[Parallel(n_jobs=3)]: Using backend LokyBackend with 3 concurrent workers.


Initiated feature extraction.


[Parallel(n_jobs=3)]: Done  44 tasks      | elapsed:    7.2s
[Parallel(n_jobs=3)]: Done 196 out of 196 | elapsed:   25.8s finished
[Parallel(n_jobs=3)]: Using backend LokyBackend with 3 concurrent workers.


Initiated feature extraction.


[Parallel(n_jobs=3)]: Done  44 tasks      | elapsed:    6.2s
[Parallel(n_jobs=3)]: Done 196 out of 196 | elapsed:   24.7s finished
[Parallel(n_jobs=3)]: Using backend LokyBackend with 3 concurrent workers.


Initiated feature extraction.


[Parallel(n_jobs=3)]: Done  44 tasks      | elapsed:    8.6s
[Parallel(n_jobs=3)]: Done 196 out of 196 | elapsed:   27.9s finished


In [12]:
for pet_param_file, pet_feature_file in zip(pet_param_files, pet_feature_files):
    
    param_file_ref = ('_').join(os.path.basename(pet_param_file).split('_')[:3])
    feature_file_ref = ('_').join(os.path.basename(pet_feature_file).split('_')[:3])
    # Sanity check: Corresponding names on parameter and feature file.
    assert param_file_ref == feature_file_ref
    
    feature_extractor(
        param_file=pet_param_file, 
        paths_to_images_and_masks=paths_pet_images, 
        verbose=1, 
        path_to_results=pet_feature_file,
        n_jobs=None, 
        drop_missing=True, 
        variance_thresh=0.0
    )

[Parallel(n_jobs=3)]: Using backend LokyBackend with 3 concurrent workers.


Initiated feature extraction.


[Parallel(n_jobs=3)]: Done  82 tasks      | elapsed:    6.7s
[Parallel(n_jobs=3)]: Done 196 out of 196 | elapsed:   15.7s finished
[Parallel(n_jobs=3)]: Using backend LokyBackend with 3 concurrent workers.


Initiated feature extraction.


[Parallel(n_jobs=3)]: Done  82 tasks      | elapsed:    6.3s
[Parallel(n_jobs=3)]: Done 196 out of 196 | elapsed:   14.9s finished
[Parallel(n_jobs=3)]: Using backend LokyBackend with 3 concurrent workers.


Initiated feature extraction.


[Parallel(n_jobs=3)]: Done  82 tasks      | elapsed:    6.1s
[Parallel(n_jobs=3)]: Done 196 out of 196 | elapsed:   14.5s finished
[Parallel(n_jobs=3)]: Using backend LokyBackend with 3 concurrent workers.


Initiated feature extraction.


[Parallel(n_jobs=3)]: Done  44 tasks      | elapsed:    5.8s
[Parallel(n_jobs=3)]: Done 196 out of 196 | elapsed:   25.9s finished
[Parallel(n_jobs=3)]: Using backend LokyBackend with 3 concurrent workers.


Initiated feature extraction.


[Parallel(n_jobs=3)]: Done  44 tasks      | elapsed:    7.4s
[Parallel(n_jobs=3)]: Done 196 out of 196 | elapsed:   42.5s finished
[Parallel(n_jobs=3)]: Using backend LokyBackend with 3 concurrent workers.


Initiated feature extraction.


[Parallel(n_jobs=3)]: Done  44 tasks      | elapsed:   26.1s
[Parallel(n_jobs=3)]: Done 196 out of 196 | elapsed:  1.6min finished
