# Radiomics Feature Extraction

Extracting features from PET and CT images.

In [1]:
import os
import nrrd
import utils 
import radiomics

import numpy as np
import pandas as pd
import matplotlib.pyplot as plt

from feature_extraction import feature_extractor

%matplotlib inline

In [2]:
#path_ct_imagedir = './../../data_source/images/ct_nrrd/'
#path_ct_masksdir = './../../data_source/images/masks_nrrd/'
#path_pet_imagedir = './../../data_source/images/pet_nrrd/'
#path_pet_masksdir = './../../data_source/images/masks_nrrd/'

path_ct_imagedir = './../../data_source/images/ct_removed_broken_slices/'
path_ct_masksdir = './../../data_source/images/masks_removed_broken_slices_ct_size/'
path_pet_imagedir = './../../data_source/images/pet_removed_broken_slices/'
path_pet_masksdir = './../../data_source/images/masks_removed_broken_slices_pet_size/'

In [3]:
# NB: Load correct image data set before calc features.
ct_param_files = [
    #'./parameter_files/firstorder_original_images/ct32_firstorder_original_images_config.yaml',
    #'./parameter_files/firstorder_original_images/ct64_firstorder_original_images_config.yaml',
    #'./parameter_files/firstorder_original_images/ct128_firstorder_original_images_config.yaml',
    #'./parameter_files/texture_original_images/ct32_texture_original_images_config.yaml',
    #'./parameter_files/texture_original_images/ct64_texture_original_images_config.yaml',
    #'./parameter_files/texture_original_images/ct128_texture_original_images_config.yaml'
    
    './parameter_files/firstorder_removed_broken_slices/ct32_firstorder_removed_broken_slices_config.yaml',
    './parameter_files/firstorder_removed_broken_slices/ct64_firstorder_removed_broken_slices_config.yaml',
    './parameter_files/firstorder_removed_broken_slices/ct128_firstorder_removed_broken_slices_config.yaml',
    './parameter_files/texture_removed_broken_slices/ct32_texture_removed_broken_slices_config.yaml',
    './parameter_files/texture_removed_broken_slices/ct64_texture_removed_broken_slices_config.yaml',
    './parameter_files/texture_removed_broken_slices/ct128_texture_removed_broken_slices_config.yaml',
]

path_to_results = [
    './../../resultsAss/shape_features.csv'
]


ct_feature_files = [
    #'./../../data_source/radiomic_features/firstorder_original_images/ct32_firstorder_original_images.csv',
    #'./../../data_source/radiomic_features/firstorder_original_images/ct64_firstorder_original_images.csv',
    #'./../../data_source/radiomic_features/firstorder_original_images/ct128_firstorder_original_images.csv',
    #'./../../data_source/radiomic_features/texture_original_images/ct32_texture_original_images.csv',
    #'./../../data_source/radiomic_features/texture_original_images/ct64_texture_original_images.csv',
    #'./../../data_source/radiomic_features/texture_original_images/ct128_texture_original_images.csv'
    
    './../../data_source/radiomic_features/firstorder_removed_broken_slices/ct32_firstorder_removed_broken_slices.csv',
    './../../data_source/radiomic_features/firstorder_removed_broken_slices/ct64_firstorder_removed_broken_slices.csv',
    './../../data_source/radiomic_features/firstorder_removed_broken_slices/ct128_firstorder_removed_broken_slices.csv',
    './../../data_source/radiomic_features/texture_removed_broken_slices/ct32_texture_removed_broken_slices.csv',
    './../../data_source/radiomic_features/texture_removed_broken_slices/ct64_texture_removed_broken_slices.csv',
    './../../data_source/radiomic_features/texture_removed_broken_slices/ct128_texture_removed_broken_slices.csv',
]

In [15]:
# NB: Load correct image data set before calc features.
pet_param_files = [
    #'./parameter_files/shape/shape_config.yaml',
    './parameter_files/shape/shape_config.yaml',
    
    #'./parameter_files/firstorder_original_images/pet32_firstorder_original_images_config.yaml',
    #'./parameter_files/firstorder_original_images/pet64_firstorder_original_images_config.yaml',
    #'./parameter_files/firstorder_original_images/pet128_firstorder_original_images_config.yaml',
    #'./parameter_files/texture_original_images/pet32_texture_original_images_config.yaml',
    #'./parameter_files/texture_original_images/pet64_texture_original_images_config.yaml',
    #'./parameter_files/texture_original_images/pet128_texture_original_images_config.yaml'
    
    #'./parameter_files/firstorder_removed_broken_slices/pet32_firstorder_removed_broken_slices_config.yaml',
    #'./parameter_files/firstorder_removed_broken_slices/pet64_firstorder_removed_broken_slices_config.yaml',
    #'./parameter_files/firstorder_removed_broken_slices/pet128_firstorder_removed_broken_slices_config.yaml',
    #'./parameter_files/texture_removed_broken_slices/pet32_texture_removed_broken_slices_config.yaml',
    #'./parameter_files/texture_removed_broken_slices/pet64_texture_removed_broken_slices_config.yaml',
    #'./parameter_files/texture_removed_broken_slices/pet128_texture_removed_broken_slices_config.yaml',
]
pet_feature_files = [
    #'./../../data_source/radiomic_features/shape/shape_original_masks.csv',
    './../../data_source/radiomic_features/shape/shape_removed_broken_slices.csv',
    
    #'./../../data_source/radiomic_features/firstorder_original_images/pet32_firstorder_original_images.csv',
    #'./../../data_source/radiomic_features/firstorder_original_images/pet64_firstorder_original_images.csv',
    #'./../../data_source/radiomic_features/firstorder_original_images/pet128_firstorder_original_images.csv',
    #'./../../data_source/radiomic_features/texture_original_images/pet32_texture_original_images.csv',
    #'./../../data_source/radiomic_features/texture_original_images/pet64_texture_original_images.csv',
    #'./../../data_source/radiomic_features/texture_original_images/pet128_texture_original_images.csv'
        
    #'./../../data_source/radiomic_features/firstorder_removed_broken_slices/pet32_firstorder_removed_broken_slices.csv',
    #'./../../data_source/radiomic_features/firstorder_removed_broken_slices/pet64_firstorder_removed_broken_slices.csv',
    #'./../../data_source/radiomic_features/firstorder_removed_broken_slices/pet128_firstorder_removed_broken_slices.csv',
    #'./../../data_source/radiomic_features/texture_removed_broken_slices/pet32_texture_removed_broken_slices.csv',
    #'./../../data_source/radiomic_features/texture_removed_broken_slices/pet64_texture_removed_broken_slices.csv',
    #'./../../data_source/radiomic_features/texture_removed_broken_slices/pet128_texture_removed_broken_slices.csv',
]

In [3]:
paths_ct_images = utils.sample_paths(
    path_ct_imagedir, path_ct_masksdir, target_format='nrrd'
)
paths_pet_images = utils.sample_paths(
    path_pet_imagedir, path_pet_masksdir, target_format='nrrd'
)

In [5]:
paths_ct_images[0]['Image'], paths_ct_images[0]['Mask']

('./../../data_source/images/ct_removed_broken_slices/P002CT.nrrd',
 './../../data_source/images/masks_removed_broken_slices_ct_size/P002mask.nrrd')

In [6]:
# Sanity check.
len(paths_ct_images), len(paths_pet_images)

(187, 187)

## Setup

In [5]:
def z_score_image(image):
    
    output = (image - np.mean(image)) / (np.std(image) + 1e-12)
    return output


def bin_widths(path_images, nbins, n=3, z_scoring=False):
    img_max, img_min = [], []
    for image_path in path_images:
        
        image, _ = nrrd.read(image_path['Image'])
        mask, _ = nrrd.read(image_path['Mask'])
        
        if z_scoring:
            image = z_score_image(image)
        
        _cropped_image = image * mask

        cropped_image = np.copy(_cropped_image)
        cropped_image[_cropped_image == 0] = np.nan
        
        img_max.append(np.ceil(np.nanmax(cropped_image)))
        img_min.append(np.floor(np.nanmin(cropped_image)))
  
    return (np.mean(img_max) - np.mean(img_min)) / nbins

In [6]:
print(f'PET 32: {bin_widths(paths_pet_images, 32)}')
print(f'PET 64: {bin_widths(paths_pet_images, 64)}')
print(f'PET 128: {bin_widths(paths_pet_images, 128)}')

PET 32: 0.40240641711229946
PET 64: 0.20120320855614973
PET 128: 0.10060160427807487


In [7]:
print(f'CT 32: {bin_widths(paths_ct_images, 32)}')
print(f'CT 64: {bin_widths(paths_ct_images, 64)}')
print(f'CT 128: {bin_widths(paths_ct_images, 128)}')

CT 32: 39.07286096256684
CT 64: 19.53643048128342
CT 128: 9.76821524064171


In [None]:
print(f'CT texture 32: {bin_widths(paths_ct_images, 32, z_scoring=True)}')
print(f'CT  texture 64: {bin_widths(paths_ct_images, 64, z_scoring=True)}')
print(f'CT texture 128: {bin_widths(paths_ct_images, 128, z_scoring=True)}')

CT texture 32: 0.1303475935828877
CT  texture 64: 0.06517379679144385


## Calculate features

In [11]:
for ct_param_file, ct_feature_file in zip(ct_param_files, ct_feature_files):
    
    # Sanity check (shape features only): Corresponding names on parameter and feature file.
    _param_file_ref = ('_').join(os.path.basename(ct_param_file).split('_')[:3])
    _feature_file_ref = ('_').join(os.path.basename(ct_feature_file).split('_')[:3])
    assert _param_file_ref == _feature_file_ref
    
    feature_extractor(
        param_file=ct_param_file, 
        paths_to_images_and_masks=paths_ct_images, 
        verbose=1, 
        path_to_results=ct_feature_file,
        n_jobs=None, 
        drop_missing=True, 
        variance_thresh=0.0
    )

Initiated feature extraction.


[Parallel(n_jobs=3)]: Using backend LokyBackend with 3 concurrent workers.
[Parallel(n_jobs=3)]: Done  44 tasks      | elapsed:    5.5s
[Parallel(n_jobs=3)]: Done 187 out of 187 | elapsed:   14.2s finished
[Parallel(n_jobs=3)]: Using backend LokyBackend with 3 concurrent workers.


Initiated feature extraction.


[Parallel(n_jobs=3)]: Done  82 tasks      | elapsed:    5.2s
[Parallel(n_jobs=3)]: Done 182 out of 187 | elapsed:   11.5s remaining:    0.3s
[Parallel(n_jobs=3)]: Done 187 out of 187 | elapsed:   11.6s finished
[Parallel(n_jobs=3)]: Using backend LokyBackend with 3 concurrent workers.


Initiated feature extraction.


[Parallel(n_jobs=3)]: Done  82 tasks      | elapsed:    5.1s
[Parallel(n_jobs=3)]: Done 182 out of 187 | elapsed:   11.4s remaining:    0.3s
[Parallel(n_jobs=3)]: Done 187 out of 187 | elapsed:   11.5s finished
[Parallel(n_jobs=3)]: Using backend LokyBackend with 3 concurrent workers.


Initiated feature extraction.


[Parallel(n_jobs=3)]: Done  44 tasks      | elapsed:    4.6s
[Parallel(n_jobs=3)]: Done 187 out of 187 | elapsed:   20.2s finished
[Parallel(n_jobs=3)]: Using backend LokyBackend with 3 concurrent workers.


Initiated feature extraction.


[Parallel(n_jobs=3)]: Done  44 tasks      | elapsed:    5.4s
[Parallel(n_jobs=3)]: Done 187 out of 187 | elapsed:   22.8s finished
[Parallel(n_jobs=3)]: Using backend LokyBackend with 3 concurrent workers.


Initiated feature extraction.


[Parallel(n_jobs=3)]: Done  44 tasks      | elapsed:    9.0s
[Parallel(n_jobs=3)]: Done 187 out of 187 | elapsed:   35.2s finished


In [16]:
for pet_param_file, pet_feature_file in zip(pet_param_files, pet_feature_files):
    
    param_file_ref = ('_').join(os.path.basename(pet_param_file).split('_')[:3])
    feature_file_ref = ('_').join(os.path.basename(pet_feature_file).split('_')[:3])
    # Sanity check: Corresponding names on parameter and feature file.
    #assert param_file_ref == feature_file_ref
    
    feature_extractor(
        param_file=pet_param_file, 
        paths_to_images_and_masks=paths_pet_images, 
        verbose=1, 
        path_to_results=pet_feature_file,
        n_jobs=None, 
        drop_missing=True, 
        variance_thresh=0.0
    )

Initiated feature extraction.


[Parallel(n_jobs=3)]: Using backend LokyBackend with 3 concurrent workers.
[Parallel(n_jobs=3)]: Done  44 tasks      | elapsed:   12.5s
[Parallel(n_jobs=3)]: Done 187 out of 187 | elapsed:   34.9s finished
