# Post processing of Extracted Radiomic Features 

* Uniquely label features.
* Remove features constant across gray level discretizations.

In [1]:
import sys
sys.path.append('./../utils/')

import os
import re
import glob

import numpy as np
import pandas as pd

In [2]:
def features_independent_of_gl(df, gl_bins=[32, 64, 128], thresh=1e-7):
    """Identify features constant across gray level quantifications.
    """
    to_drop = []
    output = {}
    for label in np.unique(df.columns):
        # Select all columns with same label.
        features = df.loc[:, label]
        # Remove if variance constant across GL quantification.
        if sum(features.var(axis=1)) < thresh:
            output[str(label)] = features.iloc[:, 0]
            to_drop.append(label)
        else:
            for num, gl_bin in enumerate(gl_bins):
                output[f'{label}_{gl_bin}bins'] = features.iloc[:, num]

    return pd.DataFrame(output), to_drop

In [3]:
#path_to_dir = './../../data_source/images/ct_nrrd/'
#path_to_dir = './../../data_source/images/ct_removed_broken_images/'
path_to_dir = './../../data_source/images/ct_removed_broken_slices/'


#path_to_lrr = './../../data_source/to_analysis/original_images/lrr_original_images.csv'
#path_to_lrr = './../../data_source/to_analysis/removed_broken_images/lrr_removed_broken_images.csv'
path_to_lrr = './../../data_source/to_analysis/removed_broken_slices/lrr_removed_broken_slices.csv'

#path_to_dfs = './../../data_source/to_analysis/original_images/dfs_original_images.csv'
#path_to_dfs = './../../data_source/to_analysis/removed_broken_images/dfs_removed_broken_images.csv'
path_to_dfs = './../../data_source/to_analysis/removed_broken_slices/dfs_removed_broken_slices.csv'


#path_no_ct = './../../data_source/to_analysis/original_images/no_ct_original_images.csv'
#path_no_ct = './../../data_source/to_analysis/removed_broken_images/no_ct_removed_broken_images.csv'
path_no_ct = './../../data_source/to_analysis/removed_broken_slices/no_ct_removed_broken_slices.csv'


#path_to_feature_matrix = './../../data_source/to_analysis/original_images/all_features_original_images.csv'
#path_to_feature_matrix = './../../data_source/to_analysis/removed_broken_images/all_features_removed_broken_images.csv'
path_to_feature_matrix = './../../data_source/to_analysis/removed_broken_slices/all_features_removed_broken_slices.csv'


#path_to_texture_features = './../../data_source/to_analysis/original_images/texture_original_images.csv'
#path_to_texture_features = './../../data_source/to_analysis/removed_broken_images/texture_removed_broken_images.csv'
path_to_texture_features = './../../data_source/to_analysis/removed_broken_slices/texture_removed_broken_slices.csv'


#path_to_firstorder_features = './../../data_source/to_analysis/original_images/firstorder_original_images.csv'
#path_to_firstorder_features = './../../data_source/to_analysis/removed_broken_images/firstorder_removed_broken_images.csv'
path_to_firstorder_features = './../../data_source/to_analysis/removed_broken_slices/firstorder_removed_broken_slices.csv'


#path_to_shape_features = './../../data_source/radiomic_features/shape/shape_original_masks.csv'
#path_to_shape_features = './../../data_source/radiomic_features/shape/shape_removed_broken_images.csv'
path_to_shape_features = './../../data_source/radiomic_features/shape/shape_removed_broken_slices.csv'


#path_ct_firstorder_32 = './../../data_source/radiomic_features/firstorder_original_images/ct32_firstorder_original_images.csv'
#path_ct_firstorder_64 = './../../data_source/radiomic_features/firstorder_original_images/ct64_firstorder_original_images.csv'
#path_ct_firstorder_128 = './../../data_source/radiomic_features/firstorder_original_images/ct128_firstorder_original_images.csv'
#path_ct_firstorder_32 = './../../data_source/radiomic_features/firstorder_removed_broken_images/ct32_firstorder_removed_broken_images.csv'
#path_ct_firstorder_64 = './../../data_source/radiomic_features/firstorder_removed_broken_images/ct64_firstorder_removed_broken_images.csv'
#path_ct_firstorder_128 = './../../data_source/radiomic_features/firstorder_removed_broken_images/ct128_firstorder_removed_broken_images.csv'
path_ct_firstorder_32 = './../../data_source/radiomic_features/firstorder_removed_broken_slices/ct32_firstorder_removed_broken_slices.csv'
path_ct_firstorder_64 = './../../data_source/radiomic_features/firstorder_removed_broken_slices/ct64_firstorder_removed_broken_slices.csv'
path_ct_firstorder_128 = './../../data_source/radiomic_features/firstorder_removed_broken_slices/ct128_firstorder_removed_broken_slices.csv'


#path_pet_firstorder_32 = './../../data_source/radiomic_features/firstorder_original_images/pet32_firstorder_original_images.csv'
#path_pet_firstorder_64 = './../../data_source/radiomic_features/firstorder_original_images/pet64_firstorder_original_images.csv'
#path_pet_firstorder_128 = './../../data_source/radiomic_features/firstorder_original_images/pet128_firstorder_original_images.csv'
#path_pet_firstorder_32 = './../../data_source/radiomic_features/firstorder_removed_broken_images/pet32_firstorder_removed_broken_images.csv'
#path_pet_firstorder_64 = './../../data_source/radiomic_features/firstorder_removed_broken_images/pet64_firstorder_removed_broken_images.csv'
#path_pet_firstorder_128 = './../../data_source/radiomic_features/firstorder_removed_broken_images/pet128_firstorder_removed_broken_images.csv'
path_pet_firstorder_32 = './../../data_source/radiomic_features/firstorder_removed_broken_slices/pet32_firstorder_removed_broken_slices.csv'
path_pet_firstorder_64 = './../../data_source/radiomic_features/firstorder_removed_broken_slices/pet64_firstorder_removed_broken_slices.csv'
path_pet_firstorder_128 = './../../data_source/radiomic_features/firstorder_removed_broken_slices/pet128_firstorder_removed_broken_slices.csv'


#path_ct_texture_32 = './../../data_source/radiomic_features/texture_original_images/ct32_texture_original_images.csv'
#path_ct_texture_64 = './../../data_source/radiomic_features/texture_original_images/ct64_texture_original_images.csv'
#path_ct_texture_128 = './../../data_source/radiomic_features/texture_original_images/ct128_texture_original_images.csv'
#path_ct_texture_32 = './../../data_source/radiomic_features/texture_removed_broken_images/ct32_texture_removed_broken_images.csv'
#path_ct_texture_64 = './../../data_source/radiomic_features/texture_removed_broken_images/ct64_texture_removed_broken_images.csv'
#path_ct_texture_128 = './../../data_source/radiomic_features/texture_removed_broken_images/ct128_texture_removed_broken_images.csv'
path_ct_texture_32 = './../../data_source/radiomic_features/texture_removed_broken_slices/ct32_texture_removed_broken_slices.csv'
path_ct_texture_64 = './../../data_source/radiomic_features/texture_removed_broken_slices/ct64_texture_removed_broken_slices.csv'
path_ct_texture_128 = './../../data_source/radiomic_features/texture_removed_broken_slices/ct128_texture_removed_broken_slices.csv'


#path_pet_texture_32 = './../../data_source/radiomic_features/texture_original_images/pet32_texture_original_images.csv'
#path_pet_texture_64 = './../../data_source/radiomic_features/texture_original_images/pet64_texture_original_images.csv'
#path_pet_texture_128 = './../../data_source/radiomic_features/texture_original_images/pet128_texture_original_images.csv'
#path_pet_texture_32 = './../../data_source/radiomic_features/texture_removed_broken_images/pet32_texture_removed_broken_images.csv'
#path_pet_texture_64 = './../../data_source/radiomic_features/texture_removed_broken_images/pet64_texture_removed_broken_images.csv'
#path_pet_texture_128 = './../../data_source/radiomic_features/texture_removed_broken_images/pet128_texture_removed_broken_images.csv'
path_pet_texture_32 = './../../data_source/radiomic_features/texture_removed_broken_slices/ct32_texture_removed_broken_slices.csv'
path_pet_texture_64 = './../../data_source/radiomic_features/texture_removed_broken_slices/ct64_texture_removed_broken_slices.csv'
path_pet_texture_128 = './../../data_source/radiomic_features/texture_removed_broken_slices/ct128_texture_removed_broken_slices.csv'

    
patient_id = []
for fname in sorted(os.listdir(path_to_dir)):
    try:
        patient_id.append(int(re.findall(r'\d+', fname)[0]))
    except:
        pass     
len(patient_id)

196

# Shape Features

In [4]:
# NOTE: Using original tumor volume despite removing slices.
shape = pd.read_csv(path_to_shape_features, index_col=0)
shape.index = patient_id

shape.head()

Unnamed: 0,original_shape_Elongation,original_shape_Flatness,original_shape_LeastAxisLength,original_shape_MajorAxisLength,original_shape_Maximum2DDiameterColumn,original_shape_Maximum2DDiameterRow,original_shape_Maximum2DDiameterSlice,original_shape_Maximum3DDiameter,original_shape_MeshVolume,original_shape_MinorAxisLength,original_shape_Sphericity,original_shape_SurfaceArea,original_shape_SurfaceVolumeRatio,original_shape_VoxelVolume
2,0.738882,0.723925,27.060529,37.380273,42.953463,45.54119,43.65776,46.411206,17098.541667,27.619612,0.660413,4860.055715,0.284238,17142.0
4,0.7969,0.629917,19.845151,31.504408,39.446166,36.400549,30.364453,39.560081,9626.916667,25.105855,0.700069,3126.087371,0.324724,9661.0
5,0.600926,0.53514,22.515072,42.073251,46.957428,43.965896,32.802439,47.339202,16175.708333,25.282894,0.761164,4063.633046,0.251218,16214.0
8,0.784571,0.414247,30.263897,73.057649,75.133215,81.859636,66.753277,84.291162,69280.083333,57.318945,0.519631,15698.615155,0.226596,69354.0
10,0.69032,0.539743,19.449801,36.035312,34.176015,38.832976,33.837849,43.874822,10705.375,24.875896,0.642079,3658.448414,0.341739,10749.0


In [5]:
shape.shape

(196, 14)

# CT First Order

In [6]:
ct_firstorder_32 = pd.read_csv(path_ct_firstorder_32, index_col=0)
ct_firstorder_64 = pd.read_csv(path_ct_firstorder_64, index_col=0)
ct_firstorder_128 = pd.read_csv(path_ct_firstorder_128, index_col=0)

raw_ct_firstorder = pd.concat(
    (
        ct_firstorder_32,
        ct_firstorder_64,
        ct_firstorder_128
    ),
    axis=1
)
raw_ct_firstorder.index = patient_id

In [7]:
ct_firstorder_32.shape, ct_firstorder_64.shape, ct_firstorder_128.shape

((196, 18), (196, 18), (196, 18))

In [8]:
ct_firstorder, ct_firstorder_dropped = features_independent_of_gl(raw_ct_firstorder)

# PET First Order

In [9]:
pet_firstorder_32 = pd.read_csv(path_pet_firstorder_32, index_col=0)
pet_firstorder_64 = pd.read_csv(path_pet_firstorder_64, index_col=0)
pet_firstorder_128 = pd.read_csv(path_pet_firstorder_128, index_col=0)

raw_pet_firstorder = pd.concat(
    (
        pet_firstorder_32,
        pet_firstorder_64,
        pet_firstorder_128
    ),
    axis=1
)
raw_pet_firstorder.index = patient_id

In [10]:
pet_firstorder_32.shape, pet_firstorder_64.shape, pet_firstorder_128.shape

((196, 18), (196, 18), (196, 18))

In [11]:
pet_firstorder, pet_firstorder_dropped = features_independent_of_gl(raw_pet_firstorder)

# CT  Texture

In [12]:
ct_texture_32 = pd.read_csv(path_ct_texture_32, index_col=0)
ct_texture_64 = pd.read_csv(path_ct_texture_64, index_col=0)
ct_texture_128 = pd.read_csv(path_ct_texture_128, index_col=0)

raw_ct_texture = pd.concat(
    (
        ct_texture_32,
        ct_texture_64,
        ct_texture_128
    ),
    axis=1
)
raw_ct_texture.index = patient_id

In [13]:
ct_texture_32.shape, ct_texture_64.shape, ct_texture_128.shape

((196, 75), (196, 75), (196, 75))

In [14]:
ct_texture, ct_texture_dropped = features_independent_of_gl(raw_ct_texture)
ct_texture_dropped

[]

# PET Texture

In [15]:
pet_texture_32 = pd.read_csv(path_pet_texture_32, index_col=0)
pet_texture_64 = pd.read_csv(path_pet_texture_64, index_col=0)
pet_texture_128 = pd.read_csv(path_pet_texture_128, index_col=0)

raw_pet_texture = pd.concat(
    (
        pet_texture_32,
        pet_texture_64,
        pet_texture_128
    ),
    axis=1
)
raw_pet_texture.index = patient_id

In [16]:
pet_texture_32.shape, pet_texture_64.shape, pet_texture_128.shape

((196, 75), (196, 75), (196, 75))

In [17]:
pet_texture, pet_texture_dropped = features_independent_of_gl(raw_pet_texture)

# Concatenate Features

In [18]:
ct_features = pd.concat((ct_firstorder, ct_texture), axis=1)
pet_features = pd.concat((pet_firstorder, pet_texture), axis=1)

# Add modality labels to GL bin labels.
ct_features.columns = ['CT_{}'.format(col) for col in ct_features.columns]
pet_features.columns = ['PET_{}'.format(col) for col in pet_features.columns]

radiomics_features = pd.concat((shape, ct_features, pet_features), axis=1)
radiomics_features.index = patient_id

radiomics_features.shape

(196, 508)

In [19]:
# Concat with clinical variables and PET params.
clinical_features = pd.read_csv('./../../data_source/to_analysis/clinical_params.csv', index_col=0)
pet_params = pd.read_csv('./../../data_source/to_analysis/pet_params.csv', index_col=0)
lrr = pd.read_csv('./../../data_source/to_analysis/target_lrr.csv', index_col=0)
dfs = pd.read_csv('./../../data_source/to_analysis/target_dfs.csv', index_col=0)

# HACK:
tmp_patient_id = np.load('./../../data_source/patient_id.npy')
clinical_features.index = tmp_patient_id
pet_params.index = tmp_patient_id
lrr.index = tmp_patient_id
dfs.index = tmp_patient_id

clinical_features = clinical_features.loc[patient_id, :]
pet_params = pet_params.loc[patient_id, :]
lrr = lrr.loc[patient_id, :]
dfs = dfs.loc[patient_id]

lrr.to_csv(path_to_lrr)
dfs.to_csv(path_to_dfs)

lrr.shape, dfs.shape

((196, 1), (196, 1))

In [20]:
X = pd.concat((clinical_features, radiomics_features, pet_params), axis=1)
np.shape(X)

(196, 553)

In [21]:
X.to_csv(path_to_feature_matrix)

In [22]:
# Create texture features dataset for comparison of artifact handling method.
ct_texture.columns = ['CT_{}'.format(col) for col in ct_texture.columns]
pet_texture.columns = ['PET_{}'.format(col) for col in pet_texture.columns]

texture_features = pd.concat((ct_texture, pet_texture), axis=1)
texture_features.to_csv(path_to_texture_features)

texture_features.shape

(196, 450)

In [23]:
# Create firstorder features dataset for comparison of artifact handling method.
ct_firstorder.columns = ['CT_{}'.format(col) for col in ct_firstorder.columns]
pet_firstorder.columns = ['PET_{}'.format(col) for col in pet_firstorder.columns]

firstorder_features = pd.concat((ct_firstorder, pet_firstorder), axis=1)
firstorder_features.to_csv(path_to_firstorder_features)

firstorder_features.shape

(196, 44)

In [24]:
# No CT features.
X_CT = X.filter(regex='CT')
X_not_CT = X.drop(X_CT, axis=1)
X_not_CT.to_csv(path_no_ct)

X_CT.shape

(196, 247)