# Post processing of Extracted Radiomic Features 

* Uniquely label features.
* Remove features constant across gray level discretizations.

In [132]:
import sys
sys.path.append('./../utils/')

import os
import re
import glob

import numpy as np
import pandas as pd

In [133]:
def features_independent_of_gl(df, gl_bins=[32, 64, 128], thresh=1e-7):
    """Identify features constant across gray level quantifications.
    """
    to_drop = []
    output = {}
    for label in np.unique(df.columns):
        # Select all columns with same label.
        features = df.loc[:, label]
        # Remove if variance constant across GL quantification.
        if sum(features.var(axis=1)) < thresh:
            output[str(label)] = features.iloc[:, 0]
            to_drop.append(label)
        else:
            for num, gl_bin in enumerate(gl_bins):
                output[f'{label}_{gl_bin}bins'] = features.iloc[:, num]

    return pd.DataFrame(output), to_drop

In [134]:
#patient_id = np.load('./../../data_source/patient_id.npy')

# HACK:
import os 
import re

path_to_dir = './../../data_source/images/ct_removed_broken_images/'
patient_id = []
for fname in sorted(os.listdir(path_to_dir)):
    try:
        patient_id.append(int(re.findall(r'\d+', fname)[0]))
    except:
        pass     
len(patient_id)

195

# Shape Features

In [135]:
# NOTE: Using original tumor volume despite removing slices.
shape = pd.read_csv(
    #'./../../data_source/radiomic_features/shape/shape_original_masks.csv',
    './../../data_source/radiomic_features/shape/shape_removed_broken_images.csv',
    index_col=0
)
shape.index = patient_id

In [136]:
shape.shape

(195, 14)

# CT First Order

In [138]:
ct_firstorder_32 = pd.read_csv(
    #'./../../data_source/radiomic_features/firstorder_original_images/ct32_firstorder_original_images.csv',
    './../../data_source/radiomic_features/firstorder_removed_broken_images/ct32_firstorder_removed_broken_images_config.csv',
    index_col=0
)
ct_firstorder_64 = pd.read_csv(
    #'./../../data_source/radiomic_features/firstorder_original_images/ct64_firstorder_original_images.csv',
    './../../data_source/radiomic_features/firstorder_removed_broken_images/ct64_firstorder_removed_broken_images_config.csv',
    index_col=0
)
ct_firstorder_128 = pd.read_csv(
    #'./../../data_source/radiomic_features/firstorder_original_images/ct128_firstorder_original_images.csv',
    './../../data_source/radiomic_features/firstorder_removed_broken_images/ct128_firstorder_removed_broken_images_config.csv',
    index_col=0
)
raw_ct_firstorder = pd.concat(
    (
        ct_firstorder_32,
        ct_firstorder_64,
        ct_firstorder_128
    ),
    axis=1
)
raw_ct_firstorder.index = patient_id

In [139]:
ct_firstorder_32.shape, ct_firstorder_64.shape, ct_firstorder_128.shape

((195, 18), (195, 18), (195, 18))

In [140]:
ct_firstorder, ct_firstorder_dropped = features_independent_of_gl(raw_ct_firstorder)

# PET First Order

In [141]:
pet_firstorder_32 = pd.read_csv(
    #'./../../data_source/radiomic_features/firstorder_original_images/pet32_firstorder_original_images.csv',
    './../../data_source/radiomic_features/firstorder_removed_broken_images/pet32_firstorder_removed_broken_images_config.csv',
    
    index_col=0
)
pet_firstorder_64 = pd.read_csv(
    #'./../../data_source/radiomic_features/firstorder_original_images/pet64_firstorder_original_images.csv',
    './../../data_source/radiomic_features/firstorder_removed_broken_images/pet64_firstorder_removed_broken_images_config.csv',
    index_col=0
)
pet_firstorder_128 = pd.read_csv(
    #'./../../data_source/radiomic_features/firstorder_original_images/pet128_firstorder_original_images.csv',
    './../../data_source/radiomic_features/firstorder_removed_broken_images/pet128_firstorder_removed_broken_images_config.csv',
    index_col=0
)
raw_pet_firstorder = pd.concat(
    (
        pet_firstorder_32,
        pet_firstorder_64,
        pet_firstorder_128
    ),
    axis=1
)
raw_pet_firstorder.index = patient_id

In [142]:
pet_firstorder_32.shape, pet_firstorder_64.shape, pet_firstorder_128.shape

((195, 18), (195, 18), (195, 18))

In [143]:
pet_firstorder, pet_firstorder_dropped = features_independent_of_gl(raw_pet_firstorder)

# CT  Texture

In [144]:
ct_texture_32 = pd.read_csv(
    #'./../../data_source/radiomic_features/texture_original_images/ct32_texture_original_images.csv',
    './../../data_source/radiomic_features/texture_removed_broken_images/ct32_texture_removed_broken_images_config.csv',
    index_col=0
)
ct_texture_64 = pd.read_csv(
    #'./../../data_source/radiomic_features/texture_original_images/ct64_texture_original_images.csv',
    './../../data_source/radiomic_features/texture_removed_broken_images/ct64_texture_removed_broken_images_config.csv',
    index_col=0
)
ct_texture_128 = pd.read_csv(
    #'./../../data_source/radiomic_features/texture_original_images/ct128_texture_original_images.csv',
    './../../data_source/radiomic_features/texture_removed_broken_images/ct128_texture_removed_broken_images_config.csv',
    index_col=0
)
raw_ct_texture = pd.concat(
    (
        ct_texture_32,
        ct_texture_64,
        ct_texture_128
    ),
    axis=1
)
raw_ct_texture.index = patient_id

In [145]:
ct_texture_32.shape, ct_texture_64.shape, ct_texture_128.shape

((195, 18), (195, 18), (195, 18))

In [146]:
ct_texture, ct_texture_dropped = features_independent_of_gl(raw_ct_texture)
ct_texture_dropped

['original_firstorder_10Percentile',
 'original_firstorder_90Percentile',
 'original_firstorder_Energy',
 'original_firstorder_InterquartileRange',
 'original_firstorder_Kurtosis',
 'original_firstorder_Maximum',
 'original_firstorder_Mean',
 'original_firstorder_MeanAbsoluteDeviation',
 'original_firstorder_Median',
 'original_firstorder_Minimum',
 'original_firstorder_Range',
 'original_firstorder_RobustMeanAbsoluteDeviation',
 'original_firstorder_RootMeanSquared',
 'original_firstorder_Skewness',
 'original_firstorder_TotalEnergy',
 'original_firstorder_Variance']

# PET Texture

In [147]:
pet_texture_32 = pd.read_csv(
    #'./../../data_source/radiomic_features/texture_original_images/pet32_texture_original_images.csv',
    './../../data_source/radiomic_features/texture_removed_broken_images/ct32_texture_removed_broken_images_config.csv',
    index_col=0
)
pet_texture_64 = pd.read_csv(
    #'./../../data_source/radiomic_features/texture_original_images/pet64_texture_original_images.csv',
    './../../data_source/radiomic_features/texture_removed_broken_images/ct64_texture_removed_broken_images_config.csv',
    index_col=0
)
pet_texture_128 = pd.read_csv(
    #'./../../data_source/radiomic_features/texture_original_images/pet128_texture_original_images.csv',
    './../../data_source/radiomic_features/texture_removed_broken_images/ct128_texture_removed_broken_images_config.csv',
    index_col=0
)
raw_pet_texture = pd.concat(
    (
        pet_texture_32,
        pet_texture_64,
        pet_texture_128
    ),
    axis=1
)
raw_pet_texture.index = patient_id

In [148]:
pet_texture_32.shape, pet_texture_64.shape, pet_texture_128.shape

((195, 18), (195, 18), (195, 18))

In [149]:
pet_texture, pet_texture_dropped = features_independent_of_gl(raw_pet_texture)

# Concatenate Features

In [150]:
ct_features = pd.concat((ct_firstorder, ct_texture), axis=1)
pet_features = pd.concat((pet_firstorder, pet_texture), axis=1)

# Add modality labels to GL bin labels.
ct_features.columns = ['CT_{}'.format(col) for col in ct_features.columns]
pet_features.columns = ['PET_{}'.format(col) for col in pet_features.columns]

radiomics_features = pd.concat((shape, ct_features, pet_features), axis=1)
radiomics_features.index = patient_id

radiomics_features.shape

(195, 102)

In [151]:
# Concat with clinical variables and PET params.
clinical_features = pd.read_csv('./../../data_source/to_analysis/clinical_params.csv', index_col=0)
pet_params = pd.read_csv('./../../data_source/to_analysis/pet_params.csv', index_col=0)
lrr = pd.read_csv('./../../data_source/to_analysis/target_lrr.csv', index_col=0)
dfs = pd.read_csv('./../../data_source/to_analysis/target_lrr.csv', index_col=0)


# HACK:
_patient_id = np.load('./../../data_source/patient_id.npy')

clinical_features.index = _patient_id
pet_params.index = _patient_id

clinical_features = clinical_features.loc[patient_id, :]
pet_params = pet_params.loc[patient_id, :]

lrr.index = _patient_id
lrr = lrr.loc[patient_id, :]

dfs.index = _patient_id
dfs = dfs.loc[patient_id]

lrr.to_csv('./../../data_source/to_analysis/removed_broken_images/lrr_removed_broken_images.csv')
dfs.to_csv('./../../data_source/to_analysis/removed_broken_images/dfs_removed_broken_images.csv')

#clinical_features.index = patient_id
#pet_params.index = patient_id

In [152]:
X = pd.concat((clinical_features, radiomics_features, pet_params), axis=1)
np.shape(X)

(195, 147)

In [153]:
#X.to_csv('./../../data_source/to_analysis/original_images/all_features_original_images.csv')
X.to_csv('./../../data_source/to_analysis/removed_broken_images/all_features_removed_broken_images.csv')

In [154]:
# Create texture features dataset for comparison of artifact handling method.
ct_texture.columns = ['CT_{}'.format(col) for col in ct_texture.columns]
pet_texture.columns = ['PET_{}'.format(col) for col in pet_texture.columns]

texture_features = pd.concat((ct_texture, pet_texture), axis=1)
#texture_features.to_csv('./../../data_source/to_analysis/original_images/texture_original_images.csv')
texture_features.to_csv('./../../data_source/to_analysis/removed_broken_images/texture_removed_broken_images.csv')

In [155]:
# Create firstorder features dataset for comparison of artifact handling method.
ct_firstorder.columns = ['CT_{}'.format(col) for col in ct_firstorder.columns]
pet_firstorder.columns = ['PET_{}'.format(col) for col in pet_firstorder.columns]

firstorder_features = pd.concat((ct_firstorder, pet_firstorder), axis=1)
#firstorder_features.to_csv('./../../data_source/to_analysis/original_images/firstorder_original_images.csv')
firstorder_features.to_csv('./../../data_source/to_analysis/removed_broken_images/firstorder_removed_broken_images.csv')