# Post processing of Extracted Radiomic Features 

* Uniquely label features.
* Remove features constant across gray level discretizations.

In [1]:
import sys
sys.path.append('./../utils/')

import os
import re
import glob

import numpy as np
import pandas as pd

In [2]:
def features_independent_of_gl(df, gl_bins=[32, 64, 128], thresh=1e-7):
    """Identify features constant across gray level quantifications.
    """
    to_drop = []
    output = {}
    for label in np.unique(df.columns):
        # Select all columns with same label.
        features = df.loc[:, label]
        # Remove if variance constant across GL quantification.
        if sum(features.var(axis=1)) < thresh:
            output[str(label)] = features.iloc[:, 0]
            to_drop.append(label)
        else:
            for num, gl_bin in enumerate(gl_bins):
                output[f'{label}_{gl_bin}bins'] = features.iloc[:, num]

    return pd.DataFrame(output), to_drop

In [3]:
patient_id = np.load('./../../data_source/patient_id.npy')

# Shape Features

In [4]:
shape = pd.read_csv(
    './../../data_source/radiomic_features/shape_original_images/shape_original_masks.csv',
    index_col=0
)
shape.index = patient_id

In [5]:
shape.shape

(198, 14)

# CT First Order

In [6]:
ct_firstorder_32 = pd.read_csv(
    './../../data_source/radiomic_features/firstorder_original_images/ct32_firstorder_original_images.csv',
    index_col=0
)
ct_firstorder_64 = pd.read_csv(
    './../../data_source/radiomic_features/firstorder_original_images/ct64_firstorder_original_images.csv',
    index_col=0
)
ct_firstorder_128 = pd.read_csv(
    './../../data_source/radiomic_features/firstorder_original_images/ct128_firstorder_original_images.csv',
    index_col=0
)
raw_ct_firstorder = pd.concat(
    (
        ct_firstorder_32,
        ct_firstorder_64,
        ct_firstorder_128
    ),
    axis=1
)
raw_ct_firstorder.index = patient_id

In [7]:
ct_firstorder_32.shape, ct_firstorder_64.shape, ct_firstorder_128.shape

((198, 18), (198, 18), (198, 18))

In [8]:
ct_firstorder, ct_firstorder_dropped = features_independent_of_gl(raw_ct_firstorder)

# PET First Order

In [9]:
pet_firstorder_32 = pd.read_csv(
    './../../data_source/radiomic_features/firstorder_original_images/pet32_firstorder_original_images.csv',
    index_col=0
)
pet_firstorder_64 = pd.read_csv(
    './../../data_source/radiomic_features/firstorder_original_images/pet64_firstorder_original_images.csv',
    index_col=0
)
pet_firstorder_128 = pd.read_csv(
    './../../data_source/radiomic_features/firstorder_original_images/pet128_firstorder_original_images.csv',
    index_col=0
)
raw_pet_firstorder = pd.concat(
    (
        pet_firstorder_32,
        pet_firstorder_64,
        pet_firstorder_128
    ),
    axis=1
)
raw_pet_firstorder.index = patient_id

In [10]:
pet_firstorder_32.shape, pet_firstorder_64.shape, pet_firstorder_128.shape

((198, 18), (198, 18), (198, 18))

In [11]:
pet_firstorder, pet_firstorder_dropped = features_independent_of_gl(raw_pet_firstorder)

# CT  Texture

In [12]:
ct_texture_32 = pd.read_csv(
    './../../data_source/radiomic_features/texture_original_images/ct32_texture_original_images.csv',
    index_col=0
)
ct_texture_64 = pd.read_csv(
    './../../data_source/radiomic_features/texture_original_images/ct64_texture_original_images.csv',
    index_col=0
)
ct_texture_128 = pd.read_csv(
    './../../data_source/radiomic_features/texture_original_images/ct128_texture_original_images.csv',
    index_col=0
)
raw_ct_texture = pd.concat(
    (
        ct_texture_32,
        ct_texture_64,
        ct_texture_128
    ),
    axis=1
)

raw_ct_texture.index = patient_id

In [13]:
ct_texture_32.shape, ct_texture_64.shape, ct_texture_128.shape

((198, 75), (198, 75), (198, 75))

In [14]:
ct_texture, ct_texture_dropped = features_independent_of_gl(raw_ct_texture)
ct_texture_dropped

[]

# PET Texture

In [15]:
pet_texture_32 = pd.read_csv(
    './../../data_source/radiomic_features/texture_original_images/pet32_texture_original_images.csv',
    index_col=0
)
pet_texture_64 = pd.read_csv(
    './../../data_source/radiomic_features/texture_original_images/pet64_texture_original_images.csv',
    index_col=0
)
pet_texture_128 = pd.read_csv(
    './../../data_source/radiomic_features/texture_original_images/pet128_texture_original_images.csv',
    index_col=0
)
raw_pet_texture = pd.concat(
    (
        pet_texture_32,
        pet_texture_64,
        pet_texture_128
    ),
    axis=1
)
raw_pet_texture.index = patient_id

In [16]:
pet_texture_32.shape, pet_texture_64.shape, pet_texture_128.shape

((198, 75), (198, 75), (198, 75))

In [17]:
pet_texture, pet_texture_dropped = features_independent_of_gl(raw_pet_texture)
pet_texture_dropped

[]

# Concatenate Features

In [18]:
ct_features = pd.concat((ct_firstorder, ct_texture), axis=1)
pet_features = pd.concat((pet_firstorder, pet_texture), axis=1)

# Add modality labels to GL bin labels.
ct_features.columns = ['CT_{}'.format(col) for col in ct_features.columns]
pet_features.columns = ['PET_{}'.format(col) for col in pet_features.columns]

radiomics_features = pd.concat((shape, ct_features, pet_features), axis=1)
radiomics_features.index = patient_id

radiomics_features.shape

(198, 508)

In [19]:
# Concat with clinical variables and PET params.
clinical_features = pd.read_csv('./../../data_source/to_analysis/clinical_params.csv', index_col=0)
pet_params = pd.read_csv('./../../data_source/to_analysis/pet_params.csv', index_col=0)

clinical_features.index = patient_id
pet_params.index = patient_id

In [20]:
X = pd.concat((clinical_features, radiomics_features, pet_params), axis=1)
np.shape(X)

(198, 553)

In [21]:
X.to_csv('./../../data_source/to_analysis/all_features_original_images.csv')

In [22]:
# Create texture features dataset for comparison of artifact handling method.
ct_texture.columns = ['CT_{}'.format(col) for col in ct_texture.columns]
pet_texture.columns = ['PET_{}'.format(col) for col in pet_texture.columns]

texture_features = pd.concat((ct_texture, pet_texture), axis=1)
texture_features.to_csv('./../../data_source/to_analysis/original_images/texture_original_images.csv')

In [23]:
# Create firstorder features dataset for comparison of artifact handling method.
ct_firstorder.columns = ['CT_{}'.format(col) for col in ct_firstorder.columns]
pet_firstorder.columns = ['PET_{}'.format(col) for col in pet_firstorder.columns]

firstorder_features = pd.concat((ct_firstorder, pet_firstorder), axis=1)
firstorder_features.to_csv('./../../data_source/to_analysis/original_images/firstorder_original_images.csv')