# Post processing of Extracted Radiomic Features 

# TODO: Remove features constant across gray levels!!!

In [1]:
# Intention Hassan transform: Render feature independent of gray level discretization.

In [2]:
import sys
sys.path.append('./../utils/')

import os
import re

import numpy as np
import pandas as pd

import ioutil

from transforms import Whitening
from processing import PostProcessor

from sklearn.preprocessing import StandardScaler

import seaborn as sns
import matplotlib.pyplot as plt
%matplotlib inline

In [3]:
true_index = np.load('./../../../data_source/patient_id.npy')

removeables = [38, 45, 82]
true_index = np.array([idx for idx in true_index if not idx in removeables])

In [4]:
# Globals.
FILTER_TYPE = 'original'

## Process Raw Features

In [5]:
shape_features = [
    './../../../data_source/radiomic_features/shape/shape_features.csv'
]
ct_firstorder_features = [
    './../../../data_source/radiomic_features/firstorder/ct32_firstorder_no_filter_config.csv',
    './../../../data_source/radiomic_features/firstorder/ct64_firstorder_no_filter_config.csv',
    './../../../data_source/radiomic_features/firstorder/ct128_firstorder_no_filter_config.csv',
]
pet_firstorder_features = [
    './../../../data_source/radiomic_features/firstorder/pet32_firstorder_no_filter_config.csv',
    './../../../data_source/radiomic_features/firstorder/pet64_firstorder_no_filter_config.csv',
    './../../../data_source/radiomic_features/firstorder/pet128_firstorder_no_filter_config.csv',
]
ct_texture_features = [
    './../../../data_source/radiomic_features/texture/ct32_texture_no_filter_config.csv',
    './../../../data_source/radiomic_features/texture/ct64_texture_no_filter_config.csv',
    './../../../data_source/radiomic_features/texture/ct128_texture_no_filter_config.csv',
]
pet_texture_features = [
    './../../../data_source/radiomic_features/texture/pet32_texture_no_filter_config.csv',
    './../../../data_source/radiomic_features/texture/pet64_texture_no_filter_config.csv',
    './../../../data_source/radiomic_features/texture/pet128_texture_no_filter_config.csv',
]

# Shape features

In [6]:
shape_processsor = PostProcessor(
    path_to_features=shape_features, 
    indices=true_index, 
    filter_type=FILTER_TYPE, 
    error_dir='./../../../data_source/to_analysis/shape/errors/'
)

In [7]:
shape_processsor.process(drop_redundant=True, drop_missing=True)
shape_processsor.to_file(
    path_to_dir='./../../../data_source/to_analysis/shape'
)

<processing.PostProcessor at 0x10bac2f60>

# First order features

In [8]:
ct_firstorder_processsor = PostProcessor(
    path_to_features=ct_firstorder_features, 
    indices=true_index, 
    filter_type=FILTER_TYPE, 
    error_dir='./../../../data_source/to_analysis/firstorder/errors/'
)
pet_firstorder_processsor = PostProcessor(
    path_to_features=pet_firstorder_features, 
    indices=true_index, 
    filter_type=FILTER_TYPE, 
    error_dir='./../../../data_source/to_analysis/firstorder/errors/'
)

In [9]:
ct_firstorder_processsor.process(drop_redundant=True, drop_missing=True)
ct_firstorder_processsor.to_file(
    path_to_dir='./../../../data_source/to_analysis/firstorder/'
)
pet_firstorder_processsor.process(drop_redundant=True, drop_missing=True)
pet_firstorder_processsor.to_file(
    path_to_dir='./../../../data_source/to_analysis/firstorder/'
)

<processing.PostProcessor at 0x120aa4860>

In [18]:
# Remove firstorder features constant across GL discretization.
for col in ct_firstorder_processsor.concatenated.columns:
    features = ct_firstorder_processsor.concatenated.filter(regex=col)
    print(features.var(axis=0))

original_firstorder_10Percentile    8173.310416
original_firstorder_10Percentile    8173.310416
original_firstorder_10Percentile    8173.310416
dtype: float64
original_firstorder_90Percentile    628.901416
original_firstorder_90Percentile    628.901416
original_firstorder_90Percentile    628.901416
dtype: float64
original_firstorder_Energy    1.089917e+21
original_firstorder_Energy    1.089917e+21
original_firstorder_Energy    1.089917e+21
dtype: float64
original_firstorder_Entropy    0.142421
original_firstorder_Entropy    0.259093
original_firstorder_Entropy    0.311371
dtype: float64
original_firstorder_InterquartileRange    320.726458
original_firstorder_InterquartileRange    320.726458
original_firstorder_InterquartileRange    320.726458
dtype: float64
original_firstorder_Kurtosis    647.799718
original_firstorder_Kurtosis    647.799718
original_firstorder_Kurtosis    647.799718
dtype: float64
original_firstorder_Maximum    16440.217107
original_firstorder_Maximum    16440.217107


In [23]:
features.var(axis=1)

2      7.754818e-26
4      0.000000e+00
5      0.000000e+00
8      0.000000e+00
10     0.000000e+00
11     0.000000e+00
12     0.000000e+00
13     0.000000e+00
14     0.000000e+00
15     0.000000e+00
16     0.000000e+00
18     0.000000e+00
21     3.101927e-25
22     0.000000e+00
23     0.000000e+00
24     0.000000e+00
25     0.000000e+00
26     0.000000e+00
27     0.000000e+00
29     0.000000e+00
30     0.000000e+00
31     0.000000e+00
32     0.000000e+00
34     0.000000e+00
35     0.000000e+00
36     0.000000e+00
37     4.846761e-27
39     1.240771e-24
40     0.000000e+00
42     0.000000e+00
           ...     
213    0.000000e+00
215    4.963084e-24
216    0.000000e+00
217    0.000000e+00
218    3.101927e-25
220    0.000000e+00
222    0.000000e+00
223    0.000000e+00
224    0.000000e+00
225    1.985233e-23
228    0.000000e+00
229    0.000000e+00
230    0.000000e+00
231    0.000000e+00
232    0.000000e+00
233    0.000000e+00
239    0.000000e+00
240    0.000000e+00
241    0.000000e+00


# Texture features

In [29]:
ct_texture_processsor = PostProcessor(
    path_to_features=ct_texture_features, 
    indices=true_index, 
    filter_type=FILTER_TYPE, 
    error_dir='./../../../data_source/to_analysis/texture/errors/'
)
pet_texture_processsor = PostProcessor(
    path_to_features=pet_texture_features, 
    indices=true_index, 
    filter_type=FILTER_TYPE, 
    error_dir='./../../../data_source/to_analysis/texture/errors/'
)

In [30]:
ct_texture_processsor.process(drop_redundant=True, drop_missing=True)
ct_texture_processsor.to_file(
    path_to_dir='./../../../data_source/to_analysis/texture/'
)
pet_texture_processsor.process(drop_redundant=True, drop_missing=True)
pet_texture_processsor.to_file(
    path_to_dir='./../../../data_source/to_analysis/texture/'
)

<processing.PostProcessor at 0x126c48f60>

## Concatenate Features

In [31]:
all_ct_features = pd.concat(
    (
        ct_firstorder_processsor.concatenated, 
        ct_texture_processsor.concatenated
    ),
    axis=1
)
all_pet_features = pd.concat(
    (
        ct_firstorder_processsor.concatenated, 
        ct_texture_processsor.concatenated
    ),
    axis=1
)
all_ct_features.shape, all_pet_features.shape

((195, 276), (195, 276))

In [32]:
all_ct_features.columns = ['CT_{}'.format(col) for col in all_ct_features.columns]
all_pet_features.columns = ['PET_{}'.format(col) for col in all_pet_features.columns]

In [33]:
clinical = pd.read_csv('./../../../data_source/to_analysis/clinical_params.csv', index_col=0)
pet_params = pd.read_csv('./../../../data_source/to_analysis/pet_params.csv', index_col=0)

In [34]:
X = pd.concat(
    (
        shape_processsor.concatenated, 
        clinical, 
        all_ct_features, 
        all_pet_features, 
        pet_params
    ), axis=1
)
np.shape(X)

(198, 610)

In [35]:
X.to_csv('./../../../data_source/to_analysis/anomaly_filtered_concat.csv')