# Post processing of Extracted Radiomic Features 

**TODO:** 
* Remove features constant across gray levels.
* Include GL discretiztion level in features varying across GL.

In [1]:
import sys
sys.path.append('./../utils/')

import os
import re

import numpy as np
import pandas as pd

import ioutil

from transforms import Whitening
from processing import PostProcessor

from sklearn.preprocessing import StandardScaler

import seaborn as sns
import matplotlib.pyplot as plt
%matplotlib inline

In [2]:
def zero_var_filter(df, gl_bins=['32', '64', '128'], thresh=1e-15):

    output = {}
    for label in np.unique(df.columns):

        features = df.loc[:, label]

        if sum(features.var(axis=1)) < thresh:
            output[str(label)] = features.iloc[:, 0]
        else:
            for num, gl_bin in enumerate(gl_bins):
                output[f'{label}_{gl_bin}bins'] = features.iloc[:, num]

    return pd.DataFrame(output)

In [3]:
true_index = np.load('./../../../data_source/patient_id.npy')
removeables = [38, 45, 82]
true_index = np.array([idx for idx in true_index if not idx in removeables])

In [4]:
# Globals.
THRESH = 1e-10
FILTER_TYPE = 'original'

# Shape features

In [5]:
shape_features = [
    './../../../data_source/radiomic_features/shape/shape_features.csv'
]

In [6]:
shape_processsor = PostProcessor(
    path_to_features=shape_features, 
    indices=true_index, 
    filter_type=FILTER_TYPE, 
    error_dir='./../../../data_source/to_analysis/shape/errors/'
)

In [7]:
shape_processsor.process(drop_redundant=True, drop_missing=True)
shape_processsor.to_file(
    path_to_dir='./../../../data_source/to_analysis/shape'
)

<processing.PostProcessor at 0x1124d82e8>

# First order features

In [8]:
ct_firstorder_features = [
    './../../../data_source/radiomic_features/firstorder/ct32_firstorder_no_filter_config.csv',
    './../../../data_source/radiomic_features/firstorder/ct64_firstorder_no_filter_config.csv',
    './../../../data_source/radiomic_features/firstorder/ct128_firstorder_no_filter_config.csv',
]
pet_firstorder_features = [
    './../../../data_source/radiomic_features/firstorder/pet32_firstorder_no_filter_config.csv',
    './../../../data_source/radiomic_features/firstorder/pet64_firstorder_no_filter_config.csv',
    './../../../data_source/radiomic_features/firstorder/pet128_firstorder_no_filter_config.csv',
]

In [9]:
ct_firstorder_processsor = PostProcessor(
    path_to_features=ct_firstorder_features, 
    indices=true_index, 
    filter_type=FILTER_TYPE, 
    error_dir='./../../../data_source/to_analysis/firstorder/errors/'
)
pet_firstorder_processsor = PostProcessor(
    path_to_features=pet_firstorder_features, 
    indices=true_index, 
    filter_type=FILTER_TYPE, 
    error_dir='./../../../data_source/to_analysis/firstorder/errors/'
)

In [10]:
ct_firstorder_processsor.process(drop_redundant=True, drop_missing=True)
ct_firstorder_processsor.to_file(
    path_to_dir='./../../../data_source/to_analysis/firstorder/'
)
pet_firstorder_processsor.process(drop_redundant=True, drop_missing=True)
pet_firstorder_processsor.to_file(
    path_to_dir='./../../../data_source/to_analysis/firstorder/'
)

<processing.PostProcessor at 0x127460fd0>

In [11]:
ct_firstorder = zero_var_filter(
    ct_firstorder_processsor.concatenated,
    gl_bins=['32', '64', '128'], 
    thresh=1e-15
)
pet_firstorder = zero_var_filter(
    pet_firstorder_processsor.concatenated,
    gl_bins=['32', '64', '128'], 
    thresh=1e-15
)

# Texture features

In [12]:
ct_texture_features = [
    './../../../data_source/radiomic_features/texture/ct32_texture_no_filter_config.csv',
    './../../../data_source/radiomic_features/texture/ct64_texture_no_filter_config.csv',
    './../../../data_source/radiomic_features/texture/ct128_texture_no_filter_config.csv',
]
pet_texture_features = [
    './../../../data_source/radiomic_features/texture/pet32_texture_no_filter_config.csv',
    './../../../data_source/radiomic_features/texture/pet64_texture_no_filter_config.csv',
    './../../../data_source/radiomic_features/texture/pet128_texture_no_filter_config.csv',
]

In [13]:
ct_texture_processsor = PostProcessor(
    path_to_features=ct_texture_features, 
    indices=true_index, 
    filter_type=FILTER_TYPE, 
    error_dir='./../../../data_source/to_analysis/texture/errors/'
)
pet_texture_processsor = PostProcessor(
    path_to_features=pet_texture_features, 
    indices=true_index, 
    filter_type=FILTER_TYPE, 
    error_dir='./../../../data_source/to_analysis/texture/errors/'
)

In [14]:
ct_texture_processsor.process(drop_redundant=True, drop_missing=True)
ct_texture_processsor.to_file(
    path_to_dir='./../../../data_source/to_analysis/texture/'
)
pet_texture_processsor.process(drop_redundant=True, drop_missing=True)
pet_texture_processsor.to_file(
    path_to_dir='./../../../data_source/to_analysis/texture/'
)

<processing.PostProcessor at 0x12747aeb8>

In [15]:
ct_texture = zero_var_filter(
    ct_texture_processsor.concatenated,
    gl_bins=['32', '64', '128'], 
    thresh=1e-15
)
pet_texture = zero_var_filter(
    pet_texture_processsor.concatenated,
    gl_bins=['32', '64', '128'], 
    thresh=1e-15
)

## Concatenate Features

In [16]:
all_ct_features = pd.concat((ct_firstorder, ct_texture),axis=1)
all_pet_features = pd.concat((pet_firstorder, pet_texture),axis=1)

all_ct_features.shape, all_pet_features.shape

((195, 248), (195, 244))

In [17]:
all_ct_features.columns = ['CT_{}'.format(col) for col in all_ct_features.columns]
all_pet_features.columns = ['PET_{}'.format(col) for col in all_pet_features.columns]

In [18]:
_clinical = pd.read_csv('./../../../data_source/to_analysis/clinical_params.csv', index_col=0)
_pet_params = pd.read_csv('./../../../data_source/to_analysis/pet_params.csv', index_col=0)

clinical = _clinical.loc[true_index, :]
pet_params = _pet_params.loc[true_index, :]

In [20]:
shape_processsor.concatenated.shape, all_ct_features.shape, all_pet_features.shape, clinical.shape, pet_params.shape

((195, 13), (195, 248), (195, 244), (195, 42), (195, 3))

In [21]:
X = pd.concat(
    (
        shape_processsor.concatenated, 
        clinical, 
        all_ct_features, 
        all_pet_features, 
        pet_params
    ), axis=1
)
np.shape(X)

(195, 550)

In [22]:
X.to_csv('./../../../data_source/to_analysis/anomaly_filtered_concat.csv')