# Post processing of Extracted Radiomic Features 

**TODO:** 
* Remove features constant across gray levels.
* Include GL discretiztion level in features varying across GL.

In [57]:
import sys
sys.path.append('./../utils/')

import os
import re

import numpy as np
import pandas as pd

import ioutil

from transforms import Whitening
from processing import PostProcessor

from sklearn.preprocessing import StandardScaler

import seaborn as sns
import matplotlib.pyplot as plt
%matplotlib inline

In [58]:
true_index = np.load('./../../../data_source/patient_id.npy')

removeables = [38, 45, 82]
true_index = np.array([idx for idx in true_index if not idx in removeables])

In [59]:
# Globals.
THRESH = 1e-10
FILTER_TYPE = 'original'

## Process Raw Features

In [60]:
shape_features = [
    './../../../data_source/radiomic_features/shape/shape_features.csv'
]
ct_firstorder_features = [
    './../../../data_source/radiomic_features/firstorder/ct32_firstorder_no_filter_config.csv',
    './../../../data_source/radiomic_features/firstorder/ct64_firstorder_no_filter_config.csv',
    './../../../data_source/radiomic_features/firstorder/ct128_firstorder_no_filter_config.csv',
]
pet_firstorder_features = [
    './../../../data_source/radiomic_features/firstorder/pet32_firstorder_no_filter_config.csv',
    './../../../data_source/radiomic_features/firstorder/pet64_firstorder_no_filter_config.csv',
    './../../../data_source/radiomic_features/firstorder/pet128_firstorder_no_filter_config.csv',
]
ct_texture_features = [
    './../../../data_source/radiomic_features/texture/ct32_texture_no_filter_config.csv',
    './../../../data_source/radiomic_features/texture/ct64_texture_no_filter_config.csv',
    './../../../data_source/radiomic_features/texture/ct128_texture_no_filter_config.csv',
]
pet_texture_features = [
    './../../../data_source/radiomic_features/texture/pet32_texture_no_filter_config.csv',
    './../../../data_source/radiomic_features/texture/pet64_texture_no_filter_config.csv',
    './../../../data_source/radiomic_features/texture/pet128_texture_no_filter_config.csv',
]

# Shape features

In [62]:
shape_processsor = PostProcessor(
    path_to_features=shape_features, 
    indices=true_index, 
    filter_type=FILTER_TYPE, 
    error_dir='./../../../data_source/to_analysis/shape/errors/'
)

In [63]:
shape_processsor.process(drop_redundant=True, drop_missing=True)
shape_processsor.to_file(
    path_to_dir='./../../../data_source/to_analysis/shape'
)

<processing.PostProcessor at 0x116ee8240>

# First order features

In [64]:
ct_firstorder_processsor = PostProcessor(
    path_to_features=ct_firstorder_features, 
    indices=true_index, 
    filter_type=FILTER_TYPE, 
    error_dir='./../../../data_source/to_analysis/firstorder/errors/'
)
pet_firstorder_processsor = PostProcessor(
    path_to_features=pet_firstorder_features, 
    indices=true_index, 
    filter_type=FILTER_TYPE, 
    error_dir='./../../../data_source/to_analysis/firstorder/errors/'
)

In [65]:
ct_firstorder_processsor.process(drop_redundant=True, drop_missing=True)
ct_firstorder_processsor.to_file(
    path_to_dir='./../../../data_source/to_analysis/firstorder/'
)
pet_firstorder_processsor.process(drop_redundant=True, drop_missing=True)
pet_firstorder_processsor.to_file(
    path_to_dir='./../../../data_source/to_analysis/firstorder/'
)

<processing.PostProcessor at 0x11e18cef0>

# Texture features

In [29]:
ct_texture_processsor = PostProcessor(
    path_to_features=ct_texture_features, 
    indices=true_index, 
    filter_type=FILTER_TYPE, 
    error_dir='./../../../data_source/to_analysis/texture/errors/'
)
pet_texture_processsor = PostProcessor(
    path_to_features=pet_texture_features, 
    indices=true_index, 
    filter_type=FILTER_TYPE, 
    error_dir='./../../../data_source/to_analysis/texture/errors/'
)

In [30]:
ct_texture_processsor.process(drop_redundant=True, drop_missing=True)
ct_texture_processsor.to_file(
    path_to_dir='./../../../data_source/to_analysis/texture/'
)
pet_texture_processsor.process(drop_redundant=True, drop_missing=True)
pet_texture_processsor.to_file(
    path_to_dir='./../../../data_source/to_analysis/texture/'
)

<processing.PostProcessor at 0x126c48f60>

## Concatenate Features

In [31]:
all_ct_features = pd.concat(
    (
        ct_firstorder_processsor.concatenated, 
        ct_texture_processsor.concatenated
    ),
    axis=1
)
all_pet_features = pd.concat(
    (
        ct_firstorder_processsor.concatenated, 
        ct_texture_processsor.concatenated
    ),
    axis=1
)
all_ct_features.shape, all_pet_features.shape

((195, 276), (195, 276))

In [32]:
all_ct_features.columns = ['CT_{}'.format(col) for col in all_ct_features.columns]
all_pet_features.columns = ['PET_{}'.format(col) for col in all_pet_features.columns]

In [33]:
clinical = pd.read_csv('./../../../data_source/to_analysis/clinical_params.csv', index_col=0)
pet_params = pd.read_csv('./../../../data_source/to_analysis/pet_params.csv', index_col=0)

In [34]:
X = pd.concat(
    (
        shape_processsor.concatenated, 
        clinical, 
        all_ct_features, 
        all_pet_features, 
        pet_params
    ), axis=1
)
np.shape(X)

(198, 610)

In [35]:
X.to_csv('./../../../data_source/to_analysis/anomaly_filtered_concat.csv')