# Feature Extraction

In [None]:
import pandas as pd
import numpy as np
import nibabel as nib
from scipy import ndimage as nd
from scipy import stats
import time

## load images into dataframe

In [None]:
df = pd.read_csv('../csvs/GT_Training.csv', header=None, names=['file','label'])
df.file = df.file.map(lambda x: x.replace("'",''))
df['img'] = df.file.map(lambda x: nib.load('../data/' + str(x) + '.nii.gz'))

## create columns in dataframe for new features

In [None]:
bin_labels = ['hist' + str(x-1) + '00-' + str(x) + '00' for x in range(1,28)]
new_features = 'x y z xyz xcom ycom zcom max xmax ymax zmax mean median std variance skew kurtosis entropy'.split() + bin_labels
try:
  df = df.reindex(columns = df.columns.tolist() + new_features)
except: 
    pass
print(len(new_features),'new features')

## calculate features for each example image

In [None]:
start = time.time()
start100 = start
for i, img in enumerate(df.img):
    if i % 100 == 0 and i != 0:
        now = time.time()
        print('i = {:<10d}time for last 100 = {:<10.3g}total time = {:<10.3g}'.format(i,now-start100,now-start))
        start100 = time.time()
    data = img.get_fdata()
    x,y,z = img.shape
    xyz = x*y*z
    xcom,ycom,zcom = nd.center_of_mass(data)
    min_,max_,(xmin,ymin,zmin),(xmax,ymax,zmax) = nd.extrema(data)
    histogram = nd.histogram(data,0,2700,27)
    histogram = histogram/sum(histogram)
    mean = nd.mean(data)
    median = nd.median(data)
    std = nd.standard_deviation(data)
    var = nd.variance(data)
    skew = stats.skew(data,axis=None)
    kurtosis = stats.kurtosis(data,axis=None)
    entropy = stats.entropy(histogram)
    features = [x,y,z,xyz,xcom,ycom,zcom,max_,xmax,ymax,zmax,mean,median,std,var,skew,kurtosis,entropy] + list(histogram)
    df.loc[df.index==i,new_features] = features

print('Done with {:d} rows. Total time = {:<10.3g}'.format(len(df),time.time()-start))

## save data

In [None]:
df.drop('img',axis=1).to_csv('../new_output/features.csv')