In [1]:
import tensorflow as tf
from keras.backend.tensorflow_backend import set_session
config = tf.ConfigProto()
config.gpu_options.per_process_gpu_memory_fraction = 0.18
set_session(tf.Session(config=config))

Using TensorFlow backend.


In [2]:
from keras.models import load_model
import numpy as np
import pandas as pd
import os
import time

In [3]:
def feature_vect(model, PATH_VOXELS, patient):          
    patient_array = np.load(PATH_VOXELS + patient)
    voxels = patient_array['vox']  
    preds = np.array(model.predict(x= voxels))
    ixs = np.argmax(preds[0])
    
    xmax_malig = np.max(preds[0], axis=0)
    xmax_spiculation = np.max(preds[1], axis=0)
    xmax_lobulation = np.max(preds[2], axis=0)
    xmax_diameter = np.max(preds[3], axis=0)
    
    xsd_malig = np.std(preds[0], axis=0)
    xsd_spiculation = np.std(preds[1], axis=0)
    xsd_lobulation = np.std(preds[2], axis=0)
    xsd_diameter = np.std(preds[3], axis=0)
    
    centroids = patient_array['cents']
    shape = patient_array['shape']
    normalized_locs = centroids.astype('float32') / shape.astype('float32')
    
    feats = (np.concatenate([xmax_malig,xmax_spiculation,xmax_lobulation,xmax_diameter,\
               xsd_malig,xsd_spiculation,xmax_lobulation,xsd_diameter,\
               normalized_locs[ixs],normalized_locs.std(axis=0)]))        
    return feats

def score_model(PATH_MODEL,PATH_VOXELS, file_name):
    model_v24 = load_model(PATH_MODEL)
    
    start = time.time()
    patients = [f for f in os.listdir(PATH_VOXELS)]
    print ("patient numbers: ", len(patients))

    all_features = []
    for num, patient in enumerate(patients):
        feats = feature_vect(model_v24,PATH_VOXELS, patient)
        all_features.append(feats)
        X = np.stack(all_features)
        if num%50==0:
            print ("Patient %4d:" %num, patient[:-4])

    col=['max_malig','max_spiculation','max_lobulation','max_diameter',\
         'xsd_malig', 'xsd_spiculation', 'xmax_lobulation','xsd_diameter',\
         'loc_from_malig_x','loc_from_malig_y','loc_from_malig_z','std_locs_x','std_locs_y','std_locs_z',]
    df = pd.DataFrame(data=X,index=patients, columns=col)
    df.to_csv(file_name + '.csv')
    print ("It took %d seconds"%(time.time()-start))

In [None]:
def get_feature_matrix(model_num, stage_num):
    PATH_MODEL = "../Models/LUNA_model_v2_%d.h5"%(model_num)
    PATH_VOXELS = '../../data/stage%d_voxels_mask/'%(stage_num)
    file_name = 'feature_matrix_model%d_stage%d'%(model_num,stage_num)
    score_model(PATH_MODEL,PATH_VOXELS, file_name)

In [None]:
get_feature_matrix(2,2)

  return cls(**config)


patient numbers:  210
Patient    0: 5a06ebc438b934a360a5e469a6874505
Patient   50: 9f6c458b4276ddbc0ebaa374037bb099
Patient  100: 97434189865e083f8f553e3a922b927a


In [None]:
get_feature_matrix(model_num=1,stage_num=2)