In [20]:
import os, sys, json, glob
import pandas as pd
import numpy as np

# if recursion error occurs set recursion limit to higher value
sys.setrecursionlimit(10000000)

# set path
path = '../results_bissenbay_assignmet2_codModByAlfredo/'

def fill_db(file, patient_id, image_type, image_time):
    
    path_to_save = os.path.join(path, patient_id, image_type) + '/' + patient_id + '_' + image_type + '_' + image_time
    
    # preprocessing
    data = pd.read_csv(file)
    
    components = {}
    component_id = 1
    
    def find_components(voxel_id):
        if data.at[voxel_id, 'visited'] == False:
            data.at[voxel_id, 'visited'] = True
            voxel = data.loc[voxel_id]
            res = [(voxel_id, (voxel['x'], voxel['y'], voxel['z']))]
            neighbor_voxels = data.at[voxel_id, 'neighbor_voxels']
            for neighbor_voxel_id in neighbor_voxels:
                res.extend(find_components(neighbor_voxel_id))
            return res
        else: return []
    
    # find neighbor voxels
    data.columns.values[0] = 'voxel_id'
    data['neighbor_voxels'] = ''
        
    for voxel_id in data.index:
        voxel = data.loc[voxel_id]
        neighbor_voxels = []
        for neighbor_voxel_id in data[(voxel['voxel_id'] != data['voxel_id']) & ((abs(voxel['x'] - data['x']) + abs(voxel['y'] - data['y']) + abs(voxel['z'] - data['z'])) == 1)].index:
            neighbor_voxels.append(neighbor_voxel_id)
        data.at[voxel_id, 'neighbor_voxels'] = neighbor_voxels
        
    # save csv with a new column neighbor_voxels
    data.to_csv(path_to_save + '_features.csv')        
        
    data = data[['voxel_id', 'x', 'y', 'z', 'neighbor_voxels']]
    
    # find components
    data['visited'] = False
    
    for voxel_id in data.index:
        if data.at[voxel_id, 'visited'] == False:
            found_component = find_components(voxel_id)
            component = {}
            component['voxels'] = found_component
            component['voxels'].sort()
            
            # calculate internal & external lesions
            internal_voxels = 0
            for internal_voxel_in_component in component['voxels']:
                if len(data.at[internal_voxel_in_component[0], 'neighbor_voxels']) == 6:
                    internal_voxels += 1
            
            total_voxels = len(component['voxels'])
            component['number_of_voxels_in_component'] = total_voxels
            external_voxels = total_voxels - internal_voxels
            component['number_of_internal_voxels'] = internal_voxels
            component['number_of_external_voxels'] = external_voxels
            component['percentage_of_internal_voxels'] = round((internal_voxels / total_voxels) * 100, 2)
            component['percentage_of_external_voxels'] = round((external_voxels / total_voxels) * 100, 2)
            
            if component['percentage_of_internal_voxels'] > 0:
                print(patient_id, image_type, image_time, 'component_' + str(component_id))
            
            # updating components
            components['component_' + str(component_id)] = component
            component_id += 1
    
    # create db data
    db = {}
    db['subject'] = patient_id
    db['image'] = image_time
    db['components'] = components
    db['number_of_components'] = len(components)
    
    def default(o):
        if isinstance(o, np.integer): return int(o)
        raise TypeError
        
    with open(path_to_save + '.json', 'w') as outfile:
        json.dump(db, outfile, default=default)
    
for file in sorted(glob.glob(os.path.join(path, '**/*.csv'), recursive=True)):
    patient_id, image_type, image_time = file[55:65].split('_')
    fill_db(file, patient_id, image_type, image_time)

S01 T2 t01 component_6
S01 T2 t01 component_99
S01 T2 t01 component_207
S01 T2 t01 component_221
S01 T2 t01 component_399
S01 T2 t01 component_414
S01 T2 t01 component_462
S01 T2 t01 component_466
S01 T2 t01 component_494
S01 T2 t18 component_9
S01 T2 t18 component_136
S01 T2 t18 component_357
S01 T2 t18 component_392
S01 T2 t18 component_510
S02 T2 t01 component_15
S02 T2 t01 component_224
S02 T2 t01 component_349
S02 T2 t01 component_463
S02 T2 t01 component_516
