In [22]:
from collections import OrderedDict


class FolderStructure:
    def __init__(self, path, subs):
        self.path = path
        self.subs = subs
        
        self.components = OrderedDict({
            'code': [],
            'coord': [],
            'eq': [],
            'param': [],
            'subjects': {},
            'files': ['CHANGES.txt', 'dataset_description.json', 'participants.tsv', 'README.txt']
        })
        
        self.struct = None
        self.layout = []
        self.default_format = 'sub-{}_desc-{}_{}.{}'
        self.coord_format = 'desc-{}_{}.{}'
        self.populate()
        
    def common_structure(self, v):
        name = v['name'] if name is None else name
        return [self.default_format.format(v['sid'], v['desc'], name, 'tsv'),
                self.default_format.format(v['sid'], v['desc'], name, 'json')]
    
    def coord_structure(self, v):
        return [self.coord_format.format(v['desc'], 'nodes', 'tsv'),
                self.coord_format.format(v['desc'], 'nodes', 'json'),
                self.coord_format.format(v['desc'], 'labels', 'tsv'),
                self.coord_format.format(v['desc'], 'labels', 'json')]
        
    def populate(self):
        for k, v in self.subs.items():
            sid = v['sid']
            if sid not in self.struct.components['subjects']:
                self.struct.components['subjects'][sid] = {'net': [], 'ts': [], 'spatial': []}

            if k in ['weights.txt', 'tract_lengths.txt', 'distances.txt']:
                self.struct.components['subjects'][sid]['net'] += self.common_structure(v)
            elif k in ['centres.txt', 'centers.txt']:
                self.struct.components['coord'] += self.coord_structure(v)
            elif k.endswith('.mat'):
                self.struct.components['subjects'][sid]['ts'] += self.common_structure(v)
            elif k.endswith('.h5'):   
                file = h.File(v['path'])
                keys = file.keys()
                name = v['fname'].split('_')[0].lower()

                sid = v['sid']
                if sid not in self.struct.components['subjects']:
                    self.struct.components['subjects'][sid] = {'net': [], 'ts': [], 'spatial': []}

                if sim.check_params(file):
                    self.struct.components['subjects'][sid]['net'] += self.common_structure(v, 'weights')
                    self.struct.components['subjects'][sid]['net'] += self.common_structure(v, 'distances')
                    self.struct.components['coord'] += self.coord_structure(v)
                else:
                    if len(list(keys)) > 0:
                        self.struct.components['param'] += [self.coord_format.format(v['desc'], name, 'xml'),
                                                            self.coord_format.format(v['desc'], name, 'json')]
        self.create_layout()
                        
    def join(self, files, form='files'):
        joiner = lambda x: ''.join(x)

        if form == 'files':
            return joiner([file.format(f) for f in files])
        return joiner([subfile.format(f) for f in files])
    
    def create_layout(self):
        fold = '&emsp;&emsp;|___{}/<br>'
        subfold = '&emsp;&emsp;&emsp;&emsp;|___{}/<br>'
        subfile = '&emsp;&emsp;&emsp;&emsp;&emsp;&emsp;|___{}<br>'
        file = '&emsp;&emsp;&emsp;&emsp;|___{}<br>'
        main_files = '|___{}<br>'

        self.layout.append('|___ output/<br>')

        for k, v in struct.components.items():
            if len(v) == 0:
                self.layout.append(fold.format(k))
            else:
                if isinstance(v, list) and k != 'files':            
                    self.layout += [fold.format(k), join(v)]
                elif isinstance(v, dict):
                    for k2, v2 in v.items():
                        self.layout += [fold.format(f'sub-{k2}'), subfold.format('net'),
                                        join(v2['net'], 'subfile'), subfold.format('ts'),
                                        join(v2['ts'], 'subfile'), subfold.format('spatial'),
                                        join(v2['spatial'], 'subfile')]

        self.layout += [main_files.format(x) for x in self.components.files]
        self.layout = ''.join(self.layout)
            

In [45]:
struct = FolderStructure('../output')

In [46]:
wdc = {'centres.txt': {'fname': 'centres.txt', 'sid': '01', 'sep': '\t', 'desc': 'default', 'path': 'C:\\Users\\dinar\\Desktop\\gsoc\\data\\txt_files\\2\\centres.txt', 
                       'ext': 'txt', 'name': 'centres'}, 'tract_lengths.txt': {'fname': 'tract_lengths.txt', 'sid': '01', 'sep': '\\s', 'desc': 'default', 'path': 'C:\\Users\\dinar\\Desktop\\gsoc\\data\\txt_files\\2\\tract_lengths.txt', 'ext': 'txt', 'name': 'distances'}, 'weights.txt': {'fname': 'weights.txt', 'sid': '01', 'sep': '\\s', 'desc': 'default', 'path': 'C:\\Users\\dinar\\Desktop\\gsoc\\data\\txt_files\\2\\weights.txt', 'ext': 'txt', 'name': 'weights'},
      'BurstConfiguration_a29116b38d05429588535667ce4d1eee.h5': {'fname': 'BurstConfiguration_a29116b38d05429588535667ce4d1eee.h5', 'sid': '03', 'sep': None, 'desc': 'default', 'path': 'C:\\Users\\dinar\\Desktop\\gsoc\\data\\simulations\\1\\BurstConfiguration_a29116b38d05429588535667ce4d1eee.h5', 'ext': 'h5', 'name': 'BurstConfiguration_a29116b38d05429588535667ce4d1eee'}, 'Connectivity_6b28b6c61836452bb52cbd18538a7775.h5': {'fname': 'Connectivity_6b28b6c61836452bb52cbd18538a7775.h5', 'sid': '03', 'sep': None, 'desc': 'default', 'path': 'C:\\Users\\dinar\\Desktop\\gsoc\\data\\simulations\\1\\datatypes\\Connectivity_6b28b6c61836452bb52cbd18538a7775.h5', 'ext': 'h5', 'name': 'Connectivity_6b28b6c61836452bb52cbd18538a7775'}, 'Generic2dOscillator_4e4d2edc01e2448192baf025336e0d73.h5': {'fname': 'Generic2dOscillator_4e4d2edc01e2448192baf025336e0d73.h5', 'sid': '03', 'sep': None, 'desc': 'default', 'path': 'C:\\Users\\dinar\\Desktop\\gsoc\\data\\simulations\\1\\Generic2dOscillator_4e4d2edc01e2448192baf025336e0d73.h5', 'ext': 'h5', 'name': 'Generic2dOscillator_4e4d2edc01e2448192baf025336e0d73'}, 'HeunDeterministicViewModel_4007260a59a443cfbc076eb4c231c6f5.h5': {'fname': 'HeunDeterministicViewModel_4007260a59a443cfbc076eb4c231c6f5.h5', 'sid': '03', 'sep': None, 'desc': 'default', 'path': 'C:\\Users\\dinar\\Desktop\\gsoc\\data\\simulations\\1\\HeunDeterministicViewModel_4007260a59a443cfbc076eb4c231c6f5.h5', 'ext': 'h5', 'name': 'HeunDeterministicViewModel_4007260a59a443cfbc076eb4c231c6f5'}, 'Linear_f639e3b95c48436dae5ffaa29072963e.h5': {'fname': 'Linear_f639e3b95c48436dae5ffaa29072963e.h5', 'sid': '03', 'sep': None, 'desc': 'default', 'path': 'C:\\Users\\dinar\\Desktop\\gsoc\\data\\simulations\\1\\Linear_f639e3b95c48436dae5ffaa29072963e.h5', 'ext': 'h5', 'name': 'Linear_f639e3b95c48436dae5ffaa29072963e'}, 'SimulatorAdapterModel_994a4afbef39461abf2a4feaf9761ca0.h5': {'fname': 'SimulatorAdapterModel_994a4afbef39461abf2a4feaf9761ca0.h5', 'sid': '03', 'sep': None, 'desc': 'default', 'path': 'C:\\Users\\dinar\\Desktop\\gsoc\\data\\simulations\\1\\SimulatorAdapterModel_994a4afbef39461abf2a4feaf9761ca0.h5', 'ext': 'h5', 'name': 'SimulatorAdapterModel_994a4afbef39461abf2a4feaf9761ca0'}, 'SubSampleViewModel_cd2fe7f44797434b9ae2cc3418a9f091.h5': {'fname': 'SubSampleViewModel_cd2fe7f44797434b9ae2cc3418a9f091.h5', 'sid': '03', 'sep': None, 'desc': 'default', 'path': 'C:\\Users\\dinar\\Desktop\\gsoc\\data\\simulations\\1\\SubSampleViewModel_cd2fe7f44797434b9ae2cc3418a9f091.h5', 'ext': 'h5', 'name': 'SubSampleViewModel_cd2fe7f44797434b9ae2cc3418a9f091'}}


default_format, coord_format = 'sub-{}_desc-{}_{}.{}', 'desc-{}_{}.{}'


def common_structure(v, name=None):
    name = v['name'] if name is None else name
    return [default_format.format(v['sid'], v['desc'], name, 'tsv'),
            default_format.format(v['sid'], v['desc'], name, 'json')]


def coord_structure(v):
    return [coord_format.format(v['desc'], 'nodes', 'tsv'),
            coord_format.format(v['desc'], 'nodes', 'json'),
            coord_format.format(v['desc'], 'labels', 'tsv'),
            coord_format.format(v['desc'], 'labels', 'json')]



for k, v in wdc.items():
    sid = v['sid']
    if sid not in struct.components['subjects']:
        struct.components['subjects'][sid] = {'net': [], 'ts': [], 'spatial': []}
    
    if k in ['weights.txt', 'tract_lengths.txt', 'distances.txt']:
        struct.components['subjects'][sid]['net'] += common_structure(v)
    elif k in ['centres.txt', 'centers.txt']:
        struct.components['coord'] += coord_structure(v)
    elif k.endswith('.mat'):
        struct.components['subjects'][sid]['ts'] += common_structure(v)
    elif k.endswith('.h5'):   
        file = h.File(v['path'])
        keys = file.keys()
        name = v['fname'].split('_')[0].lower()

        sid = v['sid']
        if sid not in struct.components['subjects']:
            struct.components['subjects'][sid] = {'net': [], 'ts': [], 'spatial': []}

        if sim.check_params(file):
            struct.components['subjects'][sid]['net'] += common_structure(v, 'weights')
            struct.components['subjects'][sid]['net'] += common_structure(v, 'distances')
            struct.components['coord'] += coord_structure(v)
        else:
            if len(list(keys)) > 0:
                struct.components['param'] += [coord_format.format(v['desc'], name, 'xml'),
                                               coord_format.format(v['desc'], name, 'json')]

In [47]:
struct.components

OrderedDict([('code', []),
             ('coord',
              ['desc-default_nodes.tsv',
               'desc-default_nodes.json',
               'desc-default_labels.tsv',
               'desc-default_labels.json',
               'desc-default_nodes.tsv',
               'desc-default_nodes.json',
               'desc-default_labels.tsv',
               'desc-default_labels.json']),
             ('eq', []),
             ('param',
              ['desc-default_generic2doscillator.xml',
               'desc-default_generic2doscillator.json',
               'desc-default_linear.xml',
               'desc-default_linear.json',
               'desc-default_subsampleviewmodel.xml',
               'desc-default_subsampleviewmodel.json']),
             ('subjects',
              {'01': {'net': ['sub-01_desc-default_distances.tsv',
                 'sub-01_desc-default_distances.json',
                 'sub-01_desc-default_weights.tsv',
                 'sub-01_desc-default_weights.json'],
   

In [82]:
def join(files, form='files'):
    joiner = lambda x: ''.join(x)
    
    if form == 'files':
        return joiner([file.format(f) for f in files])
    return joiner([subfile.format(f) for f in files])


fold = '&emsp;&emsp;|___{}/<br>'
subfold = '&emsp;&emsp;&emsp;&emsp;|___{}/<br>'
subfile = '&emsp;&emsp;&emsp;&emsp;&emsp;&emsp;|___{}<br>'
file = '&emsp;&emsp;&emsp;&emsp;|___{}<br>'
main_files = '|___{}<br>'

structure = []

structure.append('|___ output/<br>')

for k, v in struct.components.items():
    if len(v) == 0:
        structure.append(fold.format(k))
    else:
        if isinstance(v, list) and k != 'files':            
            structure += [fold.format(k), join(v)]
        elif isinstance(v, dict):
            for k2, v2 in v.items():
                structure += [fold.format(f'sub-{k2}'), subfold.format('net'),
                              join(v2['net'], 'subfile'), subfold.format('ts'),
                              join(v2['ts'], 'subfile'), subfold.format('spatial'),
                              join(v2['spatial'], 'subfile')]

structure += [main_files.format(x) for x in struct.components['files']]
            

In [72]:
''.join([main_files.format(x) for x in struct.components['files']])

'|___CHANGES.txt<br>|___dataset_description.json<br>|___participants.tsv<br>|___README.txt<br>'

In [83]:
structure

['|___ output/<br>',
 '&emsp;&emsp;|___code/<br>',
 '&emsp;&emsp;|___coord/<br>',
 '&emsp;&emsp;&emsp;&emsp;|___desc-default_nodes.tsv<br>&emsp;&emsp;&emsp;&emsp;|___desc-default_nodes.json<br>&emsp;&emsp;&emsp;&emsp;|___desc-default_labels.tsv<br>&emsp;&emsp;&emsp;&emsp;|___desc-default_labels.json<br>&emsp;&emsp;&emsp;&emsp;|___desc-default_nodes.tsv<br>&emsp;&emsp;&emsp;&emsp;|___desc-default_nodes.json<br>&emsp;&emsp;&emsp;&emsp;|___desc-default_labels.tsv<br>&emsp;&emsp;&emsp;&emsp;|___desc-default_labels.json<br>',
 '&emsp;&emsp;|___eq/<br>',
 '&emsp;&emsp;|___param/<br>',
 '&emsp;&emsp;&emsp;&emsp;|___desc-default_generic2doscillator.xml<br>&emsp;&emsp;&emsp;&emsp;|___desc-default_generic2doscillator.json<br>&emsp;&emsp;&emsp;&emsp;|___desc-default_linear.xml<br>&emsp;&emsp;&emsp;&emsp;|___desc-default_linear.json<br>&emsp;&emsp;&emsp;&emsp;|___desc-default_subsampleviewmodel.xml<br>&emsp;&emsp;&emsp;&emsp;|___desc-default_subsampleviewmodel.json<br>',
 '&emsp;&emsp;|___sub-01/<

In [27]:
import sys
import os
sys.path.append('..')

In [43]:
import h5py as h
from incf.preprocess import simulations_h5 as sim

h5 = {'BurstConfiguration_a29116b38d05429588535667ce4d1eee.h5': {'fname': 'BurstConfiguration_a29116b38d05429588535667ce4d1eee.h5', 'sid': '03', 'sep': None, 'desc': 'default', 'path': 'C:\\Users\\dinar\\Desktop\\gsoc\\data\\simulations\\1\\BurstConfiguration_a29116b38d05429588535667ce4d1eee.h5', 'ext': 'h5', 'name': 'BurstConfiguration_a29116b38d05429588535667ce4d1eee'}, 'Connectivity_6b28b6c61836452bb52cbd18538a7775.h5': {'fname': 'Connectivity_6b28b6c61836452bb52cbd18538a7775.h5', 'sid': '03', 'sep': None, 'desc': 'default', 'path': 'C:\\Users\\dinar\\Desktop\\gsoc\\data\\simulations\\1\\datatypes\\Connectivity_6b28b6c61836452bb52cbd18538a7775.h5', 'ext': 'h5', 'name': 'Connectivity_6b28b6c61836452bb52cbd18538a7775'}, 'Generic2dOscillator_4e4d2edc01e2448192baf025336e0d73.h5': {'fname': 'Generic2dOscillator_4e4d2edc01e2448192baf025336e0d73.h5', 'sid': '03', 'sep': None, 'desc': 'default', 'path': 'C:\\Users\\dinar\\Desktop\\gsoc\\data\\simulations\\1\\Generic2dOscillator_4e4d2edc01e2448192baf025336e0d73.h5', 'ext': 'h5', 'name': 'Generic2dOscillator_4e4d2edc01e2448192baf025336e0d73'}, 'HeunDeterministicViewModel_4007260a59a443cfbc076eb4c231c6f5.h5': {'fname': 'HeunDeterministicViewModel_4007260a59a443cfbc076eb4c231c6f5.h5', 'sid': '03', 'sep': None, 'desc': 'default', 'path': 'C:\\Users\\dinar\\Desktop\\gsoc\\data\\simulations\\1\\HeunDeterministicViewModel_4007260a59a443cfbc076eb4c231c6f5.h5', 'ext': 'h5', 'name': 'HeunDeterministicViewModel_4007260a59a443cfbc076eb4c231c6f5'}, 'Linear_f639e3b95c48436dae5ffaa29072963e.h5': {'fname': 'Linear_f639e3b95c48436dae5ffaa29072963e.h5', 'sid': '03', 'sep': None, 'desc': 'default', 'path': 'C:\\Users\\dinar\\Desktop\\gsoc\\data\\simulations\\1\\Linear_f639e3b95c48436dae5ffaa29072963e.h5', 'ext': 'h5', 'name': 'Linear_f639e3b95c48436dae5ffaa29072963e'}, 'SimulatorAdapterModel_994a4afbef39461abf2a4feaf9761ca0.h5': {'fname': 'SimulatorAdapterModel_994a4afbef39461abf2a4feaf9761ca0.h5', 'sid': '03', 'sep': None, 'desc': 'default', 'path': 'C:\\Users\\dinar\\Desktop\\gsoc\\data\\simulations\\1\\SimulatorAdapterModel_994a4afbef39461abf2a4feaf9761ca0.h5', 'ext': 'h5', 'name': 'SimulatorAdapterModel_994a4afbef39461abf2a4feaf9761ca0'}, 'SubSampleViewModel_cd2fe7f44797434b9ae2cc3418a9f091.h5': {'fname': 'SubSampleViewModel_cd2fe7f44797434b9ae2cc3418a9f091.h5', 'sid': '03', 'sep': None, 'desc': 'default', 'path': 'C:\\Users\\dinar\\Desktop\\gsoc\\data\\simulations\\1\\SubSampleViewModel_cd2fe7f44797434b9ae2cc3418a9f091.h5', 'ext': 'h5', 'name': 'SubSampleViewModel_cd2fe7f44797434b9ae2cc3418a9f091'}}

for k, v in h5.items():
    file = h.File(v['path'])
    keys = file.keys()
    name = v['fname'].split('_')[0].lower()
    
    sid = v['sid']
    if sid not in struct.components['subjects']:
        struct.components['subjects'][sid] = {'net': [], 'ts': [], 'spatial': []}
    
    if sim.check_params(file):
        struct.components['subjects'][sid]['net'] += common_structure(v, 'weights')
        struct.components['subjects'][sid]['net'] += common_structure(v, 'distances')
        struct.components['coord'] += coord_structure(v)
    else:
        if len(list(keys)) > 0:
            struct.components['param'] += [coord_format.format(v['desc'], name, 'xml'),
                                           coord_format.format(v['desc'], name, 'json')]
    
    

In [None]:
<KeysViewHDF5 []>
<KeysViewHDF5 ['areas', 'centres', 'cortical', 'hemispheres', 'orientations', 'region_labels', 'tract_lengths', 'weights']>
<KeysViewHDF5 ['I', 'a', 'alpha', 'b', 'beta', 'c', 'd', 'e', 'f', 'g', 'gamma', 'tau']>
<KeysViewHDF5 []>
<KeysViewHDF5 ['a', 'b']>
<KeysViewHDF5 []>
<KeysViewHDF5 ['variables_of_interest']>


# distances, weights & centres
{'centres.txt': {'fname': 'centres.txt', 'sid': '01', 'sep': '\t', 'desc': 'default', 'path': 'C:\\Users\\dinar\\Desktop\\gsoc\\data\\txt_files\\2\\centres.txt', 'ext': 'txt', 'name': 'centres'}, 

'tract_lengths.txt': {'fname': 'tract_lengths.txt', 'sid': '01', 'sep': '\\s', 'desc': 'default', 'path': 'C:\\Users\\dinar\\Desktop\\gsoc\\data\\txt_files\\2\\tract_lengths.txt', 'ext': 'txt', 'name': 'distances'}, 

'weights.txt': {'fname': 'weights.txt', 'sid': '01', 'sep': '\\s', 'desc': 'default', 'path': 'C:\\Users\\dinar\\Desktop\\gsoc\\data\\txt_files\\2\\weights.txt', 'ext': 'txt', 'name': 'weights'}}