This notebooks crawls through the experiments and dumps "architecture.pkl" files.

In [1]:
import os
import pickle

import re

In [2]:
cd ../../src

/Users/cock/kDrive/PhD/Projects/Labs/beerslaw-lab/src


In [3]:
def get_model_paths(experiment_name:str):
    '''
    Returns a list of the paths where models were saved.
    '''
    model_paths = []
    experiment_path = '../experiments/' + experiment_name
    for (dirpath, dirnames, filenames) in os.walk(experiment_path):
        files = [os.path.join(dirpath, file) for file in filenames]
        model_paths.extend(files)
    kw = 'model_training.csv'
    model_paths = [path for path in model_paths if kw in path]
    model_paths = [path for path in model_paths if 'exclude' not in path]
    model_paths = ['/'.join(path.split('/')[:-1]) for path in model_paths]
    model_paths = list(set(model_paths))
    return model_paths

In [21]:
def extract_features(model_name: str):
    """
    Retrieves the architecture details from the name path.
    
    Args
        model_name: path of the folder 
        
    Returns
        dictionary with the parameters
    """
    # cell types
    re_ct = re.compile('ct([A-z]*)_')
    ct = re_ct.findall(model_name)[0]

    # nlayers
    re_nlayers = re.compile('[A-z]_nlayers([0-9]+)_')
    nlayers = re_nlayers.findall(model_name)[0]

    # ncells
    re_ncells = re.compile('.*ncells\[([0-9,\s]+)\]')
    ncells = re_ncells.findall(model_name)[0]
    ncells = ncells.split(', ')
    ncells = [int(cell) for cell in ncells]

    # dropout
    re_dropout = re.compile('.*drop([0-9\.]+)')
    dropout = re_dropout.findall(model_name)[0]
    dropout = dropout[0] + '.' + dropout[1:]

    # optimiser
    re_optimi = re.compile('.*optim([A-z]+)_loss')
    optimi = re_optimi.findall(model_name)[0]

    # batch size
    re_bs = re.compile('.*bs([0-9]+)_')
    bs = re_bs.findall(model_name)[0]

    # epochs
    re_epochs = re.compile('.*ep([0-9]+)long')
    epochs = re_epochs.findall(model_name)[0]

    settings = {
        'cell_type': ct,
        'n_layers': int(nlayers),
        'n_cells': ncells,
        'dropout': float(dropout),
        'optimiser': optimi,
        'batch_size': int(bs),
        'epochs': int(epochs),
        'padding_value': -1,
        'loss': 'auc',
        'shuffle':True,
        'verbose': 1,
        'early_stopping': False
    }
    return settings

In [22]:
def dump_architecture(model_path:str):
    """
    Reads the path, retrieves the architecture, and dumps the file there
    """
    
    model_name = model_path.split('/')[-1]
    architectures = extract_features(model_name)
    with open(model_path + '/architecture.pkl', 'wb') as fp:
        pickle.dump(architectures, fp)

In [23]:
def dump_all_architectures(experiment_name:str):
    model_paths = get_model_paths(experiment_name)
    for path in model_paths:
        dump_architecture(path)

In [24]:
dump_all_architectures('incremental/second round/')

ctLSTM_nlayers3_ncells[4, 8, 4]_drop00_optimadam_lossauc_bs16_ep200long short term memory
0.0
ctLSTM_nlayers2_ncells[8, 16]_drop00_optimadam_lossauc_bs16_ep200long short term memory
0.0
ctLSTM_nlayers1_ncells[8]_drop00_optimadam_lossauc_bs16_ep200long short term memory
0.0
ctLSTM_nlayers1_ncells[8]_drop02_optimadam_lossauc_bs16_ep200long short term memory
0.2
ctLSTM_nlayers3_ncells[4, 8, 4]_drop02_optimadam_lossauc_bs16_ep200long short term memory
0.2


In [9]:
dump_all_architectures('incremental/second round/')