In [1]:
import os, h5py
import yaml
import pandas as pd
import numpy as np
from pathlib import Path

import sh

In [6]:
conv_layer_types = ['standard'] # implemented 
motif_pooling_types = ['se_block', 'softconv', None] # implemented (do I need to have None?)
spatial_pooling_types = ['attention_pool', 'max_pool',] # implemented
base_models = ['nam', 'cam'] # implemented
activations = ['relu', 'elu', 'exponential'] # implemented
pool_sizes = [0, 2, 4, 10, 25] # implemented
num_trials = 5
save = False
dataset_name = 'hepg2' # implemented
smoke_test = True
log_wandb = False

configs = []

if save: 
    base_dir = "/home/chandana/projects/hominin/experiments/lenti-mpra"
    sh.rm(f"{base_dir}/commands.txt")

    Path(base_dir).mkdir(parents=True, exist_ok=True)
    output_file = open(f"{base_dir}/commands.txt", "a")

count = 0
for conv_layer_type in conv_layer_types:
    for motif_pooling_type in motif_pooling_types:
        for spatial_pooling_type in spatial_pooling_types:
            for base_model in base_models:
                for activation in activations:
                    for pool_size in pool_sizes:
                        for trial in range(num_trials):
                            
                            count += 1

                            config = {
                                'base_model': base_model,
                                'conv1_activation': activation,
                                'conv1_batchnorm': True,
                                'motif_pooling_type': motif_pooling_type,
                                'conv1_dropout': 0.1,
                                'conv1_filters': 128,
                                'conv1_kernel_size': 19,
                                'spatial_pooling_type': spatial_pooling_type,
                                'conv1_pool_size': pool_size,
                                'conv_layer_type': conv_layer_type,
                                'dense_activation': 'relu',
                                'dense_batchnorm': True,
                                'dense_dropout': [0.3, 0.3],
                                'dense_units': [256, 256],
                                'input_shape': (230, 4),
                                'mha_d_model': 96,
                                'mha_dropout': 0.2,
                                'mha_head_type': 'pooled',
                                'mha_heads': 8,
                                'mha_layernorm': True,
                                'output_activation': 'linear',
                                'output_shape': 1
                            }

                            configs += [config]
                            
                            if save:

                                save_dir =  f"{base_dir}/"\
                                            f"{base_model}/"\
                                            f"{conv_layer_type}/"\
                                            f"{motif_pooling_type}/"\
                                            f"{spatial_pooling_type}/"\
                                            f"pool_size={pool_size}/"\
                                            f"{activation}/"\
                                            f"trial_{trial}"
                                Path(save_dir).mkdir(parents=True, exist_ok=True)

                                with open(os.path.join(save_dir, 'config.yaml'), 'w') as file:
                                    documents = yaml.dump(config, file)


                                sh.echo("python execute_hominin.py",  \
                                f"--config_file={save_dir}/config.yaml", \
                                f"--dataset_name={dataset_name}", \
                                f"--smoke_test={smoke_test} ", \
                                f"--log_wandb={log_wandb}", \
                                f"--gpu=None", _out=output_file)
print(count)

900


In [3]:
# split between comet (0.25), citra (0.25), and elzar (0.5)

count * (0.25), count * (0.25), count * (0.5)

(450.0, 450.0, 900.0)