Programmatic way to generate config files for a given hyperparameter sweep.

In [1]:
import os
import yaml
import itertools

In [2]:
# Specify a directory for experiment

sweep_experiment_name = "sweep_1"
sweep_experiment_dir_parent = "/home/jupyter/"
sweep_experiment_dir = os.path.join(sweep_experiment_dir_parent, sweep_experiment_name)

if not os.path.exists(sweep_experiment_dir):
    os.makedirs(sweep_experiment_dir)

In [3]:
# Specify model type

model_type = 'eskmeans'

In [4]:
# workhorse to generate combinations of parameters

def generate_choice_combinations(d):
    # d is a dict of lists
    # require that the keys of d are all of the same type (eg strings)
    # output a dict {i:{key:value} for i in range(N)}
    # which contains all possible combinations of values (encoded in the lists)
    # and N = product of len(d[key]), for all keys in d
    sorted_keys = sorted(d.keys())
    choices_lex = []
    num_choices = {}
    for key in sorted_keys:
        num_choices[key] = len(d[key])
        choices_lex.append(list(range(num_choices[key])))
    choices_cartesian = list(itertools.product(*choices_lex))
    configs_cartesian = {}
    for i, choices in enumerate(choices_cartesian):
        configs_cartesian[i] = {sorted_keys[j]: d[sorted_keys[j]][choices[j]] for j in range(len(sorted_keys))}
        
    return configs_cartesian

In [15]:
# Specify general parameters

sweep_config = {}
sweep_config['input_vars'] = [['AccX', 'AccY', 'AccZ', 'Depth']]
sweep_config['metadata_fp'] = ['/home/jupyter/behavior_data_local/data/formatted/ladds_seals/dataset_metadata.yaml']
sweep_config['model'] = [model_type] # do not change
sweep_config['output_parent_dir'] = [sweep_experiment_dir]
sweep_config['test_data_fp_glob'] = [['/home/jupyter/behavior_data_local/data/formatted/ladds_seals/clip_data/*.npy']]
sweep_config['train_data_fp_glob'] = [['/home/jupyter/behavior_data_local/data/formatted/ladds_seals/clip_data/*.npy']]
sweep_config['num_clusters'] = [8, 20]
sweep_config['read_latents'] = [True] # should be either [True] or [False]
sweep_config['train_data_latents_fp_glob'] = [['/home/jupyter/behavior_benchmarks_outputs/ladds_seals/whiten_seals/latents/*.npy']]
sweep_config['test_data_latents_fp_glob'] = [['/home/jupyter/behavior_benchmarks_outputs/ladds_seals/whiten_seals/latents/*.npy']]

summary = sweep_config.copy()

In [16]:
# Specify model-specific parameters

if model_type == 'eskmeans':
    sweep_model_config = {}
    sweep_model_config['boundary_init_lambda'] = [30.0]
    sweep_model_config['embed_length'] = [10]
    sweep_model_config['landmark_hop_size'] = [5]
    sweep_model_config['n_epochs'] = [6]
    sweep_model_config['n_landmarks_max'] = [40]
    sweep_model_config['batch_size'] = [1]
    sweep_model_config['time_power_term'] = [.95, .8]
    
    summary['eskmeans_config'] = sweep_model_config

In [17]:
# Specify evaluation parameters
sweep_evaluation_config = {}
sweep_evaluation_config['boundary_tolerance_sec'] = [1.]

summary['evaluation'] = sweep_evaluation_config

In [18]:
# Summarize what the experiment is about and save off

summary['summary'] = "First sweep with eskmeans"
target_filename = sweep_experiment_name + "_summary" + '.yaml'
target_fp = os.path.join(sweep_experiment_dir, target_filename)                       
with open(target_fp, 'w') as file:
    yaml.dump(summary, file)

In [19]:
# Make cartesian combinations for sub-dictionaries
sweep_evaluation_cartesian = generate_choice_combinations(sweep_evaluation_config)
sweep_model_cartesian = generate_choice_combinations(sweep_model_config)

In [20]:
# Incorporate into main sweep dict

model_config_key_name = model_type + "_config"
sweep_config[model_config_key_name] = [sweep_model_cartesian[key] for key in sweep_model_cartesian]

sweep_config['evaluation'] = [sweep_evaluation_cartesian[key] for key in sweep_evaluation_cartesian]

In [21]:
sweep_config_cartesian = generate_choice_combinations(sweep_config)

In [22]:
# number so as to get experiment names
for i in sweep_config_cartesian.keys():
    experiment_name = sweep_experiment_name + "_" + str(i)
    sweep_config_cartesian[i]['experiment_name'] = experiment_name

In [23]:
# save off configs:

config_fps = []
for i in sweep_config_cartesian.keys():
    config = sweep_config_cartesian[i]
    target_filename = config['experiment_name'] + '.yaml'
    target_fp = os.path.join(sweep_experiment_dir, target_filename)
    config_fps.append(target_fp)                         
    with open(target_fp, 'w') as file:
        yaml.dump(config, file)

In [27]:
# Generate command line prompt to run
output = ""
for config_fp in config_fps:
    output += "python train_model.py --config " + config_fp + "; "

print(output)
#python train_model.py --config /home/jupyter/behavior_benchmarks/behavior_benchmarks/example_config/example_config_eskmeans.yaml

python train_model.py --config /home/jupyter/sweep_1/sweep_1_0.yaml; python train_model.py --config /home/jupyter/sweep_1/sweep_1_1.yaml; python train_model.py --config /home/jupyter/sweep_1/sweep_1_2.yaml; python train_model.py --config /home/jupyter/sweep_1/sweep_1_3.yaml; 
