Notebook which sets up a final set of experiments, after performing hyperparameter selection.

In [1]:
import os
import yaml
import itertools
import numpy as np
from pathlib import Path
import BEBE.evaluation.evaluation as evaluation
from tqdm import tqdm

In [2]:
# Specify directories

##########
final_experiment_name = "gulls_wavelet_kmeans"
model_selection_dir = Path('/home/jupyter/behavior_benchmarks_experiments/gulls_wavelet_kmeans_model_selection') # Directory where hyperparameter search was performed
final_experiment_dir_parent = "/home/jupyter/behavior_benchmarks_experiments"
n_experiments = 3
#########

final_experiment_dir = os.path.join(final_experiment_dir_parent, final_experiment_name)

if not os.path.exists(final_experiment_dir):
    os.makedirs(final_experiment_dir)

In [1]:
# Legacy:
# # Re-run evaluation in the new style
# for x in tqdm(model_selection_dir.glob('*/config.yaml')):
#     with open(x, 'r') as f:
#         config = yaml.safe_load(f)
#     if config['unsupervised']:
#         eval_fp = Path(x.parent, 'dev_eval.yaml')
#     else:
#         eval_fp = Path(x.parent, 'val_eval.yaml')
#     if os.path.exists(eval_fp):
#         with open(eval_fp, 'r') as f:
#             evaluation_file = yaml.safe_load(f)
#         if 'individual_scores' not in evaluation_file:
#             evaluation.generate_evaluations(config)

In [7]:
# Choose hyperparameters based on best average macro f1 score (on dev or val set, for unsupervised or supervised models, respectively)

if config['unsupervised']:
    results = model_selection_dir.glob('**/dev_eval.yaml')
else:
    results = model_selection_dir.glob('**/val_eval.yaml')
best_experiment = None
best_f1 = -1
for x in results:
    with open(x, 'r') as f:
        y = yaml.safe_load(f)
    mean_f1 = np.mean(y['individual_scores']['macro_f1s'])
    if 'supervised_scores' in y:
        f1 = y['supervised_scores']['classification_f1_macro']
    else:
        f1 = y['MAP_scores']['MAP_classification_f1_macro']
    if mean_f1 > best_f1:
        best_f1 = mean_f1
        best_experiment = x.parent

# Copy selected hyperparameters
        
selected_config_fp = str(best_experiment) + '.yaml'
with open(selected_config_fp, 'r') as f:
    config = yaml.safe_load(f)

config['use_val_in_train'] = True #Enforce we train on all availabe data (only applies to supervised models)
config['output_parent_dir'] = final_experiment_dir

# Save off config files for final experiments

config_fps = []
for i in range(n_experiments):
    experiment_name = 'trial_' + str(i)
    config['experiment_name'] = experiment_name
    target_filename = experiment_name + '.yaml'
    target_fp = os.path.join(final_experiment_dir, target_filename)                       
    with open(target_fp, 'w') as file:
        yaml.dump(config, file)
    config_fps.append(target_fp)

In [8]:
# Generate command line to run these experiments

output = ""
for config_fp in config_fps:
    output += "python full_experiment.py --config " + config_fp + "; "

print(output)

python full_experiment.py --config /home/jupyter/behavior_benchmarks_experiments/gulls_wavelet_kmeans/trial_0.yaml; python full_experiment.py --config /home/jupyter/behavior_benchmarks_experiments/gulls_wavelet_kmeans/trial_1.yaml; python full_experiment.py --config /home/jupyter/behavior_benchmarks_experiments/gulls_wavelet_kmeans/trial_2.yaml; 
