diff --git a/docs/tutorials/tuning.md b/docs/tutorials/tuning.md index 0865a2e9b..d0189d839 100644 --- a/docs/tutorials/tuning.md +++ b/docs/tutorials/tuning.md @@ -76,6 +76,8 @@ To enable hyperparameter tuning, add the following `tuning` section to your `con tuning: # Output directory for tuning results. output_dir: 'results' + # Checkpoint directory + checkpoint_dir: 'checkpoint' # Number of gpu hours allowed for tuning. gpu_hours: 1.0 # Number of fixed trials to tune hyperparameters (override gpu_hours). @@ -92,6 +94,7 @@ tuning: Let's detail the new parameters: - `output_dir`: Directory where tuning results, visualizations, and best parameters will be saved. +- `checkpoint_dir`: Directory where the tuning checkpoint will be saved each trial. Allows resuming previous tuning in case of a crash. - `gpu_hours`: Estimated total GPU time available for tuning, in hours. Given this time, the script will automatically compute for how many training trials we can tune hyperparameters. By default, `gpu_hours` is set to 1. - `n_trials`: Number of training trials for tuning. If provided, it will override `gpu_hours` and tune the model for exactly `n_trial` trials. - `two_phase_tuning`: If True, performs a two-phase tuning. In the first phase, all hyperparameters are tuned, and in the second phase, the top half (based on importance) are fine-tuned while freezing others. By default, `two_phase_tuning` is False. @@ -252,6 +255,7 @@ package: # -> python -m edsnlp.tune --config configs/config.yml tuning: output_dir: 'results' + checkpoint_dir: 'checkpoint' gpu_hours: 40.0 two_phase_tuning: True metric: "ner.micro.f" diff --git a/edsnlp/tune.py b/edsnlp/tune.py index 48d305a24..20a21e33d 100644 --- a/edsnlp/tune.py +++ b/edsnlp/tune.py @@ -7,6 +7,7 @@ import sys from typing import Dict, List, Optional, Tuple, Union +import joblib import optuna import optuna.visualization as vis from configobj import ConfigObj @@ -17,14 +18,19 @@ from optuna.pruners import MedianPruner from pydantic import BaseModel, confloat, conint from ruamel.yaml import YAML +from transformers.utils.logging import ERROR, set_verbosity from edsnlp.training.trainer import GenericScorer, registry, train app = Cli(pretty_exceptions_show_locals=False) +# disable transformers lib warn logs +set_verbosity(ERROR) + logger = logging.getLogger(__name__) DEFAULT_GPU_HOUR = 1.0 +CHECKPOINT = "study.pkl" class HyperparameterConfig(BaseModel): @@ -284,7 +290,9 @@ def on_validation_callback(all_metrics): return score -def optimize(config_path, tuned_parameters, n_trials, metric, study=None): +def optimize( + config_path, tuned_parameters, n_trials, metric, checkpoint_dir, study=None +): def objective(trial): return objective_with_param(config_path, tuned_parameters, trial, metric) @@ -293,10 +301,29 @@ def objective(trial): direction="maximize", pruner=MedianPruner(n_startup_trials=5, n_warmup_steps=2), ) - study.optimize(objective, n_trials=n_trials) + study.optimize( + objective, n_trials=n_trials, callbacks=[save_checkpoint(checkpoint_dir)] + ) return study +def save_checkpoint(checkpoint_dir): + def callback(study: optuna.study.Study, trial: optuna.trial.FrozenTrial): + checkpoint_file = os.path.join(checkpoint_dir, CHECKPOINT) + logger.info(f"Saving checkpoint to {checkpoint_file}") + joblib.dump(study, checkpoint_file) + + return callback + + +def load_checkpoint(checkpoint_dir) -> Optional[optuna.study.Study]: + checkpoint_file = os.path.join(checkpoint_dir, CHECKPOINT) + if os.path.exists(checkpoint_file): + logger.info(f"Loading study checkpoint from {checkpoint_file}") + return joblib.load(checkpoint_file) + return None + + def process_results( study, output_dir, @@ -376,17 +403,38 @@ def write_final_config(output_dir, config_path, tuned_parameters, best_params): config.write() +def parse_study_summary(output_dir): + file_path = os.path.join(output_dir, "results_summary.txt") + with open(file_path, "r") as f: + lines = f.readlines() + + sections = {"Params:": {}, "Importances:": {}} + current = None + + for line in lines: + line = line.strip() + if line in sections: + current = sections[line] + elif current is not None and line: + key, value = map(str.strip, line.split(":")) + current[key] = float(value) + + return sections["Params:"], sections["Importances:"] + + def tune_two_phase( config: Dict, config_path: str, hyperparameters: Dict[str, Dict], output_dir: str, + checkpoint_dir: str, n_trials: int, viz: bool, metric: Tuple[str], study: Optional[optuna.study.Study] = None, is_fixed_n_trials: bool = False, gpu_hours: float = 1.0, + skip_phase_1: bool = False, ) -> None: """ Perform two-phase hyperparameter tuning using Optuna. @@ -406,6 +454,8 @@ def tune_two_phase( output_dir : str Directory where tuning results, visualizations, and best parameters will be saved. + checkpoint_dir : str, + Path to save the checkpoint file. n_trials : int The total number of trials to execute across both tuning phases. This number will be split between the two phases, with approximately half @@ -423,17 +473,41 @@ def tune_two_phase( trials pruned in phase 1, we raise n_trials to compensate. Default is False. gpu_hours : float, optional Total GPU time available for tuning, in hours. Default is 1 hour. + skip_phase_1 : bool, optional + Whether or not to skip phase 1 (in case of resuming from checkpoint). + Default is False. """ - n_trials_2 = n_trials // 2 - n_trials_1 = n_trials - n_trials_2 output_dir_phase_1 = os.path.join(output_dir, "phase_1") output_dir_phase_2 = os.path.join(output_dir, "phase_2") - logger.info(f"Phase 1: Tuning all hyperparameters ({n_trials_1} trials).") - study = optimize(config, hyperparameters, n_trials_1, metric, study=study) - best_params_phase_1, importances = process_results( - study, output_dir_phase_1, viz, config, config_path, hyperparameters - ) + if str(config_path).endswith("yaml") or str(config_path).endswith("yml"): + config_path_phase_2 = os.path.join(output_dir_phase_1, "config.yml") + else: + config_path_phase_2 = os.path.join(output_dir_phase_1, "config.cfg") + + if not skip_phase_1: + n_trials_2 = n_trials // 2 + n_trials_1 = n_trials - n_trials_2 + + logger.info(f"Phase 1: Tuning all hyperparameters ({n_trials_1} trials).") + study = optimize( + config, + hyperparameters, + n_trials_1, + metric, + checkpoint_dir, + study, + ) + best_params_phase_1, importances = process_results( + study, output_dir_phase_1, viz, config, config_path, hyperparameters + ) + if not is_fixed_n_trials: + n_trials_2 = compute_remaining_n_trials_possible(study, gpu_hours) + + else: + n_trials_2 = n_trials + logger.info("Skipping already tuned phase 1") + best_params_phase_1, importances = parse_study_summary(output_dir_phase_1) hyperparameters_to_keep = list(importances.keys())[ : math.ceil(len(importances) / 2) @@ -445,6 +519,7 @@ def tune_two_phase( if key in hyperparameters_to_keep or (value.get("alias") and value["alias"] in hyperparameters_to_keep) } + hyperparameters_frozen = { key: value for key, value in hyperparameters.items() @@ -456,20 +531,20 @@ def tune_two_phase( config, hyperparameters_frozen, values=best_params_phase_1 ) - if not is_fixed_n_trials: - n_trials_2 = compute_remaining_n_trials_possible(study, gpu_hours) - logger.info( f"Phase 2: Tuning {hyperparameters_to_keep} hyperparameters " f"({n_trials_2} trials). Other hyperparameters frozen to best values." ) + study = optimize( - updated_config, hyperparameters_phase_2, n_trials_2, metric, study=study + updated_config, + hyperparameters_phase_2, + n_trials_2, + metric, + checkpoint_dir, + study, ) - if str(config_path).endswith("yaml") or str(config_path).endswith("yml"): - config_path_phase_2 = os.path.join(output_dir_phase_1, "config.yml") - else: - config_path_phase_2 = os.path.join(output_dir_phase_1, "config.cfg") + process_results( study, output_dir_phase_2, @@ -521,6 +596,7 @@ def tune( config_meta: Dict, hyperparameters: Dict[str, HyperparameterConfig], output_dir: str, + checkpoint_dir: str, gpu_hours: confloat(gt=0) = DEFAULT_GPU_HOUR, n_trials: conint(gt=0) = None, two_phase_tuning: bool = False, @@ -548,6 +624,8 @@ def tune( output_dir : str Directory where tuning results, visualizations, and best parameters will be saved. + checkpoint_dir : str, + Path to save the checkpoint file. gpu_hours : float, optional Total GPU time available for tuning, in hours. Default is 1 hour. n_trials : int, optional @@ -568,13 +646,35 @@ def tune( hyperparameters = {key: value.to_dict() for key, value in hyperparameters.items()} set_seed(seed) metric = split_path(metric) - study = None + study = load_checkpoint(checkpoint_dir) + elapsed_trials = 0 + skip_phase_1 = False is_fixed_n_trials = n_trials is not None + if study: + elapsed_trials = len(study.trials) + logger.info(f"Elapsed trials: {elapsed_trials}") + if not is_fixed_n_trials: - logger.info(f"Computing number of trials for {gpu_hours} hours of GPU.") - study = optimize(config, hyperparameters, n_trials=1, metric=metric) - n_trials = compute_n_trials(gpu_hours, compute_time_per_trial(study)) - 1 + if not study: + logger.info(f"Computing number of trials for {gpu_hours} hours of GPU.") + study = optimize( + config, + hyperparameters, + n_trials=1, + metric=metric, + checkpoint_dir=checkpoint_dir, + ) + n_trials = compute_n_trials(gpu_hours, compute_time_per_trial(study)) - 1 + else: + n_trials = compute_n_trials( + gpu_hours, compute_time_per_trial(study, ema=True) + ) + + if elapsed_trials >= (n_trials / 2): + skip_phase_1 = True + + n_trials = max(0, n_trials - elapsed_trials) logger.info(f"Number of trials: {n_trials}") @@ -585,16 +685,25 @@ def tune( config_path, hyperparameters, output_dir, + checkpoint_dir, n_trials, viz, metric=metric, study=study, is_fixed_n_trials=is_fixed_n_trials, gpu_hours=gpu_hours, + skip_phase_1=skip_phase_1, ) else: logger.info("Starting single-phase tuning.") - study = optimize(config, hyperparameters, n_trials, metric, study=study) + study = optimize( + config, + hyperparameters, + n_trials, + metric, + checkpoint_dir, + study, + ) if not is_fixed_n_trials: n_trials = compute_remaining_n_trials_possible(study, gpu_hours) if n_trials > 0: @@ -602,9 +711,23 @@ def tune( f"As some trials were pruned, perform tuning for {n_trials} " "more trials to fully use GPU time budget." ) - study = optimize(config, hyperparameters, n_trials, metric, study=study) + study = optimize( + config, + hyperparameters, + n_trials, + metric, + checkpoint_dir, + study, + ) process_results(study, output_dir, viz, config, config_path, hyperparameters) + logger.info( + f"Tuning completed. Results available in {output_dir}. Deleting checkpoint." + ) + checkpoint_file = os.path.join(checkpoint_dir, CHECKPOINT) + if os.path.exists(checkpoint_file): + os.remove(checkpoint_file) + if __name__ == "__main__": app() diff --git a/tests/tuning/test_checkpoints/single_phase_gpu_hour/study_.pkl b/tests/tuning/test_checkpoints/single_phase_gpu_hour/study_.pkl new file mode 100644 index 000000000..41662e072 Binary files /dev/null and b/tests/tuning/test_checkpoints/single_phase_gpu_hour/study_.pkl differ diff --git a/tests/tuning/test_checkpoints/single_phase_n_trials/study_.pkl b/tests/tuning/test_checkpoints/single_phase_n_trials/study_.pkl new file mode 100644 index 000000000..69f4d0eab Binary files /dev/null and b/tests/tuning/test_checkpoints/single_phase_n_trials/study_.pkl differ diff --git a/tests/tuning/test_checkpoints/two_phase_gpu_hour/config.yml b/tests/tuning/test_checkpoints/two_phase_gpu_hour/config.yml new file mode 100644 index 000000000..b481e64e8 --- /dev/null +++ b/tests/tuning/test_checkpoints/two_phase_gpu_hour/config.yml @@ -0,0 +1,127 @@ +# My usefull comment +# 🤖 PIPELINE DEFINITION +nlp: + "@core": pipeline + lang: eds + + components: + normalizer: + '@factory': eds.normalizer + + sentencizer: + '@factory': eds.sentences + + ner: + '@factory': eds.ner_crf + mode: "joint" + target_span_getter: "gold_spans" + # Set spans as both to ents and in separate `ent.label` groups + span_setter: [ "ents", "*" ] + infer_span_setter: true + + embedding: + '@factory': eds.text_cnn + kernel_sizes: [ 3 ] + + embedding: + '@factory': eds.transformer + model: hf-internal-testing/tiny-bert + window: 128 + stride: 96 + new_tokens: [ [ "(?:\\n\\s*)*\\n", "⏎" ] ] + + qualifier: + '@factory': eds.span_classifier + attributes: { "_.negation": [ "sosy" ], "_.unit": [ "measure" ] } + span_getter: ["ents", "gold_spans"] + + embedding: + '@factory': eds.span_pooler + + embedding: # ${ nlp.components.ner.embedding } + '@factory': eds.text_cnn + kernel_sizes: [ 3 ] + + embedding: + '@factory': eds.transformer + model: hf-internal-testing/tiny-bert + window: 128 + stride: 96 + +# 📈 SCORERS +scorer: + speed: true + qual: + '@metrics': eds.span_attributes + span_getter: ${nlp.components.qualifier.span_getter} + qualifiers: ${nlp.components.qualifier.attributes} + ner: + '@metrics': eds.ner_exact + span_getter: ${nlp.components.ner.target_span_getter} + +# 🎛️ OPTIMIZER +optimizer: + "@core": optimizer + optim: AdamW + module: ${ nlp } + groups: + "^transformer": false + ".*": + lr: + "@schedules": linear + start_value: 1e-3 + max_value: 2e-3 + warmup_rate: 0.1 + total_steps: ${ train.max_steps } + +# 📚 DATA +train_data: + - data: + '@readers': standoff + path: tests/training/dataset/ + converter: + - '@factory': eds.standoff_dict2doc + span_setter : 'gold_spans' + span_attributes : ['sosy', 'unit', 'negation'] + bool_attributes : ['negation'] # default standoff to doc converter + - '@factory': eds.sentences + nlp: ${nlp} + - '@factory': eds.split + nlp: null + max_length: 2000 + regex: '\n\n+' + shuffle: dataset + batch_size: 8 docs + pipe_names: [ "ner" ] + - data: + '@readers': standoff + path: tests/training/dataset/ + converter: + - '@factory': eds.standoff_dict2doc + span_setter : 'gold_spans' + span_attributes : ['sosy', 'unit', 'negation'] + bool_attributes : ['negation'] # default standoff to doc converter + shuffle: dataset + batch_size: 16 spans + pipe_names: [ "qualifier" ] + +val_data: + '@readers': standoff + path: tests/training/dataset/ + converter: + - '@factory': eds.standoff_dict2doc + span_setter : 'gold_spans' + span_attributes : ['sosy', 'unit', 'negation'] + bool_attributes : ['negation'] # default standoff to doc converter + +# 🚀 TRAIN SCRIPT OPTIONS +train: + nlp: ${ nlp } + train_data: ${ train_data } + val_data: ${ val_data } + max_steps: 5 + validation_interval: 2 + max_grad_norm: 1.0 + scorer: ${ scorer } + num_workers: 0 + optimizer: ${ optimizer } diff --git a/tests/tuning/test_checkpoints/two_phase_gpu_hour/results_summary.txt b/tests/tuning/test_checkpoints/two_phase_gpu_hour/results_summary.txt new file mode 100644 index 000000000..f706c7cac --- /dev/null +++ b/tests/tuning/test_checkpoints/two_phase_gpu_hour/results_summary.txt @@ -0,0 +1,13 @@ +Study Summary +================== +Best trial: 2 + +Value: 0.7674011016524788 + +Params: + start_value: 0.00017235427021406453 + warmup_rate: 0.1 + +Importances: + start_value: 0.7 + warmup_rate: 0.3 diff --git a/tests/tuning/test_checkpoints/two_phase_gpu_hour/study_.pkl b/tests/tuning/test_checkpoints/two_phase_gpu_hour/study_.pkl new file mode 100644 index 000000000..f6a911771 Binary files /dev/null and b/tests/tuning/test_checkpoints/two_phase_gpu_hour/study_.pkl differ diff --git a/tests/tuning/test_checkpoints/two_phase_n_trials/config.yml b/tests/tuning/test_checkpoints/two_phase_n_trials/config.yml new file mode 100644 index 000000000..b481e64e8 --- /dev/null +++ b/tests/tuning/test_checkpoints/two_phase_n_trials/config.yml @@ -0,0 +1,127 @@ +# My usefull comment +# 🤖 PIPELINE DEFINITION +nlp: + "@core": pipeline + lang: eds + + components: + normalizer: + '@factory': eds.normalizer + + sentencizer: + '@factory': eds.sentences + + ner: + '@factory': eds.ner_crf + mode: "joint" + target_span_getter: "gold_spans" + # Set spans as both to ents and in separate `ent.label` groups + span_setter: [ "ents", "*" ] + infer_span_setter: true + + embedding: + '@factory': eds.text_cnn + kernel_sizes: [ 3 ] + + embedding: + '@factory': eds.transformer + model: hf-internal-testing/tiny-bert + window: 128 + stride: 96 + new_tokens: [ [ "(?:\\n\\s*)*\\n", "⏎" ] ] + + qualifier: + '@factory': eds.span_classifier + attributes: { "_.negation": [ "sosy" ], "_.unit": [ "measure" ] } + span_getter: ["ents", "gold_spans"] + + embedding: + '@factory': eds.span_pooler + + embedding: # ${ nlp.components.ner.embedding } + '@factory': eds.text_cnn + kernel_sizes: [ 3 ] + + embedding: + '@factory': eds.transformer + model: hf-internal-testing/tiny-bert + window: 128 + stride: 96 + +# 📈 SCORERS +scorer: + speed: true + qual: + '@metrics': eds.span_attributes + span_getter: ${nlp.components.qualifier.span_getter} + qualifiers: ${nlp.components.qualifier.attributes} + ner: + '@metrics': eds.ner_exact + span_getter: ${nlp.components.ner.target_span_getter} + +# 🎛️ OPTIMIZER +optimizer: + "@core": optimizer + optim: AdamW + module: ${ nlp } + groups: + "^transformer": false + ".*": + lr: + "@schedules": linear + start_value: 1e-3 + max_value: 2e-3 + warmup_rate: 0.1 + total_steps: ${ train.max_steps } + +# 📚 DATA +train_data: + - data: + '@readers': standoff + path: tests/training/dataset/ + converter: + - '@factory': eds.standoff_dict2doc + span_setter : 'gold_spans' + span_attributes : ['sosy', 'unit', 'negation'] + bool_attributes : ['negation'] # default standoff to doc converter + - '@factory': eds.sentences + nlp: ${nlp} + - '@factory': eds.split + nlp: null + max_length: 2000 + regex: '\n\n+' + shuffle: dataset + batch_size: 8 docs + pipe_names: [ "ner" ] + - data: + '@readers': standoff + path: tests/training/dataset/ + converter: + - '@factory': eds.standoff_dict2doc + span_setter : 'gold_spans' + span_attributes : ['sosy', 'unit', 'negation'] + bool_attributes : ['negation'] # default standoff to doc converter + shuffle: dataset + batch_size: 16 spans + pipe_names: [ "qualifier" ] + +val_data: + '@readers': standoff + path: tests/training/dataset/ + converter: + - '@factory': eds.standoff_dict2doc + span_setter : 'gold_spans' + span_attributes : ['sosy', 'unit', 'negation'] + bool_attributes : ['negation'] # default standoff to doc converter + +# 🚀 TRAIN SCRIPT OPTIONS +train: + nlp: ${ nlp } + train_data: ${ train_data } + val_data: ${ val_data } + max_steps: 5 + validation_interval: 2 + max_grad_norm: 1.0 + scorer: ${ scorer } + num_workers: 0 + optimizer: ${ optimizer } diff --git a/tests/tuning/test_checkpoints/two_phase_n_trials/results_summary.txt b/tests/tuning/test_checkpoints/two_phase_n_trials/results_summary.txt new file mode 100644 index 000000000..f706c7cac --- /dev/null +++ b/tests/tuning/test_checkpoints/two_phase_n_trials/results_summary.txt @@ -0,0 +1,13 @@ +Study Summary +================== +Best trial: 2 + +Value: 0.7674011016524788 + +Params: + start_value: 0.00017235427021406453 + warmup_rate: 0.1 + +Importances: + start_value: 0.7 + warmup_rate: 0.3 diff --git a/tests/tuning/test_checkpoints/two_phase_n_trials/study_.pkl b/tests/tuning/test_checkpoints/two_phase_n_trials/study_.pkl new file mode 100644 index 000000000..c54ab0e96 Binary files /dev/null and b/tests/tuning/test_checkpoints/two_phase_n_trials/study_.pkl differ diff --git a/tests/tuning/test_end_to_end.py b/tests/tuning/test_end_to_end.py index 1febb4e82..b9c32c068 100644 --- a/tests/tuning/test_end_to_end.py +++ b/tests/tuning/test_end_to_end.py @@ -54,7 +54,8 @@ def assert_results(output_dir): @pytest.mark.parametrize("n_trials", [7, None]) @pytest.mark.parametrize("two_phase_tuning", [True, False]) -def test_tune(tmpdir, n_trials, two_phase_tuning): +@pytest.mark.parametrize("start_from_checkpoint", [True, False]) +def test_tune(tmpdir, n_trials, two_phase_tuning, start_from_checkpoint): config_meta = {"config_path": ["tests/tuning/config.yml"]} hyperparameters = { "optimizer.groups.'.*'.lr.start_value": { @@ -73,26 +74,62 @@ def test_tune(tmpdir, n_trials, two_phase_tuning): }, } output_dir = "./results" - gpu_hours = 0.015 - seed = 42 - metric = "ner.micro.f" - tune( - config_meta=config_meta, - hyperparameters=hyperparameters, - output_dir=output_dir, - gpu_hours=gpu_hours, - n_trials=n_trials, - two_phase_tuning=two_phase_tuning, - seed=seed, - metric=metric, - ) - if two_phase_tuning: - phase_1_dir = os.path.join(output_dir, "phase_1") - phase_2_dir = os.path.join(output_dir, "phase_2") - assert_results(phase_1_dir) - assert_results(phase_2_dir) - else: - assert_results(output_dir) - - shutil.rmtree(output_dir) - shutil.rmtree("./artifacts") + try: + if start_from_checkpoint: + if two_phase_tuning: + if n_trials is None: + checkpoint_dir = ( + "./tests/tuning/test_checkpoints/two_phase_gpu_hour" + ) + else: + checkpoint_dir = ( + "./tests/tuning/test_checkpoints/two_phase_n_trials" + ) + summary_src = os.path.join(checkpoint_dir, "results_summary.txt") + summary_dst = os.path.join(output_dir, "phase_1/results_summary.txt") + config_src = os.path.join(checkpoint_dir, "config.yml") + config_dst = os.path.join(output_dir, "phase_1/config.yml") + os.makedirs(os.path.join(output_dir, "phase_1")) + shutil.copy(summary_src, summary_dst) + shutil.copy(config_src, config_dst) + else: + if n_trials is None: + checkpoint_dir = ( + "./tests/tuning/test_checkpoints/single_phase_gpu_hour" + ) + else: + checkpoint_dir = ( + "./tests/tuning/test_checkpoints/single_phase_n_trials" + ) + study_src = os.path.join(checkpoint_dir, "study_.pkl") + study_dst = os.path.join(checkpoint_dir, "study.pkl") + shutil.copy(study_src, study_dst) + + else: + checkpoint_dir = "./tests/tuning/test_checkpoints" + + gpu_hours = 0.015 + seed = 42 + metric = "ner.micro.f" + tune( + config_meta=config_meta, + hyperparameters=hyperparameters, + output_dir=output_dir, + checkpoint_dir=checkpoint_dir, + gpu_hours=gpu_hours, + n_trials=n_trials, + two_phase_tuning=two_phase_tuning, + seed=seed, + metric=metric, + ) + if two_phase_tuning: + phase_1_dir = os.path.join(output_dir, "phase_1") + phase_2_dir = os.path.join(output_dir, "phase_2") + if not start_from_checkpoint: + assert_results(phase_1_dir) + assert_results(phase_2_dir) + else: + assert_results(output_dir) + finally: + shutil.rmtree(output_dir) + shutil.rmtree("./artifacts") diff --git a/tests/tuning/test_tuning.py b/tests/tuning/test_tuning.py index b5c1d603e..9967f6934 100644 --- a/tests/tuning/test_tuning.py +++ b/tests/tuning/test_tuning.py @@ -212,22 +212,33 @@ def test_compute_remaining_n_trials_possible(study): def test_optimize(mock_objective_with_param, mock_optimize_study, has_study, study): mock_objective_with_param.return_value = 0.9 metric = ("ner", "micro", "f") + checkpoint_dir = "./checkpoint" if has_study: - def pass_fn(obj, n_trials): + def pass_fn(obj, n_trials, callbacks): pass study.optimize = pass_fn study = optimize( - "config_path", tuned_parameters={}, n_trials=1, metric=metric, study=study + "config_path", + tuned_parameters={}, + n_trials=1, + metric=metric, + checkpoint_dir=checkpoint_dir, + study=study, ) assert isinstance(study, Mock) assert len(study.trials) == 3 else: study = optimize( - "config_path", tuned_parameters={}, n_trials=1, metric=metric, study=None + "config_path", + tuned_parameters={}, + n_trials=1, + metric=metric, + checkpoint_dir=checkpoint_dir, + study=None, ) assert isinstance(study, optuna.study.Study) assert len(study.trials) == 0 @@ -260,7 +271,8 @@ def test_tune( "param1": {"type": "float", "low": 0.0, "high": 1.0}, "param2": {"type": "float", "low": 0.0, "high": 1.0}, } - output_dir = "fake_output_dir" + output_dir = "output_dir" + checkpoint_dir = "checkpoint_dir" gpu_hours = 0.25 seed = 42 @@ -268,6 +280,7 @@ def test_tune( config_meta=config_meta, hyperparameters=hyperparameters, output_dir=output_dir, + checkpoint_dir=checkpoint_dir, gpu_hours=gpu_hours, n_trials=n_trials, two_phase_tuning=two_phase_tuning,