From a48d7f3bf6fd45d1630010ea59884a4798307de1 Mon Sep 17 00:00:00 2001 From: Peter Martigny Date: Wed, 29 May 2019 11:36:51 -0700 Subject: [PATCH] add the possibility to cache objects and use them in different experiment settings --- .../mlp_parameter_tuning.py | 22 ++++++-- .../mlp_parameter_tuning_cache.json | 11 ++++ .../mlp_parameter_tuning_uncached.json | 50 +++++++++++++++++++ transfer_nlp/runner/experiment_runner.py | 32 +++++++++--- 4 files changed, 104 insertions(+), 11 deletions(-) create mode 100644 experiments/deep_learning_with_pytorch/mlp_parameter_tuning_cache.json create mode 100644 experiments/deep_learning_with_pytorch/mlp_parameter_tuning_uncached.json diff --git a/experiments/deep_learning_with_pytorch/mlp_parameter_tuning.py b/experiments/deep_learning_with_pytorch/mlp_parameter_tuning.py index 9eb2de8..ecfbc02 100644 --- a/experiments/deep_learning_with_pytorch/mlp_parameter_tuning.py +++ b/experiments/deep_learning_with_pytorch/mlp_parameter_tuning.py @@ -31,8 +31,20 @@ def report(self, name: str, experiment: ExperimentConfig, report_dir: Path): parent_dir = Path(__file__).parent home_env = str(Path.home() / 'work/transfer-nlp-data') date = '_'.join(str(datetime.today()).split(' ')) - ExperimentRunner.run_all(experiment=parent_dir / 'mlp_parameter_tuning.json', - experiment_config=parent_dir / 'mlp_parameter_tuning.cfg', - report_dir=f"{home_env}/mlp_parameter_fine_tuning/{date}", - trainer_config_name='trainer', - reporter_config_name='reporter', HOME=home_env) \ No newline at end of file + + # # Uncomment to run the sequential Runner without caching read-only objects + # ExperimentRunner.run_all(experiment=parent_dir / 'mlp_parameter_tuning.json', + # experiment_config=parent_dir / 'mlp_parameter_tuning.cfg', + # report_dir=f"{home_env}/mlp_parameter_fine_tuning/{date}", + # trainer_config_name='trainer', + # reporter_config_name='reporter', HOME=home_env) + # + # + # # Uncomment to run the sequential Runner with caching read-only objects + # ExperimentRunner.run_all(experiment=parent_dir / 'mlp_parameter_tuning_uncached.json', + # experiment_config=parent_dir / 'mlp_parameter_tuning.cfg', + # report_dir=f"{home_env}/mlp_parameter_fine_tuning/{date}", + # trainer_config_name='trainer', + # reporter_config_name='reporter', + # experiment_cache=parent_dir / 'mlp_parameter_tuning_cache.json', + # HOME=home_env) diff --git a/experiments/deep_learning_with_pytorch/mlp_parameter_tuning_cache.json b/experiments/deep_learning_with_pytorch/mlp_parameter_tuning_cache.json new file mode 100644 index 0000000..cf97923 --- /dev/null +++ b/experiments/deep_learning_with_pytorch/mlp_parameter_tuning_cache.json @@ -0,0 +1,11 @@ +{ + "my_dataset_splits": { + "_name": "SurnamesDatasetMLP", + "data_file": "$HOME/surnames/surnames_with_splits.csv", + "batch_size": 128, + "vectorizer": { + "_name": "SurnamesVectorizerMLP", + "data_file": "$HOME/surnames/surnames_with_splits.csv" + } + } +} \ No newline at end of file diff --git a/experiments/deep_learning_with_pytorch/mlp_parameter_tuning_uncached.json b/experiments/deep_learning_with_pytorch/mlp_parameter_tuning_uncached.json new file mode 100644 index 0000000..b5b90e2 --- /dev/null +++ b/experiments/deep_learning_with_pytorch/mlp_parameter_tuning_uncached.json @@ -0,0 +1,50 @@ +{ + "model": { + "_name": "MultiLayerPerceptron", + "hidden_dim": "$hidden_dim", + "data": "$my_dataset_splits" + }, + "optimizer": { + "_name": "Adam", + "lr": "$lr", + "params": { + "_name": "TrainableParameters" + } + }, + "scheduler": { + "_name": "ReduceLROnPlateau", + "patience": 1, + "mode": "min", + "factor": 0.5 + }, + "trainer": { + "_name": "BasicTrainer", + "model": "$model", + "dataset_splits": "$my_dataset_splits", + "loss": { + "_name": "CrossEntropyLoss" + }, + "optimizer": "$optimizer", + "gradient_clipping": 0.25, + "num_epochs": 5, + "seed": 1337, + "regularizer": { + "_name": "L1" + }, + "tensorboard_logs": "$HOME/surnames/tensorboard/mlp", + "metrics": { + "accuracy": { + "_name": "Accuracy" + }, + "loss": { + "_name": "LossMetric", + "loss_fn": { + "_name": "CrossEntropyLoss" + } + } + } + }, + "reporter": { + "_name": "MyReporter" + } +} \ No newline at end of file diff --git a/transfer_nlp/runner/experiment_runner.py b/transfer_nlp/runner/experiment_runner.py index a0e5a7c..a964832 100644 --- a/transfer_nlp/runner/experiment_runner.py +++ b/transfer_nlp/runner/experiment_runner.py @@ -2,6 +2,7 @@ import json import logging from collections import OrderedDict +from copy import deepcopy from pathlib import Path from typing import Dict, Any, Union @@ -11,9 +12,9 @@ ConfigEnv = Dict[str, Any] -def load_config(p: Path) -> Dict[str, ConfigEnv]: - def get_val(cfg:configparser.ConfigParser, section: str, key): +def load_config(p: Path) -> Dict[str, ConfigEnv]: + def get_val(cfg: configparser.ConfigParser, section: str, key): try: return cfg.getint(section, key) except ValueError: @@ -43,6 +44,7 @@ def get_val(cfg:configparser.ConfigParser, section: str, key): return rv + class ExperimentRunner: """ Run an experiment several times with varying configurations. @@ -51,7 +53,7 @@ class ExperimentRunner: """ @staticmethod - def _capture_logs(report_path:Path): + def _capture_logs(report_path: Path): logger = logging.getLogger('') handler = logging.FileHandler(str(report_path / 'runner.log')) fmt = logging.Formatter('%(asctime)s %(levelname)s: %(message)s') # TODO configurable? @@ -65,7 +67,7 @@ def _stop_log_capture(handler): logger.removeHandler(handler) @staticmethod - def _write_config(cfg_name: str, experiment:Dict, cfg:ConfigEnv, exp_report_path:Path): + def _write_config(cfg_name: str, experiment: Dict, cfg: ConfigEnv, exp_report_path: Path, experiment_cache: Dict = None): """duplicate the config used to run the experiment in the report directory to preserve history""" config = configparser.ConfigParser({}, OrderedDict) config.optionxform = str @@ -79,12 +81,17 @@ def _write_config(cfg_name: str, experiment:Dict, cfg:ConfigEnv, exp_report_path with (exp_report_path / 'experiment.json').open('w') as expfile: json.dump(experiment, expfile, indent=4) + if experiment_cache: + with (exp_report_path / 'experiment_cache.json').open('w') as expfile: + json.dump(experiment_cache, expfile, indent=4) + @staticmethod def run_all(experiment: Union[str, Path, Dict], experiment_config: Union[str, Path], report_dir: Union[str, Path], trainer_config_name: str = 'trainer', reporter_config_name: str = 'reporter', + experiment_cache: Union[str, Path, Dict] = None, **env_vars) -> None: """ :param experiment: the experiment config @@ -104,6 +111,12 @@ def run_all(experiment: Union[str, Path, Dict], report_path = Path(report_dir) report_path.mkdir(parents=True) + experiment_config_cache = {} + if experiment_cache: + logging.info("#" * 5 + f"Building a set of read-only objects and cache them for use in different experiment settings" + "#" * 5) + experiment_config_cache = ExperimentConfig(experiment_cache, **env_vars) + logging.info("#" * 5 + f"Read-only objects are built and cached for use in different experiment settings" + "#" * 5) + for exp_name, env in envs.items(): exp_report_path = report_path / exp_name exp_report_path.mkdir() @@ -112,12 +125,19 @@ def run_all(experiment: Union[str, Path, Dict], logging.info('running %s', exp_name) all_vars = dict(env_vars) all_vars.update(env) - experiment_config = ExperimentConfig(experiment, **all_vars) + + exp = deepcopy(experiment) + if experiment_cache: + exp = ExperimentConfig.load_experiment_json(exp) + exp.update(experiment_config_cache) + + experiment_config = ExperimentConfig(exp, **all_vars) trainer: TrainerABC = experiment_config[trainer_config_name] reporter: ReporterABC = experiment_config[reporter_config_name] trainer.train() exp_json = ExperimentConfig.load_experiment_json(experiment) - ExperimentRunner._write_config(exp_name, exp_json, all_vars, exp_report_path) + exp_cache_json = ExperimentConfig.load_experiment_json(experiment_cache) if experiment_cache else None + ExperimentRunner._write_config(exp_name, exp_json, all_vars, exp_report_path, exp_cache_json) reporter.report(exp_name, experiment_config, exp_report_path) finally: ExperimentRunner._stop_log_capture(log_handler)