Skip to content

Commit

Permalink
Merge pull request #48 from feedly/feature/cache-readonly
Browse files Browse the repository at this point in the history
Add the possibility to cache objects and use them in different experiment settings
  • Loading branch information
petermartigny committed Jun 1, 2019
2 parents 23192c5 + a48d7f3 commit 5271e50
Show file tree
Hide file tree
Showing 4 changed files with 104 additions and 11 deletions.
22 changes: 17 additions & 5 deletions experiments/deep_learning_with_pytorch/mlp_parameter_tuning.py
Expand Up @@ -31,8 +31,20 @@ def report(self, name: str, experiment: ExperimentConfig, report_dir: Path):
parent_dir = Path(__file__).parent
home_env = str(Path.home() / 'work/transfer-nlp-data')
date = '_'.join(str(datetime.today()).split(' '))
ExperimentRunner.run_all(experiment=parent_dir / 'mlp_parameter_tuning.json',
experiment_config=parent_dir / 'mlp_parameter_tuning.cfg',
report_dir=f"{home_env}/mlp_parameter_fine_tuning/{date}",
trainer_config_name='trainer',
reporter_config_name='reporter', HOME=home_env)

# # Uncomment to run the sequential Runner without caching read-only objects
# ExperimentRunner.run_all(experiment=parent_dir / 'mlp_parameter_tuning.json',
# experiment_config=parent_dir / 'mlp_parameter_tuning.cfg',
# report_dir=f"{home_env}/mlp_parameter_fine_tuning/{date}",
# trainer_config_name='trainer',
# reporter_config_name='reporter', HOME=home_env)
#
#
# # Uncomment to run the sequential Runner with caching read-only objects
# ExperimentRunner.run_all(experiment=parent_dir / 'mlp_parameter_tuning_uncached.json',
# experiment_config=parent_dir / 'mlp_parameter_tuning.cfg',
# report_dir=f"{home_env}/mlp_parameter_fine_tuning/{date}",
# trainer_config_name='trainer',
# reporter_config_name='reporter',
# experiment_cache=parent_dir / 'mlp_parameter_tuning_cache.json',
# HOME=home_env)
@@ -0,0 +1,11 @@
{
"my_dataset_splits": {
"_name": "SurnamesDatasetMLP",
"data_file": "$HOME/surnames/surnames_with_splits.csv",
"batch_size": 128,
"vectorizer": {
"_name": "SurnamesVectorizerMLP",
"data_file": "$HOME/surnames/surnames_with_splits.csv"
}
}
}
@@ -0,0 +1,50 @@
{
"model": {
"_name": "MultiLayerPerceptron",
"hidden_dim": "$hidden_dim",
"data": "$my_dataset_splits"
},
"optimizer": {
"_name": "Adam",
"lr": "$lr",
"params": {
"_name": "TrainableParameters"
}
},
"scheduler": {
"_name": "ReduceLROnPlateau",
"patience": 1,
"mode": "min",
"factor": 0.5
},
"trainer": {
"_name": "BasicTrainer",
"model": "$model",
"dataset_splits": "$my_dataset_splits",
"loss": {
"_name": "CrossEntropyLoss"
},
"optimizer": "$optimizer",
"gradient_clipping": 0.25,
"num_epochs": 5,
"seed": 1337,
"regularizer": {
"_name": "L1"
},
"tensorboard_logs": "$HOME/surnames/tensorboard/mlp",
"metrics": {
"accuracy": {
"_name": "Accuracy"
},
"loss": {
"_name": "LossMetric",
"loss_fn": {
"_name": "CrossEntropyLoss"
}
}
}
},
"reporter": {
"_name": "MyReporter"
}
}
32 changes: 26 additions & 6 deletions transfer_nlp/runner/experiment_runner.py
Expand Up @@ -2,6 +2,7 @@
import json
import logging
from collections import OrderedDict
from copy import deepcopy
from pathlib import Path
from typing import Dict, Any, Union

Expand All @@ -11,9 +12,9 @@

ConfigEnv = Dict[str, Any]

def load_config(p: Path) -> Dict[str, ConfigEnv]:

def get_val(cfg:configparser.ConfigParser, section: str, key):
def load_config(p: Path) -> Dict[str, ConfigEnv]:
def get_val(cfg: configparser.ConfigParser, section: str, key):
try:
return cfg.getint(section, key)
except ValueError:
Expand Down Expand Up @@ -43,6 +44,7 @@ def get_val(cfg:configparser.ConfigParser, section: str, key):

return rv


class ExperimentRunner:
"""
Run an experiment several times with varying configurations.
Expand All @@ -51,7 +53,7 @@ class ExperimentRunner:
"""

@staticmethod
def _capture_logs(report_path:Path):
def _capture_logs(report_path: Path):
logger = logging.getLogger('')
handler = logging.FileHandler(str(report_path / 'runner.log'))
fmt = logging.Formatter('%(asctime)s %(levelname)s: %(message)s') # TODO configurable?
Expand All @@ -65,7 +67,7 @@ def _stop_log_capture(handler):
logger.removeHandler(handler)

@staticmethod
def _write_config(cfg_name: str, experiment:Dict, cfg:ConfigEnv, exp_report_path:Path):
def _write_config(cfg_name: str, experiment: Dict, cfg: ConfigEnv, exp_report_path: Path, experiment_cache: Dict = None):
"""duplicate the config used to run the experiment in the report directory to preserve history"""
config = configparser.ConfigParser({}, OrderedDict)
config.optionxform = str
Expand All @@ -79,12 +81,17 @@ def _write_config(cfg_name: str, experiment:Dict, cfg:ConfigEnv, exp_report_path
with (exp_report_path / 'experiment.json').open('w') as expfile:
json.dump(experiment, expfile, indent=4)

if experiment_cache:
with (exp_report_path / 'experiment_cache.json').open('w') as expfile:
json.dump(experiment_cache, expfile, indent=4)

@staticmethod
def run_all(experiment: Union[str, Path, Dict],
experiment_config: Union[str, Path],
report_dir: Union[str, Path],
trainer_config_name: str = 'trainer',
reporter_config_name: str = 'reporter',
experiment_cache: Union[str, Path, Dict] = None,
**env_vars) -> None:
"""
:param experiment: the experiment config
Expand All @@ -104,6 +111,12 @@ def run_all(experiment: Union[str, Path, Dict],
report_path = Path(report_dir)
report_path.mkdir(parents=True)

experiment_config_cache = {}
if experiment_cache:
logging.info("#" * 5 + f"Building a set of read-only objects and cache them for use in different experiment settings" + "#" * 5)
experiment_config_cache = ExperimentConfig(experiment_cache, **env_vars)
logging.info("#" * 5 + f"Read-only objects are built and cached for use in different experiment settings" + "#" * 5)

for exp_name, env in envs.items():
exp_report_path = report_path / exp_name
exp_report_path.mkdir()
Expand All @@ -112,12 +125,19 @@ def run_all(experiment: Union[str, Path, Dict],
logging.info('running %s', exp_name)
all_vars = dict(env_vars)
all_vars.update(env)
experiment_config = ExperimentConfig(experiment, **all_vars)

exp = deepcopy(experiment)
if experiment_cache:
exp = ExperimentConfig.load_experiment_json(exp)
exp.update(experiment_config_cache)

experiment_config = ExperimentConfig(exp, **all_vars)
trainer: TrainerABC = experiment_config[trainer_config_name]
reporter: ReporterABC = experiment_config[reporter_config_name]
trainer.train()
exp_json = ExperimentConfig.load_experiment_json(experiment)
ExperimentRunner._write_config(exp_name, exp_json, all_vars, exp_report_path)
exp_cache_json = ExperimentConfig.load_experiment_json(experiment_cache) if experiment_cache else None
ExperimentRunner._write_config(exp_name, exp_json, all_vars, exp_report_path, exp_cache_json)
reporter.report(exp_name, experiment_config, exp_report_path)
finally:
ExperimentRunner._stop_log_capture(log_handler)

0 comments on commit 5271e50

Please sign in to comment.