From 1050c721a6d6eda2b3a0821d0b47277d09aa3882 Mon Sep 17 00:00:00 2001 From: Fedor Ignatov Date: Wed, 15 Jun 2022 12:58:54 +0300 Subject: [PATCH 1/5] remove: deprecated [validate|test]_best and to_validate --- .../classifiers/paraphraser_rubert.json | 3 +-- deeppavlov/core/commands/train.py | 25 +------------------ 2 files changed, 2 insertions(+), 26 deletions(-) diff --git a/deeppavlov/configs/classifiers/paraphraser_rubert.json b/deeppavlov/configs/classifiers/paraphraser_rubert.json index 21f3dad019..bdc03382cd 100644 --- a/deeppavlov/configs/classifiers/paraphraser_rubert.json +++ b/deeppavlov/configs/classifiers/paraphraser_rubert.json @@ -46,8 +46,7 @@ "validation_patience": 7, "val_every_n_batches": 50, "log_every_n_batches": 50, - "validate_best": true, - "test_best": true, + "evaluation_targets": ["valid", "test"], "class_name": "torch_trainer" }, "metadata": { diff --git a/deeppavlov/core/commands/train.py b/deeppavlov/core/commands/train.py index c3466fc403..1a28a5d062 100644 --- a/deeppavlov/core/commands/train.py +++ b/deeppavlov/core/commands/train.py @@ -70,7 +70,6 @@ def train_evaluate_model_from_config(config: Union[str, Path, dict], iterator: Union[DataLearningIterator, DataFittingIterator] = None, *, to_train: bool = True, evaluation_targets: Optional[Iterable[str]] = None, - to_validate: Optional[bool] = None, download: bool = False, start_epoch_num: Optional[int] = None, recursive: bool = False) -> Dict[str, Dict[str, float]]: @@ -98,22 +97,11 @@ def train_evaluate_model_from_config(config: Union[str, Path, dict], if 'train' not in config: log.warning('Train config is missing. Populating with default values') - train_config = config.get('train') + train_config = config.get('train', {}) if start_epoch_num is not None: train_config['start_epoch_num'] = start_epoch_num - if 'evaluation_targets' not in train_config and ('validate_best' in train_config - or 'test_best' in train_config): - log.warning('"validate_best" and "test_best" parameters are deprecated.' - ' Please, use "evaluation_targets" list instead') - - train_config['evaluation_targets'] = [] - if train_config.pop('validate_best', True): - train_config['evaluation_targets'].append('valid') - if train_config.pop('test_best', True): - train_config['evaluation_targets'].append('test') - trainer_class = get_model(train_config.pop('class_name', 'nn_trainer')) trainer = trainer_class(config['chainer'], **train_config) @@ -123,17 +111,6 @@ def train_evaluate_model_from_config(config: Union[str, Path, dict], res = {} if iterator is not None: - if to_validate is not None: - if evaluation_targets is None: - log.warning('"to_validate" parameter is deprecated and will be removed in future versions.' - ' Please, use "evaluation_targets" list instead') - evaluation_targets = ['test'] - if to_validate: - evaluation_targets.append('valid') - else: - log.warning('Both "evaluation_targets" and "to_validate" parameters are specified.' - ' "to_validate" is deprecated and will be ignored') - res = trainer.evaluate(iterator, evaluation_targets, print_reports=True) trainer.get_chainer().destroy() From f72881415c360d61b87db4cc347253ccbfed3217 Mon Sep 17 00:00:00 2001 From: Fedor Ignatov Date: Thu, 16 Jun 2022 10:03:04 +0300 Subject: [PATCH 2/5] refactor: training print replaced with log.info --- deeppavlov/core/commands/train.py | 2 +- deeppavlov/core/trainers/fit_trainer.py | 8 +++----- deeppavlov/core/trainers/nn_trainer.py | 4 ++-- 3 files changed, 6 insertions(+), 8 deletions(-) diff --git a/deeppavlov/core/commands/train.py b/deeppavlov/core/commands/train.py index 1a28a5d062..89e45061d5 100644 --- a/deeppavlov/core/commands/train.py +++ b/deeppavlov/core/commands/train.py @@ -111,7 +111,7 @@ def train_evaluate_model_from_config(config: Union[str, Path, dict], res = {} if iterator is not None: - res = trainer.evaluate(iterator, evaluation_targets, print_reports=True) + res = trainer.evaluate(iterator, evaluation_targets) trainer.get_chainer().destroy() res = {k: v['metrics'] for k, v in res.items()} diff --git a/deeppavlov/core/trainers/fit_trainer.py b/deeppavlov/core/trainers/fit_trainer.py index 0378560564..41a3ed77ed 100644 --- a/deeppavlov/core/trainers/fit_trainer.py +++ b/deeppavlov/core/trainers/fit_trainer.py @@ -240,15 +240,14 @@ def test(self, data: Iterable[Tuple[Collection[Any], Collection[Any]]], return report - def evaluate(self, iterator: DataLearningIterator, evaluation_targets: Optional[Iterable[str]] = None, *, - print_reports: bool = True) -> Dict[str, dict]: + def evaluate(self, iterator: DataLearningIterator, + evaluation_targets: Optional[Iterable[str]] = None) -> Dict[str, dict]: """ Run :meth:`test` on multiple data types using provided data iterator Args: iterator: :class:`~deeppavlov.core.data.data_learning_iterator.DataLearningIterator` used for evaluation evaluation_targets: iterable of data types to evaluate on - print_reports: a flag used to print evaluation reports as json lines Returns: a dictionary with data types as keys and evaluation reports as values @@ -263,7 +262,6 @@ def evaluate(self, iterator: DataLearningIterator, evaluation_targets: Optional[ data_gen = iterator.gen_batches(self.batch_size, data_type=data_type, shuffle=False) report = self.test(data_gen) res[data_type] = report - if print_reports: - print(json.dumps({data_type: report}, ensure_ascii=False, cls=NumpyArrayEncoder)) + log.info(json.dumps({data_type: report}, ensure_ascii=False, cls=NumpyArrayEncoder)) return res diff --git a/deeppavlov/core/trainers/nn_trainer.py b/deeppavlov/core/trainers/nn_trainer.py index 6f6fd8b4bf..5797820c39 100644 --- a/deeppavlov/core/trainers/nn_trainer.py +++ b/deeppavlov/core/trainers/nn_trainer.py @@ -217,7 +217,7 @@ def _validate(self, iterator: DataLearningIterator, self._send_event(event_name='after_validation', data=report) report = {'valid': report} - print(json.dumps(report, ensure_ascii=False, cls=NumpyArrayEncoder)) + log.info(json.dumps(report, ensure_ascii=False, cls=NumpyArrayEncoder)) self.validation_number += 1 def _log(self, iterator: DataLearningIterator, @@ -257,7 +257,7 @@ def _log(self, iterator: DataLearningIterator, self._send_event(event_name='after_train_log', data=report) report = {'train': report} - print(json.dumps(report, ensure_ascii=False, cls=NumpyArrayEncoder)) + log.info(json.dumps(report, ensure_ascii=False, cls=NumpyArrayEncoder)) def _send_event(self, event_name: str, data: Optional[dict] = None) -> None: report = { From 03e66944b4fd2a7dbf4c7e58108e35cfb41e13e3 Mon Sep 17 00:00:00 2001 From: Fedor Ignatov Date: Tue, 21 Jun 2022 09:08:56 +0300 Subject: [PATCH 3/5] feat: tensorboard summary writer from pytorch --- deeppavlov/core/common/log_events.py | 53 +++++++++++++++++++++++++ deeppavlov/core/trainers/fit_trainer.py | 47 +--------------------- deeppavlov/core/trainers/nn_trainer.py | 31 ++++++--------- 3 files changed, 67 insertions(+), 64 deletions(-) create mode 100644 deeppavlov/core/common/log_events.py diff --git a/deeppavlov/core/common/log_events.py b/deeppavlov/core/common/log_events.py new file mode 100644 index 0000000000..82b7e130d9 --- /dev/null +++ b/deeppavlov/core/common/log_events.py @@ -0,0 +1,53 @@ +# Copyright 2019 Neural Networks and Deep Learning lab, MIPT +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +from logging import getLogger +from typing import Optional +from deeppavlov.core.commands.utils import expand_path + +log = getLogger(__name__) + + +class TBWriter: + def __init__(self, tensorboard_log_dir: str): + # TODO: After adding wandb logger, create common parent class for both loggers + from torch.utils.tensorboard import SummaryWriter + tensorboard_log_dir = expand_path(tensorboard_log_dir) + self.tb_train_writer = SummaryWriter(str(tensorboard_log_dir / 'train_log')) + self.tb_valid_writer = SummaryWriter(str(tensorboard_log_dir / 'valid_log')) + + # TODO: find how to write Summary + def write_train(self, tag, scalar_value, global_step): + self.tb_train_writer.add_scalar(tag, scalar_value, global_step) + + def write_valid(self, tag, scalar_value, global_step): + self.tb_valid_writer.add_scalar(tag, scalar_value, global_step) + + def flush(self): + self.tb_train_writer.flush() + self.tb_valid_writer.flush() + + +def get_tb_writer(tensorboard_log_dir: Optional[str]) -> Optional[TBWriter]: + try: + if tensorboard_log_dir is not None: + tb_writer = TBWriter(tensorboard_log_dir) + else: + tb_writer = None + except ImportError: + log.error('Failed to import SummaryWriter from torch.utils.tensorboard.Failed to initialize Tensorboard ' + 'logger. Install appropriate Pytorch version to use this logger or remove tensorboard_log_dir ' + 'parameter from the train parameters list in the configuration file.') + tb_writer = None + return tb_writer diff --git a/deeppavlov/core/trainers/fit_trainer.py b/deeppavlov/core/trainers/fit_trainer.py index 41a3ed77ed..57dc6b4d15 100644 --- a/deeppavlov/core/trainers/fit_trainer.py +++ b/deeppavlov/core/trainers/fit_trainer.py @@ -17,11 +17,9 @@ import time from itertools import islice from logging import getLogger -from pathlib import Path from typing import Tuple, Dict, Union, Optional, Iterable, Any, Collection from deeppavlov.core.commands.infer import build_model -from deeppavlov.core.commands.utils import expand_path from deeppavlov.core.common.chainer import Chainer from deeppavlov.core.common.params import from_params from deeppavlov.core.common.registry import register @@ -50,8 +48,6 @@ class FitTrainer: evaluation_targets: data types on which to evaluate trained pipeline (default is ``('valid', 'test')``) show_examples: a flag used to print inputs, expected outputs and predicted outputs for the last batch in evaluation logs (default is ``False``) - tensorboard_log_dir: path to a directory where tensorboard logs can be stored, ignored if None - (default is ``None``) max_test_batches: maximum batches count for pipeline testing and evaluation, ignored if negative (default is ``-1``) **kwargs: additional parameters whose names will be logged but otherwise ignored @@ -61,7 +57,6 @@ def __init__(self, chainer_config: dict, *, batch_size: int = -1, metrics: Iterable[Union[str, dict]] = ('accuracy',), evaluation_targets: Iterable[str] = ('valid', 'test'), show_examples: bool = False, - tensorboard_log_dir: Optional[Union[str, Path]] = None, max_test_batches: int = -1, **kwargs) -> None: if kwargs: @@ -72,23 +67,7 @@ def __init__(self, chainer_config: dict, *, batch_size: int = -1, self.metrics = parse_metrics(metrics, self._chainer.in_y, self._chainer.out_params) self.evaluation_targets = tuple(evaluation_targets) self.show_examples = show_examples - self.max_test_batches = None if max_test_batches < 0 else max_test_batches - - self.tensorboard_log_dir: Optional[Path] = tensorboard_log_dir - if tensorboard_log_dir is not None: - try: - # noinspection PyPackageRequirements - # noinspection PyUnresolvedReferences - import tensorflow - except ImportError: - log.warning('TensorFlow could not be imported, so tensorboard log directory' - f'`{self.tensorboard_log_dir}` will be ignored') - self.tensorboard_log_dir = None - else: - self.tensorboard_log_dir = expand_path(tensorboard_log_dir) - self._tf = tensorflow - self._built = False self._saved = False self._loaded = False @@ -110,37 +89,15 @@ def fit_chainer(self, iterator: Union[DataFittingIterator, DataLearningIterator] targets = [targets] if self.batch_size > 0 and callable(getattr(component, 'partial_fit', None)): - writer = None - for i, (x, y) in enumerate(iterator.gen_batches(self.batch_size, shuffle=False)): preprocessed = self._chainer.compute(x, y, targets=targets) # noinspection PyUnresolvedReferences - result = component.partial_fit(*preprocessed) - - if result is not None and self.tensorboard_log_dir is not None: - if writer is None: - writer = self._tf.summary.FileWriter(str(self.tensorboard_log_dir / - f'partial_fit_{component_index}_log')) - for name, score in result.items(): - summary = self._tf.Summary() - summary.value.add(tag='partial_fit/' + name, simple_value=score) - writer.add_summary(summary, i) - writer.flush() + component.partial_fit(*preprocessed) else: preprocessed = self._chainer.compute(*iterator.get_instances(), targets=targets) if len(targets) == 1: preprocessed = [preprocessed] - result: Optional[Dict[str, Iterable[float]]] = component.fit(*preprocessed) - - if result is not None and self.tensorboard_log_dir is not None: - writer = self._tf.summary.FileWriter(str(self.tensorboard_log_dir / - f'fit_log_{component_index}')) - for name, scores in result.items(): - for i, score in enumerate(scores): - summary = self._tf.Summary() - summary.value.add(tag='fit/' + name, simple_value=score) - writer.add_summary(summary, i) - writer.flush() + component.fit(*preprocessed) component.save() diff --git a/deeppavlov/core/trainers/nn_trainer.py b/deeppavlov/core/trainers/nn_trainer.py index 5797820c39..eb729a375d 100644 --- a/deeppavlov/core/trainers/nn_trainer.py +++ b/deeppavlov/core/trainers/nn_trainer.py @@ -25,7 +25,7 @@ from deeppavlov.core.data.data_learning_iterator import DataLearningIterator from deeppavlov.core.trainers.fit_trainer import FitTrainer from deeppavlov.core.trainers.utils import parse_metrics, NumpyArrayEncoder - +from deeppavlov.core.common.log_events import get_tb_writer log = getLogger(__name__) @@ -105,8 +105,7 @@ def __init__(self, chainer_config: dict, *, log_every_n_batches: int = -1, log_every_n_epochs: int = -1, log_on_k_batches: int = 1, **kwargs) -> None: super().__init__(chainer_config, batch_size=batch_size, metrics=metrics, evaluation_targets=evaluation_targets, - show_examples=show_examples, tensorboard_log_dir=tensorboard_log_dir, - max_test_batches=max_test_batches, **kwargs) + show_examples=show_examples, max_test_batches=max_test_batches, **kwargs) if train_metrics is None: self.train_metrics = self.metrics else: @@ -145,10 +144,7 @@ def _improved(op): self.last_result = {} self.losses = [] self.start_time: Optional[float] = None - - if self.tensorboard_log_dir is not None: - self.tb_train_writer = self._tf.summary.FileWriter(str(self.tensorboard_log_dir / 'train_log')) - self.tb_valid_writer = self._tf.summary.FileWriter(str(self.tensorboard_log_dir / 'valid_log')) + self.tb_writer = get_tb_writer(tensorboard_log_dir) def save(self) -> None: if self._loaded: @@ -174,14 +170,13 @@ def _validate(self, iterator: DataLearningIterator, metrics = list(report['metrics'].items()) - if tensorboard_tag is not None and self.tensorboard_log_dir is not None: - summary = self._tf.Summary() - for name, score in metrics: - summary.value.add(tag=f'{tensorboard_tag}/{name}', simple_value=score) + if tensorboard_tag is not None and self.tb_writer is not None: if tensorboard_index is None: tensorboard_index = self.train_batches_seen - self.tb_valid_writer.add_summary(summary, tensorboard_index) - self.tb_valid_writer.flush() + for name, score in metrics: + self.tb_writer.write_valid(tag=f'{tensorboard_tag}/{name}', scalar_value=score, + global_step=tensorboard_index) + self.tb_writer.flush() m_name, score = metrics[0] @@ -246,13 +241,11 @@ def _log(self, iterator: DataLearningIterator, self.losses.clear() metrics.append(('loss', report['loss'])) - if metrics and self.tensorboard_log_dir is not None: - summary = self._tf.Summary() - + if metrics and self.tb_writer is not None: for name, score in metrics: - summary.value.add(tag=f'{tensorboard_tag}/{name}', simple_value=score) - self.tb_train_writer.add_summary(summary, tensorboard_index) - self.tb_train_writer.flush() + self.tb_writer.write_train(tag=f'{tensorboard_tag}/{name}', scalar_value=score, + global_step=tensorboard_index) + self.tb_writer.flush() self._send_event(event_name='after_train_log', data=report) From 5212778ee5b14ff26c7298bee599f739d5cbfddb Mon Sep 17 00:00:00 2001 From: Fedor Ignatov Date: Tue, 21 Jun 2022 17:42:21 +0300 Subject: [PATCH 4/5] feat: separated logger for training reports --- deeppavlov/core/trainers/fit_trainer.py | 3 ++- deeppavlov/core/trainers/nn_trainer.py | 5 +++-- deeppavlov/utils/settings/log_config.json | 16 ++++++++++++++++ 3 files changed, 21 insertions(+), 3 deletions(-) diff --git a/deeppavlov/core/trainers/fit_trainer.py b/deeppavlov/core/trainers/fit_trainer.py index 57dc6b4d15..758dea532f 100644 --- a/deeppavlov/core/trainers/fit_trainer.py +++ b/deeppavlov/core/trainers/fit_trainer.py @@ -29,6 +29,7 @@ from deeppavlov.core.trainers.utils import Metric, parse_metrics, prettify_metrics, NumpyArrayEncoder log = getLogger(__name__) +report_log = getLogger('train_report') @register('fit_trainer') @@ -219,6 +220,6 @@ def evaluate(self, iterator: DataLearningIterator, data_gen = iterator.gen_batches(self.batch_size, data_type=data_type, shuffle=False) report = self.test(data_gen) res[data_type] = report - log.info(json.dumps({data_type: report}, ensure_ascii=False, cls=NumpyArrayEncoder)) + report_log.info(json.dumps({data_type: report}, ensure_ascii=False, cls=NumpyArrayEncoder)) return res diff --git a/deeppavlov/core/trainers/nn_trainer.py b/deeppavlov/core/trainers/nn_trainer.py index eb729a375d..f642d752e0 100644 --- a/deeppavlov/core/trainers/nn_trainer.py +++ b/deeppavlov/core/trainers/nn_trainer.py @@ -27,6 +27,7 @@ from deeppavlov.core.trainers.utils import parse_metrics, NumpyArrayEncoder from deeppavlov.core.common.log_events import get_tb_writer log = getLogger(__name__) +report_log = getLogger('train_report') @register('nn_trainer') @@ -212,7 +213,7 @@ def _validate(self, iterator: DataLearningIterator, self._send_event(event_name='after_validation', data=report) report = {'valid': report} - log.info(json.dumps(report, ensure_ascii=False, cls=NumpyArrayEncoder)) + report_log.info(json.dumps(report, ensure_ascii=False, cls=NumpyArrayEncoder)) self.validation_number += 1 def _log(self, iterator: DataLearningIterator, @@ -250,7 +251,7 @@ def _log(self, iterator: DataLearningIterator, self._send_event(event_name='after_train_log', data=report) report = {'train': report} - log.info(json.dumps(report, ensure_ascii=False, cls=NumpyArrayEncoder)) + report_log.info(json.dumps(report, ensure_ascii=False, cls=NumpyArrayEncoder)) def _send_event(self, event_name: str, data: Optional[dict] = None) -> None: report = { diff --git a/deeppavlov/utils/settings/log_config.json b/deeppavlov/utils/settings/log_config.json index d04d78125e..515384a42f 100644 --- a/deeppavlov/utils/settings/log_config.json +++ b/deeppavlov/utils/settings/log_config.json @@ -23,6 +23,13 @@ ], "propagate": true }, + "train_report": { + "level": "INFO", + "handlers": [ + "train_handler" + ], + "propagate": true + }, "filelock": { "level": "WARNING", "handlers": [ @@ -39,6 +46,9 @@ "uvicorn_fmt": { "format": "%(asctime)s %(message)s", "datefmt": "%Y-%m-%d %H:%M:%S" + }, + "message": { + "format": "%(message)s" } }, "handlers": { @@ -66,6 +76,12 @@ "formatter": "uvicorn_fmt", "stream": "ext://sys.stdout", "filters": ["probeFilter"] + }, + "train_handler": { + "class": "logging.StreamHandler", + "level": "INFO", + "formatter": "message", + "stream": "ext://sys.stdout" } }, "filters": { From a56f31d10b56c75250dac02d0a71a67290124612 Mon Sep 17 00:00:00 2001 From: Fedor Ignatov Date: Wed, 22 Jun 2022 09:06:50 +0300 Subject: [PATCH 5/5] refactor: error message in deeppavlov/core/common/log_events.py Co-authored-by: yurakuratov <9271630+yurakuratov@users.noreply.github.com> --- deeppavlov/core/common/log_events.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/deeppavlov/core/common/log_events.py b/deeppavlov/core/common/log_events.py index 82b7e130d9..f6d3c88cbb 100644 --- a/deeppavlov/core/common/log_events.py +++ b/deeppavlov/core/common/log_events.py @@ -46,7 +46,7 @@ def get_tb_writer(tensorboard_log_dir: Optional[str]) -> Optional[TBWriter]: else: tb_writer = None except ImportError: - log.error('Failed to import SummaryWriter from torch.utils.tensorboard.Failed to initialize Tensorboard ' + log.error('Failed to import SummaryWriter from torch.utils.tensorboard. Failed to initialize Tensorboard ' 'logger. Install appropriate Pytorch version to use this logger or remove tensorboard_log_dir ' 'parameter from the train parameters list in the configuration file.') tb_writer = None