Skip to content

Commit

Permalink
refactor: train logging (#1572)
Browse files Browse the repository at this point in the history
Co-authored-by: yurakuratov <9271630+yurakuratov@users.noreply.github.com>
  • Loading branch information
IgnatovFedor and yurakuratov committed Jun 22, 2022
1 parent 29222c6 commit 46dbb02
Show file tree
Hide file tree
Showing 6 changed files with 93 additions and 98 deletions.
3 changes: 1 addition & 2 deletions deeppavlov/configs/classifiers/paraphraser_rubert.json
Original file line number Diff line number Diff line change
Expand Up @@ -46,8 +46,7 @@
"validation_patience": 7,
"val_every_n_batches": 50,
"log_every_n_batches": 50,
"validate_best": true,
"test_best": true,
"evaluation_targets": ["valid", "test"],
"class_name": "torch_trainer"
},
"metadata": {
Expand Down
27 changes: 2 additions & 25 deletions deeppavlov/core/commands/train.py
Original file line number Diff line number Diff line change
Expand Up @@ -70,7 +70,6 @@ def train_evaluate_model_from_config(config: Union[str, Path, dict],
iterator: Union[DataLearningIterator, DataFittingIterator] = None, *,
to_train: bool = True,
evaluation_targets: Optional[Iterable[str]] = None,
to_validate: Optional[bool] = None,
download: bool = False,
start_epoch_num: Optional[int] = None,
recursive: bool = False) -> Dict[str, Dict[str, float]]:
Expand Down Expand Up @@ -98,22 +97,11 @@ def train_evaluate_model_from_config(config: Union[str, Path, dict],

if 'train' not in config:
log.warning('Train config is missing. Populating with default values')
train_config = config.get('train')
train_config = config.get('train', {})

if start_epoch_num is not None:
train_config['start_epoch_num'] = start_epoch_num

if 'evaluation_targets' not in train_config and ('validate_best' in train_config
or 'test_best' in train_config):
log.warning('"validate_best" and "test_best" parameters are deprecated.'
' Please, use "evaluation_targets" list instead')

train_config['evaluation_targets'] = []
if train_config.pop('validate_best', True):
train_config['evaluation_targets'].append('valid')
if train_config.pop('test_best', True):
train_config['evaluation_targets'].append('test')

trainer_class = get_model(train_config.pop('class_name', 'nn_trainer'))
trainer = trainer_class(config['chainer'], **train_config)

Expand All @@ -123,18 +111,7 @@ def train_evaluate_model_from_config(config: Union[str, Path, dict],
res = {}

if iterator is not None:
if to_validate is not None:
if evaluation_targets is None:
log.warning('"to_validate" parameter is deprecated and will be removed in future versions.'
' Please, use "evaluation_targets" list instead')
evaluation_targets = ['test']
if to_validate:
evaluation_targets.append('valid')
else:
log.warning('Both "evaluation_targets" and "to_validate" parameters are specified.'
' "to_validate" is deprecated and will be ignored')

res = trainer.evaluate(iterator, evaluation_targets, print_reports=True)
res = trainer.evaluate(iterator, evaluation_targets)
trainer.get_chainer().destroy()

res = {k: v['metrics'] for k, v in res.items()}
Expand Down
53 changes: 53 additions & 0 deletions deeppavlov/core/common/log_events.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,53 @@
# Copyright 2019 Neural Networks and Deep Learning lab, MIPT
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.

from logging import getLogger
from typing import Optional
from deeppavlov.core.commands.utils import expand_path

log = getLogger(__name__)


class TBWriter:
def __init__(self, tensorboard_log_dir: str):
# TODO: After adding wandb logger, create common parent class for both loggers
from torch.utils.tensorboard import SummaryWriter
tensorboard_log_dir = expand_path(tensorboard_log_dir)
self.tb_train_writer = SummaryWriter(str(tensorboard_log_dir / 'train_log'))
self.tb_valid_writer = SummaryWriter(str(tensorboard_log_dir / 'valid_log'))

# TODO: find how to write Summary
def write_train(self, tag, scalar_value, global_step):
self.tb_train_writer.add_scalar(tag, scalar_value, global_step)

def write_valid(self, tag, scalar_value, global_step):
self.tb_valid_writer.add_scalar(tag, scalar_value, global_step)

def flush(self):
self.tb_train_writer.flush()
self.tb_valid_writer.flush()


def get_tb_writer(tensorboard_log_dir: Optional[str]) -> Optional[TBWriter]:
try:
if tensorboard_log_dir is not None:
tb_writer = TBWriter(tensorboard_log_dir)
else:
tb_writer = None
except ImportError:
log.error('Failed to import SummaryWriter from torch.utils.tensorboard. Failed to initialize Tensorboard '
'logger. Install appropriate Pytorch version to use this logger or remove tensorboard_log_dir '
'parameter from the train parameters list in the configuration file.')
tb_writer = None
return tb_writer
56 changes: 6 additions & 50 deletions deeppavlov/core/trainers/fit_trainer.py
Original file line number Diff line number Diff line change
Expand Up @@ -17,11 +17,9 @@
import time
from itertools import islice
from logging import getLogger
from pathlib import Path
from typing import Tuple, Dict, Union, Optional, Iterable, Any, Collection

from deeppavlov.core.commands.infer import build_model
from deeppavlov.core.commands.utils import expand_path
from deeppavlov.core.common.chainer import Chainer
from deeppavlov.core.common.params import from_params
from deeppavlov.core.common.registry import register
Expand All @@ -31,6 +29,7 @@
from deeppavlov.core.trainers.utils import Metric, parse_metrics, prettify_metrics, NumpyArrayEncoder

log = getLogger(__name__)
report_log = getLogger('train_report')


@register('fit_trainer')
Expand All @@ -50,8 +49,6 @@ class FitTrainer:
evaluation_targets: data types on which to evaluate trained pipeline (default is ``('valid', 'test')``)
show_examples: a flag used to print inputs, expected outputs and predicted outputs for the last batch
in evaluation logs (default is ``False``)
tensorboard_log_dir: path to a directory where tensorboard logs can be stored, ignored if None
(default is ``None``)
max_test_batches: maximum batches count for pipeline testing and evaluation, ignored if negative
(default is ``-1``)
**kwargs: additional parameters whose names will be logged but otherwise ignored
Expand All @@ -61,7 +58,6 @@ def __init__(self, chainer_config: dict, *, batch_size: int = -1,
metrics: Iterable[Union[str, dict]] = ('accuracy',),
evaluation_targets: Iterable[str] = ('valid', 'test'),
show_examples: bool = False,
tensorboard_log_dir: Optional[Union[str, Path]] = None,
max_test_batches: int = -1,
**kwargs) -> None:
if kwargs:
Expand All @@ -72,23 +68,7 @@ def __init__(self, chainer_config: dict, *, batch_size: int = -1,
self.metrics = parse_metrics(metrics, self._chainer.in_y, self._chainer.out_params)
self.evaluation_targets = tuple(evaluation_targets)
self.show_examples = show_examples

self.max_test_batches = None if max_test_batches < 0 else max_test_batches

self.tensorboard_log_dir: Optional[Path] = tensorboard_log_dir
if tensorboard_log_dir is not None:
try:
# noinspection PyPackageRequirements
# noinspection PyUnresolvedReferences
import tensorflow
except ImportError:
log.warning('TensorFlow could not be imported, so tensorboard log directory'
f'`{self.tensorboard_log_dir}` will be ignored')
self.tensorboard_log_dir = None
else:
self.tensorboard_log_dir = expand_path(tensorboard_log_dir)
self._tf = tensorflow

self._built = False
self._saved = False
self._loaded = False
Expand All @@ -110,37 +90,15 @@ def fit_chainer(self, iterator: Union[DataFittingIterator, DataLearningIterator]
targets = [targets]

if self.batch_size > 0 and callable(getattr(component, 'partial_fit', None)):
writer = None

for i, (x, y) in enumerate(iterator.gen_batches(self.batch_size, shuffle=False)):
preprocessed = self._chainer.compute(x, y, targets=targets)
# noinspection PyUnresolvedReferences
result = component.partial_fit(*preprocessed)

if result is not None and self.tensorboard_log_dir is not None:
if writer is None:
writer = self._tf.summary.FileWriter(str(self.tensorboard_log_dir /
f'partial_fit_{component_index}_log'))
for name, score in result.items():
summary = self._tf.Summary()
summary.value.add(tag='partial_fit/' + name, simple_value=score)
writer.add_summary(summary, i)
writer.flush()
component.partial_fit(*preprocessed)
else:
preprocessed = self._chainer.compute(*iterator.get_instances(), targets=targets)
if len(targets) == 1:
preprocessed = [preprocessed]
result: Optional[Dict[str, Iterable[float]]] = component.fit(*preprocessed)

if result is not None and self.tensorboard_log_dir is not None:
writer = self._tf.summary.FileWriter(str(self.tensorboard_log_dir /
f'fit_log_{component_index}'))
for name, scores in result.items():
for i, score in enumerate(scores):
summary = self._tf.Summary()
summary.value.add(tag='fit/' + name, simple_value=score)
writer.add_summary(summary, i)
writer.flush()
component.fit(*preprocessed)

component.save()

Expand Down Expand Up @@ -240,15 +198,14 @@ def test(self, data: Iterable[Tuple[Collection[Any], Collection[Any]]],

return report

def evaluate(self, iterator: DataLearningIterator, evaluation_targets: Optional[Iterable[str]] = None, *,
print_reports: bool = True) -> Dict[str, dict]:
def evaluate(self, iterator: DataLearningIterator,
evaluation_targets: Optional[Iterable[str]] = None) -> Dict[str, dict]:
"""
Run :meth:`test` on multiple data types using provided data iterator
Args:
iterator: :class:`~deeppavlov.core.data.data_learning_iterator.DataLearningIterator` used for evaluation
evaluation_targets: iterable of data types to evaluate on
print_reports: a flag used to print evaluation reports as json lines
Returns:
a dictionary with data types as keys and evaluation reports as values
Expand All @@ -263,7 +220,6 @@ def evaluate(self, iterator: DataLearningIterator, evaluation_targets: Optional[
data_gen = iterator.gen_batches(self.batch_size, data_type=data_type, shuffle=False)
report = self.test(data_gen)
res[data_type] = report
if print_reports:
print(json.dumps({data_type: report}, ensure_ascii=False, cls=NumpyArrayEncoder))
report_log.info(json.dumps({data_type: report}, ensure_ascii=False, cls=NumpyArrayEncoder))

return res
36 changes: 15 additions & 21 deletions deeppavlov/core/trainers/nn_trainer.py
Original file line number Diff line number Diff line change
Expand Up @@ -25,8 +25,9 @@
from deeppavlov.core.data.data_learning_iterator import DataLearningIterator
from deeppavlov.core.trainers.fit_trainer import FitTrainer
from deeppavlov.core.trainers.utils import parse_metrics, NumpyArrayEncoder

from deeppavlov.core.common.log_events import get_tb_writer
log = getLogger(__name__)
report_log = getLogger('train_report')


@register('nn_trainer')
Expand Down Expand Up @@ -105,8 +106,7 @@ def __init__(self, chainer_config: dict, *,
log_every_n_batches: int = -1, log_every_n_epochs: int = -1, log_on_k_batches: int = 1,
**kwargs) -> None:
super().__init__(chainer_config, batch_size=batch_size, metrics=metrics, evaluation_targets=evaluation_targets,
show_examples=show_examples, tensorboard_log_dir=tensorboard_log_dir,
max_test_batches=max_test_batches, **kwargs)
show_examples=show_examples, max_test_batches=max_test_batches, **kwargs)
if train_metrics is None:
self.train_metrics = self.metrics
else:
Expand Down Expand Up @@ -145,10 +145,7 @@ def _improved(op):
self.last_result = {}
self.losses = []
self.start_time: Optional[float] = None

if self.tensorboard_log_dir is not None:
self.tb_train_writer = self._tf.summary.FileWriter(str(self.tensorboard_log_dir / 'train_log'))
self.tb_valid_writer = self._tf.summary.FileWriter(str(self.tensorboard_log_dir / 'valid_log'))
self.tb_writer = get_tb_writer(tensorboard_log_dir)

def save(self) -> None:
if self._loaded:
Expand All @@ -174,14 +171,13 @@ def _validate(self, iterator: DataLearningIterator,

metrics = list(report['metrics'].items())

if tensorboard_tag is not None and self.tensorboard_log_dir is not None:
summary = self._tf.Summary()
for name, score in metrics:
summary.value.add(tag=f'{tensorboard_tag}/{name}', simple_value=score)
if tensorboard_tag is not None and self.tb_writer is not None:
if tensorboard_index is None:
tensorboard_index = self.train_batches_seen
self.tb_valid_writer.add_summary(summary, tensorboard_index)
self.tb_valid_writer.flush()
for name, score in metrics:
self.tb_writer.write_valid(tag=f'{tensorboard_tag}/{name}', scalar_value=score,
global_step=tensorboard_index)
self.tb_writer.flush()

m_name, score = metrics[0]

Expand Down Expand Up @@ -217,7 +213,7 @@ def _validate(self, iterator: DataLearningIterator,

self._send_event(event_name='after_validation', data=report)
report = {'valid': report}
print(json.dumps(report, ensure_ascii=False, cls=NumpyArrayEncoder))
report_log.info(json.dumps(report, ensure_ascii=False, cls=NumpyArrayEncoder))
self.validation_number += 1

def _log(self, iterator: DataLearningIterator,
Expand Down Expand Up @@ -246,18 +242,16 @@ def _log(self, iterator: DataLearningIterator,
self.losses.clear()
metrics.append(('loss', report['loss']))

if metrics and self.tensorboard_log_dir is not None:
summary = self._tf.Summary()

if metrics and self.tb_writer is not None:
for name, score in metrics:
summary.value.add(tag=f'{tensorboard_tag}/{name}', simple_value=score)
self.tb_train_writer.add_summary(summary, tensorboard_index)
self.tb_train_writer.flush()
self.tb_writer.write_train(tag=f'{tensorboard_tag}/{name}', scalar_value=score,
global_step=tensorboard_index)
self.tb_writer.flush()

self._send_event(event_name='after_train_log', data=report)

report = {'train': report}
print(json.dumps(report, ensure_ascii=False, cls=NumpyArrayEncoder))
report_log.info(json.dumps(report, ensure_ascii=False, cls=NumpyArrayEncoder))

def _send_event(self, event_name: str, data: Optional[dict] = None) -> None:
report = {
Expand Down
16 changes: 16 additions & 0 deletions deeppavlov/utils/settings/log_config.json
Original file line number Diff line number Diff line change
Expand Up @@ -23,6 +23,13 @@
],
"propagate": true
},
"train_report": {
"level": "INFO",
"handlers": [
"train_handler"
],
"propagate": true
},
"filelock": {
"level": "WARNING",
"handlers": [
Expand All @@ -39,6 +46,9 @@
"uvicorn_fmt": {
"format": "%(asctime)s %(message)s",
"datefmt": "%Y-%m-%d %H:%M:%S"
},
"message": {
"format": "%(message)s"
}
},
"handlers": {
Expand Down Expand Up @@ -66,6 +76,12 @@
"formatter": "uvicorn_fmt",
"stream": "ext://sys.stdout",
"filters": ["probeFilter"]
},
"train_handler": {
"class": "logging.StreamHandler",
"level": "INFO",
"formatter": "message",
"stream": "ext://sys.stdout"
}
},
"filters": {
Expand Down

0 comments on commit 46dbb02

Please sign in to comment.