deeppavlov · Ihab-Asaad · Nov 23, 2021 · Nov 23, 2021 · Nov 30, 2021 · Nov 30, 2021
diff --git a/deeppavlov/configs/classifiers/sentiment_twitter.json b/deeppavlov/configs/classifiers/sentiment_twitter.json
@@ -73,8 +73,8 @@
         ],
         "filters_cnn": 256,
         "optimizer": "Adam",
-        "learning_rate": 0.01,
-        "learning_rate_decay": 0.1,
+        "learning_rate": 0.1,
+        "learning_rate_decay": 0.01,
         "loss": "binary_crossentropy",
         "last_layer_activation": "softmax",
         "coef_reg_cnn": 1e-3,
@@ -107,7 +107,10 @@
       "f1_macro",
       {
         "name": "roc_auc",
-        "inputs": ["y_onehot", "y_pred_probas"]
+        "inputs": [
+          "y_onehot",
+          "y_pred_probas"
+        ]
-        "inputs": [
-          "y_onehot",
-          "y_pred_probas"
-        ]
+        "inputs": ["y_onehot", "y_pred_probas"]
-        "inputs": [
-          "y_onehot",
-          "y_pred_probas"
-        ]
+        "inputs": ["y_onehot", "y_pred_probas"]
       }
     ],
     "validation_patience": 5,
@@ -119,7 +122,31 @@
       "valid",
       "test"
     ],
-    "class_name": "nn_trainer"
+    "class_name": "nn_trainer",
+    "logger": [
+      {
+        "name": "TensorboardLogger",
+        "log_dir": "{MODELS_PATH}/sentiment_twitter/Tensorboard_logs"
+      },
+      {
+        "name": "StdLogger"
+      },
+      {
+        "name": "WandbLogger",
+        "API_Key":"40-chars API KEY",
+        "init":{
+          "project": "Tuning Hyperparameters",
+          "group": "Tuning lr & lr_decay",
+          "job_type":"lr=0.01, lr_decay=0.01",
+          "config": {
+          "description": "add any hyperprameter you want to monitor, architecture discription,..",
+          "learning_rate": 0.02,
+          "architecture": "CNN",
+          "dataset": "sentiment_twitter_data"
+          }
+        }
+      }
+    ]
   },
   "metadata": {
     "variables": {
@@ -128,6 +155,10 @@
       "MODELS_PATH": "{ROOT_PATH}/models",
       "MODEL_PATH": "{MODELS_PATH}/classifiers/sentiment_twitter_v6"
     },
+    "requirements": [
+      "{DEEPPAVLOV_PATH}/requirements/tf.txt",
+      "{DEEPPAVLOV_PATH}/requirements/fasttext.txt"
+    ],
     "download": [
       {
         "url": "http://files.deeppavlov.ai/datasets/sentiment_twitter_data.tar.gz",

diff --git a/deeppavlov/core/common/logging/logging_class.py b/deeppavlov/core/common/logging/logging_class.py
@@ -0,0 +1,133 @@
+# Copyright 2022 Neural Networks and Deep Learning lab, MIPT
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+import time
+import datetime
+from itertools import islice
+from abc import ABC, abstractmethod
+from typing import List, Tuple
+from logging import getLogger
+
+from deeppavlov.core.data.data_learning_iterator import DataLearningIterator
+from deeppavlov.core.trainers.nn_trainer import NNTrainer
+
+
+log = getLogger(__name__)
+
+
+class TrainLogger(ABC):
+    """An abstract class for logging metrics during training process."""
+
+    def get_report(self, nn_trainer: NNTrainer, iterator: DataLearningIterator, type: str = None) -> dict:
+        """ "
+        Get report about current process.
+        for 'valid' type, 'get_report' function also saves best score on validation data, and the model parameters corresponding to the best score.
+
+        Args:
+            nn_trainer: 'NNTrainer' object contains parameters required for preparing the report.
+            iterator: :class:`~deeppavlov.core.data.data_learning_iterator.DataLearningIterator` used for evaluation
+            type : if "train" returns report about training process, "valid" returns report about validation process.
+
+        Returns:
+            dict contains data about current 'type' process.
+
+        """
+        if type == "train":
+            if nn_trainer.log_on_k_batches == 0:
+                report = {"time_spent": str(datetime.timedelta(
+                            seconds=round(time.time() - nn_trainer.start_time + 0.5)))}
+            else:
+                data = islice(iterator.gen_batches(nn_trainer.batch_size, data_type="train", shuffle=True),
+                    nn_trainer.log_on_k_batches,)
+                report = nn_trainer.test(
+                    data, nn_trainer.train_metrics, start_time=nn_trainer.start_time
+                )
+
+            report.update(
+                {
+                    "epochs_done": nn_trainer.epoch,
+                    "batches_seen": nn_trainer.train_batches_seen,
+                    "train_examples_seen": nn_trainer.examples,
+                }
+            )
+
+            metrics: List[Tuple[str, float]] = list(
+                report.get("metrics", {}).items()
+            ) + list(nn_trainer.last_result.items())
+
+            report.update(nn_trainer.last_result)
+            if nn_trainer.losses:
+                report["loss"] = sum(nn_trainer.losses) / len(nn_trainer.losses)
+                nn_trainer.losses.clear()
+                metrics.append(("loss", report["loss"]))
+
+        elif type == "valid":
+            report = nn_trainer.test(
+                iterator.gen_batches(
+                    nn_trainer.batch_size, data_type="valid", shuffle=False
+                ),
+                start_time=nn_trainer.start_time,
+            )
+
+            report["epochs_done"] = nn_trainer.epoch
+            report["batches_seen"] = nn_trainer.train_batches_seen
+            report["train_examples_seen"] = nn_trainer.examples
+
+            metrics = list(report["metrics"].items())
+
+            m_name, score = metrics[0]
+
+            # Update the patience
+            if nn_trainer.score_best is None:
+                nn_trainer.patience = 0
+            else:
+                if nn_trainer.improved(score, nn_trainer.score_best):
+                    nn_trainer.patience = 0
+                else:
+                    nn_trainer.patience += 1
+
+            # Run the validation model-saving logic
+            if nn_trainer._is_initial_validation():
+                log.info("Initial best {} of {}".format(m_name, score))
+                nn_trainer.score_best = score
+            elif nn_trainer._is_first_validation() and nn_trainer.score_best is None:
+                log.info("First best {} of {}".format(m_name, score))
+                nn_trainer.score_best = score
+                log.info("Saving model")
+                nn_trainer.save()
+            elif nn_trainer.improved(score, nn_trainer.score_best):
+                log.info("Improved best {} of {}".format(m_name, score))
+                nn_trainer.score_best = score
+                log.info("Saving model")
+                nn_trainer.save()
+            else:
+                log.info(
+                    "Did not improve on the {} of {}".format(
+                        m_name, nn_trainer.score_best
+                    )
+                )
+
+            report["impatience"] = nn_trainer.patience
+            if nn_trainer.validation_patience > 0:
+                report["patience_limit"] = nn_trainer.validation_patience
+
+            nn_trainer.validation_number += 1
+        return report
+
+    @abstractmethod
+    def __call__() -> None:
+        raise NotImplementedError
+
+    def close():
+        raise NotImplementedError
diff --git a/deeppavlov/core/common/logging/std_logger.py b/deeppavlov/core/common/logging/std_logger.py
@@ -0,0 +1,53 @@
+from typing import Dict
+from logging import getLogger
+import json
+
+from deeppavlov.core.data.data_learning_iterator import DataLearningIterator
+from deeppavlov.core.trainers.nn_trainer import NNTrainer
+from deeppavlov.core.trainers.utils import NumpyArrayEncoder
+from deeppavlov.core.common.logging.logging_class import TrainLogger
+
+log = getLogger(__name__)
+
+
+class StdLogger(TrainLogger):
+    """
+    StdLogger class for logging report about current training and validation processes to stdout.
+
+    Args:
+        stdlogging (bool): if True, log report to stdout.
+            the object of this class with stdlogging = False can be used for validation process.
+        **kwargs: additional parameters whose names will be logged but otherwise ignored
+    """
+
+    def __init__(self, stdlogging: bool = True, **kwargs) -> None:
+        self.stdlogging = stdlogging
+
+    def __call__(self,nn_trainer: NNTrainer, iterator: DataLearningIterator, type: str = None, report: Dict = None,
+                 **kwargs) -> dict:
+        """
+        override call method, to log report to stdout.
+
+        Args:
+            nn_trainer: NNTrainer object contains parameters required for preparing report.
+            iterator: :class:`~deeppavlov.core.data.data_learning_iterator.DataLearningIterator` used for evaluation.
+            type : process type, if "train" logs report about training process, else if "valid" logs report about validation process.
+            report: dictionary contains current process information, if None, use 'get_report' method to get this report.
+            **kwargs: additional parameters whose names will be logged but otherwise ignored
+        Returns:
+            dict contains logged data to stdout.
+
+        """
+        if report is None:
+            report = self.get_report(
+                nn_trainer=nn_trainer, iterator=iterator, type=type
+            )
+        if self.stdlogging:
+            log.info(
+                json.dumps({type: report}, ensure_ascii=False, cls=NumpyArrayEncoder)
+            )
+        return report
+
+    @staticmethod
+    def close():
+        log.info("Logging to Stdout completed")
diff --git a/deeppavlov/core/common/logging/tensorboard_logger.py b/deeppavlov/core/common/logging/tensorboard_logger.py
@@ -0,0 +1,98 @@
+from pathlib import Path
+from typing import List, Tuple, Optional, Dict
+from logging import getLogger
+
+from deeppavlov.core.commands.utils import expand_path
+
+from deeppavlov.core.data.data_learning_iterator import DataLearningIterator
+from deeppavlov.core.trainers.nn_trainer import NNTrainer
+from deeppavlov.core.trainers.fit_trainer import FitTrainer
+from deeppavlov.core.common.logging.logging_class import TrainLogger
+
+log = getLogger(__name__)
+
+
+class TensorboardLogger(TrainLogger):
+    """
+    TensorboardLogger class for logging to tesnorboard.
+
+    Args:
+        fit_trainer: FitTrainer object passed to set Tensorflow as one of its parameter if successful importation.
+        log_dir (Path): path to local folder to log data into.
+
+    """
+
+    def __init__(self, fit_trainer:FitTrainer , log_dir: Path = None) -> None:
+        try:
+                # noinspection PyPackageRequirements
+                # noinspection PyUnresolvedReferences
+            import tensorflow as tf
+        except ImportError:
+            log.warning('TensorFlow could not be imported, so tensorboard log directory'
+                        f'`{log_dir}` will be ignored')
+        else:
+            log_dir = expand_path(log_dir)
+            fit_trainer._tf = tf
+            self.train_log_dir = str(log_dir / 'train_log')
+            self.valid_log_dir = str(log_dir / 'valid_log')
+            self.tb_train_writer = tf.summary.FileWriter(self.train_log_dir)
+            self.tb_valid_writer = tf.summary.FileWriter(self.valid_log_dir)
+
+    def __call__(self, nn_trainer: NNTrainer, iterator: DataLearningIterator, type: str = None,
+                 tensorboard_tag: Optional[str] = None, tensorboard_index: Optional[int] = None,
+                 report: Dict = None, **kwargs) -> dict:
+        """
+        override call method, for 'train' logging type, log metircs of training process to log_dir/train_log.
+        for 'valid' logging type, log metrics of validation process to log_dir/valid_log.
+
+        Args:
+            nn_trainer: NNTrainer object contains parameters required for preparing the report.
+            iterator: :class:`~deeppavlov.core.data.data_learning_iterator.DataLearningIterator` used for evaluation
+            type : process type, if "train" logs report about training process, else if "valid" logs report about validation process.
+            tensorboard_tag: one of two options : 'every_n_batches', 'every_n_epochs'
+            tensorboard_index: one of two options: 'train_batches_seen', 'epoch' corresponding to 'tensorboard_tag' types respectively.
+            report: dictionary contains current process information, if None, use 'get_report' method to get this report.
+            **kwargs: additional parameters whose names will be logged but otherwise ignored
+
+        Returns:
+            dict contains metrics logged to tesnorboard.
+
+        """
+        if report is None:
+            report = self.get_report(
+                nn_trainer=nn_trainer, iterator=iterator, type=type
+            )
+
+        if type == "train":
+            metrics: List[Tuple[str, float]] = list(
+                report.get("metrics", {}).items()
+            ) + list(nn_trainer.last_result.items())
+            if report.get("loss", None) is not None:
+                metrics.append(("loss", report["loss"]))
+
+            if metrics and self.train_log_dir is not None:
+                summary = nn_trainer._tf.Summary()
+
+                for name, score in metrics:
+                    summary.value.add(
+                        tag=f"{tensorboard_tag}/{name}", simple_value=score
+                    )
+                self.tb_train_writer.add_summary(summary, tensorboard_index)
+                self.tb_train_writer.flush()
+        else:
+            metrics = list(report["metrics"].items())
+            if tensorboard_tag is not None and self.valid_log_dir is not None:
+                summary = nn_trainer._tf.Summary()
+                for name, score in metrics:
+                    summary.value.add(
+                        tag=f"{tensorboard_tag}/{name}", simple_value=score
+                    )
+                if tensorboard_index is None:
+                    tensorboard_index = nn_trainer.train_batches_seen
+                self.tb_valid_writer.add_summary(summary, tensorboard_index)
+                self.tb_valid_writer.flush()
+        return report
+
+    @staticmethod
+    def close():
+        log.info("Logging to Tensorboard completed")