From 578b062a7c8f5ec105d46deecb5fa57154516166 Mon Sep 17 00:00:00 2001 From: Franziska Zimmermann Date: Mon, 8 Mar 2021 17:15:38 +0100 Subject: [PATCH 1/6] 2120-imporved-tensorboard-logging, builds on top of 2117-more-flexibility-on-main-metric --- flair/models/sequence_tagger_model.py | 12 +++++-- flair/models/text_classification_model.py | 8 +++-- flair/trainers/trainer.py | 43 +++++++++++++++++++++-- flair/training_utils.py | 3 +- 4 files changed, 57 insertions(+), 9 deletions(-) diff --git a/flair/models/sequence_tagger_model.py b/flair/models/sequence_tagger_model.py index 1ba08bc4a..7530f3841 100644 --- a/flair/models/sequence_tagger_model.py +++ b/flair/models/sequence_tagger_model.py @@ -2,7 +2,7 @@ import sys from pathlib import Path -from typing import List, Union, Optional, Dict +from typing import List, Union, Optional, Dict, Tuple from warnings import warn import numpy as np @@ -518,7 +518,8 @@ def evaluate( embedding_storage_mode: str = "none", mini_batch_size: int = 32, num_workers: int = 8, - wsd_evaluation: bool = False + wsd_evaluation: bool = False, + main_score_type: Tuple[str, str] = ("micro avg", 'f1-score') ) -> (Result, float): # read Dataset into data loader (if list of sentences passed, make Dataset first) @@ -604,6 +605,10 @@ def evaluate( classification_report = metrics.classification_report(y_true, y_pred, digits=4, target_names=target_names, zero_division=1, labels=labels_to_report) + classification_report_dict = metrics.classification_report(y_true, y_pred, digits=4, + target_names=target_names, zero_division=0, + output_dict=True) + # get scores micro_f_score = round( metrics.fbeta_score(y_true, y_pred, beta=self.beta, average='micro', labels=labels_to_report), 4) @@ -624,10 +629,11 @@ def evaluate( log_line = f"\t{accuracy_score}" result = Result( - main_score=micro_f_score, + main_score=classification_report_dict[main_score_type[0]][main_score_type[1]], log_line=log_line, log_header=log_header, detailed_results=detailed_result, + classification_report=classification_report_dict ) return result, eval_loss diff --git a/flair/models/text_classification_model.py b/flair/models/text_classification_model.py index 57404adeb..3e95153f1 100644 --- a/flair/models/text_classification_model.py +++ b/flair/models/text_classification_model.py @@ -1,6 +1,6 @@ import logging from pathlib import Path -from typing import List, Union, Dict, Optional, Set +from typing import List, Union, Dict, Optional, Set, Tuple import torch import torch.nn as nn @@ -255,6 +255,7 @@ def evaluate( embedding_storage_mode: str = "none", mini_batch_size: int = 32, num_workers: int = 8, + main_score_type: Tuple[str, str]=("micro avg", 'f1-score') ) -> (Result, float): # read Dataset into data loader (if list of sentences passed, make Dataset first) @@ -340,6 +341,8 @@ def evaluate( target_names.append(self.label_dictionary.get_item_for_index(i)) classification_report = metrics.classification_report(y_true, y_pred, digits=4, target_names=target_names, zero_division=0) + classification_report_dict = metrics.classification_report(y_true, y_pred, digits=4, + target_names=target_names, zero_division=0, output_dict=True) # get scores micro_f_score = round(metrics.fbeta_score(y_true, y_pred, beta=self.beta, average='micro', zero_division=0), @@ -370,10 +373,11 @@ def evaluate( f"{accuracy_score}" result = Result( - main_score=micro_f_score, + main_score=classification_report_dict[main_score_type[0]][main_score_type[1]], log_line=log_line, log_header=log_header, detailed_results=detailed_result, + classification_report=classification_report_dict ) eval_loss /= batch_count diff --git a/flair/trainers/trainer.py b/flair/trainers/trainer.py index b42d9e191..1e66baf94 100644 --- a/flair/trainers/trainer.py +++ b/flair/trainers/trainer.py @@ -6,7 +6,7 @@ import datetime import sys import inspect - +import os import torch from torch.optim.sgd import SGD from torch.utils.data.dataset import ConcatDataset @@ -45,6 +45,8 @@ def __init__( optimizer: torch.optim.Optimizer = SGD, epoch: int = 0, use_tensorboard: bool = False, + tensorboard_log_dir = None, + metrics_for_tensorboard = [] ): """ Initialize a model trainer @@ -53,12 +55,16 @@ def __init__( :param optimizer: The optimizer to use (typically SGD or Adam) :param epoch: The starting epoch (normally 0 but could be higher if you continue training model) :param use_tensorboard: If True, writes out tensorboard information + :param tensorboard_log_dir: Directory into which tensorboard log files will be written + :param metrics_for_tensorboard: List of tuples that specify which metrics (in addition to the main_score) shall be plotted in tensorboard, could be [("macro avg", 'f1-score'), ("macro avg", 'precision')] for example """ self.model: flair.nn.Model = model self.corpus: Corpus = corpus self.optimizer: torch.optim.Optimizer = optimizer self.epoch: int = epoch self.use_tensorboard: bool = use_tensorboard + self.tensorboard_log_dir = tensorboard_log_dir + self.metrics_for_tensorboard = metrics_for_tensorboard def train( self, @@ -93,6 +99,8 @@ def train( eval_on_train_fraction=0.0, eval_on_train_shuffle=False, save_model_at_each_epoch=False, + main_score_type=("micro avg", 'f1-score'), + tensorboard_comment='', **kwargs, ) -> dict: """ @@ -127,15 +135,20 @@ def train( :param eval_on_train_shuffle: if True the train data fraction is determined on the start of training and kept fixed during training, otherwise it's sampled at beginning of each epoch :param save_model_at_each_epoch: If True, at each epoch the thus far trained model will be saved + :param main_score_type: Type of metric to use for best model tracking and learning rate scheduling (if dev data is available, otherwise loss will be used) + :param tensorboard_comment: Comment to use for tensorboard logging :param kwargs: Other arguments for the Optimizer :return: """ + self.main_score_type=main_score_type if self.use_tensorboard: try: from torch.utils.tensorboard import SummaryWriter - - writer = SummaryWriter() + if self.tensorboard_log_dir is not None and not os.path.exists(self.tensorboard_log_dir): + os.mkdir(self.tensorboard_log_dir) + writer = SummaryWriter(log_dir=self.tensorboard_log_dir, comment=tensorboard_comment) + log.info(f"tensorboard logging path is {self.tensorboard_log_dir}") except: log_line(log) log.warning( @@ -331,6 +344,8 @@ def train( ) previous_learning_rate = learning_rate + if self.use_tensorboard: + writer.add_scalar("learning_rate", learning_rate, self.epoch) # stop training if learning rate becomes too small if (not isinstance(lr_scheduler, OneCycleLR)) and learning_rate < min_learning_rate: @@ -445,6 +460,7 @@ def train( mini_batch_size=mini_batch_chunk_size, num_workers=num_workers, embedding_storage_mode=embeddings_storage_mode, + main_score_type=self.main_score_type ) result_line += f"\t{train_eval_result.log_line}" @@ -457,6 +473,7 @@ def train( mini_batch_size=mini_batch_chunk_size, num_workers=num_workers, embedding_storage_mode=embeddings_storage_mode, + main_score_type=self.main_score_type ) result_line += ( f"\t{train_part_loss}\t{train_part_eval_result.log_line}" @@ -464,6 +481,12 @@ def train( log.info( f"TRAIN_SPLIT : loss {train_part_loss} - score {round(train_part_eval_result.main_score, 4)}" ) + if self.use_tensorboard: + for (metric_class_avg_type, metric_type) in self.metrics_for_tensorboard: + writer.add_scalar( + f"train_{metric_class_avg_type}_{metric_type}", train_part_eval_result.classification_report[metric_class_avg_type][metric_type], self.epoch + ) + if log_dev: dev_eval_result, dev_loss = self.model.evaluate( @@ -472,6 +495,7 @@ def train( num_workers=num_workers, out_path=base_path / "dev.tsv", embedding_storage_mode=embeddings_storage_mode, + main_score_type=self.main_score_type ) result_line += f"\t{dev_loss}\t{dev_eval_result.log_line}" log.info( @@ -492,6 +516,11 @@ def train( writer.add_scalar( "dev_score", dev_eval_result.main_score, self.epoch ) + for (metric_class_avg_type, metric_type) in self.metrics_for_tensorboard: + writer.add_scalar( + f"dev_{metric_class_avg_type}_{metric_type}", + dev_eval_result.classification_report[metric_class_avg_type][metric_type], self.epoch + ) if log_test: test_eval_result, test_loss = self.model.evaluate( @@ -500,6 +529,7 @@ def train( num_workers=num_workers, out_path=base_path / "test.tsv", embedding_storage_mode=embeddings_storage_mode, + main_score_type=self.main_score_type ) result_line += f"\t{test_loss}\t{test_eval_result.log_line}" log.info( @@ -514,6 +544,11 @@ def train( writer.add_scalar( "test_score", test_eval_result.main_score, self.epoch ) + for (metric_class_avg_type, metric_type) in self.metrics_for_tensorboard: + writer.add_scalar( + f"test_{metric_class_avg_type}_{metric_type}", + test_eval_result.classification_report[metric_class_avg_type][metric_type], self.epoch + ) # determine learning rate annealing through scheduler. Use auxiliary metric for AnnealOnPlateau if log_dev and isinstance(lr_scheduler, AnnealOnPlateau): @@ -671,6 +706,7 @@ def final_test( num_workers=num_workers, out_path=base_path / "test.tsv", embedding_storage_mode="none", + main_score_type=self.main_score_type ) test_results: Result = test_results @@ -689,6 +725,7 @@ def final_test( num_workers=num_workers, out_path=base_path / f"{subcorpus.name}-test.tsv", embedding_storage_mode="none", + main_score_type=self.main_score_type ) log.info(subcorpus.name) log.info(subcorpus_results.log_line) diff --git a/flair/training_utils.py b/flair/training_utils.py index 731cb2215..01426e889 100644 --- a/flair/training_utils.py +++ b/flair/training_utils.py @@ -18,12 +18,13 @@ class Result(object): def __init__( - self, main_score: float, log_header: str, log_line: str, detailed_results: str + self, main_score: float, log_header: str, log_line: str, detailed_results: str, classification_report:dict ): self.main_score: float = main_score self.log_header: str = log_header self.log_line: str = log_line self.detailed_results: str = detailed_results + self.classification_report: dict = classification_report class Metric(object): From 7e0d9ade15c60b512b0afac54711c4d6928eb5f2 Mon Sep 17 00:00:00 2001 From: Franziska Zimmermann Date: Fri, 19 Mar 2021 09:18:25 +0100 Subject: [PATCH 2/6] Undo changes to sequence tagger --- flair/models/sequence_tagger_model.py | 11 +++-------- flair/training_utils.py | 2 +- 2 files changed, 4 insertions(+), 9 deletions(-) diff --git a/flair/models/sequence_tagger_model.py b/flair/models/sequence_tagger_model.py index 7530f3841..17e5ca444 100644 --- a/flair/models/sequence_tagger_model.py +++ b/flair/models/sequence_tagger_model.py @@ -518,8 +518,7 @@ def evaluate( embedding_storage_mode: str = "none", mini_batch_size: int = 32, num_workers: int = 8, - wsd_evaluation: bool = False, - main_score_type: Tuple[str, str] = ("micro avg", 'f1-score') + wsd_evaluation: bool = False ) -> (Result, float): # read Dataset into data loader (if list of sentences passed, make Dataset first) @@ -605,9 +604,6 @@ def evaluate( classification_report = metrics.classification_report(y_true, y_pred, digits=4, target_names=target_names, zero_division=1, labels=labels_to_report) - classification_report_dict = metrics.classification_report(y_true, y_pred, digits=4, - target_names=target_names, zero_division=0, - output_dict=True) # get scores micro_f_score = round( @@ -629,11 +625,10 @@ def evaluate( log_line = f"\t{accuracy_score}" result = Result( - main_score=classification_report_dict[main_score_type[0]][main_score_type[1]], + main_score=micro_f_score, log_line=log_line, log_header=log_header, - detailed_results=detailed_result, - classification_report=classification_report_dict + detailed_results=detailed_result ) return result, eval_loss diff --git a/flair/training_utils.py b/flair/training_utils.py index 01426e889..615972821 100644 --- a/flair/training_utils.py +++ b/flair/training_utils.py @@ -18,7 +18,7 @@ class Result(object): def __init__( - self, main_score: float, log_header: str, log_line: str, detailed_results: str, classification_report:dict + self, main_score: float, log_header: str, log_line: str, detailed_results: str, classification_report:dict = None ): self.main_score: float = main_score self.log_header: str = log_header From 471c0600ff2912967dae5dd0593b5dc9b5b94377 Mon Sep 17 00:00:00 2001 From: Franziska Zimmermann Date: Fri, 19 Mar 2021 12:25:05 +0100 Subject: [PATCH 3/6] Fix bug in call of evaluate --- flair/models/sequence_tagger_model.py | 3 ++- flair/models/similarity_learning_model.py | 1 + flair/models/text_regression_model.py | 1 + flair/trainers/trainer.py | 12 ++++++++---- 4 files changed, 12 insertions(+), 5 deletions(-) diff --git a/flair/models/sequence_tagger_model.py b/flair/models/sequence_tagger_model.py index 17e5ca444..019923bbd 100644 --- a/flair/models/sequence_tagger_model.py +++ b/flair/models/sequence_tagger_model.py @@ -518,7 +518,8 @@ def evaluate( embedding_storage_mode: str = "none", mini_batch_size: int = 32, num_workers: int = 8, - wsd_evaluation: bool = False + wsd_evaluation: bool = False, + **kwargs ) -> (Result, float): # read Dataset into data loader (if list of sentences passed, make Dataset first) diff --git a/flair/models/similarity_learning_model.py b/flair/models/similarity_learning_model.py index d64a1d710..1b4d17f76 100644 --- a/flair/models/similarity_learning_model.py +++ b/flair/models/similarity_learning_model.py @@ -276,6 +276,7 @@ def evaluate( embedding_storage_mode="none", mini_batch_size=32, num_workers=8, + **kwargs ) -> (Result, float): # assumes that for each data pair there's at least one embedding per modality diff --git a/flair/models/text_regression_model.py b/flair/models/text_regression_model.py index a6290d7da..dbaa3d32f 100644 --- a/flair/models/text_regression_model.py +++ b/flair/models/text_regression_model.py @@ -104,6 +104,7 @@ def evaluate( embedding_storage_mode: str = "none", mini_batch_size: int = 32, num_workers: int = 8, + **kwargs ) -> (Result, float): # read Dataset into data loader (if list of sentences passed, make Dataset first) diff --git a/flair/trainers/trainer.py b/flair/trainers/trainer.py index 9a318aba3..b692dfb34 100644 --- a/flair/trainers/trainer.py +++ b/flair/trainers/trainer.py @@ -31,7 +31,7 @@ AnnealOnPlateau, ) from torch.optim.lr_scheduler import OneCycleLR -from flair.models import SequenceTagger +from flair.models import SequenceTagger, TextClassifier import random log = logging.getLogger("flair") @@ -137,13 +137,17 @@ def train( :param save_model_each_k_epochs: Each k epochs, a model state will be written out. If set to '5', a model will be saved each 5 epochs. Default is 0 which means no model saving. :param save_model_epoch_step: Each save_model_epoch_step'th epoch the thus far trained model will be saved - :param main_score_type: Type of metric to use for best model tracking and learning rate scheduling (if dev data is available, otherwise loss will be used) + :param main_score_type: Type of metric to use for best model tracking and learning rate scheduling (if dev data is available, otherwise loss will be used), currently only applicable for text_classification_model :param tensorboard_comment: Comment to use for tensorboard logging :param kwargs: Other arguments for the Optimizer :return: """ - - self.main_score_type=main_score_type + if isinstance(self.model, TextClassifier): + self.main_score_type=main_score_type + else: + if main_score_type is not None: + warnings.warn("Choosing a main score type during training is currently only possible for text_classification_model. Will use default main score type instead of specified one.") + self.main_score_type = None if self.use_tensorboard: try: from torch.utils.tensorboard import SummaryWriter From 967bdc15f2d9d8b8f545689534f3204bb8ffc655 Mon Sep 17 00:00:00 2001 From: Franziska Zimmermann Date: Fri, 19 Mar 2021 12:50:56 +0100 Subject: [PATCH 4/6] Fix import --- flair/trainers/trainer.py | 1 + 1 file changed, 1 insertion(+) diff --git a/flair/trainers/trainer.py b/flair/trainers/trainer.py index b692dfb34..79a9a1ad9 100644 --- a/flair/trainers/trainer.py +++ b/flair/trainers/trainer.py @@ -6,6 +6,7 @@ import datetime import sys import inspect +import warnings import os import torch from torch.optim.sgd import SGD From cc32c6f8f19e1ef4568481fcb18b65fdd5a0b85c Mon Sep 17 00:00:00 2001 From: Franziska Zimmermann Date: Fri, 19 Mar 2021 16:03:07 +0100 Subject: [PATCH 5/6] Renaming parameter of trainer --- flair/trainers/trainer.py | 10 +++++----- 1 file changed, 5 insertions(+), 5 deletions(-) diff --git a/flair/trainers/trainer.py b/flair/trainers/trainer.py index 79a9a1ad9..5b72c44b0 100644 --- a/flair/trainers/trainer.py +++ b/flair/trainers/trainer.py @@ -100,7 +100,7 @@ def train( eval_on_train_fraction=0.0, eval_on_train_shuffle=False, save_model_each_k_epochs: int = 0, - main_score_type=("micro avg", 'f1-score'), + classification_main_metric=("micro avg", 'f1-score'), tensorboard_comment='', **kwargs, ) -> dict: @@ -138,16 +138,16 @@ def train( :param save_model_each_k_epochs: Each k epochs, a model state will be written out. If set to '5', a model will be saved each 5 epochs. Default is 0 which means no model saving. :param save_model_epoch_step: Each save_model_epoch_step'th epoch the thus far trained model will be saved - :param main_score_type: Type of metric to use for best model tracking and learning rate scheduling (if dev data is available, otherwise loss will be used), currently only applicable for text_classification_model + :param classification_main_metric: Type of metric to use for best model tracking and learning rate scheduling (if dev data is available, otherwise loss will be used), currently only applicable for text_classification_model :param tensorboard_comment: Comment to use for tensorboard logging :param kwargs: Other arguments for the Optimizer :return: """ if isinstance(self.model, TextClassifier): - self.main_score_type=main_score_type + self.main_score_type=classification_main_metric else: - if main_score_type is not None: - warnings.warn("Choosing a main score type during training is currently only possible for text_classification_model. Will use default main score type instead of specified one.") + if classification_main_metric is not None: + warnings.warn("Specification of main score type only implemented for text classifier. Defaulting to main score type of selected model.") self.main_score_type = None if self.use_tensorboard: try: From 065cf02073bfb746bfa127e140ead30a8709adc1 Mon Sep 17 00:00:00 2001 From: "Braune, Fabienne, Dr. (K-FIL-1/2)" Date: Fri, 19 Mar 2021 16:11:19 +0100 Subject: [PATCH 6/6] Removed unused Tuple --- flair/models/sequence_tagger_model.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/flair/models/sequence_tagger_model.py b/flair/models/sequence_tagger_model.py index 019923bbd..c8543f400 100644 --- a/flair/models/sequence_tagger_model.py +++ b/flair/models/sequence_tagger_model.py @@ -2,7 +2,7 @@ import sys from pathlib import Path -from typing import List, Union, Optional, Dict, Tuple +from typing import List, Union, Optional, Dict from warnings import warn import numpy as np