Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Small refactorings in ModelTrainer and SequenceTagger evaluation logic #2184

Merged
merged 3 commits into from
Mar 24, 2021
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Jump to
Jump to file
Failed to load files.
Diff view
Diff view
63 changes: 33 additions & 30 deletions flair/models/sequence_tagger_model.py
Original file line number Diff line number Diff line change
Expand Up @@ -408,6 +408,10 @@ def _requires_span_F1_evaluation(self) -> bool:
for item in self.tag_dictionary.get_items():
if item.startswith('B-'):
span_F1 = True
if item == 'O':
span_F1 = True
if item == '':
span_F1 = True
return span_F1

def _evaluate_with_span_F1(self, data_loader, embedding_storage_mode, mini_batch_size, out_path):
Expand Down Expand Up @@ -511,25 +515,7 @@ def _evaluate_with_span_F1(self, data_loader, embedding_storage_mode, mini_batch

return result, eval_loss

def evaluate(
self,
sentences: Union[List[Sentence], Dataset],
out_path: Union[str, Path] = None,
embedding_storage_mode: str = "none",
mini_batch_size: int = 32,
num_workers: int = 8,
wsd_evaluation: bool = False,
**kwargs
) -> (Result, float):

# read Dataset into data loader (if list of sentences passed, make Dataset first)
if not isinstance(sentences, Dataset):
sentences = SentenceDataset(sentences)
data_loader = DataLoader(sentences, batch_size=mini_batch_size, num_workers=num_workers)

# if span F1 needs to be used, use separate eval method
if self._requires_span_F1_evaluation() and not wsd_evaluation:
return self._evaluate_with_span_F1(data_loader, embedding_storage_mode, mini_batch_size, out_path)
def _evaluate_with_regular_F1(self, data_loader, embedding_storage_mode, mini_batch_size, out_path):

# else, use scikit-learn to evaluate
y_true = []
Expand Down Expand Up @@ -560,13 +546,7 @@ def evaluate(
y_true.append(labels.add_item(gold_tag))

# add predicted tag
if wsd_evaluation:
if gold_tag == 'O':
predicted_tag = 'O'
else:
predicted_tag = token.get_tag('predicted').value
else:
predicted_tag = token.get_tag('predicted').value
predicted_tag = token.get_tag('predicted').value

y_pred.append(labels.add_item(predicted_tag))

Expand Down Expand Up @@ -605,7 +585,6 @@ def evaluate(
classification_report = metrics.classification_report(y_true, y_pred, digits=4, target_names=target_names,
zero_division=1, labels=labels_to_report)


# get scores
micro_f_score = round(
metrics.fbeta_score(y_true, y_pred, beta=self.beta, average='micro', labels=labels_to_report), 4)
Expand Down Expand Up @@ -633,6 +612,28 @@ def evaluate(
)
return result, eval_loss

def evaluate(
self,
sentences: Union[List[Sentence], Dataset],
out_path: Union[str, Path] = None,
embedding_storage_mode: str = "none",
mini_batch_size: int = 32,
num_workers: int = 8,
wsd_evaluation: bool = False,
**kwargs
) -> (Result, float):

# read Dataset into data loader (if list of sentences passed, make Dataset first)
if not isinstance(sentences, Dataset):
sentences = SentenceDataset(sentences)
data_loader = DataLoader(sentences, batch_size=mini_batch_size, num_workers=num_workers)

# depending on whether span F1 needs to be used, use separate eval method
if self._requires_span_F1_evaluation():
return self._evaluate_with_span_F1(data_loader, embedding_storage_mode, mini_batch_size, out_path)
else:
return self._evaluate_with_regular_F1(data_loader, embedding_storage_mode, mini_batch_size, out_path)

def forward_loss(
self, data_points: Union[List[Sentence], Sentence], sort=True
) -> torch.tensor:
Expand Down Expand Up @@ -1148,10 +1149,12 @@ def _fetch_model(model_name) -> str:

# output information
log.info("-" * 80)
log.info(f"The model key '{model_name}' now maps to 'https://huggingface.co/{hf_model_name}' on the HuggingFace ModelHub")
log.info(
f"The model key '{model_name}' now maps to 'https://huggingface.co/{hf_model_name}' on the HuggingFace ModelHub")
log.info(f" - The most current version of the model is automatically downloaded from there.")
if model_name in hu_model_map:
log.info(f" - (you can alternatively manually download the original model at {hu_model_map[model_name]})")
log.info(
f" - (you can alternatively manually download the original model at {hu_model_map[model_name]})")
log.info("-" * 80)

# use mapped name instead
Expand Down Expand Up @@ -1229,7 +1232,7 @@ def _fetch_model(model_name) -> str:
log.error(f" -> Please check https://huggingface.co/models?filter=flair for all available models.")
log.error(f" -> Alternatively, point to a model file on your local drive.")
log.error("-" * 80)
Path(flair.cache_root / 'models' / model_folder).rmdir() # remove folder again if not valid
Path(flair.cache_root / 'models' / model_folder).rmdir() # remove folder again if not valid

return model_path

Expand Down
45 changes: 23 additions & 22 deletions flair/trainers/trainer.py
Original file line number Diff line number Diff line change
Expand Up @@ -61,7 +61,7 @@ def __init__(
self.epoch: int = epoch
self.use_tensorboard: bool = use_tensorboard

def initialize_best_dev_score(self,log_dev):
def initialize_best_dev_score(self, log_dev):
"""
Initialize the best score the model has seen so far.
The score is the loss if we don't have dev data and main_score_type otherwise.
Expand All @@ -75,24 +75,24 @@ def initialize_best_dev_score(self,log_dev):
self.score_mode_for_best_model_saving = "min"
self.best_dev_score_seen = 100000000000

def check_for_best_score(self,score_value_for_best_model_saving):
def check_for_best_score(self, score_value_for_best_model_saving):
"""
Check whether score_value_for_best_model_saving is better than the best score the trainer has seen so far.
The score is the loss if we don't have dev data and main_score_type otherwise.
:param score_value_for_best_model_saving: The current epoch score
:return: boolean indicating whether score_value_for_best_model_saving is better than the best score the trainer has seen so far
"""

if self.score_mode_for_best_model_saving=="max":
if self.best_dev_score_seen<score_value_for_best_model_saving:
if self.score_mode_for_best_model_saving == "max":
if self.best_dev_score_seen < score_value_for_best_model_saving:
found_best_model = True
self.best_dev_score_seen=score_value_for_best_model_saving
self.best_dev_score_seen = score_value_for_best_model_saving
else:
found_best_model = False
else:
if self.best_dev_score_seen>score_value_for_best_model_saving:
if self.best_dev_score_seen > score_value_for_best_model_saving:
found_best_model = True
self.best_dev_score_seen=score_value_for_best_model_saving
self.best_dev_score_seen = score_value_for_best_model_saving
else:
found_best_model = False
return found_best_model
Expand Down Expand Up @@ -131,7 +131,7 @@ def check_for_and_delete_previous_best_models(base_path, save_checkpoint):

def get_best_model_path(self, base_path, check_model_existance=False):
all_best_model_names = [filename for filename in os.listdir(base_path) if
filename.startswith("best-model_epoch")]
filename.startswith("best-model_epoch")]
if check_model_existance:
if len(all_best_model_names) > 0:
assert len(all_best_model_names) == 1, "There should be at most one best model saved at any time."
Expand Down Expand Up @@ -222,12 +222,9 @@ def train(
:param kwargs: Other arguments for the Optimizer
:return:
"""
if isinstance(self.model, TextClassifier):
self.main_score_type=classification_main_metric
else:
if classification_main_metric is not None:
warnings.warn("Specification of main score type only implemented for text classifier. Defaulting to main score type of selected model.")
self.main_score_type = None

main_score_type = classification_main_metric if isinstance(self.model, TextClassifier) else None

if self.use_tensorboard:
try:
from torch.utils.tensorboard import SummaryWriter
Expand Down Expand Up @@ -546,7 +543,7 @@ def train(
mini_batch_size=mini_batch_chunk_size,
num_workers=num_workers,
embedding_storage_mode=embeddings_storage_mode,
main_score_type=self.main_score_type
main_score_type=main_score_type
)
result_line += f"\t{train_eval_result.log_line}"

Expand All @@ -559,7 +556,7 @@ def train(
mini_batch_size=mini_batch_chunk_size,
num_workers=num_workers,
embedding_storage_mode=embeddings_storage_mode,
main_score_type=self.main_score_type
main_score_type=main_score_type
)
result_line += (
f"\t{train_part_loss}\t{train_part_eval_result.log_line}"
Expand All @@ -575,7 +572,7 @@ def train(
num_workers=num_workers,
out_path=base_path / "dev.tsv",
embedding_storage_mode=embeddings_storage_mode,
main_score_type=self.main_score_type
main_score_type=main_score_type
)
result_line += f"\t{dev_loss}\t{dev_eval_result.log_line}"
log.info(
Expand Down Expand Up @@ -604,7 +601,7 @@ def train(
num_workers=num_workers,
out_path=base_path / "test.tsv",
embedding_storage_mode=embeddings_storage_mode,
main_score_type=self.main_score_type
main_score_type=main_score_type
)
result_line += f"\t{test_loss}\t{test_eval_result.log_line}"
log.info(
Expand Down Expand Up @@ -725,7 +722,7 @@ def train(

# test best model if test data is present
if self.corpus.test and not train_with_test:
final_score = self.final_test(base_path, mini_batch_chunk_size, num_workers)
final_score = self.final_test(base_path, mini_batch_chunk_size, num_workers, main_score_type)
else:
final_score = 0
log.info("Test data not provided setting final score to 0")
Expand Down Expand Up @@ -755,7 +752,11 @@ def load_checkpoint(cls, checkpoint: Union[Path, str], corpus: Corpus):
return model

def final_test(
self, base_path: Union[Path, str], eval_mini_batch_size: int, num_workers: int = 8
self,
base_path: Union[Path, str],
eval_mini_batch_size: int,
num_workers: int = 8,
main_score_type: str = None,
):
if type(base_path) is str:
base_path = Path(base_path)
Expand All @@ -776,7 +777,7 @@ def final_test(
num_workers=num_workers,
out_path=base_path / "test.tsv",
embedding_storage_mode="none",
main_score_type=self.main_score_type
main_score_type=main_score_type
)

test_results: Result = test_results
Expand All @@ -795,7 +796,7 @@ def final_test(
num_workers=num_workers,
out_path=base_path / f"{subcorpus.name}-test.tsv",
embedding_storage_mode="none",
main_score_type=self.main_score_type
main_score_type=main_score_type
)
log.info(subcorpus.name)
log.info(subcorpus_results.log_line)
Expand Down