diff --git a/farm/data_handler/processor.py b/farm/data_handler/processor.py index ab40c5e3f..a50489bfb 100644 --- a/farm/data_handler/processor.py +++ b/farm/data_handler/processor.py @@ -2101,7 +2101,7 @@ def _convert_answers(self, baskets): label_idxs[i][0] = -100 # TODO remove this hack also from featurization label_idxs[i][1] = -100 break # Break loop around answers, so the error message is not shown multiple times - elif answer_indices != answer_text.strip(): + elif answer_indices.strip() != answer_text.strip(): logger.warning(f"""Answer using start/end indices is '{answer_indices}' while gold label text is '{answer_text}'.\n Example will not be converted for training/evaluation.""") error_in_answer = True diff --git a/farm/modeling/tokenization.py b/farm/modeling/tokenization.py index b70be74f5..f186633e8 100644 --- a/farm/modeling/tokenization.py +++ b/farm/modeling/tokenization.py @@ -567,7 +567,7 @@ def tokenize_batch_question_answering(pre_baskets, tokenizer, indices): baskets = [] # # Tokenize texts in batch mode texts = [d["context"] for d in pre_baskets] - tokenized_docs_batch = tokenizer.batch_encode_plus(texts, return_offsets_mapping=True, return_special_tokens_mask=True, add_special_tokens=False) + tokenized_docs_batch = tokenizer.batch_encode_plus(texts, return_offsets_mapping=True, return_special_tokens_mask=True, add_special_tokens=False, verbose=False) # Extract relevant data tokenids_batch = tokenized_docs_batch["input_ids"]