diff --git a/changelog.md b/changelog.md index 195989205..629f010c4 100644 --- a/changelog.md +++ b/changelog.md @@ -98,6 +98,11 @@ - The loss of `eds.ner_crf` is now computed as the mean over the words instead of the sum. This change is compatible with multi-gpu training. - Having multiple stats keys matching a batching pattern now warns instead of raising an error. +### Changed + +- The loss of `eds.ner_crf` is now computed as the mean over the words instead of the sum. This change is compatible with multi-gpu training. +- Having multiple stats keys matching a batching pattern now warns instead of raising an error. + ### Fixed - Support packaging with poetry 2.0 diff --git a/edsnlp/metrics/dep_parsing.py b/edsnlp/metrics/dep_parsing.py index 5247a483f..1937e457b 100644 --- a/edsnlp/metrics/dep_parsing.py +++ b/edsnlp/metrics/dep_parsing.py @@ -17,7 +17,9 @@ def dependency_parsing_metric( The examples to score, either a tuple of (golds, preds) or a list of spacy.training.Example objects filter_expr : Optional[str] - The filter expression to use to filter the documents + The filter expression to use to filter the documents. The expression + should be a valid Python test and uses the variable `doc` to refer to + the reference (gold) document. Returns ------- diff --git a/edsnlp/pipes/trainable/ner_crf/ner_crf.py b/edsnlp/pipes/trainable/ner_crf/ner_crf.py index eec07aa57..842c02992 100644 --- a/edsnlp/pipes/trainable/ner_crf/ner_crf.py +++ b/edsnlp/pipes/trainable/ner_crf/ner_crf.py @@ -454,6 +454,9 @@ def collate(self, preps) -> NERBatchInput: k: sum(v) for k, v in preps["stats"].items() if not k.startswith("__") }, } + collated["stats"] = { + k: sum(v) for k, v in preps["stats"].items() if not k.startswith("__") + } lengths = [length for sample in preps["lengths"] for length in sample] max_len = max(lengths) if "targets" in preps: diff --git a/tests/training/dep_parser_config.yml b/tests/training/dep_parser_config.yml index 607ba9754..d95ea6c32 100644 --- a/tests/training/dep_parser_config.yml +++ b/tests/training/dep_parser_config.yml @@ -23,6 +23,7 @@ scorer: speed: false dep: '@metrics': "eds.dep_parsing" + filter_expr: "doc.text != ''" # 🎛️ OPTIMIZER optimizer: