aphp · percevalw · Sep 2, 2025 · Sep 2, 2025 · Sep 2, 2025 · Sep 2, 2025
diff --git a/README.md b/README.md
@@ -34,13 +34,13 @@ Check out our interactive [demo](https://aphp.github.io/edsnlp/demo/) !
 You can install EDS-NLP via `pip`. We recommend pinning the library version in your projects, or use a strict package manager like [Poetry](https://python-poetry.org/).
 
 ```shell
-pip install edsnlp==0.17.2
+pip install edsnlp==0.18.0
 ```
 
 or if you want to use the trainable components (using pytorch)
 
 ```shell
-pip install "edsnlp[ml]==0.17.2"
+pip install "edsnlp[ml]==0.18.0"
 ```
 
 ### A first pipeline

diff --git a/changelog.md b/changelog.md
@@ -1,8 +1,8 @@
 # Changelog
 
-## Unreleased
+## v0.18.0 (2025-07-02)
 
-## Added
+### Added
 
 - Added support for multiple loggers (`tensorboard`, `wandb`, `comet_ml`, `aim`, `mlflow`, `clearml`, `dvclive`, `csv`, `json`, `rich`) in `edsnlp.train` via the `logger` parameter. Default is [`json` and `rich`] for backward compatibility.
 - Sub batch sizes for gradient accumulation can now be defined as simple "splits" of the original batch, e.g. `batch_size = 10000 tokens` and `sub_batch_size = 5 splits` to accumulate batches of 2000 tokens.
@@ -12,7 +12,7 @@
 - New `Training a span classifier` tutorial, and reorganized deep-learning docs
 - `ScheduledOptimizer` now warns when a parameter selector does not match any parameter.
 
-## Fixed
+### Fixed
 
 - `use_section` in `eds.history` should now correctly handle cases when there are other sections following history sections.
 - Added clickable snippets in the documentation for more registered functions
@@ -22,7 +22,7 @@
 - :ambulance: Until now, `post_init` was applied **after** the instantiation of the optimizer : if the model discovered new labels, and therefore changed its parameter tensors to reflect that, these new tensors were not taken into account by the optimizer, which could likely lead to subpar performance. Now, `post_init` is applied **before** the optimizer is instantiated, so that the optimizer can correctly handle the new tensors.
 - Added missing entry points for readers and writers in the registry, including `write_parquet` and support for `polars` in `pyproject.toml`. Now all implemented readers and writers are correctly registered as entry points.
 
-## Changed
+### Changed
 
 - Sections cues in `eds.history` are now section titles, and not the full section.
 - :boom: Validation metrics are now found under the root field `validation` in the training logs (e.g. `metrics['validation']['ner']['micro']['f']`)

diff --git a/docs/index.md b/docs/index.md
@@ -15,13 +15,13 @@ Check out our interactive [demo](https://aphp.github.io/edsnlp/demo/) !
 You can install EDS-NLP via `pip`. We recommend pinning the library version in your projects, or use a strict package manager like [Poetry](https://python-poetry.org/).
 
 ```{: data-md-color-scheme="slate" }
-pip install edsnlp==0.17.2
+pip install edsnlp==0.18.0
 ```
 
 or if you want to use the trainable components (using pytorch)
 
 ```{: data-md-color-scheme="slate" }
-pip install "edsnlp[ml]==0.17.2"
+pip install "edsnlp[ml]==0.18.0"
 ```
 
 ### A first pipeline

diff --git a/docs/tutorials/training-ner.md b/docs/tutorials/training-ner.md
@@ -233,7 +233,7 @@ Visit the [`edsnlp.train` documentation][edsnlp.training.trainer.train] for a li
     import edsnlp
     from edsnlp.training import train, ScheduledOptimizer, TrainingData
     from edsnlp.metrics.ner import NerExactMetric
-    from edsnlp.training.loggers import CSVLogger, RichLogger, WandbLogger
+    from edsnlp.training.loggers import CSVLogger, RichLogger, WandBLogger
     import edsnlp.pipes as eds
     import torch
 
@@ -242,6 +242,7 @@ Visit the [`edsnlp.train` documentation][edsnlp.training.trainer.train] for a li
     nlp.add_pipe(
         # The NER pipe will be a CRF model
         eds.ner_crf(
+            name="ner",
             mode="joint",
             target_span_getter="gold_spans",
             # Set spans as both to ents and in separate `ent.label` groups
@@ -280,19 +281,21 @@ Visit the [`edsnlp.train` documentation][edsnlp.training.trainer.train] for a li
         optim=torch.optim.Adam,
         module=nlp,
         total_steps=max_steps,
-        groups={
-            "^transformer": {
-                "lr": {"@schedules": "linear", "warmup_rate": 0.1, "start_value": 0 "max_value": 5e-5,},
+        groups=[
+            {
+                "selector": "transformer",
+                "lr": {"@schedules": "linear", "warmup_rate": 0.1, "start_value": 0, "max_value": 5e-5,},
             },
-            "": {
-                "lr": {"@schedules": "linear", "warmup_rate": 0.1, "start_value": 3e-4 "max_value": 3e-4,},
+            {
+                "selector": ".*",
+                "lr": {"@schedules": "linear", "warmup_rate": 0.1, "start_value": 3e-4, "max_value": 3e-4,},
             },
-        },
+        ],
     )
 
     #
     loggers = [
-        CSVLogger(),
+        CSVLogger.draft(), # draft as we will let the train function specify the logging_dir
         RichLogger(
             fields={
                 "step": {},

diff --git a/docs/tutorials/training-span-classifier.md b/docs/tutorials/training-span-classifier.md
@@ -265,24 +265,26 @@ Visit the [`edsnlp.train` documentation][edsnlp.training.trainer.train] for a li
     # 🎛️ OPTIMIZER (here it will be the same as thedefault one)
     optimizer = ScheduledOptimizer.draft(  # (2)!
         optim=torch.optim.AdamW,
-        groups={
-            "biopsy_classifier[.]embedding": {
+        groups=[
+            {
+                "selector": "biopsy_classifier[.]embedding",
                 "lr": {
                     "@schedules": "linear",
                     "warmup_rate": 0.1,
                     "start_value": 0.,
                     "max_value": 5e-5,
                 },
             },
-            ".*": {
+            {
+                "selector": ".*",
                 "lr": {
                     "@schedules": "linear",
                     "warmup_rate": 0.1,
                     "start_value": 3e-4,
                     "max_value": 3e-4,
                 },
             },
-        }
+        ]
     )
 
     # 🚀 TRAIN

diff --git a/edsnlp/__init__.py b/edsnlp/__init__.py
@@ -15,7 +15,7 @@
 import edsnlp.pipes
 from . import reducers
 
-__version__ = "0.17.2"
+__version__ = "0.18.0"
 
 BASE_DIR = Path(__file__).parent
 

diff --git a/edsnlp/training/trainer.py b/edsnlp/training/trainer.py
@@ -676,6 +676,14 @@ def train(
         total_steps=max_steps,
     )
 
+    for td in train_data:
+        if not (td.pipe_names is None or td.pipe_names <= trainable_pipe_names):
+            raise ValueError(
+                f"Training data pipe names {td.pipe_names} should be a subset of "
+                f"the trainable pipe names {trainable_pipe_names}, or left to None "
+                f"use this dataset for all trainable components."
+            )
+
     for phase_i, pipe_names in enumerate(phases):
         trained_pipes_local: Dict[str, TorchComponent] = {
             n: nlp.get_pipe(n) for n in pipe_names
@@ -688,6 +696,14 @@ def train(
             if td.pipe_names is None or set(td.pipe_names) & set(pipe_names)
         ]
 
+        if len(phase_training_data) == 0:
+            raise ValueError(
+                f"No training data found for phase {phase_i + 1} with components "
+                f"{', '.join(pipe_names)}. Make sure that these components are "
+                f"listed in the 'pipe_names' attribute of at least one of the "
+                f"provided training data."
+            )
+
         with nlp.select_pipes(disable=trainable_pipe_names - set(pipe_names)):
             accelerator.print(f"Phase {phase_i + 1}: training {', '.join(pipe_names)}")
             set_seed(seed)
@@ -700,37 +716,32 @@ def train(
                     grad_params.add(param)
                 param.requires_grad_(has_grad_param)
 
-            accelerator.print(
-                "Optimizing groups:"
-                + "".join(
-                    "\n - {} weight tensors ({:,} parameters){}".format(
+            accelerator.print("Optimizing groups:")
+            for g in optim.param_groups:
+                accelerator.print(
+                    " - {} weight tensors ({:,} parameters){}".format(
                         len([p for p in g["params"] if p in grad_params]),
                         sum([p.numel() for p in g["params"] if p in grad_params]),
                         ": " + " & ".join(g.get("selectors", "*"))
                         if "selectors" in g
                         else "",
                     )
-                    for g in optim.param_groups
                 )
-            )
             accelerator.print(
                 f"Keeping frozen {len(all_params - grad_params):} weight tensors "
                 f"({sum(p.numel() for p in all_params - grad_params):,} parameters)"
             )
 
             nlp.train(True)
 
-            iterator = iter(
-                zip(
-                    *(
-                        td(nlp, device).set_processing(
-                            num_cpu_workers=num_workers,
-                            process_start_method="spawn",
-                        )
-                        for td in phase_training_data
-                    )
+            phase_datasets = [
+                td(nlp, device).set_processing(
+                    num_cpu_workers=num_workers,
+                    process_start_method="spawn",
                 )
-            )
+                for td in phase_training_data
+            ]
+            iterator = iter(zip(*(phase_datasets)))
             (accel_optim, trained_pipes) = accelerator.prepare(optim, trained_pipes)
             if hasattr(accel_optim.optimizer, "initialize"):
                 accel_optim.optimizer.initialize()

diff --git a/tests/test_entrypoints.py b/tests/test_entrypoints.py
@@ -25,8 +25,6 @@ def test_entrypoints():
 
 
 def test_readers_and_writers_entrypoints():
-    import importlib.metadata
-
     # Map of expected entry points for readers and writers
     expected_readers = {
         "spark": "from_spark",
@@ -47,9 +45,16 @@ def test_readers_and_writers_entrypoints():
         "polars": "to_polars",
         "parquet": "write_parquet",
     }
-    eps = importlib.metadata.entry_points()
-    readers = {ep.name for ep in eps.select(group="edsnlp_readers")}
-    writers = {ep.name for ep in eps.select(group="edsnlp_writers")}
+    eps = entry_points()
+    if hasattr(eps, "select"):
+        readers_eps = eps.select(group="edsnlp_readers")
+        writers_eps = eps.select(group="edsnlp_writers")
+    else:
+        readers_eps = eps.get("edsnlp_readers", [])
+        writers_eps = eps.get("edsnlp_writers", [])
+
+    readers = {ep.name for ep in readers_eps}
+    writers = {ep.name for ep in writers_eps}
     for name in expected_readers:
         assert name in readers, f"Reader entry point '{name}' is missing"
     for name in expected_writers: