From 7cfcf4e8582d9f4fdf21bff9b85fc8ccc75e6d8b Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Perceval=20Wajsb=C3=BCrt?= <perceval.wajsburt@aphp.fr>
Date: Thu, 14 Nov 2024 15:35:01 +0100
Subject: [PATCH 1/4] fix: support nlp-less components

---
 docs/concepts/pipeline.md                | 34 +++++++++++++-
 docs/scripts/clickable_snippets.py       |  4 +-
 docs/tutorials/make-a-training-script.md |  1 +
 edsnlp/core/pipeline.py                  | 33 +++++--------
 edsnlp/core/registries.py                | 33 ++++++++++++-
 edsnlp/data/converters.py                |  3 +-
 tests/test_pipeline.py                   | 60 +++++++++++++++++++++++-
 tests/training/qlf_config.yml            |  2 +
 8 files changed, 140 insertions(+), 30 deletions(-)
diff --git a/docs/concepts/pipeline.md b/docs/concepts/pipeline.md
index 23b83919b..8d9fcda5f 100644
--- a/docs/concepts/pipeline.md
+++ b/docs/concepts/pipeline.md
@@ -60,12 +60,42 @@ To create your first EDS-NLP pipeline, run the following code. We provide severa
     nlp.add_pipe("eds.negation")
     ```
 
-=== "From a config file"
+=== "From a YAML config file"
+
+    You can also create a pipeline from a configuration file. This is useful when you plan on changing the pipeline configuration often.
+
+    ```{ .yaml title="config.yml" }
+    nlp:
+      "@core": pipeline
+      lang: eds
+      components:
+        sentences:
+          "@factory": eds.sentences
+
+        matcher:
+          "@factory": eds.matcher
+          regex:
+            smoker: ["fume", "clope"]
+
+        negation:
+          "@factory": eds.negation
+    ```
+
+    and then load the pipeline with:
+
+    ```{ .python .no-check }
+    import edsnlp
+
+    nlp = edsnlp.load("config.yml")
+    ```
+
+=== "From a INI config file"
 
     You can also create a pipeline from a configuration file. This is useful when you plan on changing the pipeline configuration often.
 
     ```{ .cfg title="config.cfg" }
     [nlp]
+    @core = "pipeline"
     lang = "eds"
     pipeline = ["sentences", "matcher", "negation"]
 
@@ -100,7 +130,7 @@ from pathlib import Path
 nlp("Le patient ne fume pas")
 
 # Processing multiple documents
-model.pipe([text1, text2])
+nlp.pipe([text1, text2])
 ```
 
 For more information on how to use the pipeline, refer to the [Inference](/inference) page.
diff --git a/docs/scripts/clickable_snippets.py b/docs/scripts/clickable_snippets.py
index 4704d7cff..2b901448a 100644
--- a/docs/scripts/clickable_snippets.py
+++ b/docs/scripts/clickable_snippets.py
@@ -184,7 +184,7 @@ def replace_link(match):
 
         # Re-insert soups into the output
         for soup, start, end in reversed(soups):
-            output = output[:start] + str(soup) + output[end:]
+            output = output[:start] + str(soup.find("code")) + output[end:]
 
         output = regex.sub(HREF_REGEX, replace_link, output)
 
@@ -202,7 +202,7 @@ def convert_html_to_code(
         cls, html_content: str
     ) -> Tuple[BeautifulSoup, str, list, list]:
         pre_html_content = "<pre>" + html_content + "</pre>"
-        soup = BeautifulSoup(pre_html_content, "html5lib")
+        soup = list(BeautifulSoup(pre_html_content, "html5lib").children)[0]
         code_element = soup.find("code")
 
         line_lengths = [0]
diff --git a/docs/tutorials/make-a-training-script.md b/docs/tutorials/make-a-training-script.md
index 806122872..29b4130ef 100644
--- a/docs/tutorials/make-a-training-script.md
+++ b/docs/tutorials/make-a-training-script.md
@@ -395,6 +395,7 @@ print(nlp.config.to_yaml_str())
 
 ```yaml title="config.yml"
 nlp:
+  "@core": "pipeline"
   lang: "eds"
   components:
     ner:
diff --git a/edsnlp/core/pipeline.py b/edsnlp/core/pipeline.py
index 74f47a726..d92f99925 100644
--- a/edsnlp/core/pipeline.py
+++ b/edsnlp/core/pipeline.py
@@ -1,7 +1,6 @@
 import contextlib
 import functools
 import importlib
-import inspect
 import os
 import re
 import shutil
@@ -10,6 +9,7 @@
 import sysconfig
 import warnings
 from enum import Enum
+from inspect import Parameter, signature
 from pathlib import Path
 from types import FunctionType
 from typing import (
@@ -105,7 +105,7 @@ def __init__(
         vocab_config: Type[BaseDefaults] = None,
         meta: Dict[str, Any] = None,
         pipeline: Optional[Sequence[str]] = None,
-        components: Dict[str, CurriedFactory] = {},
+        components: Dict[str, Any] = {},
         disable: AsList[str] = EMPTY_LIST,
         enable: AsList[str] = EMPTY_LIST,
         exclude: AsList = EMPTY_LIST,
@@ -232,17 +232,18 @@ def create_pipe(
         Pipe
         """
         try:
-            curried: CurriedFactory = Config(
+            pipe = Config(
                 {
                     "@factory": factory,
                     **(config if config is not None else {}),
                 }
             ).resolve(registry=registry)
-            if name is None:
-                name = inspect.signature(curried.factory).parameters.get("name").default
-            if name is None or name == inspect.Parameter.empty:
-                name = factory
-            pipe = curried.instantiate(nlp=self, path=(name,))
+            if isinstance(pipe, CurriedFactory):
+                if name is None:
+                    name = signature(pipe.factory).parameters.get("name").default
+                if name is None or name == Parameter.empty:
+                    name = factory
+                pipe = pipe.instantiate(nlp=self, path=(name,))
         except ConfitValidationError as e:
             raise e.with_traceback(None)
         return pipe
@@ -413,8 +414,8 @@ def pipe(
         inputs: Iterable[Union[str, Doc]]
             The inputs to create the Docs from, or Docs directly.
         n_process: int
-            Deprecated. Use the ".set(num_cpu_workers=n_process)" method on the returned
-            data stream instead.
+            Deprecated. Use the ".set_processing(num_cpu_workers=n_process)" method
+            on the returned data stream instead.
             The number of parallel workers to use. If 0, the operations will be
             executed sequentially.
 
@@ -589,16 +590,6 @@ def _add_pipes(
         enable: Container[str],
         disable: Container[str],
     ):
-        # Since components are actually resolved as curried factories,
-        # we need to instantiate them here
-        for name, component in components.items():
-            if not isinstance(component, CurriedFactory):
-                raise ValueError(
-                    f"Component {repr(name)} is not instantiable (got {component}). "
-                    f"Please make sure that you didn't forget to add a '@factory' "
-                    f"key to the component config."
-                )
-
         try:
             components = CurriedFactory.instantiate(components, nlp=self)
         except ConfitValidationError as e:
@@ -1215,7 +1206,7 @@ def load(
         elif is_package:
             # Load as package
             available_kwargs = {"overrides": overrides, **pipe_selection}
-            signature_kwargs = inspect.signature(module.load).parameters
+            signature_kwargs = signature(module.load).parameters
             kwargs = {
                 name: available_kwargs[name]
                 for name in signature_kwargs
diff --git a/edsnlp/core/registries.py b/edsnlp/core/registries.py
index c3b9a5409..8628b5f4a 100644
--- a/edsnlp/core/registries.py
+++ b/edsnlp/core/registries.py
@@ -75,12 +75,38 @@ def maybe_nlp(self) -> Union["CurriedFactory", Any]:
         -------
         Union["CurriedFactory", Any]
         """
+        from edsnlp.core.pipeline import Pipeline, PipelineProtocol
+
         sig = inspect.signature(self.factory)
-        # and sig.parameters["nlp"].default is sig.empty
-        if "nlp" not in sig.parameters or "nlp" in self.kwargs:
+        if (
+            not (
+                "nlp" in sig.parameters
+                and (
+                    sig.parameters["nlp"].default is sig.empty
+                    or sig.parameters["nlp"].annotation in (Pipeline, PipelineProtocol)
+                )
+            )
+            or "nlp" in self.kwargs
+        ) and not self.search_curried_factory(self.kwargs):
             return self.factory(**self.kwargs)
         return self
 
+    @classmethod
+    def search_curried_factory(cls, obj):
+        if isinstance(obj, CurriedFactory):
+            return obj
+        elif isinstance(obj, dict):
+            for value in obj.values():
+                result = cls.search_curried_factory(value)
+                if result is not None:
+                    return result
+        elif isinstance(obj, (tuple, list, set)):
+            for value in obj:
+                result = cls.search_curried_factory(value)
+                if result is not None:
+                    return result
+        return None
+
     def instantiate(
         obj: Any,
         nlp: "edsnlp.Pipeline",
@@ -177,6 +203,9 @@ def __getattr__(self, name):
             raise AttributeError(name)
         self._raise_curried_factory_error()
 
+    def __repr__(self):
+        return f"CurriedFactory({self.factory})"
+
 
 glob = []
 
diff --git a/edsnlp/data/converters.py b/edsnlp/data/converters.py
index c8c262354..1bf1e6d2b 100644
--- a/edsnlp/data/converters.py
+++ b/edsnlp/data/converters.py
@@ -644,7 +644,6 @@ def __call__(self, doc):
 def get_dict2doc_converter(
     converter: Union[str, Callable], kwargs
 ) -> Tuple[Callable, Dict]:
-    kwargs_to_init = False
     if not callable(converter):
         available = edsnlp.registry.factory.get_available()
         try:
@@ -666,7 +665,7 @@ def get_dict2doc_converter(
                 f"Cannot find converter for format {converter}. "
                 f"Available converters are {', '.join(available)}"
             )
-    if isinstance(converter, type) or kwargs_to_init:
+    if isinstance(converter, type):
         return converter(**kwargs), {}
     return converter, validate_kwargs(converter, kwargs)
 
diff --git a/tests/test_pipeline.py b/tests/test_pipeline.py
index 9c0ff82c1..d4734498d 100644
--- a/tests/test_pipeline.py
+++ b/tests/test_pipeline.py
@@ -130,8 +130,8 @@ def test_disk_serialization(tmp_path, ml_nlp):
 [components.ner]
 @factory = "eds.ner_crf"
 embedding = ${components.transformer}
-target_span_getter = ["ents", "ner-preds"]
 mode = "independent"
+target_span_getter = ["ents", "ner-preds"]
 labels = ["PERSON", "GIFT"]
 infer_span_setter = false
 window = 40
@@ -254,6 +254,41 @@ def test_config_validation_error():
     assert "got 'error-mode'" in str(e.value)
 
 
+@edsnlp.registry.factory.register("test_wrapper", spacy_compatible=False)
+class WrapperComponent:
+    def __init__(self, *, copy_list, copy_dict, sub):
+        pass
+
+
+fail_config_sub = """
+nlp:
+    lang: "eds"
+    components:
+        wrapper:
+            "@factory": "test_wrapper"
+
+            copy_list:
+                - ${nlp.components.wrapper.sub}
+
+            copy_dict:
+                key: ${nlp.components.wrapper.sub}
+
+            sub:
+                "@factory": "eds.matcher"
+                terms: 100.0  # clearly wrong
+
+        matcher_copy: ${nlp.components.wrapper.sub}
+"""
+
+
+def test_config_sub_validation_error():
+    with pytest.raises(ConfitValidationError):
+        Pipeline.from_config(Config.from_yaml_str(fail_config_sub))
+
+    fix = {"nlp": {"components": {"wrapper": {"sub": {"terms": {"pattern": ["ok"]}}}}}}
+    Pipeline.from_config(Config.from_yaml_str(fail_config_sub).merge(fix))
+
+
 def test_add_pipe_validation_error():
     model = edsnlp.blank("eds")
     with pytest.raises(ConfitValidationError) as e:
@@ -407,3 +442,26 @@ def test_repr(frozen_ml_nlp):
   "ner": eds.ner_crf
 })"""
         )
+
+
+@edsnlp.registry.factory.register("test_nlp_less", spacy_compatible=False)
+class NlpLessComponent:
+    def __init__(self, nlp=None, name: str = "nlp_less", *, value: int):
+        self.value = value
+        self.name = name
+
+    def __call__(self, doc):
+        return doc
+
+
+def test_nlp_less_component():
+    component = NlpLessComponent(value=42)
+    assert component.value == 42
+
+    config = """
+[component]
+@factory = "test_nlp_less"
+value = 42
+"""
+    component = Config.from_str(config).resolve(registry=registry)["component"]
+    assert component.value == 42
diff --git a/tests/training/qlf_config.yml b/tests/training/qlf_config.yml
index 960ad857b..884a8e349 100644
--- a/tests/training/qlf_config.yml
+++ b/tests/training/qlf_config.yml
@@ -1,5 +1,7 @@
 # 🤖 PIPELINE DEFINITION
 nlp:
+  "@core": pipeline
+
   lang: eds
 
   components:

From 359c9d5b62ee99d903c3ab59d7e892b166e562e2 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Perceval=20Wajsb=C3=BCrt?= <perceval.wajsburt@aphp.fr>
Date: Fri, 15 Nov 2024 02:08:09 +0100
Subject: [PATCH 2/4] fix: redirect measurements import to quantities

---
 edsnlp/__init__.py | 4 ++++
 1 file changed, 4 insertions(+)

diff --git a/edsnlp/__init__.py b/edsnlp/__init__.py
index 152cfa253..3ab460806 100644
--- a/edsnlp/__init__.py
+++ b/edsnlp/__init__.py
@@ -52,6 +52,10 @@ def find_spec(self, fullname, path, target=None):  # pragma: no cover
             new_name = fullname.replace("span_qualifier", "span_classifier")
             spec = importlib.util.spec_from_loader(fullname, AliasLoader(new_name))
             return spec
+        if "measurements" in fullname.split("."):
+            new_name = fullname.replace("measurements", "quantities")
+            spec = importlib.util.spec_from_loader(fullname, AliasLoader(new_name))
+            return spec
 
 
 class AliasLoader(importlib.abc.Loader):

From f33afd770c8c40358564b1cb5ce95e86cb7296f7 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Perceval=20Wajsb=C3=BCrt?= <perceval.wajsburt@aphp.fr>
Date: Thu, 14 Nov 2024 21:17:05 +0100
Subject: [PATCH 3/4] ci: test build only with pytorch cpu

---
 .github/workflows/release.yml    | 2 +-
 .github/workflows/test-build.yml | 3 +++
 2 files changed, 4 insertions(+), 1 deletion(-)

diff --git a/.github/workflows/release.yml b/.github/workflows/release.yml
index b6b9198d0..a2b733941 100644
--- a/.github/workflows/release.yml
+++ b/.github/workflows/release.yml
@@ -33,7 +33,7 @@ jobs:
         uses: pypa/cibuildwheel@v2.21.3
         env:
           CIBW_ARCHS_MACOS: "x86_64 arm64"
-          PIP_EXTRA_INDEX_URL: "https://download.pytorch.org/whl/cpu"
+          CIBW_ENVIRONMENT: PIP_EXTRA_INDEX_URL=https://download.pytorch.org/whl/cpu
 
       - uses: actions/upload-artifact@v4
         with:
diff --git a/.github/workflows/test-build.yml b/.github/workflows/test-build.yml
index ac64a28b3..569849669 100644
--- a/.github/workflows/test-build.yml
+++ b/.github/workflows/test-build.yml
@@ -4,6 +4,7 @@
 
 name: Test Build
 
+
 on:
   workflow_dispatch:
   pull_request:
@@ -27,6 +28,8 @@ jobs:
         uses: pypa/cibuildwheel@v2.16.5
         env:
           CIBW_ARCHS_MACOS: "x86_64 arm64"
+          CIBW_ENVIRONMENT: PIP_EXTRA_INDEX_URL=https://download.pytorch.org/whl/cpu
+
 
   build_sdist:
     name: Build source distribution

From 733a7fe2e3d737e95514ba3e3cf70a5762688445 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Perceval=20Wajsb=C3=BCrt?= <perceval.wajsburt@aphp.fr>
Date: Thu, 14 Nov 2024 12:53:11 +0100
Subject: [PATCH 4/4] chore: bump version to 0.14.0

---
 README.md          | 4 ++--
 changelog.md       | 2 +-
 docs/index.md      | 4 ++--
 edsnlp/__init__.py | 2 +-
 pyproject.toml     | 2 +-
 5 files changed, 7 insertions(+), 7 deletions(-)

diff --git a/README.md b/README.md
index 6f5602224..f30dca7d7 100644
--- a/README.md
+++ b/README.md
@@ -34,13 +34,13 @@ Check out our interactive [demo](https://aphp.github.io/edsnlp/demo/) !
 You can install EDS-NLP via `pip`. We recommend pinning the library version in your projects, or use a strict package manager like [Poetry](https://python-poetry.org/).
 
 ```shell
-pip install edsnlp==0.13.1
+pip install edsnlp==0.14.0
 ```
 
 or if you want to use the trainable components (using pytorch)
 
 ```shell
-pip install "edsnlp[ml]==0.13.1"
+pip install "edsnlp[ml]==0.14.0"
 ```
 
 ### A first pipeline
diff --git a/changelog.md b/changelog.md
index 9836cadd0..133f20fc3 100644
--- a/changelog.md
+++ b/changelog.md
@@ -1,6 +1,6 @@
 # Changelog
 
-## Unreleased
+## v0.14.0 (2024-11-14)
 
 ### Added
 
diff --git a/docs/index.md b/docs/index.md
index 546abc9fe..e3ac71610 100644
--- a/docs/index.md
+++ b/docs/index.md
@@ -15,13 +15,13 @@ Check out our interactive [demo](https://aphp.github.io/edsnlp/demo/) !
 You can install EDS-NLP via `pip`. We recommend pinning the library version in your projects, or use a strict package manager like [Poetry](https://python-poetry.org/).
 
 ```{: data-md-color-scheme="slate" }
-pip install edsnlp==0.13.1
+pip install edsnlp==0.14.0
 ```
 
 or if you want to use the trainable components (using pytorch)
 
 ```{: data-md-color-scheme="slate" }
-pip install "edsnlp[ml]==0.13.1"
+pip install "edsnlp[ml]==0.14.0"
 ```
 
 ### A first pipeline
diff --git a/edsnlp/__init__.py b/edsnlp/__init__.py
index 3ab460806..620685214 100644
--- a/edsnlp/__init__.py
+++ b/edsnlp/__init__.py
@@ -15,7 +15,7 @@
 import edsnlp.pipes
 from . import reducers
 
-__version__ = "0.13.1"
+__version__ = "0.14.0"
 
 BASE_DIR = Path(__file__).parent
 
diff --git a/pyproject.toml b/pyproject.toml
index a071d4088..eb8e7dfb7 100644
--- a/pyproject.toml
+++ b/pyproject.toml
@@ -14,7 +14,7 @@ dependencies = [
     "pysimstring>=1.2.1",
     "regex",
     "spacy>=3.2,<3.8",
-    "confit>=0.5.5",
+    "confit>=0.7.0",
     "tqdm",
     "umls-downloader>=0.1.1",
     "numpy>=1.15.0,<1.23.2; python_version<'3.8'",