Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 1 addition & 1 deletion .github/workflows/release.yml
Original file line number Diff line number Diff line change
Expand Up @@ -33,7 +33,7 @@ jobs:
uses: pypa/cibuildwheel@v2.21.3
env:
CIBW_ARCHS_MACOS: "x86_64 arm64"
PIP_EXTRA_INDEX_URL: "https://download.pytorch.org/whl/cpu"
CIBW_ENVIRONMENT: PIP_EXTRA_INDEX_URL=https://download.pytorch.org/whl/cpu

- uses: actions/upload-artifact@v4
with:
Expand Down
3 changes: 3 additions & 0 deletions .github/workflows/test-build.yml
Original file line number Diff line number Diff line change
Expand Up @@ -4,6 +4,7 @@

name: Test Build


on:
workflow_dispatch:
pull_request:
Expand All @@ -27,6 +28,8 @@ jobs:
uses: pypa/cibuildwheel@v2.16.5
env:
CIBW_ARCHS_MACOS: "x86_64 arm64"
CIBW_ENVIRONMENT: PIP_EXTRA_INDEX_URL=https://download.pytorch.org/whl/cpu


build_sdist:
name: Build source distribution
Expand Down
4 changes: 2 additions & 2 deletions README.md
Original file line number Diff line number Diff line change
Expand Up @@ -34,13 +34,13 @@ Check out our interactive [demo](https://aphp.github.io/edsnlp/demo/) !
You can install EDS-NLP via `pip`. We recommend pinning the library version in your projects, or use a strict package manager like [Poetry](https://python-poetry.org/).

```shell
pip install edsnlp==0.13.1
pip install edsnlp==0.14.0
```

or if you want to use the trainable components (using pytorch)

```shell
pip install "edsnlp[ml]==0.13.1"
pip install "edsnlp[ml]==0.14.0"
```

### A first pipeline
Expand Down
2 changes: 1 addition & 1 deletion changelog.md
Original file line number Diff line number Diff line change
@@ -1,6 +1,6 @@
# Changelog

## Unreleased
## v0.14.0 (2024-11-14)

### Added

Expand Down
34 changes: 32 additions & 2 deletions docs/concepts/pipeline.md
Original file line number Diff line number Diff line change
Expand Up @@ -60,12 +60,42 @@ To create your first EDS-NLP pipeline, run the following code. We provide severa
nlp.add_pipe("eds.negation")
```

=== "From a config file"
=== "From a YAML config file"

You can also create a pipeline from a configuration file. This is useful when you plan on changing the pipeline configuration often.

```{ .yaml title="config.yml" }
nlp:
"@core": pipeline
lang: eds
components:
sentences:
"@factory": eds.sentences

matcher:
"@factory": eds.matcher
regex:
smoker: ["fume", "clope"]

negation:
"@factory": eds.negation
```

and then load the pipeline with:

```{ .python .no-check }
import edsnlp

nlp = edsnlp.load("config.yml")
```

=== "From a INI config file"

You can also create a pipeline from a configuration file. This is useful when you plan on changing the pipeline configuration often.

```{ .cfg title="config.cfg" }
[nlp]
@core = "pipeline"
lang = "eds"
pipeline = ["sentences", "matcher", "negation"]

Expand Down Expand Up @@ -100,7 +130,7 @@ from pathlib import Path
nlp("Le patient ne fume pas")

# Processing multiple documents
model.pipe([text1, text2])
nlp.pipe([text1, text2])
```

For more information on how to use the pipeline, refer to the [Inference](/inference) page.
Expand Down
4 changes: 2 additions & 2 deletions docs/index.md
Original file line number Diff line number Diff line change
Expand Up @@ -15,13 +15,13 @@ Check out our interactive [demo](https://aphp.github.io/edsnlp/demo/) !
You can install EDS-NLP via `pip`. We recommend pinning the library version in your projects, or use a strict package manager like [Poetry](https://python-poetry.org/).

```{: data-md-color-scheme="slate" }
pip install edsnlp==0.13.1
pip install edsnlp==0.14.0
```

or if you want to use the trainable components (using pytorch)

```{: data-md-color-scheme="slate" }
pip install "edsnlp[ml]==0.13.1"
pip install "edsnlp[ml]==0.14.0"
```

### A first pipeline
Expand Down
4 changes: 2 additions & 2 deletions docs/scripts/clickable_snippets.py
Original file line number Diff line number Diff line change
Expand Up @@ -184,7 +184,7 @@ def replace_link(match):

# Re-insert soups into the output
for soup, start, end in reversed(soups):
output = output[:start] + str(soup) + output[end:]
output = output[:start] + str(soup.find("code")) + output[end:]

output = regex.sub(HREF_REGEX, replace_link, output)

Expand All @@ -202,7 +202,7 @@ def convert_html_to_code(
cls, html_content: str
) -> Tuple[BeautifulSoup, str, list, list]:
pre_html_content = "<pre>" + html_content + "</pre>"
soup = BeautifulSoup(pre_html_content, "html5lib")
soup = list(BeautifulSoup(pre_html_content, "html5lib").children)[0]
code_element = soup.find("code")

line_lengths = [0]
Expand Down
1 change: 1 addition & 0 deletions docs/tutorials/make-a-training-script.md
Original file line number Diff line number Diff line change
Expand Up @@ -395,6 +395,7 @@ print(nlp.config.to_yaml_str())

```yaml title="config.yml"
nlp:
"@core": "pipeline"
lang: "eds"
components:
ner:
Expand Down
6 changes: 5 additions & 1 deletion edsnlp/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -15,7 +15,7 @@
import edsnlp.pipes
from . import reducers

__version__ = "0.13.1"
__version__ = "0.14.0"

BASE_DIR = Path(__file__).parent

Expand Down Expand Up @@ -52,6 +52,10 @@ def find_spec(self, fullname, path, target=None): # pragma: no cover
new_name = fullname.replace("span_qualifier", "span_classifier")
spec = importlib.util.spec_from_loader(fullname, AliasLoader(new_name))
return spec
if "measurements" in fullname.split("."):
new_name = fullname.replace("measurements", "quantities")
spec = importlib.util.spec_from_loader(fullname, AliasLoader(new_name))
return spec


class AliasLoader(importlib.abc.Loader):
Expand Down
33 changes: 12 additions & 21 deletions edsnlp/core/pipeline.py
Original file line number Diff line number Diff line change
@@ -1,7 +1,6 @@
import contextlib
import functools
import importlib
import inspect
import os
import re
import shutil
Expand All @@ -10,6 +9,7 @@
import sysconfig
import warnings
from enum import Enum
from inspect import Parameter, signature
from pathlib import Path
from types import FunctionType
from typing import (
Expand Down Expand Up @@ -105,7 +105,7 @@ def __init__(
vocab_config: Type[BaseDefaults] = None,
meta: Dict[str, Any] = None,
pipeline: Optional[Sequence[str]] = None,
components: Dict[str, CurriedFactory] = {},
components: Dict[str, Any] = {},
disable: AsList[str] = EMPTY_LIST,
enable: AsList[str] = EMPTY_LIST,
exclude: AsList = EMPTY_LIST,
Expand Down Expand Up @@ -232,17 +232,18 @@ def create_pipe(
Pipe
"""
try:
curried: CurriedFactory = Config(
pipe = Config(
{
"@factory": factory,
**(config if config is not None else {}),
}
).resolve(registry=registry)
if name is None:
name = inspect.signature(curried.factory).parameters.get("name").default
if name is None or name == inspect.Parameter.empty:
name = factory
pipe = curried.instantiate(nlp=self, path=(name,))
if isinstance(pipe, CurriedFactory):
if name is None:
name = signature(pipe.factory).parameters.get("name").default
if name is None or name == Parameter.empty:
name = factory
pipe = pipe.instantiate(nlp=self, path=(name,))
except ConfitValidationError as e:
raise e.with_traceback(None)
return pipe
Expand Down Expand Up @@ -413,8 +414,8 @@ def pipe(
inputs: Iterable[Union[str, Doc]]
The inputs to create the Docs from, or Docs directly.
n_process: int
Deprecated. Use the ".set(num_cpu_workers=n_process)" method on the returned
data stream instead.
Deprecated. Use the ".set_processing(num_cpu_workers=n_process)" method
on the returned data stream instead.
The number of parallel workers to use. If 0, the operations will be
executed sequentially.

Expand Down Expand Up @@ -589,16 +590,6 @@ def _add_pipes(
enable: Container[str],
disable: Container[str],
):
# Since components are actually resolved as curried factories,
# we need to instantiate them here
for name, component in components.items():
if not isinstance(component, CurriedFactory):
raise ValueError(
f"Component {repr(name)} is not instantiable (got {component}). "
f"Please make sure that you didn't forget to add a '@factory' "
f"key to the component config."
)

try:
components = CurriedFactory.instantiate(components, nlp=self)
except ConfitValidationError as e:
Expand Down Expand Up @@ -1215,7 +1206,7 @@ def load(
elif is_package:
# Load as package
available_kwargs = {"overrides": overrides, **pipe_selection}
signature_kwargs = inspect.signature(module.load).parameters
signature_kwargs = signature(module.load).parameters
kwargs = {
name: available_kwargs[name]
for name in signature_kwargs
Expand Down
33 changes: 31 additions & 2 deletions edsnlp/core/registries.py
Original file line number Diff line number Diff line change
Expand Up @@ -75,12 +75,38 @@ def maybe_nlp(self) -> Union["CurriedFactory", Any]:
-------
Union["CurriedFactory", Any]
"""
from edsnlp.core.pipeline import Pipeline, PipelineProtocol

sig = inspect.signature(self.factory)
# and sig.parameters["nlp"].default is sig.empty
if "nlp" not in sig.parameters or "nlp" in self.kwargs:
if (
not (
"nlp" in sig.parameters
and (
sig.parameters["nlp"].default is sig.empty
or sig.parameters["nlp"].annotation in (Pipeline, PipelineProtocol)
)
)
or "nlp" in self.kwargs
) and not self.search_curried_factory(self.kwargs):
return self.factory(**self.kwargs)
return self

@classmethod
def search_curried_factory(cls, obj):
if isinstance(obj, CurriedFactory):
return obj
elif isinstance(obj, dict):
for value in obj.values():
result = cls.search_curried_factory(value)
if result is not None:
return result
elif isinstance(obj, (tuple, list, set)):
for value in obj:
result = cls.search_curried_factory(value)
if result is not None:
return result
return None

def instantiate(
obj: Any,
nlp: "edsnlp.Pipeline",
Expand Down Expand Up @@ -177,6 +203,9 @@ def __getattr__(self, name):
raise AttributeError(name)
self._raise_curried_factory_error()

def __repr__(self):
return f"CurriedFactory({self.factory})"


glob = []

Expand Down
3 changes: 1 addition & 2 deletions edsnlp/data/converters.py
Original file line number Diff line number Diff line change
Expand Up @@ -644,7 +644,6 @@ def __call__(self, doc):
def get_dict2doc_converter(
converter: Union[str, Callable], kwargs
) -> Tuple[Callable, Dict]:
kwargs_to_init = False
if not callable(converter):
available = edsnlp.registry.factory.get_available()
try:
Expand All @@ -666,7 +665,7 @@ def get_dict2doc_converter(
f"Cannot find converter for format {converter}. "
f"Available converters are {', '.join(available)}"
)
if isinstance(converter, type) or kwargs_to_init:
if isinstance(converter, type):
return converter(**kwargs), {}
return converter, validate_kwargs(converter, kwargs)

Expand Down
2 changes: 1 addition & 1 deletion pyproject.toml
Original file line number Diff line number Diff line change
Expand Up @@ -14,7 +14,7 @@ dependencies = [
"pysimstring>=1.2.1",
"regex",
"spacy>=3.2,<3.8",
"confit>=0.5.5",
"confit>=0.7.0",
"tqdm",
"umls-downloader>=0.1.1",
"numpy>=1.15.0,<1.23.2; python_version<'3.8'",
Expand Down
Loading
Loading