Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
20 changes: 15 additions & 5 deletions .github/workflows/tests.yml
Original file line number Diff line number Diff line change
Expand Up @@ -36,7 +36,7 @@ jobs:
strategy:
fail-fast: true
matrix:
python-version: ["3.7", "3.8", "3.9", "3.10"]
python-version: ["3.7", "3.8", "3.9", "3.10", "3.11", "3.12"]
steps:
- uses: actions/checkout@v2

Expand Down Expand Up @@ -71,20 +71,30 @@ jobs:
python-version: ${{ matrix.python-version }}
cache: 'pip'

- name: Install dependencies
run: |
pip install poetry
pip install -e '.[dev]' pytest-xdist pip
if: matrix.python-version != '3.9' && matrix.python-version != '3.10' && matrix.python-version != '3.11' && matrix.python-version != '3.12'
# uv venv
# source .venv/bin/activate
# uv pip install -e '.[dev]' pytest-xdist pip

- name: Install dependencies
run: |
pip install poetry
pip install -e '.[dev,setup]' pytest-xdist pip
if: matrix.python-version != '3.10'
if: matrix.python-version == '3.9'
# uv venv
# source .venv/bin/activate
# uv pip install -e '.[dev,setup]' pytest-xdist pip
# uv pip install -e '.[dev]' pytest-xdist pip

- name: Install dependencies
run: |
pip install poetry
pip install -e '.[dev-no-ml,setup]' pytest-xdist pip
if: matrix.python-version == '3.10'
pip install -e '.[dev-no-ml]' pytest-xdist pip
# skip ML tests for 3.10 and 3.11
if: matrix.python-version == '3.10' || matrix.python-version == '3.11' || matrix.python-version == '3.12'

- name: Test with Pytest on Python ${{ matrix.python-version }}
env:
Expand Down
11 changes: 11 additions & 0 deletions changelog.md
Original file line number Diff line number Diff line change
@@ -1,5 +1,16 @@
# Changelog

## Unreleased

### Added

- Support for numpy>2.0, and formal support for Python 3.11 and Python 3.12

### Fixed

- `edsnlp.package` now correctly detect if a project uses an old-style poetry pyproject or a PEP621 pyproject.toml.
- PEP621 projects containing nested directories (e.g., "my_project/pipes/foo.py") are now supported.

## v0.16.0 (2025-0.3-26)

### Added
Expand Down
2 changes: 1 addition & 1 deletion edsnlp/core/pipeline.py
Original file line number Diff line number Diff line change
Expand Up @@ -102,7 +102,7 @@ def __init__(
create_tokenizer: Optional[Callable[[Self], Tokenizer]] = None,
vocab: Union[bool, Vocab] = True,
batch_size: Optional[int] = None,
vocab_config: Type[BaseDefaults] = None,
vocab_config: Optional[Type[BaseDefaults]] = None,
meta: Dict[str, Any] = None,
pipeline: Optional[Sequence[str]] = None,
components: Dict[str, Any] = {},
Expand Down
4 changes: 3 additions & 1 deletion edsnlp/data/converters.py
Original file line number Diff line number Diff line change
Expand Up @@ -89,7 +89,9 @@ def validate_kwargs(func, kwargs):
model = vd.init_model_instance(
**{k: v for k, v in kwargs.items() if k in spec.args}
)
fields = model.__fields__ if pydantic.__version__ < "2" else model.model_fields
fields = (
model.__fields__ if pydantic.__version__ < "2" else vd.model.model_fields
)
d = {
k: v
for k, v in model.__dict__.items()
Expand Down
60 changes: 36 additions & 24 deletions edsnlp/package.py
Original file line number Diff line number Diff line change
Expand Up @@ -6,15 +6,7 @@
import tempfile
import warnings
from pathlib import Path
from typing import (
TYPE_CHECKING,
Any,
Dict,
Mapping,
Optional,
Sequence,
Union,
)
from typing import TYPE_CHECKING, Any, Dict, Mapping, Optional, Sequence, Union

import build
import confit
Expand Down Expand Up @@ -288,7 +280,7 @@ def make_src_dir(self):
logger.info(f"SKIP {rel}")


class PoetryPackager(Packager):
class OldStylePoetryPackager(Packager):
def __init__(
self,
*,
Expand Down Expand Up @@ -326,7 +318,13 @@ def __init__(
"requires": ["hatchling"],
"build-backend": "hatchling.build",
},
"tool": {"hatch": {"build": {}}},
"tool": {
"hatch": {
"build": {},
# in case the user provides a git dependency for example
"metadata": {"allow-direct-references": True},
}
},
"project": {
"name": model_package,
"version": version,
Expand Down Expand Up @@ -458,7 +456,7 @@ def __init__(
)


class SetuptoolsPackager(Packager):
class StandardPackager(Packager):
def __init__(
self,
*,
Expand Down Expand Up @@ -499,7 +497,13 @@ def __init__(
"requires": ["hatchling"],
"build-backend": "hatchling.build",
},
"tool": {"hatch": {"build": {}}},
"tool": {
"hatch": {
"build": {},
# in case the user provides a git dependency for example
"metadata": {"allow-direct-references": True},
},
},
"project": {
"name": model_package,
"version": version,
Expand All @@ -523,7 +527,10 @@ def __init__(
packages = sorted([p for p in packages if p])
file_paths = []
for package in packages:
file_paths.extend((root_dir / package).rglob("*"))
for path in (root_dir / package).rglob("*"):
if "__pycache__" in path.parts or path.is_dir():
continue
file_paths.append(path)

new_pyproject["tool"]["hatch"]["build"] = {
"packages": [*packages, artifacts_name],
Expand Down Expand Up @@ -570,7 +577,7 @@ def package(
dist_dir: Path = Path("dist"),
artifacts_name: ModuleName = "artifacts",
check_dependencies: bool = False,
project_type: Optional[Literal["poetry", "setuptools"]] = None,
project_type: Optional[str] = None,
version: Optional[str] = None,
metadata: Optional[Dict[str, Any]] = {},
distributions: Optional[AsList[Literal["wheel", "sdist"]]] = ["wheel"],
Expand Down Expand Up @@ -601,21 +608,26 @@ def package(
if pyproject_path.exists():
pyproject = toml.loads((root_dir / "pyproject.toml").read_text())

package_managers = {"setuptools", "poetry", "hatch", "pdm"} & set(
(pyproject or {}).get("tool", {})
)
package_managers = package_managers or {"setuptools"} # default
try:
_ = pyproject["tool"]["poetry"]["name"]
inferred_project_type = "old-style-poetry"
except (KeyError, TypeError):
inferred_project_type = "standard"

try:
if project_type is None:
[project_type] = package_managers
project_type = inferred_project_type
packager_cls = {
"poetry": PoetryPackager,
"setuptools": SetuptoolsPackager,
"old-style-poetry": OldStylePoetryPackager,
"standard": StandardPackager,
# for backward compatibility
"poetry": OldStylePoetryPackager,
"setuptools": StandardPackager,
}[project_type]
except Exception: # pragma: no cover
raise ValueError(
"Could not infer project type, only poetry and setuptools based projects "
"are supported for now"
f"Could not process project type {project_type!r} only old-style poetry "
f"and PEP 621 pyproject.toml formats are supported for now."
)
packager = packager_cls(
pyproject=pyproject,
Expand Down
9 changes: 4 additions & 5 deletions edsnlp/pipes/core/endlines/model.py
Original file line number Diff line number Diff line change
Expand Up @@ -3,7 +3,6 @@

import numpy as np
import pandas as pd
from numpy.lib.function_base import iterable
from pandas.api.types import CategoricalDtype
from pandas.core.groupby import DataFrameGroupBy
from spacy.strings import StringStore
Expand Down Expand Up @@ -239,9 +238,9 @@ def _convert_A(self, df: pd.DataFrame, col: str) -> pd.DataFrame:
df[new_col] = df[col].astype(cat_type_A)
df[new_col] = df[new_col].cat.codes
# Ensure that not known values are coded as OTHER
df.loc[
~df[col].isin(self.vocabulary["A3A4"].keys()), new_col
] = self.vocabulary["A3A4"]["OTHER"]
df.loc[~df[col].isin(self.vocabulary["A3A4"].keys()), new_col] = (
self.vocabulary["A3A4"]["OTHER"]
)
return df

def _convert_B(self, df: pd.DataFrame, col: str) -> pd.DataFrame:
Expand Down Expand Up @@ -594,7 +593,7 @@ def _retrieve_lines(cls, dfg: DataFrameGroupBy) -> DataFrameGroupBy:
return dfg

@classmethod
def _create_vocabulary(cls, x: iterable) -> dict:
def _create_vocabulary(cls, x: Iterable) -> dict:
"""Function to create a vocabulary for attributes in the training set.

Parameters
Expand Down
14 changes: 13 additions & 1 deletion edsnlp/pipes/misc/dates/models.py
Original file line number Diff line number Diff line change
Expand Up @@ -4,12 +4,24 @@

import pydantic
from pandas._libs.tslibs.nattype import NaTType
from pydantic import BaseModel, Field, root_validator, validator
from pydantic import BaseModel, Field
from pytz import timezone
from spacy.tokens import Span

from edsnlp.pipes.misc.dates.patterns.relative import specific_dict

try:
from pydantic import field_validator, model_validator

def validator(x, allow_reuse=True, pre=False):
return field_validator(x, mode="before" if pre else "after")

def root_validator(allow_reuse=True, pre=False):
return model_validator(mode="before" if pre else "after")

except ImportError:
from pydantic import root_validator, validator


class Direction(str, Enum):
FUTURE = "future"
Expand Down
1 change: 0 additions & 1 deletion edsnlp/pipes/ner/scores/charlson/factory.py
Original file line number Diff line number Diff line change
Expand Up @@ -24,7 +24,6 @@
"eds.charlson",
assigns=["doc.ents", "doc.spans"],
deprecated=[
"eds.charlson",
"charlson",
],
)
Expand Down
8 changes: 6 additions & 2 deletions edsnlp/tune.py
Original file line number Diff line number Diff line change
Expand Up @@ -10,6 +10,7 @@
import joblib
import optuna
import optuna.visualization as vis
import pydantic
from configobj import ConfigObj
from confit import Cli, Config
from confit.utils.collections import split_path
Expand Down Expand Up @@ -49,6 +50,9 @@ class HyperparameterConfig(BaseModel):
class Config:
extra = "forbid"

if pydantic.VERSION < "2":
model_dump = BaseModel.dict

def to_dict(self) -> dict:
"""
Convert the hyperparameter configuration to a dictionary.
Expand All @@ -57,7 +61,7 @@ def to_dict(self) -> dict:
Returns:
dict: A dictionary representation of the hyperparameter configuration.
"""
return self.dict(exclude_unset=True, exclude_defaults=True)
return self.model_dump(exclude_unset=True, exclude_defaults=True)


def setup_logging():
Expand Down Expand Up @@ -598,7 +602,7 @@ def tune(
output_dir: str,
checkpoint_dir: str,
gpu_hours: confloat(gt=0) = DEFAULT_GPU_HOUR,
n_trials: conint(gt=0) = None,
n_trials: Optional[conint(gt=0)] = None,
two_phase_tuning: bool = False,
seed: int = 42,
metric="ner.micro.f",
Expand Down
38 changes: 19 additions & 19 deletions pyproject.toml
Original file line number Diff line number Diff line change
Expand Up @@ -13,13 +13,16 @@ dependencies = [
"pytz",
"pysimstring>=1.2.1",
"regex",
"spacy>=3.2,<3.8",
"thinc<8.2.5", # we don't need thinc but spacy depdends on it 8.2.5 cause binary issues
# spacy doesn't provide binaries for python<3.9 from 3.8.2 so we need to cap it ourself
"spacy>=3.2,<3.8.2; python_version<'3.9'",
"spacy>=3.8.5,<4.0.0; python_version>='3.9'",
# thinc doesn't provide binaries for python<3.9 from 8.2.5 so we need to cap it ourself
"thinc<8.2.5; python_version<'3.9'",
"thinc>=8.2.5; python_version>='3.9'",
"confit>=0.7.3",
"tqdm",
"umls-downloader>=0.1.1",
"numpy>=1.15.0,<1.23.2; python_version<'3.8'",
"numpy>=1.15.0,<2.0.0; python_version>='3.8'",
"numpy>=1.15.0",
"pandas>=1.1.0; python_version<'3.8'",
"pandas>=1.4.0; python_version>='3.8'",
"typing-extensions>=4.0.0",
Expand Down Expand Up @@ -49,8 +52,7 @@ dev-no-ml = [
"pyspark",
"polars",

"mlconjug3<3.9.0",
"scikit-learn>=1.0.0",
"scikit-learn",

"edsnlp[docs-no-ml]",
]
Expand All @@ -75,7 +77,7 @@ docs-no-ml = [
ml = [
"rich-logger>=0.3.1",
"torch>=1.13.0",
"foldedtensor>=0.3.4",
"foldedtensor>=0.4.0",
"safetensors>=0.3.0; python_version>='3.8'",
"safetensors>=0.3.0,<0.5.0; python_version<'3.8'",
"transformers>=4.0.0,<5.0.0",
Expand All @@ -92,10 +94,11 @@ dev = [
"plotly>=5.18.0", # required by optuna viz
"ruamel.yaml>=0.18.0",
"configobj>=5.0.9",

"scikit-learn",
]
setup = [
"typer"
"mlconjug3<3.9.0", # bug https://github.com/Ars-Linguistica/mlconjug3/pull/506
"numpy<2", # mlconjug has scikit-learn dep which doesn't support for numpy 2 yet
]

[project.urls]
Expand Down Expand Up @@ -312,7 +315,11 @@ where = ["."]
requires = [
"setuptools",
"cython>=0.25",
"spacy>=3.2,<3.8",
"spacy>=3.2,!=3.8.2; python_version<'3.9'",
"spacy>=3.2,!=3.8.2,<4.0.0; python_version>='3.9'",
# thinc doesn't provide binaries for python<3.9 from 8.2.5 so we need to cap it ourselves
"thinc<8.2.5; python_version<'3.9'",
"thinc>=8.2.5; python_version>='3.9'",
# to update from https://github.com/scipy/oldest-supported-numpy/blob/main/setup.cfg
# while setting numpy >= 1.15.0 due to spacy reqs
"numpy==1.15.0; python_version=='3.7' and platform_machine not in 'arm64|aarch64|loongarch64' and platform_system!='AIX' and platform_python_implementation != 'PyPy'",
Expand All @@ -324,19 +331,12 @@ requires = [
"numpy==1.19.0; python_version=='3.6' and platform_machine!='loongarch64' and platform_python_implementation=='PyPy'",
"numpy==1.19.2; python_version=='3.7' and platform_machine=='aarch64' and platform_system!='AIX' and platform_python_implementation != 'PyPy'",
"numpy==1.19.2; python_version=='3.8' and platform_machine=='aarch64' and platform_python_implementation != 'PyPy'",
"numpy==1.19.3; python_version=='3.9' and platform_machine=='arm64' and platform_system=='Windows' and platform_python_implementation != 'PyPy'",
"numpy==1.19.3; python_version=='3.9' and platform_system not in 'OS400' and platform_machine not in 'arm64|loongarch64' and platform_python_implementation != 'PyPy'",
"numpy==1.20.0; python_version=='3.7' and platform_machine!='loongarch64' and platform_python_implementation=='PyPy'",
"numpy==1.21.0; python_version=='3.7' and platform_machine=='arm64' and platform_system=='Darwin' and platform_python_implementation!='PyPy'",
"numpy==1.21.0; python_version=='3.8' and platform_machine=='arm64' and platform_system=='Darwin' and platform_python_implementation!='PyPy'",
"numpy==1.21.0; python_version=='3.9' and platform_machine=='arm64' and platform_system=='Darwin' and platform_python_implementation!='PyPy'",
"numpy==1.21.6; python_version=='3.10' and platform_machine!='loongarch64'",
"numpy==1.22.2; platform_machine=='loongarch64' and python_version>='3.8' and python_version<'3.11' and platform_python_implementation!='PyPy'",
"numpy==1.22.2; python_version>='3.8' and python_version<'3.9' and platform_machine=='loongarch64' and platform_python_implementation!='PyPy'",
"numpy==1.22.2; python_version=='3.8' and platform_machine!='loongarch64' and platform_python_implementation=='PyPy'",
"numpy==1.23.2; python_version=='3.11'",
"numpy==1.23.3; python_version=='3.9' and platform_system=='OS400' and platform_machine!='loongarch64' and platform_python_implementation!='PyPy'",
"numpy==1.25.0; python_version=='3.9' and platform_python_implementation=='PyPy'",
"numpy==1.26.1; python_version=='3.12'",
"numpy>=2.0; python_version>='3.9'",
]
build-backend = "setuptools.build_meta"

Expand Down
1 change: 1 addition & 0 deletions tests/tuning/config.yml
Original file line number Diff line number Diff line change
Expand Up @@ -125,3 +125,4 @@ train:
scorer: ${ scorer }
num_workers: 0
optimizer: ${ optimizer }
cpu: true
Loading
Loading