Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
128 changes: 65 additions & 63 deletions .github/workflows/tests.yml
Original file line number Diff line number Diff line change
Expand Up @@ -7,9 +7,11 @@ on:
branches: [master]

env:
# UV_INDEX_STRATEGY: "unsafe-first-match"
# UV_EXTRA_INDEX_URL: "https://download.pytorch.org/whl/cpu"
UV_INDEX_STRATEGY: "unsafe-first-match"
UV_EXTRA_INDEX_URL: "https://download.pytorch.org/whl/cpu"
PIP_EXTRA_INDEX_URL: "https://download.pytorch.org/whl/cpu"
UV_SYSTEM_PYTHON: 1
BRANCH_NAME: ${{ github.head_ref || github.ref_name }}

jobs:
linting:
Expand All @@ -22,7 +24,7 @@ jobs:
# requites to grab the history of the PR
fetch-depth: 0

- uses: actions/setup-python@v4
- uses: actions/setup-python@v5
with:
cache: 'pip'

Expand All @@ -46,19 +48,6 @@ jobs:
path: ~/.data/
key: resources

# - name: Cache pip
# uses: actions/cache@v3
# with:
# path: ~/.cache/pip
# key: ${{ runner.os }}-python-${{ matrix.python-version }}-pip

- run: echo WEEK=$(date +%V) >>$GITHUB_ENV
shell: bash

# - uses: hynek/setup-cached-uv@v1
# with:
# cache-suffix: -tests-${{ matrix.python-version }}-${{ env.WEEK }}

- name: Set up Java
uses: actions/setup-java@v2
with:
Expand All @@ -72,48 +61,28 @@ jobs:
cache: 'pip'

- name: Install dependencies
run: |
pip install poetry
pip install -e '.[dev]' pytest-xdist pip
run: pip install -e ".[dev]"
if: matrix.python-version != '3.9' && matrix.python-version != '3.10' && matrix.python-version != '3.11' && matrix.python-version != '3.12'
# uv venv
# source .venv/bin/activate
# uv pip install -e '.[dev]' pytest-xdist pip

- name: Install dependencies
run: |
pip install poetry
pip install -e '.[dev,setup]' pytest-xdist pip
run: pip install -e ".[dev,setup]"
if: matrix.python-version == '3.9'
# uv venv
# source .venv/bin/activate
# uv pip install -e '.[dev]' pytest-xdist pip

- name: Install dependencies
run: |
pip install poetry
pip install -e '.[dev-no-ml]' pytest-xdist pip
# skip ML tests for 3.10 and 3.11
run: pip install -e ".[dev-no-ml]"
if: matrix.python-version == '3.10' || matrix.python-version == '3.11' || matrix.python-version == '3.12'

- name: Test with Pytest on Python ${{ matrix.python-version }}
env:
UMLS_API_KEY: ${{ secrets.UMLS_API_KEY }}
run: |
coverage run -m pytest --ignore tests/test_docs.py # -n auto
# coverage combine
# mv .coverage .coverage.${{ matrix.python-version }}
# source .venv/bin/activate
run: coverage run -m pytest --ignore tests/test_docs.py
if: matrix.python-version != '3.9'

- name: Test with Pytest on Python ${{ matrix.python-version }}
env:
UMLS_API_KEY: ${{ secrets.UMLS_API_KEY }}
run: |
coverage run -m pytest # -n auto
# coverage combine
# mv .coverage .coverage.${{ matrix.python-version }}
# source .venv/bin/activate
run: coverage run -m pytest
if: matrix.python-version == '3.9'

- name: Upload coverage data
Expand All @@ -137,33 +106,72 @@ jobs:

documentation:
name: Documentation
runs-on: ubuntu-22.04
runs-on: ubuntu-latest
steps:
- uses: actions/checkout@v2

- uses: actions/setup-python@v4
- uses: actions/setup-python@v5
with:
python-version: "3.9"
python-version: 3.9
cache: 'pip'

- run: echo WEEK=$(date +%V) >>$GITHUB_ENV
shell: bash
- name: Install dependencies
run: pip install -e ".[docs]"

# - uses: hynek/setup-cached-uv@v1
# with:
# cache-suffix: -docs-${{ matrix.python-version }}-${{ env.WEEK }}
- name: Set up Git
run: |
git config user.name ${{ github.actor }}
git config user.email ${{ github.actor }}@users.noreply.github.com
echo Current branch: $BRANCH_NAME

- name: Install dependencies
- name: Build documentation
run: |
pip install '.[docs]'
# uv venv
# uv pip install '.[docs]'
mike deploy --no-redirect --rebase --update-aliases $BRANCH_NAME latest
mike set-default $BRANCH_NAME

- name: Put content of gh-pages to public folder
run: rm -rf public && mkdir public && git archive gh-pages | tar -x -C ./public/

- name: Build documentation
- name: Set up Vercel
run: npm install --global vercel@latest

- name: Pull Vercel environment
run: vercel pull --yes --environment=preview --token=${{ secrets.VERCEL_TOKEN }}

- name: Create new vercel project linked to this branch
run: vercel project add edsnlp-$BRANCH_NAME --token=${{ secrets.VERCEL_TOKEN }}

- name: Link public folder to the (maybe) new vercel project
run: vercel link --cwd public --project edsnlp-$BRANCH_NAME --yes --token=${{ secrets.VERCEL_TOKEN }}

- name: Deploy to Vercel
run: vercel deploy public/ --yes --token=${{ secrets.VERCEL_TOKEN }} --archive=tgz --prod > deployment-url.txt

- name: Post the documentation link
env:
GITHUB_TOKEN: ${{ secrets.GITHUB_TOKEN }}
run: |
mkdocs build --clean
# source .venv/bin/activate
URL=https://edsnlp-$BRANCH_NAME.vercel.app/
COMMENT_BODY="## Docs preview URL\n\n$URL\n\n"
HEADER="Authorization: token $GITHUB_TOKEN"
PR_COMMENTS_URL="https://api.github.com/repos/${{ github.repository }}/issues/${{ github.event.pull_request.number }}/comments"

# Fetch existing comments to find if one from this workflow already exists
COMMENTS=$(curl -s -H "$HEADER" "$PR_COMMENTS_URL")
COMMENT_ID=$(echo "$COMMENTS" | jq -r '.[] | select(.user.login == "github-actions[bot]" and (.body | startswith("## Docs preview URL"))) | .id')

# Check if we have a comment ID, if so, update it, otherwise create a new one
if [[ "$COMMENT_ID" ]]; then
# Update existing comment
curl -s -X PATCH -H "$HEADER" -H "Content-Type: application/json" -d "{\"body\": \"$COMMENT_BODY\"}" "https://api.github.com/repos/${{ github.repository }}/issues/comments/$COMMENT_ID"
else
# Post new comment
curl -s -X POST -H "$HEADER" -H "Content-Type: application/json" -d "{\"body\": \"$COMMENT_BODY\"}" "$PR_COMMENTS_URL"
fi

if [ $status -ne 0 ]; then
exit $status
fi

simple-installation:
name: Simple installation
Expand All @@ -175,21 +183,15 @@ jobs:
steps:
- uses: actions/checkout@v2

- uses: actions/setup-python@v4
- uses: actions/setup-python@v5
with:
python-version: ${{ matrix.python-version }}
cache: 'pip'

- run: echo WEEK=$(date +%V) >>$GITHUB_ENV
shell: bash

# - uses: hynek/setup-cached-uv@v1
# with:
# cache-suffix: -simple-install-${{ matrix.python-version }}-${{ env.WEEK }}

- name: Install library
run: |
pip install ".[ml]" pytest
pytest tests/pipelines/test_pipelines.py
# uv venv
# uv pip install .
2 changes: 2 additions & 0 deletions .gitignore
Original file line number Diff line number Diff line change
Expand Up @@ -24,6 +24,7 @@ site/
*.cpp
*.so
*.c
public/

# Unit test / coverage reports
htmlcov/
Expand Down Expand Up @@ -71,3 +72,4 @@ _build/
docs/reference
docs/changelog.md
docs/contributing.md
.vercel
2 changes: 1 addition & 1 deletion .pre-commit-config.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -22,7 +22,7 @@ repos:
# ruff
- repo: https://github.com/charliermarsh/ruff-pre-commit
# Ruff version.
rev: 'v0.6.4'
rev: 'v0.9.6'
hooks:
- id: ruff
args: ['--config', 'pyproject.toml', '--fix', '--show-fixes']
Expand Down
13 changes: 12 additions & 1 deletion changelog.md
Original file line number Diff line number Diff line change
Expand Up @@ -5,13 +5,24 @@
### Added

- Support for numpy>2.0, and formal support for Python 3.11 and Python 3.12
- Expose the defaults patterns of `eds.negation`, `eds.hypothesis`, `eds.family`, `eds.history` and `eds.reported_speech` under a `eds.negation.default_patterns` attribute
- Added a `context_getter` SpanGetter argument to the `eds.matcher` class to only retrieve entities inside the spans returned by the getter
- Added a `filter_expr` parameter to scorers to filter the documents to score
- Added a new `required` field to `eds.contextual_matcher` assign patterns to only match if the required field has been found, and an `include` parameter (similar to `exclude`) to search for required patterns without assigning them to the entity
- Added context strings (e.g., "words[0:5] | sent[0:1]") to the `eds.contextual_matcher` component to allow for more complex patterns in the selection of the window around the trigger spans.
- Include and exclude patterns in the contextual matcher now dismiss matches that occur inside the anchor pattern (e.g. "anti" exclude pattern for anchor pattern "antibiotics" will not match the "anti" part of "antibiotics")
- Pull Requests will now build a public accessible preview of the docs

### Changed
- Improve the contextual matcher documentation.

### Fixed

- `edsnlp.package` now correctly detect if a project uses an old-style poetry pyproject or a PEP621 pyproject.toml.
- PEP621 projects containing nested directories (e.g., "my_project/pipes/foo.py") are now supported.
- Try several paths to find current pip executable
- Compatibility with Optuna 4.3.0
- The parameter "value_extract" of `eds.score` now correctly handles lists of patterns.
- "Zero variance error" when computing param tuning importance are now catched and converted as a warning

## v0.16.0 (2025-0.3-26)

Expand Down
8 changes: 4 additions & 4 deletions docs/assets/fragments/alcohol-examples.md
Original file line number Diff line number Diff line change
Expand Up @@ -73,12 +73,12 @@
spans = doc.spans["alcohol"]

spans
# Out: [Alcoolism non sevré]
# Out: [Alcoolisme non sevré]

span = spans[0]

span._.detailed_status
# Out: None # "sevré" is negated, so no "ABTINENCE" status
span._.detailed_status # "sevré" is negated, so no "ABTINENCE" status
# Out: None
```


Expand All @@ -90,7 +90,7 @@
spans = doc.spans["alcohol"]

spans
# Out: [Alcool: 0]
# Out: [Alcool]

span = spans[0]

Expand Down
2 changes: 1 addition & 1 deletion docs/assets/fragments/peptic-ulcer-disease-examples.md
Original file line number Diff line number Diff line change
Expand Up @@ -41,7 +41,7 @@
spans = doc.spans["peptic_ulcer_disease"]

spans
# Out: [ulcères]
# Out: [gastrique: blabla blabla blabla blabla blabla quelques ulcères]

span = spans[0]

Expand Down
4 changes: 2 additions & 2 deletions docs/assets/fragments/tobacco-examples.md
Original file line number Diff line number Diff line change
Expand Up @@ -66,7 +66,7 @@
spans = doc.spans["tobacco"]

spans
# Out: [Tabac: 0]
# Out: [Tabac]

span = spans[0]

Expand All @@ -77,7 +77,7 @@
# Out: True

span._.assigned
# Out: {'zero_after': [0]}
# Out: {'zero_after': 0}
```


Expand Down
17 changes: 17 additions & 0 deletions docs/assets/stylesheets/extra.css
Original file line number Diff line number Diff line change
Expand Up @@ -190,3 +190,20 @@ a.discrete-link {
font-size: 1rem;
align-content: center;
}

.doc-param-details .subdoc {
padding: 0;
box-shadow: none;
border-color: var(--md-typeset-table-color);
}

.doc-param-details .subdoc > div > div > div> table {
padding: 0;
box-shadow: none;
border: none;
}

.doc-param-details .subdoc > summary {
margin: 0;
font-weight: normal;
}
Loading
Loading