diff --git a/.github/workflows/argilla-server.workflow.yml b/.github/workflows/argilla-server.workflow.yml new file mode 100644 index 0000000000..6be922a2e4 --- /dev/null +++ b/.github/workflows/argilla-server.workflow.yml @@ -0,0 +1,168 @@ +name: Build `argilla-server` package + +concurrency: + group: ${{ github.workflow }}-${{ github.event.pull_request.number || github.ref }} + cancel-in-progress: true + +on: + workflow_dispatch: + + pull_request: + paths: + - 'argilla-server/**' + types: + - opened + - edited + - reopened + - synchronize + - ready_for_review + +jobs: + + build: + name: Build `argilla-server` package + runs-on: ubuntu-latest + + defaults: + run: + shell: bash -l {0} + working-directory: ./argilla-server + + services: + search_engine: + image: docker.elastic.co/elasticsearch/elasticsearch:8.8.2 + ports: + - 9200:9200 + env: + discovery.type: single-node + xpack.security.enabled: false + # NOTE: postgres service will not be executed by default. + # It will only be executed if the postgresDockerImage input is provided. + postgres: + image: postgres:14 + env: + POSTGRES_HOST: localhost + POSTGRES_USER: postgres + POSTGRES_PASSWORD: postgres + POSTGRES_DB: argilla + options: >- + --health-cmd pg_isready + --health-interval 10s + --health-timeout 5s + --health-retries 5 + ports: + - 5432:5432 + + env: + ARGILLA_ENABLE_TELEMETRY: 0 + + steps: + - name: Checkout Code 🛎 + uses: actions/checkout@v4 + + - name: Setup PDM + uses: pdm-project/setup-pdm@v4 + with: + python-version-file: argilla-server/pyproject.toml + cache-dependency-path: argilla-server/pdm.lock + cache: true + + - name: Install dependencies + run: pdm install + + - name: Run tests 📈 + run: | + ARGILLA_DATABASE_URL=postgresql://postgres:postgres@localhost:5432/argilla + pdm test tests/unit -vs --cov=argilla_server --cov-report=xml:coverage.xml + + - name: Upload test coverage + uses: codecov/codecov-action@v4 + with: + file: coverage.xml + + - name: Build package + run: | + pdm build + + - name: Upload artifact + uses: actions/upload-artifact@v4 + with: + name: argilla-server + path: argilla-server/dist + + build_server_docker_image: + name: Build Argilla server docker image + uses: ./.github/workflows/argilla-server.build-push-docker.yml + if: github.event_name == 'pull_request' && github.event.pull_request.draft == false + needs: + - build + with: + download-python-package: true + image-name: argilla/argilla-server + dockerfile: argilla-server/docker/server/Dockerfile + context: argilla-server/docker/server + readme: argilla-server/README.md + platforms: linux/amd64,linux/arm64 + secrets: inherit + + build_quickstart_docker_image: + name: Build Argilla quickstart docker image + uses: ./.github/workflows/argilla-server.build-push-docker.yml + needs: build_server_docker_image + if: github.event_name == 'pull_request' && github.event.pull_request.draft == false + with: + download-python-package: false + image-name: argilla/argilla-quickstart + dockerfile: argilla-server/docker/quickstart/Dockerfile + context: argilla-server/docker/quickstart + readme: argilla-server/docker/quickstart/README.md + platforms: linux/amd64,linux/arm64 + build-args: | + ARGILLA_VERSION=${{ needs.build_server_docker_image.outputs.version }} + secrets: inherit + + # This job will publish argilla-server python package into PyPI repository + publish_release: + name: Publish Release + runs-on: ubuntu-latest + if: ${{ github.event_name == 'release' }} + + needs: + - build + - build_server_docker_image + - build_quickstart_docker_image + + permissions: + # This permission is needed for private repositories. + # contents: read + # IMPORTANT: this permission is mandatory for trusted publishing on PyPI + id-token: write + + defaults: + run: + shell: bash -l {0} + working-directory: ./argilla-server + + steps: + - name: Checkout Code 🛎 + uses: actions/checkout@v4 + + - name: Download python package + uses: actions/download-artifact@v4 + with: + name: argilla-server + path: argilla-server/dist + + - name: Setup PDM + uses: pdm-project/setup-pdm@v4 + with: + cache: true + + - name: Publish Package to PyPI test environment 🥪 + run: pdm publish --no-build --repository testpypi + + - name: Test Installing 🍿 + run: pip install --index-url https://test.pypi.org/simple --no-deps argilla-server==${GITHUB_REF#refs/*/v} + + - name: Publish Package to PyPI 🥩 + run: pdm publish --no-build diff --git a/argilla-server/src/argilla_server/apis/v1/controllers/search.py b/argilla-server/src/argilla_server/apis/v1/controllers/search.py index dd7d2c3138..fe6c763139 100644 --- a/argilla-server/src/argilla_server/apis/v1/controllers/search.py +++ b/argilla-server/src/argilla_server/apis/v1/controllers/search.py @@ -134,5 +134,4 @@ async def search( dataset_id: UUID, search_query: RecordsSearchQuery, ): - return RecordsSearchResponse(query=search_query) diff --git a/argilla-server/src/argilla_server/bulk/records_bulk.py b/argilla-server/src/argilla_server/bulk/records_bulk.py index 043b9dc7a4..dc4f67d138 100644 --- a/argilla-server/src/argilla_server/bulk/records_bulk.py +++ b/argilla-server/src/argilla_server/bulk/records_bulk.py @@ -43,7 +43,6 @@ class CreateRecordsBulk: - def __init__(self, db: AsyncSession, search_engine: SearchEngine): self._db = db self._search_engine = search_engine @@ -73,7 +72,6 @@ async def create_records_bulk(self, dataset: Dataset, bulk_create: RecordsBulkCr return RecordsBulk(items=records) async def _upsert_records_relationships(self, records: List[Record], records_create: List[RecordCreate]) -> None: - records_and_suggestions = list(zip(records, [r.suggestions for r in records_create])) records_and_responses = list(zip(records, [r.responses for r in records_create])) records_and_vectors = list(zip(records, [r.vectors for r in records_create])) @@ -87,7 +85,6 @@ async def _upsert_records_relationships(self, records: List[Record], records_cre async def _upsert_records_suggestions( self, records_and_suggestions: List[Tuple[Record, List[SuggestionCreate]]] ) -> List[Suggestion]: - upsert_many_suggestions = [] for idx, (record, suggestions) in enumerate(records_and_suggestions): try: @@ -118,7 +115,6 @@ async def _upsert_records_suggestions( async def _upsert_records_responses( self, records_and_responses: List[Tuple[Record, List[UserResponseCreate]]] ) -> List[Response]: - user_ids = [response.user_id for _, responses in records_and_responses for response in responses or []] users_by_id = await fetch_users_by_ids_as_dict(self._db, user_ids) @@ -147,7 +143,6 @@ async def _upsert_records_responses( async def _upsert_records_vectors( self, records_and_vectors: List[Tuple[Record, Dict[str, List[float]]]] ) -> List[Vector]: - upsert_many_vectors = [] for idx, (record, vectors) in enumerate(records_and_vectors): try: @@ -176,9 +171,7 @@ def _metadata_is_set(self, record_create: RecordCreate) -> bool: class UpsertRecordsBulk(CreateRecordsBulk): - async def upsert_records_bulk(self, dataset: Dataset, bulk_upsert: RecordsBulkUpsert) -> RecordsBulkWithUpdateInfo: - found_records = await self._fetch_existing_dataset_records(dataset, bulk_upsert.items) # found_records is passed to the validator to avoid querying the database again, but ideally, it should be # computed inside the validator @@ -220,7 +213,6 @@ async def _fetch_existing_dataset_records( dataset: Dataset, records_upsert: List[RecordUpsert], ) -> Dict[Union[str, UUID], Record]: - records_by_external_id = await fetch_records_by_external_ids_as_dict( self._db, dataset, [r.external_id for r in records_upsert] ) diff --git a/argilla-server/src/argilla_server/contexts/records.py b/argilla-server/src/argilla_server/contexts/records.py index da31701551..0762bb4488 100644 --- a/argilla-server/src/argilla_server/contexts/records.py +++ b/argilla-server/src/argilla_server/contexts/records.py @@ -25,7 +25,6 @@ async def list_dataset_records_by_ids( db: AsyncSession, dataset_id: UUID, record_ids: Sequence[UUID] ) -> Sequence[Record]: - query = select(Record).filter(Record.id.in_(record_ids), Record.dataset_id == dataset_id) return (await db.execute(query)).unique().scalars().all() @@ -33,7 +32,6 @@ async def list_dataset_records_by_ids( async def list_dataset_records_by_external_ids( db: AsyncSession, dataset_id: UUID, external_ids: Sequence[str] ) -> Sequence[Record]: - query = ( select(Record) .filter(Record.external_id.in_(external_ids), Record.dataset_id == dataset_id) diff --git a/argilla-server/src/argilla_server/search_engine/commons.py b/argilla-server/src/argilla_server/search_engine/commons.py index 572c48b071..97f7d2697d 100644 --- a/argilla-server/src/argilla_server/search_engine/commons.py +++ b/argilla-server/src/argilla_server/search_engine/commons.py @@ -33,6 +33,7 @@ ) from argilla_server.search_engine.base import ( AndFilter, + FieldFilterScope, Filter, FilterScope, FloatMetadataFilter, @@ -43,6 +44,7 @@ MetadataFilterScope, MetadataMetrics, Order, + QueryStringFilter, RangeFilter, RecordFilterScope, ResponseFilterScope, @@ -56,8 +58,6 @@ TermsMetadataMetrics, TextQuery, UserResponseStatusFilter, - FieldFilterScope, - QueryStringFilter, ) ALL_RESPONSES_STATUSES_FIELD = "all_responses_statuses" diff --git a/argilla-server/tests/profiling/__init__.py b/argilla-server/tests/profiling/__init__.py index e69de29bb2..55be41799b 100644 --- a/argilla-server/tests/profiling/__init__.py +++ b/argilla-server/tests/profiling/__init__.py @@ -0,0 +1,13 @@ +# Copyright 2021-present, the Recognai S.L. team. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. diff --git a/argilla-server/tests/profiling/conftest.py b/argilla-server/tests/profiling/conftest.py index 4801f7ef87..d2cdc291b1 100644 --- a/argilla-server/tests/profiling/conftest.py +++ b/argilla-server/tests/profiling/conftest.py @@ -1,5 +1,18 @@ -import pytest +# Copyright 2021-present, the Recognai S.L. team. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +import pytest from argilla_server.settings import settings diff --git a/argilla-server/tests/profiling/test_records_bulk.py b/argilla-server/tests/profiling/test_records_bulk.py index db4db3128e..a53daff93e 100644 --- a/argilla-server/tests/profiling/test_records_bulk.py +++ b/argilla-server/tests/profiling/test_records_bulk.py @@ -1,17 +1,31 @@ -from typing import Union +# Copyright 2021-present, the Recognai S.L. team. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. -import pytest -from pyinstrument import Profiler -from sqlalchemy.ext.asyncio import AsyncSession +from typing import Union import argilla_server.apis.v1.handlers.datasets.records_bulk +import pytest from argilla_server.contexts import datasets from argilla_server.models import Dataset, Question, VectorSettings -from argilla_server.schemas.v1.records import RecordsCreate, RecordCreate +from argilla_server.schemas.v1.records import RecordCreate, RecordsCreate from argilla_server.schemas.v1.records_bulk import RecordsBulkCreate from argilla_server.schemas.v1.responses import UserDraftResponseCreate from argilla_server.schemas.v1.suggestions import SuggestionCreate from argilla_server.search_engine import ElasticSearchEngine +from pyinstrument import Profiler +from sqlalchemy.ext.asyncio import AsyncSession + from tests.factories import UserFactory from tests.unit.api.v1.datasets.records.records_bulk.test_dataset_records_bulk import TestDatasetRecordsBulk @@ -30,9 +44,7 @@ def _get_dataset_vector_settings_by_name(dataset: Dataset, name: str) -> Union[" @pytest.mark.asyncio class TestUpsertRecordsBulk: - async def test_profiling_for_create_records(self, db: AsyncSession, elasticsearch_config: dict): - engine = ElasticSearchEngine(config=elasticsearch_config, number_of_replicas=0, number_of_shards=1) dataset = await TestDatasetRecordsBulk().test_dataset() diff --git a/argilla-server/tests/unit/api/v1/datasets/records/records_bulk/test_dataset_records_bulk.py b/argilla-server/tests/unit/api/v1/datasets/records/records_bulk/test_dataset_records_bulk.py index a59ef01569..0661cdc690 100644 --- a/argilla-server/tests/unit/api/v1/datasets/records/records_bulk/test_dataset_records_bulk.py +++ b/argilla-server/tests/unit/api/v1/datasets/records/records_bulk/test_dataset_records_bulk.py @@ -31,7 +31,6 @@ @pytest.mark.asyncio class TestDatasetRecordsBulk: - def url(self, dataset_id: UUID) -> str: return f"/api/v1/datasets/{dataset_id}/records/bulk" diff --git a/argilla-server/tests/unit/api/v1/datasets/records/records_bulk/test_dataset_records_bulk_with_responses.py b/argilla-server/tests/unit/api/v1/datasets/records/records_bulk/test_dataset_records_bulk_with_responses.py index 156154db45..c3f2ea0299 100644 --- a/argilla-server/tests/unit/api/v1/datasets/records/records_bulk/test_dataset_records_bulk_with_responses.py +++ b/argilla-server/tests/unit/api/v1/datasets/records/records_bulk/test_dataset_records_bulk_with_responses.py @@ -34,7 +34,6 @@ @pytest.mark.asyncio class TestDatasetRecordsBulkWithResponses: - def url(self, dataset_id: UUID) -> str: return f"/api/v1/datasets/{dataset_id}/records/bulk" @@ -361,7 +360,6 @@ async def _configure_dataset_fields(self, dataset: Dataset): await dataset.awaitable_attrs.fields async def _configure_dataset_questions(self, dataset: Dataset): - await LabelSelectionQuestionFactory.create( dataset=dataset, name="label", diff --git a/argilla-server/tests/unit/api/v1/datasets/records/records_bulk/test_dataset_records_bulk_with_suggestions.py b/argilla-server/tests/unit/api/v1/datasets/records/records_bulk/test_dataset_records_bulk_with_suggestions.py index c6028a4871..9d91d1fc35 100644 --- a/argilla-server/tests/unit/api/v1/datasets/records/records_bulk/test_dataset_records_bulk_with_suggestions.py +++ b/argilla-server/tests/unit/api/v1/datasets/records/records_bulk/test_dataset_records_bulk_with_suggestions.py @@ -33,7 +33,6 @@ @pytest.mark.asyncio class TestDatasetRecordsBulkWithSuggestions: - def url(self, dataset_id: UUID) -> str: return f"/api/v1/datasets/{dataset_id}/records/bulk" diff --git a/argilla-server/tests/unit/api/v1/datasets/records/records_bulk/test_dataset_records_bulk_with_vectors.py b/argilla-server/tests/unit/api/v1/datasets/records/records_bulk/test_dataset_records_bulk_with_vectors.py index 37ed298866..dee8f9d194 100644 --- a/argilla-server/tests/unit/api/v1/datasets/records/records_bulk/test_dataset_records_bulk_with_vectors.py +++ b/argilla-server/tests/unit/api/v1/datasets/records/records_bulk/test_dataset_records_bulk_with_vectors.py @@ -31,7 +31,6 @@ @pytest.mark.asyncio class TestDatasetRecordsBulkWithVectors: - def url(self, dataset_id: UUID) -> str: return f"/api/v1/datasets/{dataset_id}/records/bulk" diff --git a/argilla-server/tests/unit/search_engine/test_commons.py b/argilla-server/tests/unit/search_engine/test_commons.py index 53229197c3..e8b542e6b3 100644 --- a/argilla-server/tests/unit/search_engine/test_commons.py +++ b/argilla-server/tests/unit/search_engine/test_commons.py @@ -28,7 +28,7 @@ TextQuery, UserResponseStatusFilter, ) -from argilla_server.search_engine.base import QueryStringFilter, FieldFilterScope +from argilla_server.search_engine.base import FieldFilterScope, QueryStringFilter from argilla_server.search_engine.commons import ( ALL_RESPONSES_STATUSES_FIELD, BaseElasticAndOpenSearchEngine, diff --git a/argilla-server/tests/unit/validators/test_records_bulk.py b/argilla-server/tests/unit/validators/test_records_bulk.py index 64b3f69cee..061551e58b 100644 --- a/argilla-server/tests/unit/validators/test_records_bulk.py +++ b/argilla-server/tests/unit/validators/test_records_bulk.py @@ -24,7 +24,6 @@ @pytest.mark.asyncio class TestRecordsBulkValidators: - async def configure_dataset(self) -> Dataset: dataset = await DatasetFactory.create(status="ready") diff --git a/tests/unit/client/feedback/integrations/huggingface/test_dataset.py b/tests/unit/client/feedback/integrations/huggingface/test_dataset.py index 84c467bfa4..8d222df3a6 100644 --- a/tests/unit/client/feedback/integrations/huggingface/test_dataset.py +++ b/tests/unit/client/feedback/integrations/huggingface/test_dataset.py @@ -15,6 +15,7 @@ from typing import Any, Dict import pytest + from argilla import SuggestionSchema from argilla.client.feedback.dataset.local.dataset import FeedbackDataset from argilla.client.feedback.integrations.huggingface.dataset import HuggingFaceDatasetMixin