Skip to content

Commit

Permalink
ci: Define a clean workflow for argilla-server
Browse files Browse the repository at this point in the history
  • Loading branch information
frascuchon committed May 1, 2024
1 parent 23ffc5f commit 18bfffb
Show file tree
Hide file tree
Showing 15 changed files with 218 additions and 28 deletions.
168 changes: 168 additions & 0 deletions .github/workflows/argilla-server.workflow.yml
Original file line number Diff line number Diff line change
@@ -0,0 +1,168 @@
name: Build `argilla-server` package

concurrency:
group: ${{ github.workflow }}-${{ github.event.pull_request.number || github.ref }}
cancel-in-progress: true

on:
workflow_dispatch:

pull_request:
paths:
- 'argilla-server/**'
types:
- opened
- edited
- reopened
- synchronize
- ready_for_review

jobs:

build:
name: Build `argilla-server` package
runs-on: ubuntu-latest

defaults:
run:
shell: bash -l {0}
working-directory: ./argilla-server

services:
search_engine:
image: docker.elastic.co/elasticsearch/elasticsearch:8.8.2
ports:
- 9200:9200
env:
discovery.type: single-node
xpack.security.enabled: false
# NOTE: postgres service will not be executed by default.
# It will only be executed if the postgresDockerImage input is provided.
postgres:
image: postgres:14
env:
POSTGRES_HOST: localhost
POSTGRES_USER: postgres
POSTGRES_PASSWORD: postgres
POSTGRES_DB: argilla
options: >-
--health-cmd pg_isready
--health-interval 10s
--health-timeout 5s
--health-retries 5
ports:
- 5432:5432

env:
ARGILLA_ENABLE_TELEMETRY: 0

steps:
- name: Checkout Code 🛎
uses: actions/checkout@v4

- name: Setup PDM
uses: pdm-project/setup-pdm@v4
with:
python-version-file: argilla-server/pyproject.toml
cache-dependency-path: argilla-server/pdm.lock
cache: true

- name: Install dependencies
run: pdm install

- name: Run tests 📈
run: |
ARGILLA_DATABASE_URL=postgresql://postgres:postgres@localhost:5432/argilla
pdm test tests/unit -vs --cov=argilla_server --cov-report=xml:coverage.xml
- name: Upload test coverage
uses: codecov/codecov-action@v4
with:
file: coverage.xml

- name: Build package
run: |
pdm build
- name: Upload artifact
uses: actions/upload-artifact@v4
with:
name: argilla-server
path: argilla-server/dist

build_server_docker_image:
name: Build Argilla server docker image
uses: ./.github/workflows/argilla-server.build-push-docker.yml
if: github.event_name == 'pull_request' && github.event.pull_request.draft == false
needs:
- build
with:
download-python-package: true
image-name: argilla/argilla-server
dockerfile: argilla-server/docker/server/Dockerfile
context: argilla-server/docker/server
readme: argilla-server/README.md
platforms: linux/amd64,linux/arm64
secrets: inherit

build_quickstart_docker_image:
name: Build Argilla quickstart docker image
uses: ./.github/workflows/argilla-server.build-push-docker.yml
needs: build_server_docker_image
if: github.event_name == 'pull_request' && github.event.pull_request.draft == false
with:
download-python-package: false
image-name: argilla/argilla-quickstart
dockerfile: argilla-server/docker/quickstart/Dockerfile
context: argilla-server/docker/quickstart
readme: argilla-server/docker/quickstart/README.md
platforms: linux/amd64,linux/arm64
build-args: |
ARGILLA_VERSION=${{ needs.build_server_docker_image.outputs.version }}
secrets: inherit

# This job will publish argilla-server python package into PyPI repository
publish_release:
name: Publish Release
runs-on: ubuntu-latest
if: ${{ github.event_name == 'release' }}

needs:
- build
- build_server_docker_image
- build_quickstart_docker_image

permissions:
# This permission is needed for private repositories.
# contents: read
# IMPORTANT: this permission is mandatory for trusted publishing on PyPI
id-token: write

defaults:
run:
shell: bash -l {0}
working-directory: ./argilla-server

steps:
- name: Checkout Code 🛎
uses: actions/checkout@v4

- name: Download python package
uses: actions/download-artifact@v4
with:
name: argilla-server
path: argilla-server/dist

- name: Setup PDM
uses: pdm-project/setup-pdm@v4
with:
cache: true

- name: Publish Package to PyPI test environment 🥪
run: pdm publish --no-build --repository testpypi

- name: Test Installing 🍿
run: pip install --index-url https://test.pypi.org/simple --no-deps argilla-server==${GITHUB_REF#refs/*/v}

- name: Publish Package to PyPI 🥩
run: pdm publish --no-build
Original file line number Diff line number Diff line change
Expand Up @@ -134,5 +134,4 @@ async def search(
dataset_id: UUID,
search_query: RecordsSearchQuery,
):

return RecordsSearchResponse(query=search_query)
8 changes: 0 additions & 8 deletions argilla-server/src/argilla_server/bulk/records_bulk.py
Original file line number Diff line number Diff line change
Expand Up @@ -43,7 +43,6 @@


class CreateRecordsBulk:

def __init__(self, db: AsyncSession, search_engine: SearchEngine):
self._db = db
self._search_engine = search_engine
Expand Down Expand Up @@ -73,7 +72,6 @@ async def create_records_bulk(self, dataset: Dataset, bulk_create: RecordsBulkCr
return RecordsBulk(items=records)

async def _upsert_records_relationships(self, records: List[Record], records_create: List[RecordCreate]) -> None:

records_and_suggestions = list(zip(records, [r.suggestions for r in records_create]))
records_and_responses = list(zip(records, [r.responses for r in records_create]))
records_and_vectors = list(zip(records, [r.vectors for r in records_create]))
Expand All @@ -87,7 +85,6 @@ async def _upsert_records_relationships(self, records: List[Record], records_cre
async def _upsert_records_suggestions(
self, records_and_suggestions: List[Tuple[Record, List[SuggestionCreate]]]
) -> List[Suggestion]:

upsert_many_suggestions = []
for idx, (record, suggestions) in enumerate(records_and_suggestions):
try:
Expand Down Expand Up @@ -118,7 +115,6 @@ async def _upsert_records_suggestions(
async def _upsert_records_responses(
self, records_and_responses: List[Tuple[Record, List[UserResponseCreate]]]
) -> List[Response]:

user_ids = [response.user_id for _, responses in records_and_responses for response in responses or []]
users_by_id = await fetch_users_by_ids_as_dict(self._db, user_ids)

Expand Down Expand Up @@ -147,7 +143,6 @@ async def _upsert_records_responses(
async def _upsert_records_vectors(
self, records_and_vectors: List[Tuple[Record, Dict[str, List[float]]]]
) -> List[Vector]:

upsert_many_vectors = []
for idx, (record, vectors) in enumerate(records_and_vectors):
try:
Expand Down Expand Up @@ -176,9 +171,7 @@ def _metadata_is_set(self, record_create: RecordCreate) -> bool:


class UpsertRecordsBulk(CreateRecordsBulk):

async def upsert_records_bulk(self, dataset: Dataset, bulk_upsert: RecordsBulkUpsert) -> RecordsBulkWithUpdateInfo:

found_records = await self._fetch_existing_dataset_records(dataset, bulk_upsert.items)
# found_records is passed to the validator to avoid querying the database again, but ideally, it should be
# computed inside the validator
Expand Down Expand Up @@ -220,7 +213,6 @@ async def _fetch_existing_dataset_records(
dataset: Dataset,
records_upsert: List[RecordUpsert],
) -> Dict[Union[str, UUID], Record]:

records_by_external_id = await fetch_records_by_external_ids_as_dict(
self._db, dataset, [r.external_id for r in records_upsert]
)
Expand Down
2 changes: 0 additions & 2 deletions argilla-server/src/argilla_server/contexts/records.py
Original file line number Diff line number Diff line change
Expand Up @@ -25,15 +25,13 @@
async def list_dataset_records_by_ids(
db: AsyncSession, dataset_id: UUID, record_ids: Sequence[UUID]
) -> Sequence[Record]:

query = select(Record).filter(Record.id.in_(record_ids), Record.dataset_id == dataset_id)
return (await db.execute(query)).unique().scalars().all()


async def list_dataset_records_by_external_ids(
db: AsyncSession, dataset_id: UUID, external_ids: Sequence[str]
) -> Sequence[Record]:

query = (
select(Record)
.filter(Record.external_id.in_(external_ids), Record.dataset_id == dataset_id)
Expand Down
4 changes: 2 additions & 2 deletions argilla-server/src/argilla_server/search_engine/commons.py
Original file line number Diff line number Diff line change
Expand Up @@ -33,6 +33,7 @@
)
from argilla_server.search_engine.base import (
AndFilter,
FieldFilterScope,
Filter,
FilterScope,
FloatMetadataFilter,
Expand All @@ -43,6 +44,7 @@
MetadataFilterScope,
MetadataMetrics,
Order,
QueryStringFilter,
RangeFilter,
RecordFilterScope,
ResponseFilterScope,
Expand All @@ -56,8 +58,6 @@
TermsMetadataMetrics,
TextQuery,
UserResponseStatusFilter,
FieldFilterScope,
QueryStringFilter,
)

ALL_RESPONSES_STATUSES_FIELD = "all_responses_statuses"
Expand Down
13 changes: 13 additions & 0 deletions argilla-server/tests/profiling/__init__.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,13 @@
# Copyright 2021-present, the Recognai S.L. team.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
15 changes: 14 additions & 1 deletion argilla-server/tests/profiling/conftest.py
Original file line number Diff line number Diff line change
@@ -1,5 +1,18 @@
import pytest
# Copyright 2021-present, the Recognai S.L. team.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.

import pytest
from argilla_server.settings import settings


Expand Down
26 changes: 19 additions & 7 deletions argilla-server/tests/profiling/test_records_bulk.py
Original file line number Diff line number Diff line change
@@ -1,17 +1,31 @@
from typing import Union
# Copyright 2021-present, the Recognai S.L. team.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.

import pytest
from pyinstrument import Profiler
from sqlalchemy.ext.asyncio import AsyncSession
from typing import Union

import argilla_server.apis.v1.handlers.datasets.records_bulk
import pytest
from argilla_server.contexts import datasets
from argilla_server.models import Dataset, Question, VectorSettings
from argilla_server.schemas.v1.records import RecordsCreate, RecordCreate
from argilla_server.schemas.v1.records import RecordCreate, RecordsCreate
from argilla_server.schemas.v1.records_bulk import RecordsBulkCreate
from argilla_server.schemas.v1.responses import UserDraftResponseCreate
from argilla_server.schemas.v1.suggestions import SuggestionCreate
from argilla_server.search_engine import ElasticSearchEngine
from pyinstrument import Profiler
from sqlalchemy.ext.asyncio import AsyncSession

from tests.factories import UserFactory
from tests.unit.api.v1.datasets.records.records_bulk.test_dataset_records_bulk import TestDatasetRecordsBulk

Expand All @@ -30,9 +44,7 @@ def _get_dataset_vector_settings_by_name(dataset: Dataset, name: str) -> Union["

@pytest.mark.asyncio
class TestUpsertRecordsBulk:

async def test_profiling_for_create_records(self, db: AsyncSession, elasticsearch_config: dict):

engine = ElasticSearchEngine(config=elasticsearch_config, number_of_replicas=0, number_of_shards=1)
dataset = await TestDatasetRecordsBulk().test_dataset()

Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -31,7 +31,6 @@

@pytest.mark.asyncio
class TestDatasetRecordsBulk:

def url(self, dataset_id: UUID) -> str:
return f"/api/v1/datasets/{dataset_id}/records/bulk"

Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -34,7 +34,6 @@

@pytest.mark.asyncio
class TestDatasetRecordsBulkWithResponses:

def url(self, dataset_id: UUID) -> str:
return f"/api/v1/datasets/{dataset_id}/records/bulk"

Expand Down Expand Up @@ -361,7 +360,6 @@ async def _configure_dataset_fields(self, dataset: Dataset):
await dataset.awaitable_attrs.fields

async def _configure_dataset_questions(self, dataset: Dataset):

await LabelSelectionQuestionFactory.create(
dataset=dataset,
name="label",
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -33,7 +33,6 @@

@pytest.mark.asyncio
class TestDatasetRecordsBulkWithSuggestions:

def url(self, dataset_id: UUID) -> str:
return f"/api/v1/datasets/{dataset_id}/records/bulk"

Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -31,7 +31,6 @@

@pytest.mark.asyncio
class TestDatasetRecordsBulkWithVectors:

def url(self, dataset_id: UUID) -> str:
return f"/api/v1/datasets/{dataset_id}/records/bulk"

Expand Down
Loading

0 comments on commit 18bfffb

Please sign in to comment.