diff --git a/CHANGELOG.md b/CHANGELOG.md index 5ab62bf7fd..3c168ef59d 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -25,6 +25,7 @@ These are the section headers that we use: - API v1 responses returning `Response` schema now always include `record_id` as attribute. ([#4482](https://github.com/argilla-io/argilla/pull/4482)) - API v1 responses returning `Question` schema now always include `dataset_id` attribute. ([#4487](https://github.com/argilla-io/argilla/pull/4487)) - API v1 responses returning `Field` schema now always include `dataset_id` attribute. ([#4488](https://github.com/argilla-io/argilla/pull/4488)) +- API v1 responses returning `MetadataProperty` schema now always include `dataset_id` attribute. ([#4489](https://github.com/argilla-io/argilla/pull/4489)) ### Changed diff --git a/src/argilla/server/apis/v1/handlers/datasets/datasets.py b/src/argilla/server/apis/v1/handlers/datasets/datasets.py index 626c932948..1e5ccb4b2b 100644 --- a/src/argilla/server/apis/v1/handlers/datasets/datasets.py +++ b/src/argilla/server/apis/v1/handlers/datasets/datasets.py @@ -30,14 +30,12 @@ DatasetMetrics, Datasets, DatasetUpdate, - MetadataProperties, - MetadataProperty, - MetadataPropertyCreate, VectorSettings, VectorSettingsCreate, VectorsSettings, ) from argilla.server.schemas.v1.fields import Field, FieldCreate, Fields +from argilla.server.schemas.v1.metadata_properties import MetadataProperties, MetadataProperty, MetadataPropertyCreate from argilla.server.schemas.v1.questions import Question, QuestionCreate, Questions from argilla.server.search_engine import ( SearchEngine, diff --git a/src/argilla/server/contexts/datasets.py b/src/argilla/server/contexts/datasets.py index c4585a6481..8965b97468 100644 --- a/src/argilla/server/contexts/datasets.py +++ b/src/argilla/server/contexts/datasets.py @@ -53,13 +53,12 @@ from argilla.server.models.suggestions import SuggestionCreateWithRecordId from argilla.server.schemas.v1.datasets import ( DatasetCreate, - MetadataPropertyCreate, ) from argilla.server.schemas.v1.datasets import ( VectorSettings as VectorSettingsSchema, ) from argilla.server.schemas.v1.fields import FieldCreate -from argilla.server.schemas.v1.metadata_properties import MetadataPropertyUpdate +from argilla.server.schemas.v1.metadata_properties import MetadataPropertyCreate, MetadataPropertyUpdate from argilla.server.schemas.v1.questions import QuestionCreate from argilla.server.schemas.v1.records import ( RecordCreate, diff --git a/src/argilla/server/schemas/v1/datasets.py b/src/argilla/server/schemas/v1/datasets.py index b31f17639c..b6a62d0ebe 100644 --- a/src/argilla/server/schemas/v1/datasets.py +++ b/src/argilla/server/schemas/v1/datasets.py @@ -13,16 +13,15 @@ # limitations under the License. from datetime import datetime -from typing import Any, Dict, Generic, List, Literal, Optional, TypeVar, Union +from typing import List, Literal, Optional, Union from uuid import UUID from fastapi import Query -from argilla.server.enums import DatasetStatus, MetadataPropertyType, SimilarityOrder, SortOrder -from argilla.server.pydantic_v1 import BaseModel, PositiveInt, constr, root_validator -from argilla.server.pydantic_v1 import Field as PydanticField -from argilla.server.pydantic_v1.generics import GenericModel +from argilla.server.enums import DatasetStatus, SimilarityOrder, SortOrder +from argilla.server.pydantic_v1 import BaseModel, Field, PositiveInt, constr, root_validator from argilla.server.schemas.base import UpdateSchema +from argilla.server.schemas.v1.metadata_properties import MetadataPropertyName from argilla.server.schemas.v1.questions import QuestionName from argilla.server.schemas.v1.records import Record, RecordFilterScope from argilla.server.schemas.v1.responses import ResponseFilterScope @@ -39,21 +38,12 @@ DATASET_GUIDELINES_MIN_LENGTH = 1 DATASET_GUIDELINES_MAX_LENGTH = 10000 -METADATA_PROPERTY_CREATE_NAME_REGEX = r"^(?=.*[a-z0-9])[a-z0-9_-]+$" -METADATA_PROPERTY_CREATE_NAME_MIN_LENGTH = 1 -METADATA_PROPERTY_CREATE_NAME_MAX_LENGTH = 200 -METADATA_PROPERTY_CREATE_TITLE_MIN_LENGTH = 1 -METADATA_PROPERTY_CREATE_TITLE_MAX_LENGTH = 500 - VECTOR_SETTINGS_CREATE_NAME_REGEX = r"^(?=.*[a-z0-9])[a-z0-9_-]+$" VECTOR_SETTINGS_CREATE_NAME_MIN_LENGTH = 1 VECTOR_SETTINGS_CREATE_NAME_MAX_LENGTH = 200 VECTOR_SETTINGS_CREATE_TITLE_MIN_LENGTH = 1 VECTOR_SETTINGS_CREATE_TITLE_MAX_LENGTH = 500 -TERMS_METADATA_PROPERTY_VALUES_MIN_ITEMS = 1 -TERMS_METADATA_PROPERTY_VALUES_MAX_ITEMS = 250 - TERMS_FILTER_VALUES_MIN_ITEMS = 1 TERMS_FILTER_VALUES_MAX_ITEMS = 250 @@ -85,12 +75,12 @@ class Datasets(BaseModel): DatasetName = Annotated[ constr(regex=DATASET_NAME_REGEX, min_length=DATASET_NAME_MIN_LENGTH, max_length=DATASET_NAME_MAX_LENGTH), - PydanticField(..., description="Dataset name"), + Field(..., description="Dataset name"), ] DatasetGuidelines = Annotated[ constr(min_length=DATASET_GUIDELINES_MIN_LENGTH, max_length=DATASET_GUIDELINES_MAX_LENGTH), - PydanticField(..., description="Dataset guidelines"), + Field(..., description="Dataset guidelines"), ] @@ -151,12 +141,12 @@ class VectorsSettings(BaseModel): min_length=VECTOR_SETTINGS_CREATE_TITLE_MIN_LENGTH, max_length=VECTOR_SETTINGS_CREATE_TITLE_MAX_LENGTH, ), - PydanticField(..., description="The title of the vector settings"), + Field(..., description="The title of the vector settings"), ] class VectorSettingsCreate(BaseModel): - name: str = PydanticField( + name: str = Field( ..., regex=VECTOR_SETTINGS_CREATE_NAME_REGEX, min_length=VECTOR_SETTINGS_CREATE_NAME_MIN_LENGTH, @@ -167,107 +157,6 @@ class VectorSettingsCreate(BaseModel): dimensions: PositiveInt -NT = TypeVar("NT", int, float) - - -class NumericMetadataProperty(GenericModel, Generic[NT]): - min: Optional[NT] = None - max: Optional[NT] = None - - @root_validator(skip_on_failure=True) - def check_bounds(cls, values: Dict[str, Any]) -> Dict[str, Any]: - min = values.get("min") - max = values.get("max") - - if min is not None and max is not None and min >= max: - raise ValueError(f"'min' ({min}) must be lower than 'max' ({max})") - - return values - - -class TermsMetadataPropertyCreate(BaseModel): - type: Literal[MetadataPropertyType.terms] - values: Optional[List[str]] = PydanticField( - None, min_items=TERMS_METADATA_PROPERTY_VALUES_MIN_ITEMS, max_items=TERMS_METADATA_PROPERTY_VALUES_MAX_ITEMS - ) - - -class IntegerMetadataPropertyCreate(NumericMetadataProperty[int]): - type: Literal[MetadataPropertyType.integer] - - -class FloatMetadataPropertyCreate(NumericMetadataProperty[float]): - type: Literal[MetadataPropertyType.float] - - -MetadataPropertyName = Annotated[ - str, - PydanticField( - ..., - regex=METADATA_PROPERTY_CREATE_NAME_REGEX, - min_length=METADATA_PROPERTY_CREATE_NAME_MIN_LENGTH, - max_length=METADATA_PROPERTY_CREATE_NAME_MAX_LENGTH, - ), -] - -MetadataPropertyTitle = Annotated[ - constr(min_length=METADATA_PROPERTY_CREATE_TITLE_MIN_LENGTH, max_length=METADATA_PROPERTY_CREATE_TITLE_MAX_LENGTH), - PydanticField(..., description="The title of the metadata property"), -] - -MetadataPropertySettingsCreate = Annotated[ - Union[TermsMetadataPropertyCreate, IntegerMetadataPropertyCreate, FloatMetadataPropertyCreate], - PydanticField(..., discriminator="type"), -] - - -class MetadataPropertyCreate(BaseModel): - name: MetadataPropertyName - title: MetadataPropertyTitle - settings: MetadataPropertySettingsCreate - visible_for_annotators: bool = True - - -class TermsMetadataProperty(BaseModel): - type: Literal[MetadataPropertyType.terms] - values: Optional[List[str]] = None - - -class IntegerMetadataProperty(BaseModel): - type: Literal[MetadataPropertyType.integer] - min: Optional[int] = None - max: Optional[int] = None - - -class FloatMetadataProperty(BaseModel): - type: Literal[MetadataPropertyType.float] - min: Optional[float] = None - max: Optional[float] = None - - -MetadataPropertySettings = Annotated[ - Union[TermsMetadataProperty, IntegerMetadataProperty, FloatMetadataProperty], - PydanticField(..., discriminator="type"), -] - - -class MetadataProperty(BaseModel): - id: UUID - name: str - title: str - settings: MetadataPropertySettings - visible_for_annotators: bool - inserted_at: datetime - updated_at: datetime - - class Config: - orm_mode = True - - -class MetadataProperties(BaseModel): - items: List[MetadataProperty] - - class MetadataParsedQueryParam: def __init__(self, string: str): k, *v = string.split(":", maxsplit=1) @@ -277,7 +166,7 @@ def __init__(self, string: str): class MetadataQueryParams(BaseModel): - metadata: List[str] = PydanticField(Query([], pattern=r"^(?=.*[a-z0-9])[a-z0-9_-]+:(.+(,(.+))*)$")) + metadata: List[str] = Field(Query([], pattern=r"^(?=.*[a-z0-9])[a-z0-9_-]+:(.+(,(.+))*)$")) @property def metadata_parsed(self) -> List[MetadataParsedQueryParam]: @@ -321,16 +210,14 @@ class MetadataFilterScope(BaseModel): FilterScope = Annotated[ Union[RecordFilterScope, ResponseFilterScope, SuggestionFilterScope, MetadataFilterScope], - PydanticField(..., discriminator="entity"), + Field(..., discriminator="entity"), ] class TermsFilter(BaseModel): type: Literal["terms"] scope: FilterScope - values: List[str] = PydanticField( - ..., min_items=TERMS_FILTER_VALUES_MIN_ITEMS, max_items=TERMS_FILTER_VALUES_MAX_ITEMS - ) + values: List[str] = Field(..., min_items=TERMS_FILTER_VALUES_MIN_ITEMS, max_items=TERMS_FILTER_VALUES_MAX_ITEMS) class RangeFilter(BaseModel): @@ -352,13 +239,11 @@ def check_ge_and_le(cls, values: dict) -> dict: return values -Filter = Annotated[Union[TermsFilter, RangeFilter], PydanticField(..., discriminator="type")] +Filter = Annotated[Union[TermsFilter, RangeFilter], Field(..., discriminator="type")] class Filters(BaseModel): - and_: List[Filter] = PydanticField( - None, alias="and", min_items=FILTERS_AND_MIN_ITEMS, max_items=FILTERS_AND_MAX_ITEMS - ) + and_: List[Filter] = Field(None, alias="and", min_items=FILTERS_AND_MIN_ITEMS, max_items=FILTERS_AND_MAX_ITEMS) class Order(BaseModel): @@ -369,7 +254,7 @@ class Order(BaseModel): class SearchRecordsQuery(BaseModel): query: Optional[Query] filters: Optional[Filters] - sort: Optional[List[Order]] = PydanticField( + sort: Optional[List[Order]] = Field( None, min_items=SEARCH_RECORDS_QUERY_SORT_MIN_ITEMS, max_items=SEARCH_RECORDS_QUERY_SORT_MAX_ITEMS ) diff --git a/src/argilla/server/schemas/v1/metadata_properties.py b/src/argilla/server/schemas/v1/metadata_properties.py index 62b1fbf760..eaf208e00e 100644 --- a/src/argilla/server/schemas/v1/metadata_properties.py +++ b/src/argilla/server/schemas/v1/metadata_properties.py @@ -13,17 +13,28 @@ # limitations under the License. from datetime import datetime -from typing import Generic, List, Literal, Optional, TypeVar, Union +from typing import Annotated, Any, Dict, Generic, List, Literal, Optional, TypeVar, Union from uuid import UUID +from typing_extensions import Annotated + from argilla.server.enums import MetadataPropertyType -from argilla.server.pydantic_v1 import BaseModel, Field, validator +from argilla.server.pydantic_v1 import BaseModel, Field, constr, root_validator, validator from argilla.server.pydantic_v1.generics import GenericModel from argilla.server.schemas.base import UpdateSchema -from argilla.server.schemas.v1.datasets import MetadataPropertySettings, MetadataPropertyTitle FLOAT_METADATA_METRICS_PRECISION = 5 +METADATA_PROPERTY_CREATE_NAME_REGEX = r"^(?=.*[a-z0-9])[a-z0-9_-]+$" +METADATA_PROPERTY_CREATE_NAME_MIN_LENGTH = 1 +METADATA_PROPERTY_CREATE_NAME_MAX_LENGTH = 200 + +METADATA_PROPERTY_CREATE_TITLE_MIN_LENGTH = 1 +METADATA_PROPERTY_CREATE_TITLE_MAX_LENGTH = 500 + +TERMS_METADATA_PROPERTY_VALUES_MIN_ITEMS = 1 +TERMS_METADATA_PROPERTY_VALUES_MAX_ITEMS = 250 + try: from typing import Annotated except ImportError: @@ -67,6 +78,82 @@ def round_result(cls, v: float): ] +class TermsMetadataProperty(BaseModel): + type: Literal[MetadataPropertyType.terms] + values: Optional[List[str]] = None + + +class IntegerMetadataProperty(BaseModel): + type: Literal[MetadataPropertyType.integer] + min: Optional[int] = None + max: Optional[int] = None + + +class FloatMetadataProperty(BaseModel): + type: Literal[MetadataPropertyType.float] + min: Optional[float] = None + max: Optional[float] = None + + +MetadataPropertySettings = Annotated[ + Union[TermsMetadataProperty, IntegerMetadataProperty, FloatMetadataProperty], + Field(..., discriminator="type"), +] + + +MetadataPropertyName = Annotated[ + str, + Field( + ..., + regex=METADATA_PROPERTY_CREATE_NAME_REGEX, + min_length=METADATA_PROPERTY_CREATE_NAME_MIN_LENGTH, + max_length=METADATA_PROPERTY_CREATE_NAME_MAX_LENGTH, + ), +] + + +MetadataPropertyTitle = Annotated[ + constr(min_length=METADATA_PROPERTY_CREATE_TITLE_MIN_LENGTH, max_length=METADATA_PROPERTY_CREATE_TITLE_MAX_LENGTH), + Field(..., description="The title of the metadata property"), +] + + +class NumericMetadataProperty(GenericModel, Generic[NT]): + min: Optional[NT] = None + max: Optional[NT] = None + + @root_validator(skip_on_failure=True) + def check_bounds(cls, values: Dict[str, Any]) -> Dict[str, Any]: + min = values.get("min") + max = values.get("max") + + if min is not None and max is not None and min >= max: + raise ValueError(f"'min' ({min}) must be lower than 'max' ({max})") + + return values + + +class TermsMetadataPropertyCreate(BaseModel): + type: Literal[MetadataPropertyType.terms] + values: Optional[List[str]] = Field( + None, min_items=TERMS_METADATA_PROPERTY_VALUES_MIN_ITEMS, max_items=TERMS_METADATA_PROPERTY_VALUES_MAX_ITEMS + ) + + +class IntegerMetadataPropertyCreate(NumericMetadataProperty[int]): + type: Literal[MetadataPropertyType.integer] + + +class FloatMetadataPropertyCreate(NumericMetadataProperty[float]): + type: Literal[MetadataPropertyType.float] + + +MetadataPropertySettingsCreate = Annotated[ + Union[TermsMetadataPropertyCreate, IntegerMetadataPropertyCreate, FloatMetadataPropertyCreate], + Field(..., discriminator="type"), +] + + class MetadataProperty(BaseModel): id: UUID name: str @@ -81,6 +168,17 @@ class Config: orm_mode = True +class MetadataProperties(BaseModel): + items: List[MetadataProperty] + + +class MetadataPropertyCreate(BaseModel): + name: MetadataPropertyName + title: MetadataPropertyTitle + settings: MetadataPropertySettingsCreate + visible_for_annotators: bool = True + + class MetadataPropertyUpdate(UpdateSchema): title: Optional[MetadataPropertyTitle] visible_for_annotators: Optional[bool] diff --git a/tests/unit/server/api/v1/test_datasets.py b/tests/unit/server/api/v1/test_datasets.py index 3855e4e942..193b1b6cda 100644 --- a/tests/unit/server/api/v1/test_datasets.py +++ b/tests/unit/server/api/v1/test_datasets.py @@ -44,13 +44,15 @@ from argilla.server.schemas.v1.datasets import ( DATASET_GUIDELINES_MAX_LENGTH, DATASET_NAME_MAX_LENGTH, - METADATA_PROPERTY_CREATE_NAME_MAX_LENGTH, - METADATA_PROPERTY_CREATE_TITLE_MAX_LENGTH, - TERMS_METADATA_PROPERTY_VALUES_MAX_ITEMS, VECTOR_SETTINGS_CREATE_NAME_MAX_LENGTH, VECTOR_SETTINGS_CREATE_TITLE_MAX_LENGTH, ) from argilla.server.schemas.v1.fields import FIELD_CREATE_NAME_MAX_LENGTH, FIELD_CREATE_TITLE_MAX_LENGTH +from argilla.server.schemas.v1.metadata_properties import ( + METADATA_PROPERTY_CREATE_NAME_MAX_LENGTH, + METADATA_PROPERTY_CREATE_TITLE_MAX_LENGTH, + TERMS_METADATA_PROPERTY_VALUES_MAX_ITEMS, +) from argilla.server.schemas.v1.questions import ( QUESTION_CREATE_DESCRIPTION_MAX_LENGTH, QUESTION_CREATE_NAME_MAX_LENGTH, @@ -480,6 +482,7 @@ async def test_list_current_user_dataset_metadata_properties( "title": terms_property.title, "settings": {"type": "terms", "values": ["a", "b", "c"]}, "visible_for_annotators": True, + "dataset_id": str(terms_property.dataset_id), "inserted_at": terms_property.inserted_at.isoformat(), "updated_at": terms_property.updated_at.isoformat(), }, @@ -489,6 +492,7 @@ async def test_list_current_user_dataset_metadata_properties( "title": integer_property.title, "settings": {"type": "integer", "min": None, "max": None}, "visible_for_annotators": True, + "dataset_id": str(integer_property.dataset_id), "inserted_at": integer_property.inserted_at.isoformat(), "updated_at": integer_property.updated_at.isoformat(), }, @@ -498,6 +502,7 @@ async def test_list_current_user_dataset_metadata_properties( "title": float_property.title, "settings": {"type": "float", "min": None, "max": None}, "visible_for_annotators": True, + "dataset_id": str(float_property.dataset_id), "inserted_at": float_property.inserted_at.isoformat(), "updated_at": float_property.updated_at.isoformat(), }, @@ -1688,6 +1693,7 @@ async def test_create_dataset_metadata_property( "title": "title", "settings": expected_settings, "visible_for_annotators": True, + "dataset_id": str(dataset.id), "inserted_at": datetime.fromisoformat(response_body["inserted_at"]).isoformat(), "updated_at": datetime.fromisoformat(response_body["updated_at"]).isoformat(), } @@ -1720,6 +1726,7 @@ async def test_create_dataset_metadata_property_with_dataset_ready( assert response_body == { "id": str(UUID(response_body["id"])), "visible_for_annotators": True, + "dataset_id": str(dataset.id), "inserted_at": datetime.fromisoformat(response_body["inserted_at"]).isoformat(), "updated_at": datetime.fromisoformat(response_body["updated_at"]).isoformat(), **metadata_property_json, diff --git a/tests/unit/server/api/v1/test_metadata_properties.py b/tests/unit/server/api/v1/test_metadata_properties.py index 216bb1269f..4240fbb8b9 100644 --- a/tests/unit/server/api/v1/test_metadata_properties.py +++ b/tests/unit/server/api/v1/test_metadata_properties.py @@ -20,7 +20,7 @@ from argilla.server.constants import API_KEY_HEADER_NAME from argilla.server.enums import MetadataPropertyType, UserRole from argilla.server.models import MetadataProperty, UserRole -from argilla.server.schemas.v1.datasets import METADATA_PROPERTY_CREATE_TITLE_MAX_LENGTH +from argilla.server.schemas.v1.metadata_properties import METADATA_PROPERTY_CREATE_TITLE_MAX_LENGTH from argilla.server.search_engine import FloatMetadataMetrics, IntegerMetadataMetrics, TermsMetadataMetrics from sqlalchemy import func, select