Skip to content

Commit

Permalink
refactor: move MetadataProperty schemas to his own file and unify can…
Browse files Browse the repository at this point in the history
…onical MetadataProperty schema adding dataset_id (#4489)

# Description

This PR includes the following changes:
* Move `MetadataProperty` related schemas for API v1 to his own file at
`schemas/v1/metadata_properties.py`.
* Use only one `MetadataProperty` canonical schema removing
duplications.
* `MetadataProperty ` schema will include always `dataset_id` as
attribute.

Refs #4407 

**Type of change**

(Please delete options that are not relevant. Remember to title the PR
according to the type of change)

- [ ] Bug fix (non-breaking change which fixes an issue)
- [x] New feature (non-breaking change which adds functionality)
- [ ] Breaking change (fix or feature that would cause existing
functionality to not work as expected)
- [x] Refactor (change restructuring the codebase without changing
functionality)
- [ ] Improvement (change adding some improvement to an existing
functionality)
- [ ] Documentation update

**How Has This Been Tested**

- [ ] Modifying and running unit tests.

**Checklist**

- [ ] I added relevant documentation
- [x] follows the style guidelines of this project
- [x] I did a self-review of my code
- [ ] I made corresponding changes to the documentation
- [ ] My changes generate no new warnings
- [ ] I have added tests that prove my fix is effective or that my
feature works
- [ ] I filled out [the contributor form](https://tally.so/r/n9XrxK)
(see text above)
- [x] I have added relevant notes to the CHANGELOG.md file (See
https://keepachangelog.com/)
  • Loading branch information
jfcalvo committed Jan 12, 2024
1 parent 701ed33 commit 5298f5f
Show file tree
Hide file tree
Showing 7 changed files with 129 additions and 141 deletions.
1 change: 1 addition & 0 deletions CHANGELOG.md
Original file line number Diff line number Diff line change
Expand Up @@ -25,6 +25,7 @@ These are the section headers that we use:
- API v1 responses returning `Response` schema now always include `record_id` as attribute. ([#4482](https://github.com/argilla-io/argilla/pull/4482))
- API v1 responses returning `Question` schema now always include `dataset_id` attribute. ([#4487](https://github.com/argilla-io/argilla/pull/4487))
- API v1 responses returning `Field` schema now always include `dataset_id` attribute. ([#4488](https://github.com/argilla-io/argilla/pull/4488))
- API v1 responses returning `MetadataProperty` schema now always include `dataset_id` attribute. ([#4489](https://github.com/argilla-io/argilla/pull/4489))

### Changed

Expand Down
4 changes: 1 addition & 3 deletions src/argilla/server/apis/v1/handlers/datasets/datasets.py
Original file line number Diff line number Diff line change
Expand Up @@ -30,14 +30,12 @@
DatasetMetrics,
Datasets,
DatasetUpdate,
MetadataProperties,
MetadataProperty,
MetadataPropertyCreate,
VectorSettings,
VectorSettingsCreate,
VectorsSettings,
)
from argilla.server.schemas.v1.fields import Field, FieldCreate, Fields
from argilla.server.schemas.v1.metadata_properties import MetadataProperties, MetadataProperty, MetadataPropertyCreate
from argilla.server.schemas.v1.questions import Question, QuestionCreate, Questions
from argilla.server.search_engine import (
SearchEngine,
Expand Down
3 changes: 1 addition & 2 deletions src/argilla/server/contexts/datasets.py
Original file line number Diff line number Diff line change
Expand Up @@ -53,13 +53,12 @@
from argilla.server.models.suggestions import SuggestionCreateWithRecordId
from argilla.server.schemas.v1.datasets import (
DatasetCreate,
MetadataPropertyCreate,
)
from argilla.server.schemas.v1.datasets import (
VectorSettings as VectorSettingsSchema,
)
from argilla.server.schemas.v1.fields import FieldCreate
from argilla.server.schemas.v1.metadata_properties import MetadataPropertyUpdate
from argilla.server.schemas.v1.metadata_properties import MetadataPropertyCreate, MetadataPropertyUpdate
from argilla.server.schemas.v1.questions import QuestionCreate
from argilla.server.schemas.v1.records import (
RecordCreate,
Expand Down
143 changes: 14 additions & 129 deletions src/argilla/server/schemas/v1/datasets.py
Original file line number Diff line number Diff line change
Expand Up @@ -13,16 +13,15 @@
# limitations under the License.

from datetime import datetime
from typing import Any, Dict, Generic, List, Literal, Optional, TypeVar, Union
from typing import List, Literal, Optional, Union
from uuid import UUID

from fastapi import Query

from argilla.server.enums import DatasetStatus, MetadataPropertyType, SimilarityOrder, SortOrder
from argilla.server.pydantic_v1 import BaseModel, PositiveInt, constr, root_validator
from argilla.server.pydantic_v1 import Field as PydanticField
from argilla.server.pydantic_v1.generics import GenericModel
from argilla.server.enums import DatasetStatus, SimilarityOrder, SortOrder
from argilla.server.pydantic_v1 import BaseModel, Field, PositiveInt, constr, root_validator
from argilla.server.schemas.base import UpdateSchema
from argilla.server.schemas.v1.metadata_properties import MetadataPropertyName
from argilla.server.schemas.v1.questions import QuestionName
from argilla.server.schemas.v1.records import Record, RecordFilterScope
from argilla.server.schemas.v1.responses import ResponseFilterScope
Expand All @@ -39,21 +38,12 @@
DATASET_GUIDELINES_MIN_LENGTH = 1
DATASET_GUIDELINES_MAX_LENGTH = 10000

METADATA_PROPERTY_CREATE_NAME_REGEX = r"^(?=.*[a-z0-9])[a-z0-9_-]+$"
METADATA_PROPERTY_CREATE_NAME_MIN_LENGTH = 1
METADATA_PROPERTY_CREATE_NAME_MAX_LENGTH = 200
METADATA_PROPERTY_CREATE_TITLE_MIN_LENGTH = 1
METADATA_PROPERTY_CREATE_TITLE_MAX_LENGTH = 500

VECTOR_SETTINGS_CREATE_NAME_REGEX = r"^(?=.*[a-z0-9])[a-z0-9_-]+$"
VECTOR_SETTINGS_CREATE_NAME_MIN_LENGTH = 1
VECTOR_SETTINGS_CREATE_NAME_MAX_LENGTH = 200
VECTOR_SETTINGS_CREATE_TITLE_MIN_LENGTH = 1
VECTOR_SETTINGS_CREATE_TITLE_MAX_LENGTH = 500

TERMS_METADATA_PROPERTY_VALUES_MIN_ITEMS = 1
TERMS_METADATA_PROPERTY_VALUES_MAX_ITEMS = 250

TERMS_FILTER_VALUES_MIN_ITEMS = 1
TERMS_FILTER_VALUES_MAX_ITEMS = 250

Expand Down Expand Up @@ -85,12 +75,12 @@ class Datasets(BaseModel):

DatasetName = Annotated[
constr(regex=DATASET_NAME_REGEX, min_length=DATASET_NAME_MIN_LENGTH, max_length=DATASET_NAME_MAX_LENGTH),
PydanticField(..., description="Dataset name"),
Field(..., description="Dataset name"),
]

DatasetGuidelines = Annotated[
constr(min_length=DATASET_GUIDELINES_MIN_LENGTH, max_length=DATASET_GUIDELINES_MAX_LENGTH),
PydanticField(..., description="Dataset guidelines"),
Field(..., description="Dataset guidelines"),
]


Expand Down Expand Up @@ -151,12 +141,12 @@ class VectorsSettings(BaseModel):
min_length=VECTOR_SETTINGS_CREATE_TITLE_MIN_LENGTH,
max_length=VECTOR_SETTINGS_CREATE_TITLE_MAX_LENGTH,
),
PydanticField(..., description="The title of the vector settings"),
Field(..., description="The title of the vector settings"),
]


class VectorSettingsCreate(BaseModel):
name: str = PydanticField(
name: str = Field(
...,
regex=VECTOR_SETTINGS_CREATE_NAME_REGEX,
min_length=VECTOR_SETTINGS_CREATE_NAME_MIN_LENGTH,
Expand All @@ -167,107 +157,6 @@ class VectorSettingsCreate(BaseModel):
dimensions: PositiveInt


NT = TypeVar("NT", int, float)


class NumericMetadataProperty(GenericModel, Generic[NT]):
min: Optional[NT] = None
max: Optional[NT] = None

@root_validator(skip_on_failure=True)
def check_bounds(cls, values: Dict[str, Any]) -> Dict[str, Any]:
min = values.get("min")
max = values.get("max")

if min is not None and max is not None and min >= max:
raise ValueError(f"'min' ({min}) must be lower than 'max' ({max})")

return values


class TermsMetadataPropertyCreate(BaseModel):
type: Literal[MetadataPropertyType.terms]
values: Optional[List[str]] = PydanticField(
None, min_items=TERMS_METADATA_PROPERTY_VALUES_MIN_ITEMS, max_items=TERMS_METADATA_PROPERTY_VALUES_MAX_ITEMS
)


class IntegerMetadataPropertyCreate(NumericMetadataProperty[int]):
type: Literal[MetadataPropertyType.integer]


class FloatMetadataPropertyCreate(NumericMetadataProperty[float]):
type: Literal[MetadataPropertyType.float]


MetadataPropertyName = Annotated[
str,
PydanticField(
...,
regex=METADATA_PROPERTY_CREATE_NAME_REGEX,
min_length=METADATA_PROPERTY_CREATE_NAME_MIN_LENGTH,
max_length=METADATA_PROPERTY_CREATE_NAME_MAX_LENGTH,
),
]

MetadataPropertyTitle = Annotated[
constr(min_length=METADATA_PROPERTY_CREATE_TITLE_MIN_LENGTH, max_length=METADATA_PROPERTY_CREATE_TITLE_MAX_LENGTH),
PydanticField(..., description="The title of the metadata property"),
]

MetadataPropertySettingsCreate = Annotated[
Union[TermsMetadataPropertyCreate, IntegerMetadataPropertyCreate, FloatMetadataPropertyCreate],
PydanticField(..., discriminator="type"),
]


class MetadataPropertyCreate(BaseModel):
name: MetadataPropertyName
title: MetadataPropertyTitle
settings: MetadataPropertySettingsCreate
visible_for_annotators: bool = True


class TermsMetadataProperty(BaseModel):
type: Literal[MetadataPropertyType.terms]
values: Optional[List[str]] = None


class IntegerMetadataProperty(BaseModel):
type: Literal[MetadataPropertyType.integer]
min: Optional[int] = None
max: Optional[int] = None


class FloatMetadataProperty(BaseModel):
type: Literal[MetadataPropertyType.float]
min: Optional[float] = None
max: Optional[float] = None


MetadataPropertySettings = Annotated[
Union[TermsMetadataProperty, IntegerMetadataProperty, FloatMetadataProperty],
PydanticField(..., discriminator="type"),
]


class MetadataProperty(BaseModel):
id: UUID
name: str
title: str
settings: MetadataPropertySettings
visible_for_annotators: bool
inserted_at: datetime
updated_at: datetime

class Config:
orm_mode = True


class MetadataProperties(BaseModel):
items: List[MetadataProperty]


class MetadataParsedQueryParam:
def __init__(self, string: str):
k, *v = string.split(":", maxsplit=1)
Expand All @@ -277,7 +166,7 @@ def __init__(self, string: str):


class MetadataQueryParams(BaseModel):
metadata: List[str] = PydanticField(Query([], pattern=r"^(?=.*[a-z0-9])[a-z0-9_-]+:(.+(,(.+))*)$"))
metadata: List[str] = Field(Query([], pattern=r"^(?=.*[a-z0-9])[a-z0-9_-]+:(.+(,(.+))*)$"))

@property
def metadata_parsed(self) -> List[MetadataParsedQueryParam]:
Expand Down Expand Up @@ -321,16 +210,14 @@ class MetadataFilterScope(BaseModel):

FilterScope = Annotated[
Union[RecordFilterScope, ResponseFilterScope, SuggestionFilterScope, MetadataFilterScope],
PydanticField(..., discriminator="entity"),
Field(..., discriminator="entity"),
]


class TermsFilter(BaseModel):
type: Literal["terms"]
scope: FilterScope
values: List[str] = PydanticField(
..., min_items=TERMS_FILTER_VALUES_MIN_ITEMS, max_items=TERMS_FILTER_VALUES_MAX_ITEMS
)
values: List[str] = Field(..., min_items=TERMS_FILTER_VALUES_MIN_ITEMS, max_items=TERMS_FILTER_VALUES_MAX_ITEMS)


class RangeFilter(BaseModel):
Expand All @@ -352,13 +239,11 @@ def check_ge_and_le(cls, values: dict) -> dict:
return values


Filter = Annotated[Union[TermsFilter, RangeFilter], PydanticField(..., discriminator="type")]
Filter = Annotated[Union[TermsFilter, RangeFilter], Field(..., discriminator="type")]


class Filters(BaseModel):
and_: List[Filter] = PydanticField(
None, alias="and", min_items=FILTERS_AND_MIN_ITEMS, max_items=FILTERS_AND_MAX_ITEMS
)
and_: List[Filter] = Field(None, alias="and", min_items=FILTERS_AND_MIN_ITEMS, max_items=FILTERS_AND_MAX_ITEMS)


class Order(BaseModel):
Expand All @@ -369,7 +254,7 @@ class Order(BaseModel):
class SearchRecordsQuery(BaseModel):
query: Optional[Query]
filters: Optional[Filters]
sort: Optional[List[Order]] = PydanticField(
sort: Optional[List[Order]] = Field(
None, min_items=SEARCH_RECORDS_QUERY_SORT_MIN_ITEMS, max_items=SEARCH_RECORDS_QUERY_SORT_MAX_ITEMS
)

Expand Down
Loading

0 comments on commit 5298f5f

Please sign in to comment.