Skip to content

Commit

Permalink
Merge pull request #111 from hCaptcha/ENG-187
Browse files Browse the repository at this point in the history
[ENG-187] Add Drag&Drop job type
  • Loading branch information
RafalBielickiIM committed Mar 21, 2024
2 parents a981bb9 + a3b9503 commit 7b69343
Show file tree
Hide file tree
Showing 11 changed files with 156 additions and 167 deletions.
21 changes: 21 additions & 0 deletions basemodels/constants.py
Original file line number Diff line number Diff line change
@@ -1,3 +1,5 @@
from enum import Enum

JOB_TYPES_FOR_CONTENT_TYPE_VALIDATION = [
"image_label_binary",
"image_label_multiple_choice",
Expand All @@ -16,3 +18,22 @@
"image/jpg",
"image/png",
]


# Base job types
class BaseJobTypesEnum(str, Enum):
image_label_binary = "image_label_binary"
image_label_multiple_choice = "image_label_multiple_choice"
text_free_entry = "text_free_entry"
text_label_multiple_span_select = "text_label_multiple_span_select"
text_multiple_choice_one_option = "text_multiple_choice_one_option"
text_multiple_choice_multiple_options = "text_multiple_choice_multiple_options"
image_label_area_adjust = "image_label_area_adjust"
image_label_area_select = "image_label_area_select"
image_label_single_polygon = "image_label_single_polygon"
image_label_multiple_polygons = "image_label_multiple_polygons"
image_label_semantic_segmentation_one_option = "image_label_semantic_segmentation_one_option"
image_label_semantic_segmentation_multiple_options = "image_label_semantic_segmentation_multiple_options"
image_label_text = "image_label_text"
image_drag_drop = "image_drag_drop"
multi_challenge = "multi_challenge"
46 changes: 40 additions & 6 deletions basemodels/manifest/data/groundtruth.py
Original file line number Diff line number Diff line change
@@ -1,11 +1,12 @@
from typing import List, Optional, Union
from uuid import UUID

import requests
from pydantic.v1 import BaseModel, HttpUrl, ValidationError, conlist, validator, root_validator, Field
from pydantic.v1 import BaseModel, HttpUrl, ValidationError
from requests import RequestException
from typing_extensions import Literal

from basemodels.constants import SUPPORTED_CONTENT_TYPES
from basemodels.constants import SUPPORTED_CONTENT_TYPES, BaseJobTypesEnum


def create_wrapper_model(type):
Expand Down Expand Up @@ -78,6 +79,33 @@ class ILASGroundtruthEntry(BaseModel):
ilas_groundtruth_entry_type = List[List[ILASGroundtruthEntry]]
ILASGroundtruthEntryModel = create_wrapper_model(ilas_groundtruth_entry_type)


class IDDGroundtruthEntry(BaseModel):
entity_name: UUID
entity_type: Optional[str]
entity_coords: List[int]


"""
Groundtruth file format for `image_drag_drop` job type
{
"81fb76f3-3906-4fbd-8168-9dff208860a5": [
{
"entity_name": "04606112-4b9d-455f-8f43-9cc1a9bca185",
"entity_type": "default",
"entity_coords": [275, 184]
}
]
}
"""
idd_groundtruth_entry_type = List[IDDGroundtruthEntry]
IDDGroundtruthEntryModel = create_wrapper_model(idd_groundtruth_entry_type)

idd_groundtruth_entry_key_type = UUID
IDDGroundtruthEntryKeyModel = create_wrapper_model(idd_groundtruth_entry_key_type)


class TLMSSGroundTruthEntry(BaseModel):
start: int
end: int
Expand Down Expand Up @@ -106,8 +134,10 @@ class TLMSSGroundTruthEntry(BaseModel):
"image_label_multiple_choice": ILMCGroundtruthEntryModel,
"image_label_area_select": ILASGroundtruthEntryModel,
"text_label_multiple_span_select": TLMSSGroundTruthEntryModel,
"image_drag_drop": IDDGroundtruthEntryModel,
}


def validate_content_type(uri: str) -> None:
"""Validate uri content type"""
try:
Expand All @@ -131,13 +161,17 @@ def validate_groundtruth_entry(
validate_image_content_type: bool,
):
"""Validate key & value of groundtruth entry based on request_type"""
GroundtruthEntryValueModel = groundtruth_entry_models_map.get(request_type)
groundtruth_entry_value_model_class = groundtruth_entry_models_map.get(request_type)
groundtruth_entry_key_model_class = GroundtruthEntryKeyModel

if GroundtruthEntryValueModel is None:
if groundtruth_entry_value_model_class is None:
return

validate_wrapper_model(GroundtruthEntryKeyModel, key)
validate_wrapper_model(GroundtruthEntryValueModel, value)
if request_type == BaseJobTypesEnum.image_drag_drop:
groundtruth_entry_key_model_class = IDDGroundtruthEntryKeyModel

validate_wrapper_model(groundtruth_entry_key_model_class, key)
validate_wrapper_model(groundtruth_entry_value_model_class, value)

if validate_image_content_type:
validate_content_type(key)
5 changes: 1 addition & 4 deletions basemodels/manifest/data/helpers.py
Original file line number Diff line number Diff line change
Expand Up @@ -16,8 +16,5 @@ def validate_content_type(uri: str) -> None:
content_type = response.headers.get("Content-Type", "")
if content_type not in SUPPORTED_CONTENT_TYPES:
raise ValidationError(
[
ErrorWrapper(ValueError(f"Unsupported type {content_type}"), "answer_example_uri")
],
ExampleResourceModel
[ErrorWrapper(ValueError(f"Unsupported type {content_type}"), "answer_example_uri")], ExampleResourceModel
)
12 changes: 4 additions & 8 deletions basemodels/manifest/data/requester_question_example.py
Original file line number Diff line number Diff line change
Expand Up @@ -23,19 +23,15 @@ def validate_requester_example_image(
raise ValueError(f"Not supported format for requester_question_example.")
except RequestException as e:
raise ValidationError(
[
ErrorWrapper(ValueError(f"could not retrieve requester example ({uri_val})"), "answer_example_uri")
],
ExampleResourceModel
[ErrorWrapper(ValueError(f"could not retrieve requester example ({uri_val})"), "answer_example_uri")],
ExampleResourceModel,
) from e
except ValidationError as e:
raise ValidationError(
[
ErrorWrapper(
ValueError(f"requester example image for {uri_val} has unsupported type"),
"answer_example_uri"
ValueError(f"requester example image for {uri_val} has unsupported type"), "answer_example_uri"
)
],
ExampleResourceModel
ExampleResourceModel,
) from e

13 changes: 7 additions & 6 deletions basemodels/manifest/data/requester_restricted_answer_set.py
Original file line number Diff line number Diff line change
Expand Up @@ -27,20 +27,21 @@ def validate_requester_restricted_answer_set_uris(restricted_answer_set: dict) -
[
ErrorWrapper(
ValueError(f"could not retrieve requester restricted answer set example uri ({uri})"),
"answer_example_uri"
"answer_example_uri",
)
],
ExampleResourceModel
ExampleResourceModel,
) from e
except ValidationError as e:

raise ValidationError(
[
ErrorWrapper(
ValueError(f"requester restricted answer set example uri "
f"({uri}) content type failed validation"),
"answer_example_uri"
ValueError(
f"requester restricted answer set example uri " f"({uri}) content type failed validation"
),
"answer_example_uri",
)
],
ExampleResourceModel
ExampleResourceModel,
) from e
58 changes: 30 additions & 28 deletions basemodels/manifest/data/taskdata.py
Original file line number Diff line number Diff line change
@@ -1,17 +1,20 @@
from typing import Dict, Optional, Union, Any
from typing import Dict, Optional, Union, Any, List, Tuple
from uuid import UUID

import requests
from pydantic.v1 import BaseModel, HttpUrl, validate_model, ValidationError, validator, root_validator
from pydantic.v1 import HttpUrl, validate_model, ValidationError, validator, BaseModel, root_validator, AnyHttpUrl
from pydantic.v1.error_wrappers import ErrorWrapper
from requests import RequestException

from basemodels.constants import SUPPORTED_CONTENT_TYPES


# New type
class AtLeastTenCharUrl(HttpUrl):
min_length = 10
class Entity(BaseModel):
"""Entity configuration"""

entity_id: UUID
entity_uri: AnyHttpUrl
coords: Tuple[int, int]


class TaskDataEntry(BaseModel):
Expand All @@ -36,14 +39,28 @@ class TaskDataEntry(BaseModel):

task_key: Optional[UUID]
datapoint_uri: Optional[HttpUrl]
entities: Optional[List[Entity]]
datapoint_text: Optional[Dict[str, str]]
datapoint_hash: Optional[str]
metadata: Optional[Dict[str, Optional[Union[str, int, float, Dict[str, Any]]]]]

@validator("datapoint_uri", always=True)
def validate_datapoint_uri(cls, value):
if value and len(value) < 10:
raise ValidationError("datapoint_uri need to be at least 10 char length.")
raise ValueError("datapoint_uri need to be at least 10 char length.")
return value

@root_validator
def validate_task_data(cls, values):
"""
Validate datapoint_uri.
Raise error if no datapoint_text and no value for URI.
"""
if not values.get("datapoint_uri") and not values.get("datapoint_text"):
raise ValueError(f"datapoint_uri is missing. {list(values.keys())}")
return values

@validator("metadata")
def validate_metadata(cls, value):
if value is None:
Expand All @@ -57,20 +74,6 @@ def validate_metadata(cls, value):

return value

datapoint_hash: Optional[str]
metadata: Optional[Dict[str, Optional[Union[str, int, float, Dict[str, Any]]]]]

@root_validator
def validate_datapoint_text(cls, values):
"""
Validate datapoint_uri.
Raise error if no datapoint_text and no value for URI.
"""
if not values.get("datapoint_uri") and not values.get("datapoint_text"):
raise ValueError("datapoint_uri is missing.")
return values


def validate_content_type(uri: str) -> None:
"""Validate uri content type"""
Expand All @@ -79,32 +82,31 @@ def validate_content_type(uri: str) -> None:
response.raise_for_status()
except RequestException as e:
raise ValidationError(
[
ErrorWrapper(ValueError(f"taskdata content type ({uri}) validation failed"), "datapoint_uri")
],
TaskDataEntry
[ErrorWrapper(ValueError(f"taskdata content type ({uri}) validation failed"), "datapoint_uri")],
TaskDataEntry,
) from e

content_type = response.headers.get("Content-Type", "")
if content_type not in SUPPORTED_CONTENT_TYPES:
raise ValidationError(
[
ErrorWrapper(
ValueError(f"taskdata entry datapoint_uri has unsupported type {content_type}"),
"datapoint_uri"
ValueError(f"taskdata entry datapoint_uri has unsupported type {content_type}"), "datapoint_uri"
)
],
TaskDataEntry
TaskDataEntry,
)


def validate_taskdata_entry(value: dict, validate_image_content_type: bool) -> None:
"""Validate taskdata entry"""
if not isinstance(value, dict):
raise ValidationError("taskdata entry should be dict", TaskDataEntry())

print("HERE")
print(value)
*_, validation_error = validate_model(TaskDataEntry, value)
if validation_error:
print(validation_error)
raise validation_error

if validate_image_content_type:
Expand Down
Loading

0 comments on commit 7b69343

Please sign in to comment.