diff --git a/README.md b/README.md index 9b47d88..38f707e 100644 --- a/README.md +++ b/README.md @@ -2476,11 +2476,41 @@ Create object in the dataset. The types of objects that can be created are "image", "video", and "audio". There are type-specific methods. but they can be used in the same way. +Created object are automatically assigned to the "latest" dataset version. + ```python dataset_object = client.create_dataset_object( - dataset_version_id="YOUR_DATASET_VERSION_ID", + dataset="YOUR_DATASET_NAME", name="brushwood_dog.jpg", file_path="./brushwood_dog.jpg", + tags=["dog"], # max 5 tags per dataset object. + annotations=[ + { + "keypoints": [ + { + "value": [ + 102.59, + 23.04, + 1 + ], + "key": "head" + } + ], + "attributes": [ + { + "value": "Scottish field", + "key": "kind" + } + ], + "confidenceScore": 0, + "rotation": 0, + "points": [ + 0 + ], + "value": "dog", + "type": "bbox" # type can be 'bbox', 'segmentation'. + } + ] ) ``` @@ -2495,7 +2525,46 @@ See API docs for details. 'size': 6717, 'height': 225, 'width': 225, - 'groupId': None, + 'tags': [ + 'dog' + ], + "annotations": [ + { + "id": "YOUR_DATASET_OBJECT_ANNOTATION_ID", + "type": "bbox", + "title": "dog", + "value": "dog", + "points": [ + 0 + ], + "attributes": [ + { + "value": "Scottish field", + "key": "kind", + "name": "Kind", + "type": "text" + } + ], + "keypoints": [ + { + "edges": [ + "right_shoulder", + "left_shoulder" + ], + "value": [ + 102.59, + 23.04, + 1 + ], + "key": "head", + "name": "щан" + } + ], + "rotation": 0, + "color": "#FF0000", + "confidenceScore": -1 + } + ], 'createdAt': '2022-10-30T08:32:20.748Z', 'updatedAt': '2022-10-30T08:32:20.748Z' } @@ -2518,20 +2587,28 @@ Success response is the same as when created. Get all dataset object in the dataset. (Up to 1000 tasks) ```python -dataset_objects = client.get_dataset_objects(dataset_version_id="YOUR_DATASET_VERSION_ID") +dataset_objects = client.get_dataset_objects(dataset="YOUR_DATASET_NAME") ``` The success response is the same as when created, but it is an array. -You can filter by keywords. +You can filter by version and tags. ```python dataset_objects = client.get_dataset_objects( - dataset_version_id="YOUR_DATASET_VERSION_ID", keyword="dog" + dataset="YOUR_DATASET_NAME", + version="latest", # default is "latest" + tags=["cat"], ) ``` -If you wish to retrieve more than 1000 data sets, please refer to the Task [sample code](#get-tasks). +### Delete Dataset Object + +Delete a single dataset object. + +```python +client.delete_dataset_object(dataset_object_id="YOUR_DATASET_OBJECT_ID") +``` ## Converter diff --git a/examples/create_dataset_object.py b/examples/create_dataset_object.py index 671ede2..44a2a78 100644 --- a/examples/create_dataset_object.py +++ b/examples/create_dataset_object.py @@ -5,7 +5,7 @@ client = fastlabel.Client() dataset_object = client.create_dataset_object( - dataset_version_id="YOUR_DATASET_VERSION_ID", + dataset="YOUR_DATASET_NAME", name="NAME", file_path="FILE_PATH", ) diff --git a/examples/delete_dataset_object.py b/examples/delete_dataset_object.py new file mode 100644 index 0000000..822b523 --- /dev/null +++ b/examples/delete_dataset_object.py @@ -0,0 +1,5 @@ +import fastlabel + +client = fastlabel.Client() + +client.delete_dataset_object(dataset_object_id="YOUR_DATASET_OBJECT_ID") diff --git a/examples/get_dataset_objects.py b/examples/get_dataset_objects.py index 1207d18..623b1b7 100644 --- a/examples/get_dataset_objects.py +++ b/examples/get_dataset_objects.py @@ -4,7 +4,5 @@ client = fastlabel.Client() -dataset_objects = client.get_dataset_objects( - dataset_version_id="YOUR_DATASET_VERSION_ID" -) +dataset_objects = client.get_dataset_objects(dataset="YOUR_DATASET_NAME") pprint(dataset_objects) diff --git a/fastlabel/__init__.py b/fastlabel/__init__.py index 5dc4735..1f475a8 100644 --- a/fastlabel/__init__.py +++ b/fastlabel/__init__.py @@ -3907,45 +3907,41 @@ def find_dataset_object(self, dataset_object_id: str) -> dict: def get_dataset_objects( self, - dataset_version_id: str, - keyword: str = None, - offset: int = None, - limit: int = 100, + dataset: str, + version: str = None, + tags: List[str] = [], ) -> list: """ Returns a list of dataset objects. - Returns up to 1000 at a time, to get more, set offset as the starting position - to fetch. - - dataset_version_id is dataset object in dataset version (Required). - keyword are search terms in the dataset object name (Optional). - offset is the starting position number to fetch (Optional). - limit is the max number to fetch (Optional). + dataset is dataset name (Required). + version is dataset version (Optional). + tags is a list of tag (Optional). """ - if limit > 1000: - raise FastLabelInvalidException( - "Limit must be less than or equal to 1000.", 422 - ) endpoint = "dataset-objects" - params = {"datasetVersionId": dataset_version_id} - if keyword: - params["keyword"] = keyword - if offset: - params["offset"] = offset - if limit: - params["limit"] = limit + params = {"dataset": dataset} + if version: + params["version"] = version + if tags: + params["tags"] = tags return self.api.get_request(endpoint, params=params) def create_dataset_object( - self, dataset_version_id: str, name: str, file_path: str + self, + dataset: str, + name: str, + file_path: str, + tags: List[str] = [], + annotations: List[dict] = [], ) -> dict: """ Create a dataset object. - dataset_version_id is dataset object in dataset version (Required). + dataset is dataset name (Required). name is a unique identifier of dataset object in your dataset (Required). file_path is a path to data. (Required). + tags is a list of tag (Optional). + annotations is a list of annotation (Optional). """ endpoint = "dataset-objects" if not utils.is_object_supported_size(file_path): @@ -3953,12 +3949,23 @@ def create_dataset_object( "Supported object size is under 250 MB.", 422 ) payload = { - "datasetVersionId": dataset_version_id, + "dataset": dataset, "name": name, - "file": utils.base64_encode(file_path), + "filePath": utils.base64_encode(file_path), } + if tags: + payload["tags"] = tags + if annotations: + payload["annotations"] = annotations return self.api.post_request(endpoint, payload=payload) + def delete_dataset_object(self, dataset_object_id: str) -> None: + """ + Delete a dataset object. + """ + endpoint = "dataset-objects/" + dataset_object_id + self.api.delete_request(endpoint) + def update_aws_s3_storage( self, project: str, bucket_name: str, bucket_region: str, prefix: str = None ) -> str: diff --git a/fastlabel/api.py b/fastlabel/api.py index cd026de..31db1ed 100644 --- a/fastlabel/api.py +++ b/fastlabel/api.py @@ -1,12 +1,12 @@ import os -import requests from typing import Union +import requests + from .exceptions import FastLabelException, FastLabelInvalidException class Api: - base_url = "https://api.fastlabel.ai/v1/" access_token = None @@ -124,6 +124,5 @@ def upload_zipfile( url: str, file_path: str, ): - files = {'file': open(file_path, 'rb')} + files = {"file": open(file_path, "rb")} return requests.put(url, files=files) - diff --git a/tests/test_client_dataset.py b/tests/test_client_dataset.py index 8501e1a..181a2ed 100644 --- a/tests/test_client_dataset.py +++ b/tests/test_client_dataset.py @@ -6,12 +6,6 @@ from fastlabel import Client -OBJECT_SIGNED_URL_KEY = "objectSignedUrl" - - -def remove_object_signed_url(d: dict) -> dict: - return {k: v for k, v in d.items() if k != OBJECT_SIGNED_URL_KEY} - @pytest.fixture def client() -> Client: @@ -57,7 +51,7 @@ def test_create_dataset_object(self, client: Client, testing_dataset: dict): target_file = Path(sys.path[0]) / "files/test_image.jpg" # Act dataset_object = client.create_dataset_object( - dataset_version_id=testing_dataset["version"]["id"], + dataset=testing_dataset["name"], name="test_image.jpg", file_path=str(target_file), ) @@ -67,13 +61,12 @@ def test_create_dataset_object(self, client: Client, testing_dataset: dict): assert dataset_object["size"] == 6717 assert dataset_object["height"] == 225 assert dataset_object["width"] == 225 - assert dataset_object["groupId"] is None def test_find_dataset_object(self, client: Client, testing_dataset: dict): # Arrange target_file = Path(sys.path[0]) / "files/test_image.jpg" dataset_object = client.create_dataset_object( - dataset_version_id=testing_dataset["version"]["id"], + dataset=testing_dataset["name"], name="test_image.jpg", file_path=str(target_file), ) @@ -85,29 +78,26 @@ def test_find_dataset_object(self, client: Client, testing_dataset: dict): def test_get_dataset_object(self, client: Client, testing_dataset: dict): # Arrange target_file = Path(sys.path[0]) / "files/test_image.jpg" - dataset_object1 = client.create_dataset_object( - dataset_version_id=testing_dataset["version"]["id"], + client.create_dataset_object( + dataset=testing_dataset["name"], name="test_image1.jpg", file_path=str(target_file), + tags=["image1"], ) - dataset_object2 = client.create_dataset_object( - dataset_version_id=testing_dataset["version"]["id"], + client.create_dataset_object( + dataset=testing_dataset["name"], name="test_image2.jpg", file_path=str(target_file), + tags=["image1"], ) # Act results = client.get_dataset_objects( - dataset_version_id=testing_dataset["version"]["id"] + dataset=testing_dataset["name"], + tags=["image1"], ) # Assert assert results is not None assert len(results) == 2 - assert remove_object_signed_url(results[0]) == remove_object_signed_url( - dataset_object1 - ) - assert remove_object_signed_url(results[1]) == remove_object_signed_url( - dataset_object2 - ) class TestVideoDataset: @@ -138,7 +128,7 @@ def test_create_dataset_object(self, client: Client, testing_dataset: dict): target_file = Path(sys.path[0]) / "files/test_video.mp4" # Act dataset_object = client.create_dataset_object( - dataset_version_id=testing_dataset["version"]["id"], + dataset=testing_dataset["name"], name="test_video.mp4", file_path=str(target_file), ) @@ -148,13 +138,12 @@ def test_create_dataset_object(self, client: Client, testing_dataset: dict): assert dataset_object["size"] == 534032 assert dataset_object["height"] == 240 assert dataset_object["width"] == 320 - assert dataset_object["groupId"] is None def test_find_dataset_object(self, client: Client, testing_dataset: dict): # Arrange target_file = Path(sys.path[0]) / "files/test_video.mp4" dataset_object = client.create_dataset_object( - dataset_version_id=testing_dataset["version"]["id"], + dataset=testing_dataset["name"], name="test_video.mp4", file_path=str(target_file), ) @@ -166,29 +155,26 @@ def test_find_dataset_object(self, client: Client, testing_dataset: dict): def test_get_dataset_object(self, client: Client, testing_dataset: dict): # Arrange target_file = Path(sys.path[0]) / "files/test_video.mp4" - dataset_object1 = client.create_dataset_object( - dataset_version_id=testing_dataset["version"]["id"], + client.create_dataset_object( + dataset=testing_dataset["name"], name="test_video1.mp4", file_path=str(target_file), + tags=["video1"], ) - dataset_object2 = client.create_dataset_object( - dataset_version_id=testing_dataset["version"]["id"], + client.create_dataset_object( + dataset=testing_dataset["name"], name="test_video2.mp4", file_path=str(target_file), + tags=["video1"], ) # Act results = client.get_dataset_objects( - dataset_version_id=testing_dataset["version"]["id"] + dataset=testing_dataset["name"], + tags=["video1"], ) # Assert assert results is not None assert len(results) == 2 - assert remove_object_signed_url(results[0]) == remove_object_signed_url( - dataset_object1 - ) - assert remove_object_signed_url(results[1]) == remove_object_signed_url( - dataset_object2 - ) class TestAudioDataset: @@ -220,7 +206,7 @@ def test_create_dataset_object(self, client: Client, testing_dataset: dict): target_file = Path(sys.path[0]) / "files/test_audio.mp3" # Act dataset_object = client.create_dataset_object( - dataset_version_id=testing_dataset["version"]["id"], + dataset=testing_dataset["name"], name="test_audio.mp3", file_path=str(target_file), ) @@ -230,13 +216,12 @@ def test_create_dataset_object(self, client: Client, testing_dataset: dict): assert dataset_object["size"] == 16182 assert dataset_object["height"] == 0 assert dataset_object["width"] == 0 - assert dataset_object["groupId"] is None def test_find_dataset_object(self, client: Client, testing_dataset: dict): # Arrange target_file = Path(sys.path[0]) / "files/test_audio.mp3" dataset_object = client.create_dataset_object( - dataset_version_id=testing_dataset["version"]["id"], + dataset=testing_dataset["name"], name="test_audio.mp3", file_path=str(target_file), ) @@ -248,29 +233,26 @@ def test_find_dataset_object(self, client: Client, testing_dataset: dict): def test_get_dataset_object(self, client: Client, testing_dataset: dict): # Arrange target_file = Path(sys.path[0]) / "files/test_audio.mp3" - dataset_object1 = client.create_dataset_object( - dataset_version_id=testing_dataset["version"]["id"], + client.create_dataset_object( + dataset=testing_dataset["name"], name="test_audio1.mp3", file_path=str(target_file), + tags=["audio1"], ) - dataset_object2 = client.create_dataset_object( - dataset_version_id=testing_dataset["version"]["id"], + client.create_dataset_object( + dataset=testing_dataset["name"], name="test_audio2.mp3", file_path=str(target_file), + tags=["audio1"], ) # Act results = client.get_dataset_objects( - dataset_version_id=testing_dataset["version"]["id"] + dataset=testing_dataset["name"], + tags=["audio1"], ) # Assert assert results is not None assert len(results) == 2 - assert remove_object_signed_url(results[0]) == remove_object_signed_url( - dataset_object1 - ) - assert remove_object_signed_url(results[1]) == remove_object_signed_url( - dataset_object2 - ) class TestMixingDataset: @@ -283,7 +265,7 @@ def test_create_dataset_object(self, client: Client, testing_dataset: dict): target_file = Path(sys.path[0]) / "files/test_other_file.txt" # Act dataset_object = client.create_dataset_object( - dataset_version_id=testing_dataset["version"]["id"], + dataset=testing_dataset["name"], name="test_other_file.txt", file_path=str(target_file), ) @@ -293,4 +275,3 @@ def test_create_dataset_object(self, client: Client, testing_dataset: dict): assert dataset_object["size"] == 3090 assert dataset_object["height"] == 0 assert dataset_object["width"] == 0 - assert dataset_object["groupId"] is None