From c2c32c8920e526dfa7695428a3dad5cc106f2fa0 Mon Sep 17 00:00:00 2001 From: Yoshihiro Fujimoto Date: Mon, 31 Oct 2022 15:49:55 +0900 Subject: [PATCH 1/3] add dataset function --- fastlabel/__init__.py | 257 ++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 257 insertions(+) diff --git a/fastlabel/__init__.py b/fastlabel/__init__.py index 29e5776..192afc9 100644 --- a/fastlabel/__init__.py +++ b/fastlabel/__init__.py @@ -2828,6 +2828,263 @@ def copy_project(self, project_id: str) -> None: endpoint = "projects/copy" return self.api.post_request(endpoint, payload=payload) + # Dataset + + def find_dataset(self, dataset_id: str) -> dict: + """ + Find a dataset. + """ + endpoint = "datasets/" + dataset_id + return self.api.get_request(endpoint) + + def get_datasets( + self, + keyword: str = None, + type: str = None, + offset: int = None, + limit: int = 100, + ) -> list: + """ + Returns a list of datasets. + + Returns up to 1000 at a time, to get more, set offset as the starting position + to fetch. + + keyword are search terms in the dataset slug (Optional). + type is type of your dataset (Optional). + offset is the starting position number to fetch (Optional). + limit is the max number to fetch (Optional). + """ + if limit > 1000: + raise FastLabelInvalidException( + "Limit must be less than or equal to 1000.", 422 + ) + endpoint = "datasets" + params = {} + if keyword: + params["keyword"] = keyword + if type: + params["type"] = type + if offset: + params["offset"] = offset + if limit: + params["limit"] = limit + return self.api.get_request(endpoint, params=params) + + def create_dataset( + self, + type: str, + name: str, + slug: str, + ) -> str: + """ + Create a dataset. + + type can be 'image', 'video', 'audio' (Required). + name is name of your dataset (Required). + slug is slug of your dataset (Required). + """ + endpoint = "datasets" + payload = { + "type": type, + "name": name, + "slug": slug, + } + return self.api.post_request(endpoint, payload=payload) + + def update_dataset( + self, + dataset_id: str, + name: str = None, + ) -> str: + """ + Update a dataset. + + dataset_id is an id of the dataset (Required). + name is name of your dataset (Required). + """ + endpoint = "datasets/" + dataset_id + payload = {"name": name} + return self.api.put_request(endpoint, payload=payload) + + def delete_dataset(self, dataset_id: str) -> None: + """ + Delete a dataset. + """ + endpoint = "datasets/" + dataset_id + self.api.delete_request(endpoint) + + # Dataset Object + + def find_dataset_object(self, dataset_object_id: str) -> dict: + """ + Find a dataset object. + """ + endpoint = "dataset-objects/" + dataset_object_id + return self.api.get_request(endpoint) + + def get_dataset_objects( + self, + dataset_id: str = None, + keyword: str = None, + offset: int = None, + limit: int = 100, + ) -> list: + """ + Returns a list of dataset objects. + + Returns up to 1000 at a time, to get more, set offset as the starting position + to fetch. + + dataset_id is dataset object in dataset (Required). + keyword are search terms in the dataset object name (Optional). + offset is the starting position number to fetch (Optional). + limit is the max number to fetch (Optional). + """ + if limit > 1000: + raise FastLabelInvalidException( + "Limit must be less than or equal to 1000.", 422 + ) + endpoint = "dataset-objects" + params = {"datasetId": dataset_id} + if keyword: + params["keyword"] = keyword + if offset: + params["offset"] = offset + if limit: + params["limit"] = limit + return self.api.get_request(endpoint, params=params) + + def create_image_dataset_object( + self, + dataset_id: str, + name: str, + file_path: str, + ) -> str: + """ + Create a image dataset object. + + dataset_id is dataset object in dataset (Required). + name is an unique identifier of dataset object in your dataset (Required). + file_path is a path to data. Supported extensions are png, jpg, jpeg (Required). + """ + endpoint = "dataset-objects" + # TODO: add jfif, pjpeg, pjp? + if not utils.is_image_supported_ext(file_path): + raise FastLabelInvalidException( + "Supported extensions are png, jpg, jpeg.", 422 + ) + if not utils.is_image_supported_size(file_path): + raise FastLabelInvalidException("Supported image size is under 20 MB.", 422) + + payload = { + "datasetId": dataset_id, + "name": name, + "file": utils.base64_encode(file_path), + "type": "image", + } + return self.api.post_request(endpoint, payload=payload) + + def create_video_dataset_object( + self, + dataset_id: str, + name: str, + file_path: str, + ) -> str: + """ + Create a video dataset object. + + dataset_id is dataset object in dataset (Required). + name is an unique identifier of dataset object in your dataset (Required). + file_path is a path to data. Supported extensions are mp4 (Required). + """ + endpoint = "dataset-objects" + # TODO: add m4v, mov, avi? + if not utils.is_video_supported_ext(file_path): + raise FastLabelInvalidException("Supported extensions are mp4.", 422) + if not utils.is_video_supported_size(file_path): + raise FastLabelInvalidException( + "Supported video size is under 250 MB.", 422 + ) + + payload = { + "datasetId": dataset_id, + "name": name, + "file": utils.base64_encode(file_path), + "type": "video", + } + return self.api.post_request(endpoint, payload=payload) + + def create_audio_dataset_object( + self, + dataset_id: str, + name: str, + file_path: str, + ) -> str: + """ + Create a audio dataset object. + + dataset_id is dataset object in dataset (Required). + name is an unique identifier of dataset object in your dataset (Required). + file_path is a path to data. Supported extensions are mp3, wav, w4a (Required). + """ + endpoint = "dataset-objects" + # TODO: add mp2? + if not utils.is_audio_supported_ext(file_path): + raise FastLabelInvalidException( + "Supported extensions are mp3, wav and w4a.", 422 + ) + if not utils.is_audio_supported_size(file_path): + raise FastLabelInvalidException( + "Supported audio size is under 120 MB.", 422 + ) + + payload = { + "datasetId": dataset_id, + "name": name, + "file": utils.base64_encode(file_path), + "type": "audio", + } + return self.api.post_request(endpoint, payload=payload) + + def delete_dataset_objects( + self, dataset_id: str, dataset_object_ids: List[str] + ) -> None: + """ + Delete a dataset objects. + """ + endpoint = "dataset-objects/delete/multi" + payload = {"datasetId": dataset_id, "datasetObjectIds": dataset_object_ids} + self.api.post_request(endpoint, payload=payload) + + def get_dataset_object_import_histories( + self, + dataset_id: str = None, + offset: int = None, + limit: int = 5, + ) -> list: + """ + Returns a list of dataset objects. + + Returns up to 1000 at a time, to get more, set offset as the starting position + to fetch. + + dataset_id is import histories in dataset (Required). + offset is the starting position number to fetch (Optional). + limit is the max number to fetch (Optional). + """ + if limit > 1000: + raise FastLabelInvalidException( + "Limit must be less than or equal to 1000.", 422 + ) + endpoint = "dataset-objects" + params = {"datasetId": dataset_id} + if offset: + params["offset"] = offset + if limit: + params["limit"] = limit + return self.api.get_request(endpoint, params=params) + def update_aws_s3_storage( self, project: str, bucket_name: str, bucket_region: str, prefix: str = None ) -> str: From 7e782c59262e26b471fe3bf7e3747a5a6108f828 Mon Sep 17 00:00:00 2001 From: Yoshihiro Fujimoto Date: Thu, 3 Nov 2022 20:43:37 +0900 Subject: [PATCH 2/3] fix: Fix url for get import histories --- fastlabel/__init__.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/fastlabel/__init__.py b/fastlabel/__init__.py index 192afc9..186f9b2 100644 --- a/fastlabel/__init__.py +++ b/fastlabel/__init__.py @@ -3077,7 +3077,7 @@ def get_dataset_object_import_histories( raise FastLabelInvalidException( "Limit must be less than or equal to 1000.", 422 ) - endpoint = "dataset-objects" + endpoint = "dataset-objects/imports/histories" params = {"datasetId": dataset_id} if offset: params["offset"] = offset From 479f548870442cbdbac410e82c1760b463a0c906 Mon Sep 17 00:00:00 2001 From: Yoshihiro Fujimoto Date: Thu, 3 Nov 2022 20:49:33 +0900 Subject: [PATCH 3/3] docs: Added description dataset --- README.md | 193 ++++++++++++++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 193 insertions(+) diff --git a/README.md b/README.md index f9dd9ca..dc4922d 100644 --- a/README.md +++ b/README.md @@ -27,6 +27,7 @@ _If you are using FastLabel prototype, please install version 0.2.2._ - [labelme](#labelme) - [Segmentation](#segmentation) - [Converter to FastLabel format](#converter-to-fastlabel-format) +- [Dataset](#dataset) ## Installation @@ -2105,6 +2106,198 @@ for image_file_path in glob.iglob(os.path.join(input_dataset_path, "**/**.jpg"), > Please check const.COLOR_PALLETE for index colors. +## Dataset + +### Create Dataset + +Create a new dataset. + +```python +dataset = client.create_dataset( + name="Japanese Dogs", + slug="japanese_dogs", + type="image" +) +``` + +#### Response Dataset + +See API docs for details. + +```python +{ + 'id': 'YOUR_DATASET_ID', + 'name': 'Japanese Dogs', + 'slug': 'japanese_dogs', + 'type': 'image', + 'createdAt': '2022-10-31T02:20:00.248Z', + 'updatedAt': '2022-10-31T02:20:00.248Z' +} +``` + +### Find Dataset + +Find a single dataset. + +```python +dataset = client.find_dataset(dataset_id="YOUR_DATASET_ID") +``` + +Success response is the same as when created. + +### Get Dataset + +Get all datasets in the workspace. (Up to 1000 tasks) + +```python +datasets = client.get_datasets() +``` + +The success response is the same as when created, but it is an array. + +You can filter by type and keywords. + +```python +datasets = client.get_datasets( + type="image", # 'image', 'video', 'audio' + keyword="dog" +) +``` + +If you wish to retrieve more than 1000 data sets, please refer to the Task [sample code](#get%20tasks). + +### Update Dataset + +Update a single dataset. + +```python +dataset = client.update_dataset( + dataset_id="YOUR_DATASET_ID", name="World dogs" +) +``` + +Success response is the same as when created. + +### Delete Dataset + +Delete a single dataset. + +**⚠️ The dataset object and its associated tasks that dataset has will also be deleted, so check carefully before executing.** + +```python +client.delete_dataset(dataset_id="YOUR_DATASET_ID") +``` + +### Create Dataset Object + +Create object in the dataset. + +The types of objects that can be created are "image", "video", and "audio". +There are type-specific methods. but they can be used in the same way. + +```python +dataset_object = client.create_image_dataset_object( + dataset_id="YOUR_DATASET_ID", + name="brushwood_dog.jpg", + file_path="./brushwood_dog.jpg", +) +``` + +#### Response Dataset Object + +See API docs for details. + +```python +{ + 'id': 'YOUR_DATASET_OBJECT_ID', + 'name': 'brushwood_dog.jpg', + 'size': 6717, + 'height': 225, + 'width': 225, + 'groupId': None, + 'createdAt': '2022-10-30T08:32:20.748Z', + 'updatedAt': '2022-10-30T08:32:20.748Z' +} +``` + +### Find Dataset Object + +Find a single dataset object. + +```python +dataset_object = client.find_dataset_object( + dataset_object_id="YOUR_DATASET_OBJECT_ID" +) +``` + +Success response is the same as when created. + +### Get Dataset Object + +Get all dataset object in the dataset. (Up to 1000 tasks) + +```python +dataset_objects = client.get_dataset_objects(dataset_id="YOUR_DATASET_ID") +``` + +The success response is the same as when created, but it is an array. + +You can filter by keywords. + +```python +dataset_objects = client.get_dataset_objects( + dataset_id="YOUR_DATASET_ID", keyword="dog" +) +``` + +If you wish to retrieve more than 1000 data sets, please refer to the Task [sample code](#get%20tasks). + +### Delete Dataset Object + +Delete a multi dataset objects. + +**⚠️ Related tasks will also be deleted, so please check them carefully before execution.** + +```python +client.delete_dataset_objects( + dataset_id="YOUR_DATASET_ID", + dataset_object_ids=[ + "YOUR_DATASET_OBJECT_ID_1", + "YOUR_DATASET_OBJECT_ID_2", + ], +) +``` + +### Get Import Histories For Dataset Object + +Get all import histories in the dataset. (Up to 1000 tasks) + +```python +datasets = client.get_dataset_object_import_histories( + dataset_id="YOUR_DATASET_ID" +) +``` + +#### Response Dataset Object Import Histories + +See API docs for details. + +```python +[ + { + 'id': 'YOUR_DATASET_OBJECT_IMPORT_HISTORY_ID', + 'type': 'local', + 'status': 'completed', + 'msgCode': 'none', + 'msgLevel': 'none', + 'userName': 'admin', + 'count': 1, + 'createdAt': '2022-10-30T08:31:31.588Z', + 'updatedAt': '2022-11-02T07:36:07.636Z' + } +] +``` + ## API Docs Check [this](https://api.fastlabel.ai/docs/) for further information.