From c2c32c8920e526dfa7695428a3dad5cc106f2fa0 Mon Sep 17 00:00:00 2001
From: Yoshihiro Fujimoto <fuji.44mt@gmail.com>
Date: Mon, 31 Oct 2022 15:49:55 +0900
Subject: [PATCH 1/3] add dataset function

---
 fastlabel/__init__.py | 257 ++++++++++++++++++++++++++++++++++++++++++
 1 file changed, 257 insertions(+)

diff --git a/fastlabel/__init__.py b/fastlabel/__init__.py
index 29e5776..192afc9 100644
--- a/fastlabel/__init__.py
+++ b/fastlabel/__init__.py
@@ -2828,6 +2828,263 @@ def copy_project(self, project_id: str) -> None:
         endpoint = "projects/copy"
         return self.api.post_request(endpoint, payload=payload)
 
+    # Dataset
+
+    def find_dataset(self, dataset_id: str) -> dict:
+        """
+        Find a dataset.
+        """
+        endpoint = "datasets/" + dataset_id
+        return self.api.get_request(endpoint)
+
+    def get_datasets(
+        self,
+        keyword: str = None,
+        type: str = None,
+        offset: int = None,
+        limit: int = 100,
+    ) -> list:
+        """
+        Returns a list of datasets.
+
+        Returns up to 1000 at a time, to get more, set offset as the starting position
+        to fetch.
+
+        keyword are search terms in the dataset slug (Optional).
+        type is type of your dataset (Optional).
+        offset is the starting position number to fetch (Optional).
+        limit is the max number to fetch (Optional).
+        """
+        if limit > 1000:
+            raise FastLabelInvalidException(
+                "Limit must be less than or equal to 1000.", 422
+            )
+        endpoint = "datasets"
+        params = {}
+        if keyword:
+            params["keyword"] = keyword
+        if type:
+            params["type"] = type
+        if offset:
+            params["offset"] = offset
+        if limit:
+            params["limit"] = limit
+        return self.api.get_request(endpoint, params=params)
+
+    def create_dataset(
+        self,
+        type: str,
+        name: str,
+        slug: str,
+    ) -> str:
+        """
+        Create a dataset.
+
+        type can be 'image', 'video', 'audio' (Required).
+        name is name of your dataset (Required).
+        slug is slug of your dataset (Required).
+        """
+        endpoint = "datasets"
+        payload = {
+            "type": type,
+            "name": name,
+            "slug": slug,
+        }
+        return self.api.post_request(endpoint, payload=payload)
+
+    def update_dataset(
+        self,
+        dataset_id: str,
+        name: str = None,
+    ) -> str:
+        """
+        Update a dataset.
+
+        dataset_id is an id of the dataset (Required).
+        name is name of your dataset (Required).
+        """
+        endpoint = "datasets/" + dataset_id
+        payload = {"name": name}
+        return self.api.put_request(endpoint, payload=payload)
+
+    def delete_dataset(self, dataset_id: str) -> None:
+        """
+        Delete a dataset.
+        """
+        endpoint = "datasets/" + dataset_id
+        self.api.delete_request(endpoint)
+
+    # Dataset Object
+
+    def find_dataset_object(self, dataset_object_id: str) -> dict:
+        """
+        Find a dataset object.
+        """
+        endpoint = "dataset-objects/" + dataset_object_id
+        return self.api.get_request(endpoint)
+
+    def get_dataset_objects(
+        self,
+        dataset_id: str = None,
+        keyword: str = None,
+        offset: int = None,
+        limit: int = 100,
+    ) -> list:
+        """
+        Returns a list of dataset objects.
+
+        Returns up to 1000 at a time, to get more, set offset as the starting position
+        to fetch.
+
+        dataset_id is dataset object in dataset (Required).
+        keyword are search terms in the dataset object name (Optional).
+        offset is the starting position number to fetch (Optional).
+        limit is the max number to fetch (Optional).
+        """
+        if limit > 1000:
+            raise FastLabelInvalidException(
+                "Limit must be less than or equal to 1000.", 422
+            )
+        endpoint = "dataset-objects"
+        params = {"datasetId": dataset_id}
+        if keyword:
+            params["keyword"] = keyword
+        if offset:
+            params["offset"] = offset
+        if limit:
+            params["limit"] = limit
+        return self.api.get_request(endpoint, params=params)
+
+    def create_image_dataset_object(
+        self,
+        dataset_id: str,
+        name: str,
+        file_path: str,
+    ) -> str:
+        """
+        Create a image dataset object.
+
+        dataset_id is dataset object in dataset (Required).
+        name is an unique identifier of dataset object in your dataset (Required).
+        file_path is a path to data. Supported extensions are png, jpg, jpeg (Required).
+        """
+        endpoint = "dataset-objects"
+        # TODO: add jfif, pjpeg, pjp?
+        if not utils.is_image_supported_ext(file_path):
+            raise FastLabelInvalidException(
+                "Supported extensions are png, jpg, jpeg.", 422
+            )
+        if not utils.is_image_supported_size(file_path):
+            raise FastLabelInvalidException("Supported image size is under 20 MB.", 422)
+
+        payload = {
+            "datasetId": dataset_id,
+            "name": name,
+            "file": utils.base64_encode(file_path),
+            "type": "image",
+        }
+        return self.api.post_request(endpoint, payload=payload)
+
+    def create_video_dataset_object(
+        self,
+        dataset_id: str,
+        name: str,
+        file_path: str,
+    ) -> str:
+        """
+        Create a video dataset object.
+
+        dataset_id is dataset object in dataset (Required).
+        name is an unique identifier of dataset object in your dataset (Required).
+        file_path is a path to data. Supported extensions are mp4 (Required).
+        """
+        endpoint = "dataset-objects"
+        # TODO: add m4v, mov, avi?
+        if not utils.is_video_supported_ext(file_path):
+            raise FastLabelInvalidException("Supported extensions are mp4.", 422)
+        if not utils.is_video_supported_size(file_path):
+            raise FastLabelInvalidException(
+                "Supported video size is under 250 MB.", 422
+            )
+
+        payload = {
+            "datasetId": dataset_id,
+            "name": name,
+            "file": utils.base64_encode(file_path),
+            "type": "video",
+        }
+        return self.api.post_request(endpoint, payload=payload)
+
+    def create_audio_dataset_object(
+        self,
+        dataset_id: str,
+        name: str,
+        file_path: str,
+    ) -> str:
+        """
+        Create a audio dataset object.
+
+        dataset_id is dataset object in dataset (Required).
+        name is an unique identifier of dataset object in your dataset (Required).
+        file_path is a path to data. Supported extensions are mp3, wav, w4a (Required).
+        """
+        endpoint = "dataset-objects"
+        # TODO: add mp2?
+        if not utils.is_audio_supported_ext(file_path):
+            raise FastLabelInvalidException(
+                "Supported extensions are mp3, wav and w4a.", 422
+            )
+        if not utils.is_audio_supported_size(file_path):
+            raise FastLabelInvalidException(
+                "Supported audio size is under 120 MB.", 422
+            )
+
+        payload = {
+            "datasetId": dataset_id,
+            "name": name,
+            "file": utils.base64_encode(file_path),
+            "type": "audio",
+        }
+        return self.api.post_request(endpoint, payload=payload)
+
+    def delete_dataset_objects(
+        self, dataset_id: str, dataset_object_ids: List[str]
+    ) -> None:
+        """
+        Delete a dataset objects.
+        """
+        endpoint = "dataset-objects/delete/multi"
+        payload = {"datasetId": dataset_id, "datasetObjectIds": dataset_object_ids}
+        self.api.post_request(endpoint, payload=payload)
+
+    def get_dataset_object_import_histories(
+        self,
+        dataset_id: str = None,
+        offset: int = None,
+        limit: int = 5,
+    ) -> list:
+        """
+        Returns a list of dataset objects.
+
+        Returns up to 1000 at a time, to get more, set offset as the starting position
+        to fetch.
+
+        dataset_id is import histories in dataset (Required).
+        offset is the starting position number to fetch (Optional).
+        limit is the max number to fetch (Optional).
+        """
+        if limit > 1000:
+            raise FastLabelInvalidException(
+                "Limit must be less than or equal to 1000.", 422
+            )
+        endpoint = "dataset-objects"
+        params = {"datasetId": dataset_id}
+        if offset:
+            params["offset"] = offset
+        if limit:
+            params["limit"] = limit
+        return self.api.get_request(endpoint, params=params)
+
     def update_aws_s3_storage(
         self, project: str, bucket_name: str, bucket_region: str, prefix: str = None
     ) -> str:

From 7e782c59262e26b471fe3bf7e3747a5a6108f828 Mon Sep 17 00:00:00 2001
From: Yoshihiro Fujimoto <fuji.44mt@gmail.com>
Date: Thu, 3 Nov 2022 20:43:37 +0900
Subject: [PATCH 2/3] fix: Fix url for get import histories

---
 fastlabel/__init__.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/fastlabel/__init__.py b/fastlabel/__init__.py
index 192afc9..186f9b2 100644
--- a/fastlabel/__init__.py
+++ b/fastlabel/__init__.py
@@ -3077,7 +3077,7 @@ def get_dataset_object_import_histories(
             raise FastLabelInvalidException(
                 "Limit must be less than or equal to 1000.", 422
             )
-        endpoint = "dataset-objects"
+        endpoint = "dataset-objects/imports/histories"
         params = {"datasetId": dataset_id}
         if offset:
             params["offset"] = offset

From 479f548870442cbdbac410e82c1760b463a0c906 Mon Sep 17 00:00:00 2001
From: Yoshihiro Fujimoto <fuji.44mt@gmail.com>
Date: Thu, 3 Nov 2022 20:49:33 +0900
Subject: [PATCH 3/3] docs: Added description dataset

---
 README.md | 193 ++++++++++++++++++++++++++++++++++++++++++++++++++++++
 1 file changed, 193 insertions(+)

diff --git a/README.md b/README.md
index f9dd9ca..dc4922d 100644
--- a/README.md
+++ b/README.md
@@ -27,6 +27,7 @@ _If you are using FastLabel prototype, please install version 0.2.2._
   - [labelme](#labelme)
   - [Segmentation](#segmentation)
 - [Converter to FastLabel format](#converter-to-fastlabel-format)
+- [Dataset](#dataset)
 
 ## Installation
 
@@ -2105,6 +2106,198 @@ for image_file_path in glob.iglob(os.path.join(input_dataset_path, "**/**.jpg"),
 
 > Please check const.COLOR_PALLETE for index colors.
 
+## Dataset
+
+### Create Dataset
+
+Create a new dataset.
+
+```python
+dataset = client.create_dataset(
+    name="Japanese Dogs",
+    slug="japanese_dogs",
+    type="image"
+)
+```
+
+#### Response Dataset
+
+See API docs for details.
+
+```python
+{
+    'id': 'YOUR_DATASET_ID',
+    'name': 'Japanese Dogs',
+    'slug': 'japanese_dogs',
+    'type': 'image',
+    'createdAt': '2022-10-31T02:20:00.248Z',
+    'updatedAt': '2022-10-31T02:20:00.248Z'
+}
+```
+
+### Find Dataset
+
+Find a single dataset.
+
+```python
+dataset = client.find_dataset(dataset_id="YOUR_DATASET_ID")
+```
+
+Success response is the same as when created.
+
+### Get Dataset
+
+Get all datasets in the workspace. (Up to 1000 tasks)
+
+```python
+datasets = client.get_datasets()
+```
+
+The success response is the same as when created, but it is an array.
+
+You can filter by type and keywords.
+
+```python
+datasets = client.get_datasets(
+    type="image", # 'image', 'video', 'audio'
+    keyword="dog"
+)
+```
+
+If you wish to retrieve more than 1000 data sets, please refer to the Task [sample code](#get%20tasks).
+
+### Update Dataset
+
+Update a single dataset.
+
+```python
+dataset = client.update_dataset(
+    dataset_id="YOUR_DATASET_ID", name="World dogs"
+)
+```
+
+Success response is the same as when created.
+
+### Delete Dataset
+
+Delete a single dataset.
+
+**⚠️ The dataset object and its associated tasks that dataset has will also be deleted, so check carefully before executing.**
+
+```python
+client.delete_dataset(dataset_id="YOUR_DATASET_ID")
+```
+
+### Create Dataset Object
+
+Create object in the dataset.
+
+The types of objects that can be created are "image", "video", and "audio".
+There are type-specific methods. but they can be used in the same way.
+
+```python
+dataset_object = client.create_image_dataset_object(
+    dataset_id="YOUR_DATASET_ID",
+    name="brushwood_dog.jpg",
+    file_path="./brushwood_dog.jpg",
+)
+```
+
+#### Response Dataset Object
+
+See API docs for details.
+
+```python
+{
+    'id': 'YOUR_DATASET_OBJECT_ID',
+    'name': 'brushwood_dog.jpg',
+    'size': 6717,
+    'height': 225,
+    'width': 225,
+    'groupId': None,
+    'createdAt': '2022-10-30T08:32:20.748Z',
+    'updatedAt': '2022-10-30T08:32:20.748Z'
+}
+```
+
+### Find Dataset Object
+
+Find a single dataset object.
+
+```python
+dataset_object = client.find_dataset_object(
+    dataset_object_id="YOUR_DATASET_OBJECT_ID"
+)
+```
+
+Success response is the same as when created.
+
+### Get Dataset Object
+
+Get all dataset object in the dataset. (Up to 1000 tasks)
+
+```python
+dataset_objects = client.get_dataset_objects(dataset_id="YOUR_DATASET_ID")
+```
+
+The success response is the same as when created, but it is an array.
+
+You can filter by keywords.
+
+```python
+dataset_objects = client.get_dataset_objects(
+    dataset_id="YOUR_DATASET_ID", keyword="dog"
+)
+```
+
+If you wish to retrieve more than 1000 data sets, please refer to the Task [sample code](#get%20tasks).
+
+### Delete Dataset Object
+
+Delete a multi dataset objects.
+
+**⚠️ Related tasks will also be deleted, so please check them carefully before execution.**
+
+```python
+client.delete_dataset_objects(
+    dataset_id="YOUR_DATASET_ID",
+    dataset_object_ids=[
+        "YOUR_DATASET_OBJECT_ID_1",
+        "YOUR_DATASET_OBJECT_ID_2",
+    ],
+)
+```
+
+### Get Import Histories For Dataset Object
+
+Get all import histories in the dataset. (Up to 1000 tasks)
+
+```python
+datasets = client.get_dataset_object_import_histories(
+    dataset_id="YOUR_DATASET_ID"
+)
+```
+
+#### Response Dataset Object Import Histories
+
+See API docs for details.
+
+```python
+[
+    {
+        'id': 'YOUR_DATASET_OBJECT_IMPORT_HISTORY_ID',
+        'type': 'local',
+        'status': 'completed',
+        'msgCode': 'none',
+        'msgLevel': 'none',
+        'userName': 'admin',
+        'count': 1,
+        'createdAt': '2022-10-30T08:31:31.588Z',
+        'updatedAt': '2022-11-02T07:36:07.636Z'
+  }
+]
+```
+
 ## API Docs
 
 Check [this](https://api.fastlabel.ai/docs/) for further information.