diff --git a/aixplain/enums/data_type.py b/aixplain/enums/data_type.py
index d13542f9..fa79d070 100644
--- a/aixplain/enums/data_type.py
+++ b/aixplain/enums/data_type.py
@@ -35,4 +35,4 @@ class DataType(Enum):
VIDEO = "video"
def __str__(self):
- return self._value_
\ No newline at end of file
+ return self._value_
diff --git a/aixplain/modules/metadata.py b/aixplain/modules/metadata.py
index f7fac5f5..07007ebe 100644
--- a/aixplain/modules/metadata.py
+++ b/aixplain/modules/metadata.py
@@ -43,6 +43,7 @@ def __init__(
file_extension: Optional[FileType] = None,
languages: List[Language] = [],
dsubtype: DataSubtype = DataSubtype.OTHER,
+ id: Optional[Text] = None,
**kwargs
) -> None:
"""MetaData Class
@@ -62,6 +63,7 @@ def __init__(
file_extension (Optional[FileType], optional): File extension (e.g. CSV, TXT, etc.). Defaults to None.
languages (List[Language], optional): List of languages which the data consists of. Defaults to [].
dsubtype (DataSubtype, optional): Data subtype (e.g., age, topic, race, split, etc.), used in datasets metadata. Defaults to Other.
+ id (Optional[Text], optional): Data ID. Defaults to None.
"""
self.name = name
if isinstance(dtype, str):
@@ -91,4 +93,5 @@ def __init__(
language = Language(language)
self.languages.append(language)
self.dsubtype = dsubtype
+ self.id = id
self.kwargs = kwargs
diff --git a/aixplain/processes/data_onboarding/onboard_functions.py b/aixplain/processes/data_onboarding/onboard_functions.py
index 091458fd..35a64e12 100644
--- a/aixplain/processes/data_onboarding/onboard_functions.py
+++ b/aixplain/processes/data_onboarding/onboard_functions.py
@@ -97,11 +97,11 @@ def process_data_files(
-1,
0,
)
- if metadata.dtype in [DataType.AUDIO, DataType.IMAGE] or metadata.dsubtype == DataSubtype.INTERVAL:
+ if metadata.dtype in [DataType.AUDIO, DataType.IMAGE, DataType.LABEL] or metadata.dsubtype == DataSubtype.INTERVAL:
files, data_column_idx, start_column_idx, end_column_idx, nrows = process_media_files.run(
metadata=metadata, paths=paths, folder=folder
)
- elif metadata.dtype in [DataType.TEXT, DataType.LABEL]:
+ elif metadata.dtype in [DataType.TEXT]:
files, data_column_idx, nrows = process_text_files.run(metadata=metadata, paths=paths, folder=folder)
return files, data_column_idx, start_column_idx, end_column_idx, nrows
diff --git a/aixplain/processes/data_onboarding/process_media_files.py b/aixplain/processes/data_onboarding/process_media_files.py
index 3f95b1e3..c0009eca 100644
--- a/aixplain/processes/data_onboarding/process_media_files.py
+++ b/aixplain/processes/data_onboarding/process_media_files.py
@@ -16,6 +16,7 @@
from pathlib import Path
from tqdm import tqdm
from typing import List, Tuple
+from urllib.parse import urlparse
AUDIO_MAX_SIZE = 50000000
IMAGE_TEXT_MAX_SIZE = 25000000
@@ -45,6 +46,15 @@ def run(metadata: MetaData, paths: List, folder: Path, batch_size: int = 100) ->
Returns:
Tuple[List[File], int, int, int]: list of s3 links; data, start and end columns index, and number of rows
"""
+ if metadata.dtype == DataType.LABEL:
+ assert (
+ metadata.storage_type != StorageType.TEXT
+ ), f'Data Asset Onboarding Error: Column "{metadata.name}" of type "{metadata.dtype}" can not be stored in text. Label data should be stored in a JSON file.'
+ else:
+ assert (
+ metadata.storage_type != StorageType.TEXT
+ ), f'Data Asset Onboarding Error: Column "{metadata.name}" of type "{metadata.dtype}" can not be stored in text.'
+
# if files are stored locally, create a folder to store it
media_folder = Path(".")
if metadata.storage_type == StorageType.FILE:
@@ -95,6 +105,14 @@ def run(metadata: MetaData, paths: List, folder: Path, batch_size: int = 100) ->
assert (
os.path.getsize(media_path) <= AUDIO_MAX_SIZE
), f'Data Asset Onboarding Error: Local audio file "{media_path}" exceeds the size limit of 50 MB.'
+ elif metadata.dtype == DataType.LABEL:
+ assert (
+ os.path.getsize(media_path) <= IMAGE_TEXT_MAX_SIZE
+ ), f'Data Asset Onboarding Error: JSON file with labels "{media_path}" exceeds the size limit of 25 MB.'
+ _, extension = os.path.splitext(media_path)
+ assert (
+ extension == ".json"
+ ), f'Data Asset Onboarding Error: Label data should be stored in a JSON file and "{media_path}" is not one.'
else:
assert (
os.path.getsize(media_path) <= IMAGE_TEXT_MAX_SIZE
@@ -105,6 +123,12 @@ def run(metadata: MetaData, paths: List, folder: Path, batch_size: int = 100) ->
shutil.copy2(media_path, new_path)
batch.append(fname)
else:
+ if metadata.dtype == DataType.LABEL:
+ path = urlparse(media_path).path
+ _, extension = os.path.splitext(path)
+ assert (
+ extension == ".json"
+ ), f'Data Asset Onboarding Error: Label data should be stored in a JSON file and "{media_path}" is not one.'
batch.append(media_path)
# crop intervals can not be used with interval data types
diff --git a/aixplain/processes/data_onboarding/process_text_files.py b/aixplain/processes/data_onboarding/process_text_files.py
index 48db3f4e..1ba7f47e 100644
--- a/aixplain/processes/data_onboarding/process_text_files.py
+++ b/aixplain/processes/data_onboarding/process_text_files.py
@@ -26,7 +26,7 @@ def process_text(content: str, storage_type: StorageType) -> Text:
Text: textual content
"""
if storage_type == StorageType.FILE:
- # Check the size of file and assert a limit of 50 MB
+ # Check the size of file and assert a limit of 25 MB
assert (
os.path.getsize(content) <= 25000000
), f'Data Asset Onboarding Error: Local text file "{content}" exceeds the size limit of 25 MB.'
diff --git a/docs/samples/label_dataset_onboarding/corpus/images/1.jpg b/docs/samples/label_dataset_onboarding/corpus/images/1.jpg
new file mode 100644
index 00000000..ae3d592c
Binary files /dev/null and b/docs/samples/label_dataset_onboarding/corpus/images/1.jpg differ
diff --git a/docs/samples/label_dataset_onboarding/corpus/images/2.png b/docs/samples/label_dataset_onboarding/corpus/images/2.png
new file mode 100644
index 00000000..ba23ab11
Binary files /dev/null and b/docs/samples/label_dataset_onboarding/corpus/images/2.png differ
diff --git a/docs/samples/label_dataset_onboarding/corpus/index.csv b/docs/samples/label_dataset_onboarding/corpus/index.csv
new file mode 100644
index 00000000..69ba347a
--- /dev/null
+++ b/docs/samples/label_dataset_onboarding/corpus/index.csv
@@ -0,0 +1,3 @@
+,images,labels
+0,corpus/images/1.jpg,corpus/labels/1.json
+1,corpus/images/2.png,corpus/labels/2.json
diff --git a/docs/samples/label_dataset_onboarding/corpus/labels/1.json b/docs/samples/label_dataset_onboarding/corpus/labels/1.json
new file mode 100644
index 00000000..6947447f
--- /dev/null
+++ b/docs/samples/label_dataset_onboarding/corpus/labels/1.json
@@ -0,0 +1,9 @@
+{
+ "data": "arcade",
+ "boundingBox": {
+ "top": 0,
+ "bottom": 0,
+ "left": 0,
+ "right": 0
+ }
+}
\ No newline at end of file
diff --git a/docs/samples/label_dataset_onboarding/corpus/labels/2.json b/docs/samples/label_dataset_onboarding/corpus/labels/2.json
new file mode 100644
index 00000000..b990cfd3
--- /dev/null
+++ b/docs/samples/label_dataset_onboarding/corpus/labels/2.json
@@ -0,0 +1,9 @@
+{
+ "data": "building",
+ "boundingBox": {
+ "top": 0,
+ "bottom": 0,
+ "left": 0,
+ "right": 0
+ }
+}
\ No newline at end of file
diff --git a/docs/samples/label_dataset_onboarding/label_dataset_onboarding.ipynb b/docs/samples/label_dataset_onboarding/label_dataset_onboarding.ipynb
new file mode 100644
index 00000000..f499dd51
--- /dev/null
+++ b/docs/samples/label_dataset_onboarding/label_dataset_onboarding.ipynb
@@ -0,0 +1,399 @@
+{
+ "cells": [
+ {
+ "attachments": {},
+ "cell_type": "markdown",
+ "metadata": {},
+ "source": [
+ "# Image Label Detection Dataset Onboarding\n",
+ "\n",
+ "This notebook demonstrates how to onboard a dataset with label data into aiXplain platform using its SDK."
+ ]
+ },
+ {
+ "attachments": {},
+ "cell_type": "markdown",
+ "metadata": {},
+ "source": [
+ "## Credentials\n",
+ "\n",
+ "To use the aiXplain SDK, you may be registered in our platform and have an API key. The step-by-step on how to do it is better described [here](/docs/user/api_setup.md)."
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 1,
+ "metadata": {},
+ "outputs": [],
+ "source": [
+ "import os\n",
+ "\n",
+ "os.environ[\"TEAM_API_KEY\"] = \"YOUR_TEAM_API_KEY_HERE\""
+ ]
+ },
+ {
+ "attachments": {},
+ "cell_type": "markdown",
+ "metadata": {},
+ "source": [
+ "## Data\n",
+ "\n",
+ "In this example we will show how to onboard a sample dataset of images and their corresponding labels. To onboard it, the data needs to be depicted in a CSV file, which will be fed to the SDK. \n",
+ "\n",
+ "Label data should have be one or more elements in a JSON file according to one of the following structure:\n",
+ "\n",
+ "```json\n",
+ "{\n",
+ " \"data\": \"TEXT_AUDIO_LABEL\",\n",
+ " \"boundingBox\": {\n",
+ " \"start\": 0, // start character\n",
+ " \"end\": 0, // end character\n",
+ " }\n",
+ "}\n",
+ "\n",
+ "{\n",
+ " \"data\": \"AUDIO_LABEL\",\n",
+ " \"boundingBox\": {\n",
+ " \"start\": 0, // start second\n",
+ " \"end\": 0 // end second\n",
+ " }\n",
+ "}\n",
+ "\n",
+ "{\n",
+ " \"data\": \"IMAGE_LABEL\",\n",
+ " \"boundingBox\": {\n",
+ " \"top\": 0, // top percentage of the image\n",
+ " \"bottom\": 0, // bottom percentage of the image\n",
+ " \"left\": 0, // left percentage of the image\n",
+ " \"right\": 0 // right percentage of the image\n",
+ " }\n",
+ "}\n",
+ "\n",
+ "{\n",
+ " \"data\": \"VIDEO_LABEL\",\n",
+ " \"boundingBox\": {\n",
+ " \"start\": 0, // start second\n",
+ " \"end\": 0, // end second\n",
+ " \"top\": 0, // top percentage of the image\n",
+ " \"bottom\": 0, // bottom percentage of the image\n",
+ " \"left\": 0, // left percentage of the image\n",
+ " \"right\": 0 // right percentage of the image\n",
+ " }\n",
+ "}\n",
+ "```"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 2,
+ "metadata": {},
+ "outputs": [
+ {
+ "name": "stderr",
+ "output_type": "stream",
+ "text": [
+ "/var/folders/1p/jbswfpbs73q5qbbh78dzj5xm0000gn/T/ipykernel_47954/611755932.py:1: DeprecationWarning: \n",
+ "Pyarrow will become a required dependency of pandas in the next major release of pandas (pandas 3.0),\n",
+ "(to allow more performant data types, such as the Arrow string type, and better interoperability with other libraries)\n",
+ "but was not found to be installed on your system.\n",
+ "If this would cause problems for you,\n",
+ "please provide us feedback at https://github.com/pandas-dev/pandas/issues/54466\n",
+ " \n",
+ " import pandas as pd\n"
+ ]
+ },
+ {
+ "data": {
+ "text/html": [
+ "
\n",
+ "\n",
+ "
\n",
+ " \n",
+ " \n",
+ " | \n",
+ " Unnamed: 0 | \n",
+ " images | \n",
+ " labels | \n",
+ "
\n",
+ " \n",
+ " \n",
+ " \n",
+ " | 0 | \n",
+ " 0 | \n",
+ " corpus/images/1.jpg | \n",
+ " corpus/labels/1.json | \n",
+ "
\n",
+ " \n",
+ " | 1 | \n",
+ " 1 | \n",
+ " corpus/images/2.png | \n",
+ " corpus/labels/2.json | \n",
+ "
\n",
+ " \n",
+ "
\n",
+ "
"
+ ],
+ "text/plain": [
+ " Unnamed: 0 images labels\n",
+ "0 0 corpus/images/1.jpg corpus/labels/1.json\n",
+ "1 1 corpus/images/2.png corpus/labels/2.json"
+ ]
+ },
+ "execution_count": 2,
+ "metadata": {},
+ "output_type": "execute_result"
+ }
+ ],
+ "source": [
+ "import pandas as pd\n",
+ "\n",
+ "upload_file = \"corpus/index.csv\"\n",
+ "data = pd.read_csv(upload_file)\n",
+ "data"
+ ]
+ },
+ {
+ "attachments": {},
+ "cell_type": "markdown",
+ "metadata": {},
+ "source": [
+ "## Import\n",
+ "\n",
+ "Let's now import the necessary classes to onboard the corpus."
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 3,
+ "metadata": {},
+ "outputs": [],
+ "source": [
+ "from aixplain.enums import DataType, DataSubtype, Function, Language, License, StorageType\n",
+ "from aixplain.factories import DatasetFactory\n",
+ "from aixplain.modules import MetaData"
+ ]
+ },
+ {
+ "attachments": {},
+ "cell_type": "markdown",
+ "metadata": {},
+ "source": [
+ "## Metadata\n",
+ "\n",
+ "Besides the CSV file, a schema must be fed to the SDK giving some information about the input and output data to be onboarded, such as: \n",
+ "\n",
+ "1. Data Name\n",
+ "2. Data Type: Audio, Text, Image, Video, Label, etc.\n",
+ "3. Storage Type: whether the data is depicted in the CSV (Text), in a local file (File) or in a public link (URL)\n",
+ "4. Start Column (optional): the column which depicts the beginning of the segment in the original file\n",
+ "5. End Column (optional): the column which depicts the end of the segment in the original file\n",
+ "6. Languages (optional): the languages depicted in the data"
+ ]
+ },
+ {
+ "attachments": {},
+ "cell_type": "markdown",
+ "metadata": {},
+ "source": [
+ "Let's instantiate the metadata for the images:"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 4,
+ "metadata": {},
+ "outputs": [],
+ "source": [
+ "image_meta = MetaData(\n",
+ " name=\"images\", \n",
+ " dtype=\"image\", \n",
+ " storage_type=\"file\", \n",
+ ")"
+ ]
+ },
+ {
+ "attachments": {},
+ "cell_type": "markdown",
+ "metadata": {},
+ "source": [
+ "Now for the labels...\n",
+ "\n",
+ "(See how we can use enumerations instead of strings to specify some information)"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 5,
+ "metadata": {},
+ "outputs": [],
+ "source": [
+ "label_meta = MetaData(\n",
+ " name=\"labels\", \n",
+ " dtype=DataType.LABEL, \n",
+ " storage_type=StorageType.FILE,\n",
+ ")"
+ ]
+ },
+ {
+ "attachments": {},
+ "cell_type": "markdown",
+ "metadata": {},
+ "source": [
+ "Let's now create the schemas for the input and output data of the dataset. Since this is a image label detection dataset, the images will be set as the input and the labels as the output data."
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 6,
+ "metadata": {},
+ "outputs": [],
+ "source": [
+ "input_schema = [image_meta]\n",
+ "output_schema = [label_meta]"
+ ]
+ },
+ {
+ "attachments": {},
+ "cell_type": "markdown",
+ "metadata": {},
+ "source": [
+ "Finally we can called the `create` method to onboard the dataset, specifying the name, description, license, path to the content files and schemas. \n",
+ "\n",
+ "See that a Dataset ID will be provided as response together with the status of the onboarding process."
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 7,
+ "metadata": {},
+ "outputs": [
+ {
+ "name": "stderr",
+ "output_type": "stream",
+ "text": [
+ " Dataset's inputs onboard progress: 0%| | 0/1 [00:00, ?it/s]\n",
+ "\u001b[A\n",
+ " Dataset's inputs onboard progress: 100%|██████████| 1/1 [00:06<00:00, 6.71s/it]\n",
+ " Dataset's outputs onboard progress: 0%| | 0/1 [00:00, ?it/s]\n",
+ "\u001b[A\n",
+ " Dataset's outputs onboard progress: 100%|██████████| 1/1 [00:02<00:00, 2.51s/it]\n",
+ " Dataset's hypotheses onboard progress: 0it [00:00, ?it/s]\n",
+ " Dataset's meta onboard progress: 0it [00:00, ?it/s]\n"
+ ]
+ },
+ {
+ "name": "stdout",
+ "output_type": "stream",
+ "text": [
+ "{'status': 'onboarding', 'asset_id': '6615453db2166233fe1ab291'}\n"
+ ]
+ }
+ ],
+ "source": [
+ "payload = DatasetFactory.create(\n",
+ " name=\"dataset_onboarding_demo\",\n",
+ " description=\"This is an image label detection corpus\",\n",
+ " license=License.MIT,\n",
+ " function=Function.IMAGE_LABEL_DETECTION,\n",
+ " content_path=upload_file,\n",
+ " input_schema=input_schema,\n",
+ " output_schema=output_schema\n",
+ ")\n",
+ "print(payload)"
+ ]
+ },
+ {
+ "attachments": {},
+ "cell_type": "markdown",
+ "metadata": {},
+ "source": [
+ "You can then check the dataset using the `get` method."
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 8,
+ "metadata": {},
+ "outputs": [
+ {
+ "name": "stderr",
+ "output_type": "stream",
+ "text": [
+ "INFO:root:Start service for GET Dataset - https://dev-platform-api.aixplain.com/sdk/datasets/6615453db2166233fe1ab291/overview - {'Authorization': 'Token 9136c08bf02b5552885b9f2a5e0fae517d81ff2fa6fe7084a3adb655c4aa7215', 'Content-Type': 'application/json'}\n"
+ ]
+ },
+ {
+ "data": {
+ "text/plain": [
+ "{'id': '6615453db2166233fe1ab291',\n",
+ " 'name': 'dataset_onboarding_demo',\n",
+ " 'description': 'This is an image label detection corpus',\n",
+ " 'supplier': 'aiXplain',\n",
+ " 'version': '1.0',\n",
+ " 'license': ,\n",
+ " 'privacy': ,\n",
+ " 'cost': 0,\n",
+ " 'onboard_status': ,\n",
+ " 'function': ,\n",
+ " 'source_data': {'images': },\n",
+ " 'target_data': {'labels': []},\n",
+ " 'hypotheses': {},\n",
+ " 'metadata': {},\n",
+ " 'tags': [],\n",
+ " 'length': None,\n",
+ " 'kwargs': {}}"
+ ]
+ },
+ "execution_count": 8,
+ "metadata": {},
+ "output_type": "execute_result"
+ }
+ ],
+ "source": [
+ "dataset = DatasetFactory.get(payload[\"asset_id\"])\n",
+ "dataset.__dict__"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": null,
+ "metadata": {},
+ "outputs": [],
+ "source": []
+ }
+ ],
+ "metadata": {
+ "kernelspec": {
+ "display_name": "env",
+ "language": "python",
+ "name": "python3"
+ },
+ "language_info": {
+ "codemirror_mode": {
+ "name": "ipython",
+ "version": 3
+ },
+ "file_extension": ".py",
+ "mimetype": "text/x-python",
+ "name": "python",
+ "nbconvert_exporter": "python",
+ "pygments_lexer": "ipython3",
+ "version": "3.11.7"
+ },
+ "orig_nbformat": 4
+ },
+ "nbformat": 4,
+ "nbformat_minor": 2
+}
diff --git a/docs/user/user_doc.md b/docs/user/user_doc.md
index 5b19c273..400ad0d2 100644
--- a/docs/user/user_doc.md
+++ b/docs/user/user_doc.md
@@ -282,6 +282,9 @@ Using the aiXplain SDK, you can also onboard your dataset into the aiXplain plat
- Machine translation dataset directly from s3:
- [](https://colab.research.google.com/drive/1Asnjeq5JQ9pV6UUQ2Z20XtrjnoaFD0nf?usp=sharing)
+- Image Label Detection Dataset:
+ - [Link](../samples/label_dataset_onboarding/label_dataset_onboarding.ipynb)
+
## FineTune
[FineTune](https://aixplain.com/platform/finetune) allows you to customize models by tuning them using your data and enhancing their performance. Set up and start fine-tuning with a few lines of code. Once fine-tuning is complete, the model will be deployed into your assets, ready for you to use.
diff --git a/tests/functional/pipelines/run_test.py b/tests/functional/pipelines/run_test.py
index e7af6c4e..e4389587 100644
--- a/tests/functional/pipelines/run_test.py
+++ b/tests/functional/pipelines/run_test.py
@@ -109,24 +109,24 @@ def test_run_multipipe_with_datasets(batchmode: bool):
assert response["status"] == "SUCCESS"
-@pytest.mark.parametrize("batchmode", [True, False])
-def test_run_segment_reconstruct(batchmode: bool):
+def test_run_segment_reconstruct():
pipeline = PipelineFactory.list(query="Segmentation/Reconstruction Functional Test - DO NOT DELETE")["results"][0]
- response = pipeline.run("https://aixplain-platform-assets.s3.amazonaws.com/samples/en/CPAC1x2.wav", **{"batchmode": batchmode})
+ response = pipeline.run("https://aixplain-platform-assets.s3.amazonaws.com/samples/en/CPAC1x2.wav")
assert response["status"] == "SUCCESS"
output = response["data"][0]
assert output["label"] == "Output 1"
-@pytest.mark.parametrize("batchmode", [True, False])
-def test_run_metric(batchmode: bool):
+def test_run_metric():
pipeline = PipelineFactory.list(query="ASR Metric Functional Test - DO NOT DELETE")["results"][0]
- response = pipeline.run({
- "AudioInput": "https://aixplain-platform-assets.s3.amazonaws.com/samples/en/CPAC1x2.wav",
- "ReferenceInput": "https://aixplain-platform-assets.s3.amazonaws.com/samples/en/CPAC1x2.txt"
- }, **{"batchmode": batchmode})
-
+ response = pipeline.run(
+ {
+ "AudioInput": "https://aixplain-platform-assets.s3.amazonaws.com/samples/en/CPAC1x2.wav",
+ "ReferenceInput": "https://aixplain-platform-assets.s3.amazonaws.com/samples/en/CPAC1x2.txt",
+ }
+ )
+
assert response["status"] == "SUCCESS"
assert len(response["data"]) == 2
assert response["data"][0]["label"] in ["TranscriptOutput", "ScoreOutput"]
@@ -134,34 +134,30 @@ def test_run_metric(batchmode: bool):
@pytest.mark.parametrize(
- "batchmode,input_data,output_data",
+ "input_data,output_data",
[
- (True, "https://aixplain-platform-assets.s3.amazonaws.com/samples/en/CPAC1x2.wav", "AudioOutput"),
- (False, "https://aixplain-platform-assets.s3.amazonaws.com/samples/en/CPAC1x2.wav", "AudioOutput"),
- (True, "https://aixplain-platform-assets.s3.amazonaws.com/samples/en/CPAC1x2.txt", "TextOutput"),
- (False, "https://aixplain-platform-assets.s3.amazonaws.com/samples/en/CPAC1x2.txt", "TextOutput")
- ]
+ ("https://aixplain-platform-assets.s3.amazonaws.com/samples/en/CPAC1x2.wav", "AudioOutput"),
+ ("https://aixplain-platform-assets.s3.amazonaws.com/samples/en/CPAC1x2.txt", "TextOutput"),
+ ],
)
-def test_run_router(batchmode: bool, input_data: str, output_data: str):
+def test_run_router(input_data: str, output_data: str):
pipeline = PipelineFactory.list(query="Router Test - DO NOT DELETE")["results"][0]
- response = pipeline.run(input_data, **{"batchmode": batchmode})
-
+ response = pipeline.run(input_data)
+
assert response["status"] == "SUCCESS"
assert response["data"][0]["label"] == output_data
@pytest.mark.parametrize(
- "batchmode,input_data,output_data",
+ "input_data,output_data",
[
- (True, "I love it.", "PositiveOutput"),
- (False, "I love it.", "PositiveOutput"),
- (True, "I hate it.", "NegativeOutput"),
- (False, "I hate it.", "NegativeOutput")
- ]
+ ("I love it.", "PositiveOutput"),
+ ("I hate it.", "NegativeOutput"),
+ ],
)
-def test_run_decision(batchmode: bool, input_data: str, output_data: str):
+def test_run_decision(input_data: str, output_data: str):
pipeline = PipelineFactory.list(query="Decision Test - DO NOT DELETE")["results"][0]
- response = pipeline.run(input_data, **{"batchmode": batchmode})
-
+ response = pipeline.run(input_data)
+
assert response["status"] == "SUCCESS"
- assert response["data"][0]["label"] == output_data
\ No newline at end of file
+ assert response["data"][0]["label"] == output_data